OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryExecutionContext.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_QUERYEXECUTIONCONTEXT_H
18 #define QUERYENGINE_QUERYEXECUTIONCONTEXT_H
19 
20 #include "CompilationOptions.h"
22 #include "GpuMemUtils.h"
23 #include "QueryMemoryInitializer.h"
24 #include "Rendering/RenderInfo.h"
25 #include "ResultSet.h"
26 
27 #include <boost/core/noncopyable.hpp>
28 #include <vector>
29 
30 class CompilationContext;
33 
34 struct RelAlgExecutionUnit;
36 class Executor;
37 
38 class QueryExecutionContext : boost::noncopyable {
39  public:
40  // TODO(alex): remove device_type
41  QueryExecutionContext(const RelAlgExecutionUnit& ra_exe_unit,
42  const QueryMemoryDescriptor&,
43  const Executor* executor,
44  const ExecutorDeviceType device_type,
45  const ExecutorDispatchMode dispatch_mode,
46  const int device_id,
47  const shared::TableKey& outer_table_key,
48  const int64_t num_rows,
49  const std::vector<std::vector<const int8_t*>>& col_buffers,
50  const std::vector<std::vector<uint64_t>>& frag_offsets,
51  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
52  const bool output_columnar,
53  const bool sort_on_gpu,
54  const size_t thread_idx,
55  RenderInfo*);
56 
57  ResultSetPtr getRowSet(const RelAlgExecutionUnit& ra_exe_unit,
59 
60  ResultSetPtr groupBufferToResults(const size_t i) const;
61 
62  std::vector<int64_t*> launchGpuCode(
63  const RelAlgExecutionUnit& ra_exe_unit,
64  const CompilationContext* compilation_context,
65  const bool hoist_literals,
66  const std::vector<int8_t>& literal_buff,
67  std::vector<std::vector<const int8_t*>> col_buffers,
68  const std::vector<std::vector<int64_t>>& num_rows,
69  const std::vector<std::vector<uint64_t>>& frag_row_offsets,
70  const int32_t scan_limit,
71  Data_Namespace::DataMgr* data_mgr,
72  const unsigned block_size_x,
73  const unsigned grid_size_x,
74  const int device_id,
75  const size_t shared_memory_size,
76  int32_t* error_code,
77  const uint32_t num_tables,
78  const bool allow_runtime_interrupt,
79  const std::vector<int8_t*>& join_hash_tables,
80  RenderAllocatorMap* render_allocator_map,
81  bool optimize_cuda_block_and_grid_sizes);
82 
83  std::vector<int64_t*> launchCpuCode(
84  const RelAlgExecutionUnit& ra_exe_unit,
85  const CpuCompilationContext* fn_ptrs,
86  const bool hoist_literals,
87  const std::vector<int8_t>& literal_buff,
88  std::vector<std::vector<const int8_t*>> col_buffers,
89  const std::vector<std::vector<int64_t>>& num_rows,
90  const std::vector<std::vector<uint64_t>>& frag_row_offsets,
91  const int32_t scan_limit,
92  int32_t* error_code,
93  const uint32_t start_rowid,
94  const uint32_t num_tables,
95  const std::vector<int8_t*>& join_hash_tables,
96  const int64_t num_rows_to_process = -1);
97 
98  int64_t getAggInitValForIndex(const size_t index) const;
99 
100  private:
101  // enum must be kept in sync w/ prepareKernelParams().
102  enum {
118  };
119  using KernelParamSizes = std::array<size_t, KERN_PARAM_COUNT>;
120  using KernelParams = std::array<int8_t*, KERN_PARAM_COUNT>;
121 
122  size_t sizeofColBuffers(
123  std::vector<std::vector<int8_t const*>> const& col_buffers) const;
125  int8_t* device_ptr,
126  std::vector<std::vector<int8_t const*>> const& col_buffers) const;
127 
128  template <typename T>
129  size_t sizeofFlattened2dVec(uint32_t const expected_subvector_size,
130  std::vector<std::vector<T>> const& vec2d) const;
131  template <typename T>
132  void copyFlattened2dVecToDevice(int8_t* device_ptr,
133  uint32_t const expected_subvector_size,
134  std::vector<std::vector<T>> const& vec2d) const;
135 
136  size_t sizeofInitAggVals(bool const is_group_by,
137  std::vector<int64_t> const& init_agg_vals) const;
138  void copyInitAggValsToDevice(int8_t* device_ptr,
139  bool const is_group_by,
140  std::vector<int64_t> const& init_agg_vals) const;
141 
142  size_t sizeofJoinHashTables(std::vector<int8_t*> const& join_hash_tables) const;
143  int8_t* copyJoinHashTablesToDevice(int8_t* device_ptr,
144  std::vector<int8_t*> const& join_hash_tables) const;
145 
146  size_t sizeofLiterals(std::vector<int8_t> const& literal_buff) const;
147  int8_t* copyLiteralsToDevice(int8_t* device_ptr,
148  std::vector<int8_t> const& literal_buff) const;
149 
150  template <typename T>
151  void copyValueToDevice(int8_t* device_ptr, T const value) const;
152 
153  template <typename T>
154  size_t sizeofVector(std::vector<T> const& vec) const;
155  template <typename T>
156  void copyVectorToDevice(int8_t* device_ptr, std::vector<T> const& vec) const;
157 
159  const std::vector<std::vector<const int8_t*>>& col_buffers,
160  const std::vector<int8_t>& literal_buff,
161  const std::vector<std::vector<int64_t>>& num_rows,
162  const std::vector<std::vector<uint64_t>>& frag_offsets,
163  const int32_t scan_limit,
164  const std::vector<int64_t>& init_agg_vals,
165  const std::vector<int32_t>& error_codes,
166  const uint32_t num_tables,
167  const std::vector<int8_t*>& join_hash_tables,
168  Data_Namespace::DataMgr* data_mgr,
169  const int device_id,
170  const bool hoist_literals,
171  const bool is_group_by) const;
172 
173  ResultSetPtr groupBufferToDeinterleavedResults(const size_t i) const;
174 
175  std::unique_ptr<DeviceAllocator> gpu_allocator_;
176 
177  // TODO(adb): convert to shared_ptr
182  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
183  const bool output_columnar_;
184  std::unique_ptr<QueryMemoryInitializer> query_buffers_;
185  mutable std::unique_ptr<ResultSet> estimator_result_set_;
186 
187  friend class Executor;
188 };
189 
190 #endif // QUERYENGINE_QUERYEXECUTIONCONTEXT_H
int8_t * copyJoinHashTablesToDevice(int8_t *device_ptr, std::vector< int8_t * > const &join_hash_tables) const
std::unique_ptr< DeviceAllocator > gpu_allocator_
QueryExecutionContext(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *)
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
std::vector< int64_t * > launchCpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CpuCompilationContext *fn_ptrs, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, int32_t *error_code, const uint32_t start_rowid, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, const int64_t num_rows_to_process=-1)
const ExecutorDispatchMode dispatch_mode_
size_t num_rows_to_process(const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining)
std::shared_ptr< ResultSet > ResultSetPtr
std::array< int8_t *, KERN_PARAM_COUNT > KernelParams
void copyColBuffersToDevice(int8_t *device_ptr, std::vector< std::vector< int8_t const * >> const &col_buffers) const
const ExecutorDeviceType device_type_
ExecutorDeviceType
ExecutorDispatchMode
std::unique_ptr< QueryMemoryInitializer > query_buffers_
ResultSetPtr getRowSet(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc) const
int8_t * copyLiteralsToDevice(int8_t *device_ptr, std::vector< int8_t > const &literal_buff) const
void copyValueToDevice(int8_t *device_ptr, T const value) const
size_t sizeofFlattened2dVec(uint32_t const expected_subvector_size, std::vector< std::vector< T >> const &vec2d) const
void copyFlattened2dVecToDevice(int8_t *device_ptr, uint32_t const expected_subvector_size, std::vector< std::vector< T >> const &vec2d) const
int64_t getAggInitValForIndex(const size_t index) const
size_t sizeofInitAggVals(bool const is_group_by, std::vector< int64_t > const &init_agg_vals) const
void copyVectorToDevice(int8_t *device_ptr, std::vector< T > const &vec) const
size_t sizeofJoinHashTables(std::vector< int8_t * > const &join_hash_tables) const
QueryMemoryDescriptor query_mem_desc_
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:276
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
ResultSetPtr groupBufferToDeinterleavedResults(const size_t i) const
std::array< size_t, KERN_PARAM_COUNT > KernelParamSizes
ResultSetPtr groupBufferToResults(const size_t i) const
KernelParams prepareKernelParams(const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< int8_t > &literal_buff, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, const int32_t scan_limit, const std::vector< int64_t > &init_agg_vals, const std::vector< int32_t > &error_codes, const uint32_t num_tables, const std::vector< int8_t * > &join_hash_tables, Data_Namespace::DataMgr *data_mgr, const int device_id, const bool hoist_literals, const bool is_group_by) const
def error_code
Definition: report.py:234
void copyInitAggValsToDevice(int8_t *device_ptr, bool const is_group_by, std::vector< int64_t > const &init_agg_vals) const
Basic constructors and methods of the row set interface.
Allocate GPU memory using GpuBuffers via DataMgr.
std::unique_ptr< ResultSet > estimator_result_set_
size_t sizeofVector(std::vector< T > const &vec) const
size_t sizeofColBuffers(std::vector< std::vector< int8_t const * >> const &col_buffers) const
std::vector< int64_t * > launchGpuCode(const RelAlgExecutionUnit &ra_exe_unit, const CompilationContext *compilation_context, const bool hoist_literals, const std::vector< int8_t > &literal_buff, std::vector< std::vector< const int8_t * >> col_buffers, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_row_offsets, const int32_t scan_limit, Data_Namespace::DataMgr *data_mgr, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const size_t shared_memory_size, int32_t *error_code, const uint32_t num_tables, const bool allow_runtime_interrupt, const std::vector< int8_t * > &join_hash_tables, RenderAllocatorMap *render_allocator_map, bool optimize_cuda_block_and_grid_sizes)
size_t sizeofLiterals(std::vector< int8_t > const &literal_buff) const