OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryFragmentDescriptor.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include <deque>
26 #include <functional>
27 #include <map>
28 #include <memory>
29 #include <optional>
30 #include <ostream>
31 #include <set>
32 #include <unordered_map>
33 #include <vector>
34 
35 #include "DataMgr/ChunkMetadata.h"
36 #include "Logger/Logger.h"
38 #include "Shared/DbObjectKeys.h"
39 
40 namespace Fragmenter_Namespace {
41 class FragmentInfo;
42 }
43 
44 namespace Data_Namespace {
45 struct MemoryInfo;
46 }
47 
48 class Executor;
49 class InputDescriptor;
50 struct InputTableInfo;
51 struct RelAlgExecutionUnit;
52 
55  std::vector<size_t> fragment_ids;
56 };
57 
58 using FragmentsList = std::vector<FragmentsPerTable>;
59 using TableFragments = std::vector<Fragmenter_Namespace::FragmentInfo>;
60 
62  int device_id;
64  std::optional<size_t> outer_tuple_count; // only for fragments with an exact tuple
65  // count available in metadata
66 };
67 
69  public:
71  const std::vector<InputTableInfo>& query_infos,
72  const std::vector<Data_Namespace::MemoryInfo>& gpu_mem_infos,
73  const double gpu_input_mem_limit_percent,
74  const std::vector<size_t> allowed_outer_fragment_indices);
75 
76  static void computeAllTablesFragments(
77  std::map<shared::TableKey, const TableFragments*>& all_tables_fragments,
78  const RelAlgExecutionUnit& ra_exe_unit,
79  const std::vector<InputTableInfo>& query_infos);
80 
81  void buildFragmentKernelMap(const RelAlgExecutionUnit& ra_exe_unit,
82  const std::vector<uint64_t>& frag_offsets,
83  const int device_count,
84  const ExecutorDeviceType& device_type,
85  const bool enable_multifrag_kernels,
86  const bool enable_inner_join_fragment_skipping,
87  Executor* executor);
88 
93  template <typename DISPATCH_FCN>
94  void assignFragsToMultiDispatch(DISPATCH_FCN f) const {
95  for (const auto& device_itr : execution_kernels_per_device_) {
96  const auto& execution_kernels = device_itr.second;
97  CHECK_EQ(execution_kernels.size(), size_t(1));
98 
99  const auto& fragments_list = execution_kernels.front().fragments;
100  f(device_itr.first, fragments_list, rowid_lookup_key_);
101  }
102  }
103 
110  template <typename DISPATCH_FCN>
111  void assignFragsToKernelDispatch(DISPATCH_FCN f,
112  const RelAlgExecutionUnit& ra_exe_unit) const {
113  if (execution_kernels_per_device_.empty()) {
114  return;
115  }
116 
117  size_t tuple_count = 0;
118 
119  std::unordered_map<int, size_t> execution_kernel_index;
120  for (const auto& device_itr : execution_kernels_per_device_) {
121  CHECK(execution_kernel_index.insert(std::make_pair(device_itr.first, size_t(0)))
122  .second);
123  }
124 
125  bool dispatch_finished = false;
126  while (!dispatch_finished) {
127  dispatch_finished = true;
128  for (const auto& device_itr : execution_kernels_per_device_) {
129  auto& kernel_idx = execution_kernel_index[device_itr.first];
130  if (kernel_idx < device_itr.second.size()) {
131  dispatch_finished = false;
132  const auto& execution_kernel = device_itr.second[kernel_idx++];
133  f(device_itr.first, execution_kernel.fragments, rowid_lookup_key_);
134  if (terminateDispatchMaybe(tuple_count, ra_exe_unit, execution_kernel)) {
135  return;
136  }
137  }
138  }
139  }
140  }
141 
143  return rowid_lookup_key_ < 0 && !execution_kernels_per_device_.empty();
144  }
145 
146  protected:
147  std::vector<size_t> allowed_outer_fragment_indices_;
149  int64_t rowid_lookup_key_ = -1;
150 
151  std::map<shared::TableKey, const TableFragments*> selected_tables_fragments_;
152 
153  std::map<int, std::vector<ExecutionKernelDescriptor>> execution_kernels_per_device_;
154 
156  std::map<size_t, size_t> tuple_count_per_device_;
157  std::map<size_t, size_t> available_gpu_mem_bytes_;
158 
160  const std::vector<uint64_t>& frag_offsets,
161  const int device_count,
162  const size_t num_bytes_for_row,
163  const ExecutorDeviceType& device_type,
164  Executor* executor);
165 
166  void buildFragmentPerKernelMap(const RelAlgExecutionUnit& ra_exe_unit,
167  const std::vector<uint64_t>& frag_offsets,
168  const int device_count,
169  const size_t num_bytes_for_row,
170  const ExecutorDeviceType& device_type,
171  Executor* executor);
172 
173  void buildMultifragKernelMap(const RelAlgExecutionUnit& ra_exe_unit,
174  const std::vector<uint64_t>& frag_offsets,
175  const int device_count,
176  const size_t num_bytes_for_row,
177  const ExecutorDeviceType& device_type,
178  const bool enable_inner_join_fragment_skipping,
179  Executor* executor);
180 
182  const TableFragments* fragments,
183  const RelAlgExecutionUnit& ra_exe_unit,
184  const InputDescriptor& table_desc,
185  const bool is_temporary_table,
186  const std::vector<uint64_t>& frag_offsets,
187  const int device_count,
188  const size_t num_bytes_for_row,
189  const ChunkMetadataVector& deleted_chunk_metadata_vec,
190  const std::optional<size_t> table_desc_offset,
191  const ExecutorDeviceType& device_type,
192  Executor* executor);
193 
194  bool terminateDispatchMaybe(size_t& tuple_count,
195  const RelAlgExecutionUnit& ra_exe_unit,
196  const ExecutionKernelDescriptor& kernel) const;
197 
199  const int device_id,
200  const size_t num_cols);
201 };
202 
203 std::ostream& operator<<(std::ostream&, FragmentsPerTable const&);
#define CHECK_EQ(x, y)
Definition: Logger.h:301
QueryFragmentDescriptor(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const std::vector< Data_Namespace::MemoryInfo > &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector< size_t > allowed_outer_fragment_indices)
std::optional< size_t > outer_tuple_count
std::map< size_t, size_t > tuple_count_per_device_
bool terminateDispatchMaybe(size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernelDescriptor &kernel) const
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
std::vector< Fragmenter_Namespace::FragmentInfo > TableFragments
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const
void buildFragmentPerKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< FragmentsPerTable > FragmentsList
ExecutorDeviceType
std::map< int, std::vector< ExecutionKernelDescriptor > > execution_kernels_per_device_
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
static void computeAllTablesFragments(std::map< shared::TableKey, const TableFragments * > &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos)
void buildMultifragKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
shared::TableKey table_key
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
void assignFragsToMultiDispatch(DISPATCH_FCN f) const
void buildFragmentPerKernelForTable(const TableFragments *fragments, const RelAlgExecutionUnit &ra_exe_unit, const InputDescriptor &table_desc, const bool is_temporary_table, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ChunkMetadataVector &deleted_chunk_metadata_vec, const std::optional< size_t > table_desc_offset, const ExecutorDeviceType &device_type, Executor *executor)
#define CHECK(condition)
Definition: Logger.h:291
std::vector< size_t > allowed_outer_fragment_indices_
void buildFragmentPerKernelMapForUnion(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
std::vector< size_t > fragment_ids
std::map< shared::TableKey, const TableFragments * > selected_tables_fragments_
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< uint64_t > &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
std::map< size_t, size_t > available_gpu_mem_bytes_