OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnarResults.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef COLUMNAR_RESULTS_H
18 #define COLUMNAR_RESULTS_H
19 #include "ResultSet.h"
20 #include "Shared/SqlTypesLayout.h"
21 
22 #include "../Shared/checked_alloc.h"
23 
24 #include <memory>
25 #include <unordered_map>
26 
27 class ColumnarConversionNotSupported : public std::runtime_error {
28  public:
30  : std::runtime_error(
31  "Columnar conversion not supported for variable length types") {}
32 };
33 
40 class ColumnBitmap {
41  public:
42  ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
43  : bitmaps_(num_banks, std::vector<bool>(num_elements_per_bank, false)) {}
44 
45  inline bool get(const size_t index, const size_t bank_index) const {
46  CHECK_LT(bank_index, bitmaps_.size());
47  CHECK_LT(index, bitmaps_[bank_index].size());
48  return bitmaps_[bank_index][index];
49  }
50 
51  inline void set(const size_t index, const size_t bank_index, const bool val) {
52  CHECK_LT(bank_index, bitmaps_.size());
53  CHECK_LT(index, bitmaps_[bank_index].size());
54  bitmaps_[bank_index][index] = val;
55  }
56 
57  private:
58  std::vector<std::vector<bool>> bitmaps_;
59 };
60 
62  public:
63  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64  const ResultSet& rows,
65  const size_t num_columns,
66  const std::vector<SQLTypeInfo>& target_types,
67  const size_t executor_id,
68  const size_t thread_idx,
69  const bool is_parallel_execution_enforced = false);
70 
71  ColumnarResults(const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
72  const int8_t* one_col_buffer,
73  const size_t num_rows,
74  const SQLTypeInfo& target_type,
75  const size_t executor_id,
76  const size_t thread_idx);
77 
78  static std::unique_ptr<ColumnarResults> mergeResults(
79  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
80  const std::vector<std::unique_ptr<ColumnarResults>>& sub_results);
81 
82  const std::vector<int8_t*>& getColumnBuffers() const { return column_buffers_; }
83 
84  const size_t size() const { return num_rows_; }
85 
86  const SQLTypeInfo& getColumnType(const int col_id) const {
87  CHECK_GE(col_id, 0);
88  CHECK_LT(static_cast<size_t>(col_id), target_types_.size());
89  return target_types_[col_id];
90  }
91 
92  bool isParallelConversion() const { return parallel_conversion_; }
94 
95  // functions used to read content from the result set (direct columnarization, group by
96  // queries)
97  using ReadFunction =
98  std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)>;
99 
100  // functions used to write back contents into output column buffers (direct
101  // columnarization, group by queries)
102  using WriteFunction = std::function<void(const ResultSet&,
103  const size_t,
104  const size_t,
105  const size_t,
106  const size_t,
107  const ReadFunction&)>;
108 
109  protected:
110  std::vector<int8_t*> column_buffers_;
111  size_t num_rows_;
112 
113  private:
114  ColumnarResults(const size_t num_rows,
115  const std::vector<SQLTypeInfo>& target_types,
116  const std::vector<size_t>& padded_target_sizes)
117  : num_rows_(num_rows)
118  , target_types_(target_types)
119  , padded_target_sizes_(padded_target_sizes) {}
120  inline void writeBackCell(const TargetValue& col_val,
121  const size_t row_idx,
122  const SQLTypeInfo& type_info,
123  int8_t* column_buf,
124  std::mutex* write_mutex = nullptr);
125  void materializeAllColumnsDirectly(const ResultSet& rows, const size_t num_columns);
126  void materializeAllColumnsThroughIteration(const ResultSet& rows,
127  const size_t num_columns);
128 
129  // Direct columnarization for group by queries (perfect hash or baseline hash)
130  void materializeAllColumnsGroupBy(const ResultSet& rows, const size_t num_columns);
131 
132  // Direct columnarization for Projections (only output is columnar)
133  void materializeAllColumnsProjection(const ResultSet& rows, const size_t num_columns);
134 
135  void materializeAllColumnsTableFunction(const ResultSet& rows,
136  const size_t num_columns);
137 
138  void copyAllNonLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
139  const ResultSet& rows,
140  const size_t num_columns);
141  void materializeAllLazyColumns(const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
142  const ResultSet& rows,
143  const size_t num_columns);
144 
145  void locateAndCountEntries(const ResultSet& rows,
146  ColumnBitmap& bitmap,
147  std::vector<size_t>& non_empty_per_thread,
148  const size_t entry_count,
149  const size_t num_threads,
150  const size_t size_per_thread) const;
151  void compactAndCopyEntries(const ResultSet& rows,
152  const ColumnBitmap& bitmap,
153  const std::vector<size_t>& non_empty_per_thread,
154  const size_t num_columns,
155  const size_t entry_count,
156  const size_t num_threads,
157  const size_t size_per_thread);
159  const ResultSet& rows,
160  const ColumnBitmap& bitmap,
161  const std::vector<size_t>& non_empty_per_thread,
162  const std::vector<size_t>& global_offsets,
163  const std::vector<bool>& targets_to_skip,
164  const std::vector<size_t>& slot_idx_per_target_idx,
165  const size_t num_columns,
166  const size_t entry_count,
167  const size_t num_threads,
168  const size_t size_per_thread);
170  const ResultSet& rows,
171  const ColumnBitmap& bitmap,
172  const std::vector<size_t>& non_empty_per_thread,
173  const std::vector<size_t>& global_offsets,
174  const std::vector<size_t>& slot_idx_per_target_idx,
175  const size_t num_columns,
176  const size_t entry_count,
177  const size_t num_threads,
178  const size_t size_per_thread);
179 
180  template <typename DATA_TYPE>
181  void writeBackCellDirect(const ResultSet& rows,
182  const size_t input_buffer_entry_idx,
183  const size_t output_buffer_entry_idx,
184  const size_t target_idx,
185  const size_t slot_idx,
186  const ReadFunction& read_function);
187 
188  std::vector<WriteFunction> initWriteFunctions(
189  const ResultSet& rows,
190  const std::vector<bool>& targets_to_skip = {});
191 
192  template <QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT>
193  std::vector<ReadFunction> initReadFunctions(
194  const ResultSet& rows,
195  const std::vector<size_t>& slot_idx_per_target_idx,
196  const std::vector<bool>& targets_to_skip = {});
197 
198  std::tuple<std::vector<WriteFunction>, std::vector<ReadFunction>>
199  initAllConversionFunctions(const ResultSet& rows,
200  const std::vector<size_t>& slot_idx_per_target_idx,
201  const std::vector<bool>& targets_to_skip = {});
202 
203  const std::vector<SQLTypeInfo> target_types_;
204  bool parallel_conversion_; // multi-threaded execution of columnar conversion
205  bool direct_columnar_conversion_; // whether columnar conversion might happen directly
206  // with minimal usage of result set's iterator access
207  size_t thread_idx_;
208  std::shared_ptr<Executor> executor_;
209  std::vector<size_t> padded_target_sizes_;
210 };
211 
212 using ColumnCacheMap =
213  std::unordered_map<shared::TableKey,
214  std::unordered_map<int, std::shared_ptr<const ColumnarResults>>>;
215 
216 #endif // COLUMNAR_RESULTS_H
bool isParallelConversion() const
std::vector< int8_t * > column_buffers_
void materializeAllColumnsTableFunction(const ResultSet &rows, const size_t num_columns)
static std::unique_ptr< ColumnarResults > mergeResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results)
std::vector< ReadFunction > initReadFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
ColumnarResults(const size_t num_rows, const std::vector< SQLTypeInfo > &target_types, const std::vector< size_t > &padded_target_sizes)
void locateAndCountEntries(const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const
#define CHECK_GE(x, y)
Definition: Logger.h:306
void set(const size_t index, const size_t bank_index, const bool val)
bool direct_columnar_conversion_
void compactAndCopyEntries(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
tuple rows
Definition: report.py:114
std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)> ReadFunction
std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)> WriteFunction
void materializeAllColumnsThroughIteration(const ResultSet &rows, const size_t num_columns)
const size_t size() const
std::vector< WriteFunction > initWriteFunctions(const ResultSet &rows, const std::vector< bool > &targets_to_skip={})
void materializeAllColumnsGroupBy(const ResultSet &rows, const size_t num_columns)
std::tuple< std::vector< WriteFunction >, std::vector< ReadFunction > > initAllConversionFunctions(const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={})
bool isDirectColumnarConversionPossible() const
#define CHECK_LT(x, y)
Definition: Logger.h:303
void materializeAllColumnsDirectly(const ResultSet &rows, const size_t num_columns)
void writeBackCellDirect(const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function)
void writeBackCell(const TargetValue &col_val, const size_t row_idx, const SQLTypeInfo &type_info, int8_t *column_buf, std::mutex *write_mutex=nullptr)
ColumnBitmap(const size_t num_elements_per_bank, size_t num_banks)
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::shared_ptr< Executor > executor_
std::vector< size_t > padded_target_sizes_
void copyAllNonLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
std::vector< std::vector< bool > > bitmaps_
bool g_enable_watchdog false
Definition: Execute.cpp:80
void materializeAllColumnsProjection(const ResultSet &rows, const size_t num_columns)
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
void compactAndCopyEntriesWithTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
void compactAndCopyEntriesWithoutTargetSkipping(const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread)
const std::vector< int8_t * > & getColumnBuffers() const
const std::vector< SQLTypeInfo > target_types_
void materializeAllLazyColumns(const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns)
ColumnarResults(const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const size_t executor_id, const size_t thread_idx, const bool is_parallel_execution_enforced=false)
const SQLTypeInfo & getColumnType(const int col_id) const