OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BoundingBoxIntersectJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
23 
24 // NOTE(jclay): A fixed array of size 200 is allocated on the stack.
25 // this is likely the maximum value we can do that is safe to use across
26 // all supported GPU architectures.
27 constexpr int32_t kMaxBBoxOverlapsCount{200};
28 
30  public:
31  BoundingBoxIntersectJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
32  const JoinType join_type,
33  const std::vector<InputTableInfo>& query_infos,
34  const Data_Namespace::MemoryLevel memory_level,
35  ColumnCacheMap& column_cache,
36  Executor* executor,
37  const std::vector<InnerOuter>& inner_outer_pairs,
38  const int device_count,
39  const RegisteredQueryHint& query_hints,
40  const HashTableBuildDagMap& hashtable_build_dag_map,
41  const TableIdToNodeMap& table_id_to_node_map)
42  : condition_(condition)
43  , join_type_(join_type)
44  , query_infos_(query_infos)
45  , memory_level_(memory_level)
46  , executor_(executor)
47  , column_cache_(column_cache)
48  , inner_outer_pairs_(inner_outer_pairs)
49  , device_count_(device_count)
50  , query_hints_(query_hints)
51  , hashtable_build_dag_map_(hashtable_build_dag_map)
52  , table_id_to_node_map_(table_id_to_node_map) {
54  hash_tables_for_device_.resize(std::max(device_count_, 1));
55  }
56 
58 
60  static std::shared_ptr<BoundingBoxIntersectJoinHashTable> getInstance(
61  const std::shared_ptr<Analyzer::BinOper> condition,
62  const std::vector<InputTableInfo>& query_infos,
63  const Data_Namespace::MemoryLevel memory_level,
64  const JoinType join_type,
65  const int device_count,
66  ColumnCacheMap& column_cache,
67  Executor* executor,
68  const HashTableBuildDagMap& hashtable_build_dag_map,
69  const RegisteredQueryHint& query_hint,
70  const TableIdToNodeMap& table_id_to_node_map);
71 
72  static void invalidateCache() {
74  auto_tuner_cache_->clearCache();
75 
77  hash_table_cache_->clearCache();
78  }
79 
80  static void markCachedItemAsDirty(size_t table_key) {
83  auto candidate_table_keys =
84  hash_table_cache_->getMappedQueryPlanDagsWithTableKey(table_key);
85  if (candidate_table_keys.has_value()) {
86  auto_tuner_cache_->markCachedItemAsDirty(
87  table_key,
88  *candidate_table_keys,
91  hash_table_cache_->markCachedItemAsDirty(table_key,
92  *candidate_table_keys,
95  }
96  }
97 
100  return hash_table_cache_.get();
101  }
102 
106  return auto_tuner_cache_.get();
107  }
108 
109  protected:
110  void reify(const HashType preferred_layout);
111 
112  virtual void reifyWithLayout(const HashType layout);
113 
114  virtual void reifyImpl(std::vector<ColumnsForDevice>& columns_per_device,
115  const Fragmenter_Namespace::TableInfo& query_info,
116  const HashType layout,
117  const size_t shard_count,
118  const size_t entry_count,
119  const size_t emitted_keys_count,
120  const bool skip_hashtable_caching,
121  const size_t chosen_max_hashtable_size,
122  const double chosen_bucket_threshold);
123 
124  void reifyForDevice(const ColumnsForDevice& columns_for_device,
125  const HashType layout,
126  const size_t entry_count,
127  const size_t emitted_keys_count,
128  const bool skip_hashtable_caching,
129  const int device_id,
130  const logger::ThreadLocalIds parent_thread_local_ids);
131 
132  size_t calculateHashTableSize(size_t number_of_dimensions,
133  size_t emitted_keys_count,
134  size_t entry_count) const;
135 
137  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
138  const int device_id,
139  DeviceAllocator* dev_buff_owner);
140 
141  // returns entry_count, emitted_keys_count
142  virtual std::pair<size_t, size_t> approximateTupleCount(
143  const std::vector<double>& inverse_bucket_sizes_for_dimension,
144  std::vector<ColumnsForDevice>&,
145  const size_t chosen_max_hashtable_size,
146  const double chosen_bucket_threshold);
147 
148  // returns entry_count, emitted_keys_count
149  virtual std::pair<size_t, size_t> computeHashTableCounts(
150  const size_t shard_count,
151  const std::vector<double>& inverse_bucket_sizes_for_dimension,
152  std::vector<ColumnsForDevice>& columns_per_device,
153  const size_t chosen_max_hashtable_size,
154  const double chosen_bucket_threshold);
155 
156  void setInverseBucketSizeInfo(const std::vector<double>& inverse_bucket_sizes,
157  std::vector<ColumnsForDevice>& columns_per_device,
158  const size_t device_count);
159 
160  size_t getKeyComponentWidth() const;
161 
162  size_t getKeyComponentCount() const;
163 
164  HashType getHashType() const noexcept override {
165  if (layout_override_) {
166  return *layout_override_;
167  }
168  auto hash_table = getHashTableForDevice(0);
169  CHECK(hash_table);
170  return hash_table->getLayout();
171  }
172 
173  Data_Namespace::MemoryLevel getMemoryLevel() const noexcept override {
174  return memory_level_;
175  }
176 
177  int getDeviceCount() const noexcept override { return device_count_; };
178 
179  std::shared_ptr<BaselineHashTable> initHashTableOnCpu(
180  const std::vector<JoinColumn>& join_columns,
181  const std::vector<JoinColumnTypeInfo>& join_column_types,
182  const std::vector<JoinBucketInfo>& join_bucket_info,
183  const BaselineHashTableEntryInfo hash_table_entry_info,
184  const bool skip_hashtable_caching);
185 
186 #ifdef HAVE_CUDA
187  std::shared_ptr<BaselineHashTable> initHashTableOnGpu(
188  const std::vector<JoinColumn>& join_columns,
189  const std::vector<JoinColumnTypeInfo>& join_column_types,
190  const std::vector<JoinBucketInfo>& join_bucket_info,
191  const BaselineHashTableEntryInfo hash_table_entry_info,
192  const size_t device_id);
193 
194  std::shared_ptr<BaselineHashTable> copyCpuHashTableToGpu(
195  std::shared_ptr<BaselineHashTable>& cpu_hash_table,
196  const size_t device_id);
197 #endif // HAVE_CUDA
198 
200  const size_t) override;
201 
202  std::string toString(const ExecutorDeviceType device_type,
203  const int device_id = 0,
204  bool raw = false) const override;
205 
207  const int device_id) const override;
208 
209  llvm::Value* codegenSlot(const CompilationOptions&, const size_t) override {
210  UNREACHABLE(); // not applicable for bounding box intersection
211  return nullptr;
212  }
213 
215  return query_hints_;
216  }
217 
218  size_t getEntryCount() const {
219  auto hash_table = getHashTableForDevice(0);
220  CHECK(hash_table);
221  return hash_table->getEntryCount();
222  }
223 
224  size_t getEmittedKeysCount() const {
225  auto hash_table = getHashTableForDevice(0);
226  CHECK(hash_table);
227  return hash_table->getEmittedKeysCount();
228  }
229 
230  size_t getComponentBufferSize() const noexcept override {
231  CHECK(!hash_tables_for_device_.empty());
232  auto hash_table = hash_tables_for_device_.front();
233  CHECK(hash_table);
234  return hash_table->getEntryCount() * sizeof(int32_t);
235  }
236 
237  size_t shardCount() const {
239  return 0;
240  }
243  }
244 
246  const std::vector<InnerOuter>& inner_outer_pairs) const;
247 
248  shared::TableKey getInnerTableId() const noexcept override;
249 
250  int getInnerTableRteIdx() const noexcept override {
251  CHECK(!inner_outer_pairs_.empty());
252  const auto first_inner_col = inner_outer_pairs_.front().first;
253  return first_inner_col->get_rte_idx();
254  }
255 
256  size_t getKeyBufferSize() const noexcept {
257  const auto key_component_width = getKeyComponentWidth();
258  CHECK(key_component_width == 4 || key_component_width == 8);
259  const auto key_component_count = getKeyComponentCount();
261  return getEntryCount() * key_component_count * key_component_width;
262  } else {
263  return getEntryCount() * (key_component_count + 1) * key_component_width;
264  }
265  }
266 
267  size_t offsetBufferOff() const noexcept override {
268  return getKeyBufferSize();
269  }
270 
271  size_t countBufferOff() const noexcept override {
274  } else {
275  return getKeyBufferSize();
276  }
277  }
278 
279  size_t payloadBufferOff() const noexcept override {
282  } else {
283  return getKeyBufferSize();
284  }
285  }
286 
287  std::string getHashJoinType() const final {
288  return "BoundingBoxIntersect";
289  }
290 
291  bool isBitwiseEq() const override;
292 
293  std::shared_ptr<HashTable> initHashTableOnCpuFromCache(
294  QueryPlanHash key,
295  CacheItemType item_type,
296  DeviceIdentifier device_identifier);
297 
298  std::optional<std::pair<size_t, size_t>> getApproximateTupleCountFromCache(
299  QueryPlanHash key,
300  CacheItemType item_type,
301  DeviceIdentifier device_identifier);
302 
304  CacheItemType item_type,
305  std::shared_ptr<HashTable> hashtable_ptr,
306  DeviceIdentifier device_identifier,
307  size_t hashtable_building_time);
308 
309  llvm::Value* codegenKey(const CompilationOptions&);
310  std::vector<llvm::Value*> codegenManyKey(const CompilationOptions&);
311 
312  std::optional<BoundingBoxIntersectMetaInfo> getBoundingBoxIntersectMetaInfo() {
314  }
315 
317  std::vector<InnerOuter> inner_outer_pairs;
318  const size_t num_elements;
319  const size_t chunk_key_hash;
320  const SQLOps optype;
321  const size_t max_hashtable_size;
322  const double bucket_threshold;
323  const std::vector<double> inverse_bucket_sizes = {};
324  };
325 
328  auto hash = info.chunk_key_hash;
329  for (InnerOuter inner_outer : info.inner_outer_pairs) {
330  auto inner_col = inner_outer.first;
331  auto rhs_col_var = dynamic_cast<const Analyzer::ColumnVar*>(inner_outer.second);
332  auto outer_col = rhs_col_var ? rhs_col_var : inner_col;
333  boost::hash_combine(hash, inner_col->toString());
334  if (inner_col->get_type_info().is_string()) {
335  boost::hash_combine(hash, outer_col->toString());
336  }
337  }
338  boost::hash_combine(hash, info.num_elements);
339  boost::hash_combine(hash, info.optype);
340  boost::hash_combine(hash, info.max_hashtable_size);
341  boost::hash_combine(hash, info.bucket_threshold);
342  boost::hash_combine(hash, info.inverse_bucket_sizes);
343  return hash;
344  }
345 
347  const size_t max_hashtable_size,
348  const double bucket_threshold,
349  const std::vector<double>& bucket_sizes,
350  std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>>& fragments_per_device,
351  int device_count) {
352  for (int device_id = 0; device_id < device_count; ++device_id) {
353  auto hash_val = boost::hash_value(hashtable_cache_key_[device_id]);
354  boost::hash_combine(hash_val, max_hashtable_size);
355  boost::hash_combine(hash_val, bucket_threshold);
356  boost::hash_combine(hash_val, bucket_sizes);
357  boost::hash_combine(hash_val,
358  HashJoin::collectFragmentIds(fragments_per_device[device_id]));
359  hashtable_cache_key_[device_id] = hash_val;
360  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_[device_id],
361  table_keys_);
362  }
363  }
364 
365  QueryPlanHash getCacheKey(int device_id) const {
366  return hashtable_cache_key_[device_id];
367  }
368 
369  const std::vector<InnerOuter>& getInnerOuterPairs() const {
370  return inner_outer_pairs_;
371  }
372 
373  void setBoundingBoxIntersectionMetaInfo(size_t max_table_size_bytes,
374  double bucket_threshold,
375  std::vector<double>& bucket_sizes) {
376  BoundingBoxIntersectMetaInfo bbox_intersect_meta_info;
377  bbox_intersect_meta_info.bucket_sizes = bucket_sizes;
378  bbox_intersect_meta_info.bbox_intersect_max_table_size_bytes = max_table_size_bytes;
379  bbox_intersect_meta_info.bbox_intersect_bucket_threshold = bucket_threshold;
380  HashtableCacheMetaInfo meta_info;
381  meta_info.bbox_intersect_meta_info = bbox_intersect_meta_info;
382  hashtable_cache_meta_info_ = meta_info;
383  }
384 
385  const std::shared_ptr<Analyzer::BinOper> condition_;
387  const std::vector<InputTableInfo>& query_infos_;
389 
390  Executor* executor_;
392 
393  std::vector<InnerOuter> inner_outer_pairs_;
394  const int device_count_;
396 
401 
402  std::optional<HashType>
403  layout_override_; // allows us to use a 1:many hash table for many:many
404 
406 
407  // cache a hashtable based on the cache key C
408  // C = query plan dag D + join col J + hashtable params P
409  // by varying a parameters P for bounding box intersect, we can build
410  // multiple (and different) hashtables for the same query plan dag D
411  // in this scenario, the rule we follow is cache everything
412  // with the assumption that varying P is intended by user
413  // for the performance and so worth to keep it for future recycling
414  static std::unique_ptr<HashtableRecycler> hash_table_cache_;
415  // auto tuner cache is maintained separately with hashtable cache
416  static std::unique_ptr<BoundingBoxIntersectTuningParamRecycler> auto_tuner_cache_;
417 
420  std::vector<QueryPlanHash> hashtable_cache_key_;
422  std::unordered_set<size_t> table_keys_;
424 };
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:461
const RegisteredQueryHint & getRegisteredQueryHint()
static std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > auto_tuner_cache_
size_t DeviceIdentifier
Definition: DataRecycler.h:129
void setBoundingBoxIntersectionMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
JoinType
Definition: sqldefs.h:238
llvm::Value * codegenSlot(const CompilationOptions &, const size_t) override
virtual std::pair< size_t, size_t > computeHashTableCounts(const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
Data_Namespace::MemoryLevel getMemoryLevel() const noexceptoverride
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:105
llvm::Value * codegenKey(const CompilationOptions &)
shared::TableKey getInnerTableId() const noexceptoverride
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
std::string QueryPlanDAG
SQLOps
Definition: sqldefs.h:31
void reify(const HashType preferred_layout)
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:378
const std::shared_ptr< Analyzer::BinOper > condition_
#define UNREACHABLE()
Definition: Logger.h:338
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const BaselineHashTableEntryInfo hash_table_entry_info, const bool skip_hashtable_caching)
DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const override
BoundingBoxIntersectJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
std::vector< llvm::Value * > codegenManyKey(const CompilationOptions &)
#define CHECK_GT(x, y)
Definition: Logger.h:305
ExecutorDeviceType
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info
const std::vector< InnerOuter > & getInnerOuterPairs() const
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
std::optional< BoundingBoxIntersectMetaInfo > getBoundingBoxIntersectMetaInfo()
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
CacheItemType
Definition: DataRecycler.h:38
const Data_Namespace::MemoryLevel memory_level_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
static std::shared_ptr< BoundingBoxIntersectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
const std::vector< InputTableInfo > & query_infos_
virtual void reifyImpl(std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForBoundingBoxIntersection &info)
size_t payloadBufferOff() const noexceptoverride
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:296
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
HashType getHashType() const noexceptoverride
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
Definition: HashTable.h:72
static void markCachedItemAsDirty(size_t table_key)
virtual void reifyWithLayout(const HashType layout)
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
size_t QueryPlanHash
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
#define CHECK(condition)
Definition: Logger.h:291
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
std::vector< double > bucket_sizes
QueryPlanHash getCacheKey(int device_id) const
constexpr int32_t kMaxBBoxOverlapsCount
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
HashType
Definition: HashTable.h:19
static BoundingBoxIntersectTuningParamRecycler * getBoundingBoxIntersectTuningParamCache()
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:176
size_t getComponentBufferSize() const noexceptoverride