OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashtableRecycler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "HashtableRecycler.h"
18 
19 extern bool g_is_test_env;
20 
22  QueryPlanHash key,
23  CacheItemType item_type,
24  DeviceIdentifier device_identifier,
25  std::lock_guard<std::mutex>& lock,
26  std::optional<HashtableCacheMetaInfo> meta_info) const {
29  return false;
30  }
31  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
32  // hashtable cache of the *any* device type should be properly initialized
33  CHECK(hashtable_cache);
34  auto candidate_ht_it = std::find_if(
35  hashtable_cache->begin(), hashtable_cache->end(), [&key](const auto& cached_item) {
36  return cached_item.key == key;
37  });
38  if (candidate_ht_it != hashtable_cache->end()) {
39  if (item_type == BBOX_INTERSECT_HT) {
40  CHECK(candidate_ht_it->meta_info &&
41  candidate_ht_it->meta_info->bbox_intersect_meta_info);
42  CHECK(meta_info && meta_info->bbox_intersect_meta_info);
44  *candidate_ht_it->meta_info->bbox_intersect_meta_info,
45  *meta_info->bbox_intersect_meta_info)) {
46  return true;
47  }
48  } else {
49  return true;
50  }
51  }
52  return false;
53 }
54 
55 std::shared_ptr<HashTable> HashtableRecycler::getItemFromCache(
56  QueryPlanHash key,
57  CacheItemType item_type,
58  DeviceIdentifier device_identifier,
59  std::optional<HashtableCacheMetaInfo> meta_info) {
62  return nullptr;
63  }
64  std::lock_guard<std::mutex> lock(getCacheLock());
65  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
66  auto candidate_ht = getCachedItemWithoutConsideringMetaInfo(
67  key, item_type, device_identifier, *hashtable_cache, lock);
68  if (candidate_ht) {
69  bool can_return_cached_item = false;
70  if (item_type == BBOX_INTERSECT_HT) {
71  // we have to check hashtable metainfo of join hashtable for bounding box
72  // intersection
73  CHECK(candidate_ht->meta_info && candidate_ht->meta_info->bbox_intersect_meta_info);
74  CHECK(meta_info && meta_info->bbox_intersect_meta_info);
76  *candidate_ht->meta_info->bbox_intersect_meta_info,
77  *meta_info->bbox_intersect_meta_info)) {
78  can_return_cached_item = true;
79  }
80  } else {
81  can_return_cached_item = true;
82  }
83  if (can_return_cached_item) {
84  CHECK(!candidate_ht->isDirty());
85  candidate_ht->item_metric->incRefCount();
86  VLOG(1) << "[" << item_type << ", "
88  << "] Recycle item in a cache (key: " << key << ")";
89  return candidate_ht->cached_item;
90  }
91  }
92  return nullptr;
93 }
94 
96  std::shared_ptr<HashTable> item_ptr,
97  CacheItemType item_type,
98  DeviceIdentifier device_identifier,
99  size_t item_size,
100  size_t compute_time,
101  std::optional<HashtableCacheMetaInfo> meta_info) {
103  key == EMPTY_HASHED_PLAN_DAG_KEY) {
104  return;
105  }
106  std::lock_guard<std::mutex> lock(getCacheLock());
107  auto has_cached_ht = hasItemInCache(key, item_type, device_identifier, lock, meta_info);
108  if (has_cached_ht) {
109  // check to see whether the cached one is in a dirty status
110  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
111  auto candidate_it =
112  std::find_if(hashtable_cache->begin(),
113  hashtable_cache->end(),
114  [&key](const auto& cached_item) { return cached_item.key == key; });
115  bool found_candidate = false;
116  if (candidate_it != hashtable_cache->end()) {
117  if (item_type == BBOX_INTERSECT_HT) {
118  // we have to check hashtable metainfo for bounding box intersection
119  CHECK(candidate_it->meta_info &&
120  candidate_it->meta_info->bbox_intersect_meta_info);
121  CHECK(meta_info && meta_info->bbox_intersect_meta_info);
123  *candidate_it->meta_info->bbox_intersect_meta_info,
124  *meta_info->bbox_intersect_meta_info)) {
125  found_candidate = true;
126  }
127  } else {
128  found_candidate = true;
129  }
130  if (found_candidate && candidate_it->isDirty()) {
131  // remove the dirty item from the cache and make a room for the new one
133  key, item_type, device_identifier, lock, candidate_it->meta_info);
134  has_cached_ht = false;
135  }
136  }
137  }
138 
139  if (!has_cached_ht) {
140  // check cache's space availability
141  auto& metric_tracker = getMetricTracker(item_type);
142  auto cache_status = metric_tracker.canAddItem(device_identifier, item_size);
143  if (cache_status == CacheAvailability::UNAVAILABLE) {
144  // hashtable is too large
145  LOG(INFO) << "Caching hash table fails: hash table is too large";
146  return;
147  } else if (cache_status == CacheAvailability::AVAILABLE_AFTER_CLEANUP) {
148  // we need to clean up some cached hashtables to make a room to insert this
149  // hashtable here we try to cache the new one anyway since we don't know the
150  // importance of this hashtable yet and if it is not that frequently reused it is
151  // removed in a near future
152  auto required_size = metric_tracker.calculateRequiredSpaceForItemAddition(
153  device_identifier, item_size);
154  cleanupCacheForInsertion(item_type, device_identifier, required_size, lock);
155  }
156  // put hashtable's metric to metric tracker
157  auto new_cache_metric_ptr = metric_tracker.putNewCacheItemMetric(
158  key, device_identifier, item_size, compute_time);
159  CHECK_EQ(item_size, new_cache_metric_ptr->getMemSize());
160  // put hashtable to cache
161  VLOG(1) << "[" << item_type << ", "
162  << DataRecyclerUtil::getDeviceIdentifierString(device_identifier)
163  << "] Put item to cache (key: " << key << ")";
164  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
165  hashtable_cache->emplace_back(key, item_ptr, new_cache_metric_ptr, meta_info);
166  }
167  // we have a cached hashtable in a clean status
168  return;
169 }
170 
172  QueryPlanHash key,
173  CacheItemType item_type,
174  DeviceIdentifier device_identifier,
175  std::lock_guard<std::mutex>& lock,
176  std::optional<HashtableCacheMetaInfo> meta_info) {
178  key == EMPTY_HASHED_PLAN_DAG_KEY) {
179  return;
180  }
181  auto& cache_metrics = getMetricTracker(item_type);
182  // remove cached item from the cache
183  auto cache_metric = cache_metrics.getCacheItemMetric(key, device_identifier);
184  CHECK(cache_metric);
185  auto hashtable_size = cache_metric->getMemSize();
186  auto hashtable_container = getCachedItemContainer(item_type, device_identifier);
187  auto filter = [key](auto const& item) { return item.key == key; };
188  auto itr =
189  std::find_if(hashtable_container->cbegin(), hashtable_container->cend(), filter);
190  if (itr == hashtable_container->cend()) {
191  return;
192  } else {
193  VLOG(1) << "[" << item_type << ", "
194  << DataRecyclerUtil::getDeviceIdentifierString(device_identifier)
195  << "] remove cached item from cache (key: " << key << ")";
196  hashtable_container->erase(itr);
197  }
198  // remove cache metric
199  cache_metrics.removeCacheItemMetric(key, device_identifier);
200  // update current cache size
201  cache_metrics.updateCurrentCacheSize(
202  device_identifier, CacheUpdateAction::REMOVE, hashtable_size);
203  return;
204 }
205 
207  CacheItemType item_type,
208  DeviceIdentifier device_identifier,
209  size_t required_size,
210  std::lock_guard<std::mutex>& lock,
211  std::optional<HashtableCacheMetaInfo> meta_info) {
212  // sort the vector based on the importance of the cached items (by # referenced, size
213  // and compute time) and then remove unimportant cached items
214  int elimination_target_offset = 0;
215  size_t removed_size = 0;
216  auto& metric_tracker = getMetricTracker(item_type);
217  auto actual_space_to_free = metric_tracker.getTotalCacheSize() / 2;
218  if (!g_is_test_env && required_size < actual_space_to_free) {
219  // remove enough items to avoid too frequent cache cleanup
220  // we do not apply thin to test code since test scenarios are designed to
221  // specific size of items and their caches
222  required_size = actual_space_to_free;
223  }
224  metric_tracker.sortCacheInfoByQueryMetric(device_identifier);
225  auto cached_item_metrics = metric_tracker.getCacheItemMetrics(device_identifier);
226  sortCacheContainerByQueryMetric(item_type, device_identifier);
227 
228  // collect targets to eliminate
229  for (auto& metric : cached_item_metrics) {
230  auto target_size = metric->getMemSize();
231  ++elimination_target_offset;
232  removed_size += target_size;
233  if (removed_size > required_size) {
234  break;
235  }
236  }
237 
238  // eliminate targets in 1) cache container and 2) their metrics
239  removeCachedItemFromBeginning(item_type, device_identifier, elimination_target_offset);
240  metric_tracker.removeMetricFromBeginning(device_identifier, elimination_target_offset);
241 
242  // update the current cache size after this cleanup
243  metric_tracker.updateCurrentCacheSize(
244  device_identifier, CacheUpdateAction::REMOVE, removed_size);
245 }
246 
248  std::lock_guard<std::mutex> lock(getCacheLock());
249  for (auto& item_type : getCacheItemType()) {
251  auto item_cache = getItemCache().find(item_type)->second;
252  for (auto& kv : *item_cache) {
253  if (!kv.second->empty()) {
254  VLOG(1) << "[" << item_type << ", "
257  << "] clear cache (# items: " << kv.second->size() << ")";
258  kv.second->clear();
259  }
260  }
261  }
263 }
264 
266  std::unordered_set<QueryPlanHash>& key_set,
267  CacheItemType item_type,
268  DeviceIdentifier device_identifier) {
269  if (!g_enable_data_recycler || !g_use_hashtable_cache || key_set.empty()) {
270  return;
271  }
272  std::lock_guard<std::mutex> lock(getCacheLock());
273  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
274  for (auto key : key_set) {
275  markCachedItemAsDirtyImpl(key, *hashtable_cache);
276  }
277  // after marking all cached hashtable having the given "table_key" as its one of input,
278  // we remove the mapping between the table_key -> hashed_query_plan_dag
279  // since we do not need to care about "already marked" item in the cache
281 
282  // hash tables built from synthetically generated tables have no chance to be cleared
283  // since we assume their table keys as {-1, -1}
284  // this means that typically we do not have a chance to invalidate cached items
285  // by calling invalidation request with table key {-1, -1}
286  // thus, we manually invalidate them here to maintain cache space based on the
287  // assumption that synthetically generated table is not frequently used as typical
288  // tables do
289  removeCachedHashtableBuiltFromSyntheticTable(item_type, device_identifier, lock);
290 }
291 
293  CacheItemType item_type,
294  DeviceIdentifier device_identifier,
295  std::lock_guard<std::mutex>& lock) {
296  auto hashtable_cache = getCachedItemContainer(item_type, device_identifier);
297  CHECK(hashtable_cache);
298  auto unitary_table_key = DataRecyclerUtil::getUnitaryTableKey();
299  auto key_set_it = table_key_to_query_plan_dag_map_.find(unitary_table_key);
300  if (key_set_it != table_key_to_query_plan_dag_map_.end()) {
301  auto& key_set = key_set_it->second;
302  for (auto key : key_set) {
303  removeItemFromCache(key, item_type, device_identifier, lock);
304  }
305  // after marking all cached hashtable having the given "table_key" as its one of
306  // input, we remove the mapping between the table_key -> hashed_query_plan_dag since
307  // we do not need to care about "already marked" item in the cache
308  removeTableKeyInfoFromQueryPlanDagMap(unitary_table_key);
309  }
310 }
311 
312 std::string HashtableRecycler::toString() const {
313  std::ostringstream oss;
314  oss << "A current status of the Hashtable Recycler:\n";
315  for (auto& item_type : getCacheItemType()) {
316  oss << "\t" << item_type;
317  auto& metric_tracker = getMetricTracker(item_type);
318  oss << "\n\t# cached hashtables:\n";
319  auto item_cache = getItemCache().find(item_type)->second;
320  for (auto& cache_container : *item_cache) {
321  oss << "\t\tDevice"
322  << DataRecyclerUtil::getDeviceIdentifierString(cache_container.first)
323  << ", # hashtables: " << cache_container.second->size() << "\n";
324  for (auto& ht : *cache_container.second) {
325  oss << "\t\t\tHT] " << ht.item_metric->toString() << "\n";
326  }
327  }
328  oss << "\t" << metric_tracker.toString() << "\n";
329  }
330  return oss.str();
331 }
332 
334  const BoundingBoxIntersectMetaInfo& candidate,
335  const BoundingBoxIntersectMetaInfo& target) const {
336  if (candidate.bucket_sizes.size() != target.bucket_sizes.size()) {
337  return false;
338  }
339  for (size_t i = 0; i < candidate.bucket_sizes.size(); i++) {
340  if (std::abs(target.bucket_sizes[i] - candidate.bucket_sizes[i]) > 1e-4) {
341  return false;
342  }
343  }
344  auto threshold_check =
346  auto hashtable_size_check = candidate.bbox_intersect_max_table_size_bytes ==
348  return threshold_check && hashtable_size_check;
349 }
350 
352  std::vector<const Analyzer::ColumnVar*>& inner_cols,
353  std::vector<const Analyzer::ColumnVar*>& outer_cols,
354  Executor* executor) {
355  auto hashed_join_col_info = EMPTY_HASHED_PLAN_DAG_KEY;
356  boost::hash_combine(
357  hashed_join_col_info,
358  executor->getQueryPlanDagCache().translateColVarsToInfoHash(inner_cols, false));
359  boost::hash_combine(
360  hashed_join_col_info,
361  executor->getQueryPlanDagCache().translateColVarsToInfoHash(outer_cols, false));
362  return hashed_join_col_info;
363 }
364 
366  const TableIdToNodeMap& table_id_to_node_map,
367  bool need_dict_translation,
368  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_info_pairs,
369  const shared::TableKey& table_key) {
370  // if hashtable is built from subquery's resultset we need to check
371  // 1) whether resulset rows can have inconsistency, e.g., rows can randomly be
372  // permutated per execution and 2) whether it needs dictionary translation for hashtable
373  // building to recycle the hashtable safely
374  auto getNodeByTableId =
375  [&table_id_to_node_map](
376  const shared::TableKey& table_key_param) -> const RelAlgNode* {
377  auto it = table_id_to_node_map.find(table_key_param);
378  if (it != table_id_to_node_map.end()) {
379  return it->second;
380  }
381  return nullptr;
382  };
383  bool found_sort_node = false;
384  bool found_project_node = false;
385  if (table_key.table_id < 0) {
386  const auto origin_table_id = table_key.table_id * -1;
387  const auto inner_node = getNodeByTableId({table_key.db_id, origin_table_id});
388  if (!inner_node) {
389  // we have to keep the node info of temporary resultset
390  // so in this case we are not safe to recycle the hashtable
391  return false;
392  }
393  // it is not safe to recycle the hashtable when
394  // this resultset may have resultset ordering inconsistency and/or
395  // need dictionary translation for hashtable building
396  auto sort_node = dynamic_cast<const RelSort*>(inner_node);
397  if (sort_node) {
398  found_sort_node = true;
399  } else {
400  auto project_node = dynamic_cast<const RelProject*>(inner_node);
401  if (project_node) {
402  found_project_node = true;
403  }
404  }
405  }
406  return !(found_sort_node || (found_project_node && need_dict_translation));
407 }
408 
410  const std::vector<QueryPlanHash>& cache_keys) {
411  return cache_keys.empty() ||
412  std::any_of(cache_keys.cbegin(), cache_keys.cend(), [](QueryPlanHash key) {
413  return key == EMPTY_HASHED_PLAN_DAG_KEY;
414  });
415 }
416 
418  const std::vector<InnerOuter>& inner_outer_pairs,
419  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs,
420  const SQLOps op_type,
421  const JoinType join_type,
422  const HashTableBuildDagMap& hashtable_build_dag_map,
423  int device_count,
424  int shard_count,
425  const std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>>& frags_for_device,
426  Executor* executor) {
427  CHECK_GT(device_count, (int)0);
428  CHECK_GE(shard_count, (int)0);
429  std::vector<const Analyzer::ColumnVar*> inner_cols_vec, outer_cols_vec;
430  size_t join_qual_info = EMPTY_HASHED_PLAN_DAG_KEY;
431  for (auto& join_col_pair : inner_outer_pairs) {
432  inner_cols_vec.push_back(join_col_pair.first);
433  // extract inner join col's id
434  // b/c when the inner col comes from a subquery's resulset,
435  // table id / rte_index can be different even if we have the same
436  // subquery's semantic, i.e., project col A from table T
437  boost::hash_combine(join_qual_info,
438  executor->getQueryPlanDagCache().getJoinColumnsInfoHash(
439  join_col_pair.first, JoinColumnSide::kDirect, true));
440  boost::hash_combine(join_qual_info, op_type);
441  boost::hash_combine(join_qual_info, join_type);
442  auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(join_col_pair.second);
443  boost::hash_combine(join_qual_info, join_col_pair.first->get_type_info().toString());
444  if (outer_col_var) {
445  outer_cols_vec.push_back(outer_col_var);
446  if (join_col_pair.first->get_type_info().is_dict_encoded_string()) {
447  // add comp param for dict encoded string
448  boost::hash_combine(join_qual_info,
449  executor->getQueryPlanDagCache().getJoinColumnsInfoHash(
450  outer_col_var, JoinColumnSide::kDirect, true));
451  boost::hash_combine(join_qual_info, outer_col_var->get_type_info().toString());
452  }
453  }
454  }
455 
456  if (inner_outer_string_op_infos_pairs.size()) {
457  boost::hash_combine(join_qual_info, ::toString(inner_outer_string_op_infos_pairs));
458  }
459 
460  auto join_cols_info = getJoinColumnInfoHash(inner_cols_vec, outer_cols_vec, executor);
461  HashtableAccessPathInfo access_path_info(device_count);
462  auto it = hashtable_build_dag_map.find(join_cols_info);
463  if (it != hashtable_build_dag_map.end()) {
464  size_t hashtable_access_path = EMPTY_HASHED_PLAN_DAG_KEY;
465  boost::hash_combine(hashtable_access_path, it->second.inner_cols_access_path);
466  boost::hash_combine(hashtable_access_path, join_qual_info);
467  if (inner_cols_vec.front()->get_type_info().is_dict_encoded_string()) {
468  boost::hash_combine(hashtable_access_path, it->second.outer_cols_access_path);
469  }
470  boost::hash_combine(hashtable_access_path, shard_count);
471 
472  if (!shard_count) {
473  const auto frag_list = HashJoin::collectFragmentIds(frags_for_device[0]);
474  auto cache_key_for_device = hashtable_access_path;
475  // no sharding, so all devices have the same fragments
476  boost::hash_combine(cache_key_for_device, frag_list);
477  for (int i = 0; i < device_count; ++i) {
478  access_path_info.hashed_query_plan_dag[i] = cache_key_for_device;
479  }
480  } else {
481  // we need to retrieve specific fragments for each device
482  // and consider them to make a cache key for it
483  for (int i = 0; i < device_count; ++i) {
484  const auto frag_list_for_device =
485  HashJoin::collectFragmentIds(frags_for_device[i]);
486  auto cache_key_for_device = hashtable_access_path;
487  boost::hash_combine(cache_key_for_device, frag_list_for_device);
488  access_path_info.hashed_query_plan_dag[i] = cache_key_for_device;
489  }
490  }
491  access_path_info.table_keys = it->second.inputTableKeys;
492  }
493  return access_path_info;
494 }
495 
496 std::tuple<QueryPlanHash,
497  std::shared_ptr<HashTable>,
498  std::optional<HashtableCacheMetaInfo>>
500  CacheItemType hash_table_type,
501  DeviceIdentifier device_identifier) {
502  std::lock_guard<std::mutex> lock(getCacheLock());
503  auto hashtable_cache = getCachedItemContainer(hash_table_type, device_identifier);
504  for (auto& ht : *hashtable_cache) {
505  if (!visited.count(ht.key)) {
506  return std::make_tuple(ht.key, ht.cached_item, ht.meta_info);
507  }
508  }
509  return std::make_tuple(EMPTY_HASHED_PLAN_DAG_KEY, nullptr, std::nullopt);
510 }
511 
513  size_t hashed_query_plan_dag,
514  const std::unordered_set<size_t>& table_keys) {
515  std::lock_guard<std::mutex> lock(getCacheLock());
516  for (auto table_key : table_keys) {
517  auto itr = table_key_to_query_plan_dag_map_.try_emplace(table_key).first;
518  itr->second.insert(hashed_query_plan_dag);
519  }
520 }
521 
522 std::optional<std::unordered_set<size_t>>
524  std::lock_guard<std::mutex> lock(getCacheLock());
525  auto it = table_key_to_query_plan_dag_map_.find(table_key);
526  return it != table_key_to_query_plan_dag_map_.end() ? std::make_optional(it->second)
527  : std::nullopt;
528 }
529 
531  // this function is called when marking cached item for the given table_key as dirty
532  // and when we do that we already acquire the cache lock so we skip to lock in this func
533  table_key_to_query_plan_dag_map_.erase(table_key);
534 }
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:461
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool hasItemInCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) const override
size_t DeviceIdentifier
Definition: DataRecycler.h:129
static std::string getDeviceIdentifierString(DeviceIdentifier device_identifier)
Definition: DataRecycler.h:138
JoinType
Definition: sqldefs.h:238
void putItemToCache(QueryPlanHash key, std::shared_ptr< HashTable > item_ptr, CacheItemType item_type, DeviceIdentifier device_identifier, size_t item_size, size_t compute_time, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
std::optional< CachedItem< std::shared_ptr< HashTable >, HashtableCacheMetaInfo > > getCachedItemWithoutConsideringMetaInfo(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, CachedItemContainer &m, std::lock_guard< std::mutex > &lock)
Definition: DataRecycler.h:543
CacheMetricTracker & getMetricTracker(CacheItemType item_type)
Definition: DataRecycler.h:654
#define LOG(tag)
Definition: Logger.h:285
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
SQLOps
Definition: sqldefs.h:31
static size_t getJoinColumnInfoHash(std::vector< const Analyzer::ColumnVar * > &inner_cols, std::vector< const Analyzer::ColumnVar * > &outer_cols, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::shared_ptr< CachedItemContainer > getCachedItemContainer(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:528
void markCachedItemAsDirtyImpl(QueryPlanHash key, CachedItemContainer &m) const
Definition: DataRecycler.h:504
void addQueryPlanDagForTableKeys(size_t hashed_query_plan_dag, const std::unordered_set< size_t > &table_keys)
bool g_enable_data_recycler
Definition: Execute.cpp:158
#define CHECK_GT(x, y)
Definition: Logger.h:305
void clearCacheMetricTracker()
Definition: DataRecycler.h:317
void removeCachedHashtableBuiltFromSyntheticTable(CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock)
void cleanupCacheForInsertion(CacheItemType item_type, DeviceIdentifier device_identifier, size_t required_size, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
std::unordered_set< size_t > table_keys
std::vector< QueryPlanHash > hashed_query_plan_dag
bool g_is_test_env
Definition: Execute.cpp:153
bool checkHashtableForBoundingBoxIntersectBucketCompatability(const BoundingBoxIntersectMetaInfo &candidate_bucket_dim, const BoundingBoxIntersectMetaInfo &target_bucket_dim) const
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::unordered_map< size_t, std::unordered_set< size_t > > table_key_to_query_plan_dag_map_
CacheItemType
Definition: DataRecycler.h:38
static QueryPlanHash getUnitaryTableKey()
Definition: DataRecycler.h:145
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
void removeItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
void sortCacheContainerByQueryMetric(CacheItemType item_type, DeviceIdentifier device_identifier)
Definition: DataRecycler.h:632
std::unordered_set< CacheItemType > const & getCacheItemType() const
Definition: DataRecycler.h:664
size_t QueryPlanHash
std::optional< std::unordered_set< size_t > > getMappedQueryPlanDagsWithTableKey(size_t table_key) const
std::string toString() const override
void markCachedItemAsDirty(size_t table_key, std::unordered_set< QueryPlanHash > &key_set, CacheItemType item_type, DeviceIdentifier device_identifier) override
void clearCache() override
#define CHECK(condition)
Definition: Logger.h:291
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_info_pairs, const shared::TableKey &table_key)
void removeTableKeyInfoFromQueryPlanDagMap(size_t table_key)
std::tuple< QueryPlanHash, std::shared_ptr< HashTable >, std::optional< HashtableCacheMetaInfo > > getCachedHashtableWithoutCacheKey(std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
std::vector< double > bucket_sizes
virtual std::shared_ptr< HashTable > getItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt)=0
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
void removeCachedItemFromBeginning(CacheItemType item_type, DeviceIdentifier device_identifier, int offset)
Definition: DataRecycler.h:621
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
bool g_use_hashtable_cache
Definition: Execute.cpp:159
#define VLOG(n)
Definition: Logger.h:388