OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryHint.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef OMNISCI_QUERYHINT_H
18 #define OMNISCI_QUERYHINT_H
19 
20 #include <algorithm>
21 #include <optional>
22 
23 #include <boost/algorithm/string.hpp>
24 
26 
27 // we expect query hint enum val starts with zero,
28 // and let remaining enum value to be auto-incremented
29 enum QueryHint {
30  kCpuMode = 0,
59  kHintCount, // should be at the last elem before INVALID enum value to count #
60  // supported hints correctly
61  kInvalidHint // this should be the last elem of this enum
62 };
63 
64 static const std::unordered_map<std::string, QueryHint> SupportedQueryHints = {
65  {"cpu_mode", QueryHint::kCpuMode},
66  {"columnar_output", QueryHint::kColumnarOutput},
67  {"rowwise_output", QueryHint::kRowwiseOutput},
68  {"bbox_intersect_bucket_threshold", QueryHint::kBBoxIntersectBucketThreshold},
69  {"bbox_intersect_max_size", QueryHint::kBBoxIntersectMaxSize},
70  {"bbox_intersect_allow_gpu_build", QueryHint::kBBoxIntersectAllowGpuBuild},
71  {"bbox_intersect_no_cache", QueryHint::kBBoxIntersectNoCache},
72  {"bbox_intersect_keys_per_bin", QueryHint::kBBoxIntersectKeysPerBin},
73  {"keep_result", QueryHint::kKeepResult},
74  {"keep_table_function_result", QueryHint::kKeepTableFuncResult},
75  {"aggregate_tree_fanout", QueryHint::kAggregateTreeFanout},
76  {"cuda_block_size", QueryHint::kCudaBlockSize},
77  {"cuda_grid_size_multiplier", QueryHint::kCudaGridSize},
78  {"cuda_opt_block_and_grid_sizes", kOptCudaBlockAndGridSizes},
79  {"watchdog", QueryHint::kWatchdog},
80  {"dynamic_watchdog", QueryHint::kDynamicWatchdog},
81  {"watchdog_off", QueryHint::kWatchdogOff},
82  {"dynamic_watchdog_off", QueryHint::kDynamicWatchdogOff},
83  {"query_time_limit", QueryHint::kQueryTimeLimit},
84  {"allow_loop_join", QueryHint::kAllowLoopJoin},
85  {"disable_loop_join", QueryHint::kDisableLoopJoin},
86  {"loop_join_inner_table_max_num_rows", QueryHint::kLoopJoinInnerTableMaxNumRows},
87  {"max_join_hashtable_size", QueryHint::kMaxJoinHashTableSize},
88  {"force_baseline_hash_join", QueryHint::kforceBaselineHashJoin},
89  {"force_one_to_many_hash_join", QueryHint::kforceOneToManyHashJoin},
90  {"watchdog_max_projected_rows_per_device",
92  {"preflight_count_query_threshold", QueryHint::kPreflightCountQueryThreshold},
93  {"table_reordering_off", QueryHint::kTableReorderingOff},
94  {"ndv_groups_estimator_multiplier", QueryHint::kNDVGroupsEstimatorMultiplier}};
95 
98  std::string hint_name;
99 
100  HintIdentifier(bool global_hint, const std::string& hint_name)
101  : global_hint(global_hint), hint_name(hint_name){};
102 };
103 
105  // this class represents parsed query hint's specification
106  // our query AST analyzer translates query hint string to understandable form which we
107  // called "ExplainedQueryHint"
108  public:
109  // default constructor used for deserialization only
112  , global_hint_{false}
113  , is_marker_{false}
114  , has_kv_type_options_{false} {}
115 
117  bool global_hint,
118  bool is_marker,
119  bool has_kv_type_options)
120  : hint_(hint)
121  , global_hint_(global_hint)
122  , is_marker_(is_marker)
123  , has_kv_type_options_(has_kv_type_options) {}
124 
126  bool global_hint,
127  bool is_marker,
128  bool has_kv_type_options,
129  std::vector<std::string>& list_options)
130  : hint_(hint)
131  , global_hint_(global_hint)
132  , is_marker_(is_marker)
133  , has_kv_type_options_(has_kv_type_options)
134  , list_options_(std::move(list_options)) {}
135 
137  bool global_hint,
138  bool is_marker,
139  bool has_kv_type_options,
140  std::unordered_map<std::string, std::string>& kv_options)
141  : hint_(hint)
142  , global_hint_(global_hint)
143  , is_marker_(is_marker)
144  , has_kv_type_options_(has_kv_type_options)
145  , kv_options_(std::move(kv_options)) {}
146 
147  void setListOptions(std::vector<std::string>& list_options) {
148  list_options_ = list_options;
149  }
150 
151  void setKVOptions(std::unordered_map<std::string, std::string>& kv_options) {
152  kv_options_ = kv_options;
153  }
154 
155  void setInheritPaths(std::vector<int>& interit_paths) {
156  inherit_paths_ = interit_paths;
157  }
158 
159  const std::vector<std::string>& getListOptions() const { return list_options_; }
160 
161  const std::vector<int>& getInteritPath() const { return inherit_paths_; }
162 
163  const std::unordered_map<std::string, std::string>& getKVOptions() const {
164  return kv_options_;
165  }
166 
167  const QueryHint getHint() const { return hint_; }
168 
169  bool isGlobalHint() const { return global_hint_; }
170 
171  bool hasOptions() const { return is_marker_; }
172 
173  bool hasKvOptions() const { return has_kv_type_options_; }
174 
175  private:
177  // Set true if this hint affects globally
178  // Otherwise it just affects the node which this hint is included (aka table hint)
180  // set true if this has no extra options (neither list_options nor kv_options)
182  // Set true if it is not a marker and has key-value type options
183  // Otherwise (it is not a marker but has list type options), we set this be false
185  std::vector<int> inherit_paths_; // currently not used
186  std::vector<std::string> list_options_;
187  std::unordered_map<std::string, std::string> kv_options_;
188 };
189 
191  // for each query hint, we first translate the raw query hint info
192  // to understandable form called "ExplainedQueryHint"
193  // and we get all necessary info from it and organize it into "RegisteredQueryHint"
194  // so by using "RegisteredQueryHint", we can know and access which query hint is
195  // registered and its detailed info such as the hint's parameter values given by user
196  // NOTE: after changing query hint fields, we "SHOULD" also update the corresponding
197  // "QueryHintSerializer" accordingly
199  : cpu_mode(false)
202  , keep_result(false)
204  , watchdog(std::nullopt)
205  , dynamic_watchdog(std::nullopt)
206  , query_time_limit(0)
211  , cuda_block_size(0)
215  , bbox_intersect_bucket_threshold(std::numeric_limits<double>::max())
220  , use_loop_join(std::nullopt)
222  , max_join_hash_table_size(std::numeric_limits<size_t>::max())
226 
228  CHECK_EQ(registered_hint.size(), global_hints.registered_hint.size());
229  // apply registered global hint to the local hint if necessary
230  // we prioritize global hint when both side of hints are enabled simultaneously
231  RegisteredQueryHint updated_query_hints(*this);
232 
233  constexpr int num_hints = static_cast<int>(QueryHint::kHintCount);
234  for (int i = 0; i < num_hints; ++i) {
235  if (global_hints.registered_hint.at(i)) {
236  updated_query_hints.registered_hint.at(i) = true;
237  switch (static_cast<QueryHint>(i)) {
238  case QueryHint::kCpuMode:
239  updated_query_hints.cpu_mode = true;
240  break;
242  updated_query_hints.columnar_output = true;
243  break;
245  updated_query_hints.rowwise_output = true;
246  break;
248  updated_query_hints.cuda_block_size = global_hints.cuda_block_size;
249  break;
251  updated_query_hints.cuda_grid_size_multiplier =
252  global_hints.cuda_grid_size_multiplier;
253  break;
255  updated_query_hints.opt_cuda_grid_and_block_size = true;
256  break;
258  updated_query_hints.bbox_intersect_bucket_threshold =
259  global_hints.bbox_intersect_bucket_threshold;
260  break;
262  updated_query_hints.bbox_intersect_max_size =
263  global_hints.bbox_intersect_max_size;
264  break;
266  updated_query_hints.bbox_intersect_allow_gpu_build = true;
267  break;
269  updated_query_hints.bbox_intersect_no_cache = true;
270  break;
272  updated_query_hints.bbox_intersect_keys_per_bin =
273  global_hints.bbox_intersect_keys_per_bin;
274  break;
276  updated_query_hints.keep_result = global_hints.keep_result;
277  break;
279  updated_query_hints.keep_table_function_result =
280  global_hints.keep_table_function_result;
281  break;
283  updated_query_hints.aggregate_tree_fanout =
284  global_hints.aggregate_tree_fanout;
285  break;
288  updated_query_hints.watchdog = global_hints.watchdog;
289  break;
292  updated_query_hints.dynamic_watchdog = global_hints.dynamic_watchdog;
293  break;
295  updated_query_hints.query_time_limit = global_hints.query_time_limit;
296  break;
299  updated_query_hints.use_loop_join = global_hints.use_loop_join;
300  break;
302  updated_query_hints.loop_join_inner_table_max_num_rows =
304  break;
306  updated_query_hints.max_join_hash_table_size =
307  global_hints.max_join_hash_table_size;
308  break;
310  updated_query_hints.force_baseline_hash_join =
311  global_hints.force_baseline_hash_join;
312  break;
314  updated_query_hints.force_one_to_many_hash_join =
315  global_hints.force_one_to_many_hash_join;
316  break;
318  updated_query_hints.watchdog_max_projected_rows_per_device =
320  break;
322  updated_query_hints.preflight_count_query_threshold =
323  global_hints.preflight_count_query_threshold;
324  break;
326  updated_query_hints.table_reordering_off = global_hints.table_reordering_off;
327  break;
329  updated_query_hints.ndv_groups_estimator_multiplier =
330  global_hints.ndv_groups_estimator_multiplier;
331  break;
332  default:
333  UNREACHABLE();
334  }
335  }
336  }
337  return updated_query_hints;
338  }
339 
340  // general query execution
341  bool cpu_mode;
346  std::optional<bool> watchdog;
347  std::optional<bool> dynamic_watchdog;
353 
354  // control CUDA behavior
358 
359  // window function framing
361 
362  // bbox_intersect hash join
363  double bbox_intersect_bucket_threshold; // defined in
364  // "BoundingBoxIntersectJoinHashTable.h"
369 
370  // generic hash join
371  std::optional<bool> use_loop_join;
376 
377  std::vector<bool> registered_hint;
378 
380 
381  public:
382  static QueryHint translateQueryHint(const std::string& hint_name) {
383  const auto lowered_hint_name = boost::algorithm::to_lower_copy(hint_name);
384  auto it = SupportedQueryHints.find(lowered_hint_name);
385  return it == SupportedQueryHints.end() ? QueryHint::kInvalidHint : it->second;
386  }
387 
388  bool isAnyQueryHintDelivered() const {
389  const auto identity = [](const bool b) { return b; };
390  return std::any_of(registered_hint.begin(), registered_hint.end(), identity);
391  }
392 
393  void registerHint(const QueryHint hint) {
394  const auto hint_class = static_cast<int>(hint);
395  registered_hint.at(hint_class) = true;
396  }
397 
398  bool isHintRegistered(const QueryHint hint) const {
399  const auto hint_class = static_cast<int>(hint);
400  return registered_hint.at(hint_class);
401  }
402 };
403 
404 // a map from hint_name to its detailed info
405 using Hints = std::unordered_map<QueryHint, ExplainedQueryHint>;
406 
407 #endif // OMNISCI_QUERYHINT_H
std::unordered_map< std::string, std::string > kv_options_
Definition: QueryHint.h:187
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool isGlobalHint() const
Definition: QueryHint.h:169
double bbox_intersect_keys_per_bin
Definition: QueryHint.h:368
const std::vector< int > & getInteritPath() const
Definition: QueryHint.h:161
const std::unordered_map< std::string, std::string > & getKVOptions() const
Definition: QueryHint.h:163
#define UNREACHABLE()
Definition: Logger.h:338
size_t g_preflight_count_query_threshold
Definition: Execute.cpp:84
double g_bbox_intersect_target_entries_per_bin
Definition: Execute.cpp:115
void setListOptions(std::vector< std::string > &list_options)
Definition: QueryHint.h:147
std::optional< bool > dynamic_watchdog
Definition: QueryHint.h:347
double cuda_grid_size_multiplier
Definition: QueryHint.h:356
size_t cuda_block_size
Definition: QueryHint.h:355
ExplainedQueryHint(QueryHint hint, bool global_hint, bool is_marker, bool has_kv_type_options)
Definition: QueryHint.h:116
std::vector< bool > registered_hint
Definition: QueryHint.h:377
void setKVOptions(std::unordered_map< std::string, std::string > &kv_options)
Definition: QueryHint.h:151
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:96
ExplainedQueryHint(QueryHint hint, bool global_hint, bool is_marker, bool has_kv_type_options, std::vector< std::string > &list_options)
Definition: QueryHint.h:125
size_t max_join_hash_table_size
Definition: QueryHint.h:373
bool opt_cuda_grid_and_block_size
Definition: QueryHint.h:357
bool hasKvOptions() const
Definition: QueryHint.h:173
bool keep_table_function_result
Definition: QueryHint.h:345
size_t query_time_limit
Definition: QueryHint.h:348
static const std::unordered_map< std::string, QueryHint > SupportedQueryHints
Definition: QueryHint.h:64
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:382
std::vector< std::string > list_options_
Definition: QueryHint.h:186
double bbox_intersect_bucket_threshold
Definition: QueryHint.h:363
double ndv_groups_estimator_multiplier
Definition: QueryHint.h:352
void registerHint(const QueryHint hint)
Definition: QueryHint.h:393
size_t g_watchdog_max_projected_rows_per_device
Definition: Execute.cpp:83
bool hasOptions() const
Definition: QueryHint.h:171
HintIdentifier(bool global_hint, const std::string &hint_name)
Definition: QueryHint.h:100
size_t watchdog_max_projected_rows_per_device
Definition: QueryHint.h:349
std::optional< bool > watchdog
Definition: QueryHint.h:346
size_t preflight_count_query_threshold
Definition: QueryHint.h:350
static RegisteredQueryHint defaults()
Definition: QueryHint.h:379
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:398
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:405
QueryHint
Definition: QueryHint.h:29
size_t bbox_intersect_max_size
Definition: QueryHint.h:365
bool force_baseline_hash_join
Definition: QueryHint.h:374
bool bbox_intersect_no_cache
Definition: QueryHint.h:367
std::optional< bool > use_loop_join
Definition: QueryHint.h:371
RegisteredQueryHint operator||(const RegisteredQueryHint &global_hints) const
Definition: QueryHint.h:227
bool g_enable_watchdog false
Definition: Execute.cpp:80
bool global_hint
Definition: QueryHint.h:97
size_t loop_join_inner_table_max_num_rows
Definition: QueryHint.h:372
bool bbox_intersect_allow_gpu_build
Definition: QueryHint.h:366
std::vector< int > inherit_paths_
Definition: QueryHint.h:185
bool has_kv_type_options_
Definition: QueryHint.h:184
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
QueryHint hint_
Definition: QueryHint.h:176
size_t aggregate_tree_fanout
Definition: QueryHint.h:360
const QueryHint getHint() const
Definition: QueryHint.h:167
std::string hint_name
Definition: QueryHint.h:98
const std::vector< std::string > & getListOptions() const
Definition: QueryHint.h:159
void setInheritPaths(std::vector< int > &interit_paths)
Definition: QueryHint.h:155
ExplainedQueryHint(QueryHint hint, bool global_hint, bool is_marker, bool has_kv_type_options, std::unordered_map< std::string, std::string > &kv_options)
Definition: QueryHint.h:136
bool isAnyQueryHintDelivered() const
Definition: QueryHint.h:388
bool force_one_to_many_hash_join
Definition: QueryHint.h:375
size_t g_bbox_intersect_max_table_size_bytes
Definition: Execute.cpp:114