OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagExtractor.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "QueryPlanDagExtractor.h"
18 
19 #include <boost/algorithm/cxx11/any_of.hpp>
20 
21 #include "RexVisitor.h"
22 #include "Shared/DbObjectKeys.h"
24 
25 extern bool g_is_test_env;
26 
27 namespace {
28 struct IsEquivBinOp {
29  bool operator()(std::shared_ptr<Analyzer::Expr> const& qual) {
30  if (auto oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual)) {
31  return IS_EQUIVALENCE(oper->get_optype());
32  }
33  return false;
34  }
35 };
36 } // namespace
37 
38 std::vector<InnerOuterOrLoopQual> QueryPlanDagExtractor::normalizeColumnsPair(
39  const Analyzer::BinOper* condition) {
40  std::vector<InnerOuterOrLoopQual> result;
41  const auto lhs_tuple_expr =
42  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
43  const auto rhs_tuple_expr =
44  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
45 
46  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
47  auto do_normalize_inner_outer_pair = [&result, &condition](
48  const Analyzer::Expr* lhs,
49  const Analyzer::Expr* rhs,
50  const TemporaryTables* temporary_table) {
51  try {
52  auto inner_outer_pair =
54  lhs, rhs, temporary_table, condition->is_bbox_intersect_oper())
55  .first;
56  InnerOuterOrLoopQual valid_qual{
57  std::make_pair(inner_outer_pair.first, inner_outer_pair.second), false};
58  result.push_back(valid_qual);
59  } catch (HashJoinFail& e) {
60  InnerOuterOrLoopQual invalid_qual{std::make_pair(lhs, rhs), true};
61  result.push_back(invalid_qual);
62  }
63  };
64  if (lhs_tuple_expr) {
65  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
66  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
67  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
68  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
69  do_normalize_inner_outer_pair(
70  lhs_tuple[i].get(), rhs_tuple[i].get(), executor_->getTemporaryTables());
71  }
72  } else {
73  do_normalize_inner_outer_pair(condition->get_left_operand(),
74  condition->get_right_operand(),
75  executor_->getTemporaryTables());
76  }
77  return result;
78 }
79 
80 // To extract query plan DAG, we call this function with root node of the query plan
81 // and some objects required while extracting DAG
82 // We consider a DAG representation of a query plan as a series of "unique" rel node ids
83 // We decide each rel node's node id by searching the cached plan DAG first,
84 // and assign a new id iff there exists no duplicated rel node that can reuse
86  const RelAlgNode* top_node,
87  Executor* executor) {
88  auto dag_checker_res = QueryPlanDagChecker::hasNonSupportedNodeInDag(top_node);
89  if (dag_checker_res.first) {
90  VLOG(1) << "Stop DAG extraction (" << dag_checker_res.second << ")";
91  return {EMPTY_QUERY_PLAN, true};
92  }
93  heavyai::unique_lock<heavyai::shared_mutex> lock(executor->getDataRecyclerLock());
94  auto& cached_dag = executor->getQueryPlanDagCache();
95  QueryPlanDagExtractor dag_extractor(cached_dag, {}, executor, false);
96  extractQueryPlanDagImpl(top_node, dag_extractor);
97  auto extracted_query_plan_dag = dag_extractor.getExtractedQueryPlanDagStr();
98  top_node->setQueryPlanDag(extracted_query_plan_dag);
99  if (auto sort_node = dynamic_cast<RelSort const*>(top_node)) {
100  // we evaluate sort node based on the resultset of its child node
101  // so we need to mark the extracted query plan of the child node
102  // for the resultset recycling
103  auto child_dag = dag_extractor.getExtractedQueryPlanDagStr(1);
104  sort_node->getInput(0)->setQueryPlanDag(child_dag);
105  }
106  return {extracted_query_plan_dag, dag_extractor.isDagExtractionAvailable()};
107 }
108 
110  const RelAlgNode* top_node,
111  std::optional<unsigned> left_deep_tree_id,
112  std::unordered_map<unsigned, JoinQualsPerNestingLevel> left_deep_tree_infos,
113  Executor* executor) {
114  // we already extract a query plan dag for the input query which is stored at top_node
115  if (top_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
116  return {};
117  }
118  heavyai::unique_lock<heavyai::shared_mutex> lock(executor->getDataRecyclerLock());
119  auto& cached_dag = executor->getQueryPlanDagCache();
120  QueryPlanDagExtractor dag_extractor(cached_dag, left_deep_tree_infos, executor, true);
121  extractQueryPlanDagImpl(top_node, dag_extractor);
122  return {dag_extractor.getHashTableBuildDag(), dag_extractor.getTableIdToNodeMap()};
123 }
124 
126  const RelAlgNode* top_node,
127  QueryPlanDagExtractor& dag_extractor) {
128  // add the root node of this query plan DAG
129  auto res = dag_extractor.global_dag_.addNodeIfAbsent(top_node);
130  if (!res) {
131  VLOG(1) << "Stop DAG extraction (Query plan dag cache reaches the maximum capacity)";
132  dag_extractor.contain_not_supported_rel_node_ = true;
133  return;
134  }
135  CHECK(res.has_value());
136  top_node->setRelNodeDagId(res.value());
137  dag_extractor.extracted_dag_.push_back(::toString(res.value()));
138 
139  // visit child node if necessary
140  if (auto table_func_node = dynamic_cast<const RelTableFunction*>(top_node)) {
141  for (size_t i = 0; i < table_func_node->inputCount(); ++i) {
142  dag_extractor.visit(table_func_node, table_func_node->getInput(i));
143  }
144  } else {
145  auto num_child_node = top_node->inputCount();
146  switch (num_child_node) {
147  case 1: // unary op
148  dag_extractor.visit(top_node, top_node->getInput(0));
149  break;
150  case 2: // binary op
151  if (auto trans_join_node = dynamic_cast<const RelTranslatedJoin*>(top_node)) {
152  dag_extractor.visit(trans_join_node, trans_join_node->getLHS());
153  dag_extractor.visit(trans_join_node, trans_join_node->getRHS());
154  break;
155  }
156  VLOG(1) << "Visit an invalid rel node while extracting query plan DAG: "
157  << ::toString(top_node);
158  return;
159  case 0: // leaf node
160  break;
161  default:
162  // since we replace RelLeftDeepJoin as a set of RelTranslatedJoin
163  // which is a binary op, # child nodes for every rel node should be <= 2
164  UNREACHABLE();
165  }
166  }
167 
168  // check whether extracted DAG is available to use
169  if (dag_extractor.extracted_dag_.empty() || dag_extractor.isDagExtractionAvailable()) {
170  dag_extractor.contain_not_supported_rel_node_ = true;
171  return;
172  }
173 
174  if (g_is_test_env) {
175  dag_extractor.executor_->registerExtractedQueryPlanDag(
176  dag_extractor.getExtractedQueryPlanDagStr());
177  }
178  return;
179 }
180 
182  std::ostringstream oss;
183  size_t cnt = 0;
184  if (start_pos > extracted_dag_.size()) {
185  return EMPTY_QUERY_PLAN;
186  }
187  for (auto& dag_node_id : extracted_dag_) {
188  if (cnt >= start_pos) {
189  oss << dag_node_id << "|";
190  }
191  ++cnt;
192  }
193  return oss.str();
194 }
195 
197  std::optional<RelNodeId> retrieved_node_id) {
198  if (!retrieved_node_id) {
199  VLOG(1) << "Stop DAG extraction (Detect an invalid dag id)";
201  return false;
202  }
203  CHECK(retrieved_node_id.has_value());
204  node->setRelNodeDagId(retrieved_node_id.value());
205  return true;
206 }
207 
209  const RelAlgNode* parent_node,
210  const RelAlgNode* child_node,
211  std::optional<RelNodeId> retrieved_node_id) {
212  CHECK(parent_node);
213  CHECK(child_node);
214  CHECK(retrieved_node_id.has_value());
215  auto parent_node_id = parent_node->getRelNodeDagId();
216  global_dag_.connectNodes(parent_node_id, retrieved_node_id.value());
217  extracted_dag_.push_back(::toString(retrieved_node_id.value()));
218  return true;
219 }
220 
222  const RelAlgNode* child_node) {
223  // This function takes a responsibility for all rel nodes
224  // except 1) RelLeftDeepJoinTree and 2) RelTranslatedJoin
225  auto res = global_dag_.addNodeIfAbsent(child_node);
226  if (validateNodeId(child_node, res) &&
227  registerNodeToDagCache(parent_node, child_node, res)) {
228  for (size_t i = 0; i < child_node->inputCount(); i++) {
229  visit(child_node, child_node->getInput(i));
230  }
231  }
232 }
233 
234 // we recursively visit each rel node starting from the root
235 // and collect assigned rel node ids and return them as query plan DAG
236 // for join operations we additionally generate additional information
237 // to recycle each hashtable that needs to process a given query
238 void QueryPlanDagExtractor::visit(const RelAlgNode* parent_node,
239  const RelAlgNode* child_node) {
240  if (!child_node || contain_not_supported_rel_node_) {
241  return;
242  }
243  bool child_visited = false;
244  if (analyze_join_ops_) {
245  if (auto left_deep_joins = dynamic_cast<const RelLeftDeepInnerJoin*>(child_node)) {
246  if (left_deep_tree_infos_.empty()) {
247  // we should have left_deep_tree_info for input left deep tree node
248  VLOG(1) << "Stop DAG extraction (Detect non-supported join pattern)";
250  return;
251  }
252  auto true_parent_node = parent_node;
253  std::shared_ptr<RelFilter> dummy_filter_node{nullptr};
254  const auto inner_cond = left_deep_joins->getInnerCondition();
255  // we analyze left-deep join tree as per-join qual level, so
256  // when visiting RelLeftDeepInnerJoin we decompose it into individual join node
257  // (RelTranslatedJoin).
258  // Thus, this RelLeftDeepInnerJoin object itself is useless when recycling data
259  // but sometimes it has inner condition that has to consider so we add an extra
260  // RelFilter node containing the condition to keep query semantic correctly
261  if (auto cond = dynamic_cast<const RexOperator*>(inner_cond)) {
262  RexDeepCopyVisitor copier;
263  auto copied_inner_cond = copier.visit(cond);
264  dummy_filter_node = std::make_shared<RelFilter>(copied_inner_cond);
265  register_and_visit(parent_node, dummy_filter_node.get());
266  true_parent_node = dummy_filter_node.get();
267  }
268  handleLeftDeepJoinTree(true_parent_node, left_deep_joins);
269  child_visited = true;
270  } else if (auto translated_join_node =
271  dynamic_cast<const RelTranslatedJoin*>(child_node)) {
272  handleTranslatedJoin(parent_node, translated_join_node);
273  child_visited = true;
274  }
275  }
276  if (!child_visited) {
277  register_and_visit(parent_node, child_node);
278  }
279 }
280 
282  const RelAlgNode* parent_node,
283  const RelTranslatedJoin* rel_trans_join) {
284  // when left-deep tree has multiple joins this rel_trans_join can be revisited
285  // but we need to mark the child query plan to accurately catch the query plan dag
286  // here we do not create new dag id since all rel nodes are visited already
287  CHECK(parent_node);
288  CHECK(rel_trans_join);
289 
290  auto res = global_dag_.addNodeIfAbsent(rel_trans_join);
291  if (!validateNodeId(rel_trans_join, res) ||
292  !registerNodeToDagCache(parent_node, rel_trans_join, res)) {
293  return;
294  }
295 
296  // To extract an access path (query plan DAG) for hashtable is to use a difference of
297  // two query plan DAGs 1) query plan DAG after visiting RHS node and 2) query plan DAG
298  // after visiting LHS node so by comparing 1) and 2) we can extract which query plan DAG
299  // is necessary to project join cols that are used to build a hashtable and we use it as
300  // hashtable access path
301 
302  // this lambda function deals with the case of recycled query resultset
303  // specifically, we can avoid unnecessary visiting of child tree(s) when we already have
304  // the extracted query plan DAG for the given child rel node
305  // instead, we just fill the node id vector (a sequence of node ids we visited) by using
306  // the dag of the child node
307  auto fill_node_ids_to_dag_vec = [&](const std::string& node_ids) {
308  auto node_ids_vec = split(node_ids, "|");
309  // the last elem is an empty one
310  std::for_each(node_ids_vec.begin(),
311  std::prev(node_ids_vec.end()),
312  [&](const std::string& node_id) { extracted_dag_.push_back(node_id); });
313  };
314  QueryPlanDAG current_plan_dag, after_rhs_visited, after_lhs_visited;
315  current_plan_dag = getExtractedQueryPlanDagStr();
316  auto rhs_node = rel_trans_join->getRHS();
317  std::unordered_set<size_t> rhs_input_keys, lhs_input_keys;
318  if (rhs_node) {
319  if (rhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
320  visit(rel_trans_join, rhs_node);
321  } else {
322  fill_node_ids_to_dag_vec(rhs_node->getQueryPlanDag());
323  }
324  after_rhs_visited = getExtractedQueryPlanDagStr();
325  addTableIdToNodeLink({0, int32_t(rhs_node->getId())}, rhs_node);
326  rhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(rhs_node);
327  }
328  auto lhs_node = rel_trans_join->getLHS();
329  if (rel_trans_join->getLHS()) {
330  if (lhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
331  visit(rel_trans_join, lhs_node);
332  } else {
333  fill_node_ids_to_dag_vec(lhs_node->getQueryPlanDag());
334  }
335  after_lhs_visited = getExtractedQueryPlanDagStr();
336  addTableIdToNodeLink({0, int32_t(lhs_node->getId())}, lhs_node);
337  lhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(lhs_node);
338  }
339  if (isEmptyQueryPlanDag(after_lhs_visited) || isEmptyQueryPlanDag(after_rhs_visited)) {
340  VLOG(1) << "Stop DAG extraction (Detect invalid query plan dag of join col(s))";
342  return;
343  }
344  // after visiting new node, we have added node id(s) which can be used as an access path
345  // so, we extract that node id(s) by splitting the new plan dag by the current plan dag
346  auto outer_table_identifier = split(after_rhs_visited, current_plan_dag)[1];
347  auto hash_table_identfier = split(after_lhs_visited, after_rhs_visited)[1];
348  auto join_qual_info = EMPTY_HASHED_PLAN_DAG_KEY;
349  if (!rel_trans_join->isNestedLoopQual()) {
350  auto inner_join_cols = rel_trans_join->getJoinCols(true);
351  auto inner_join_col_info =
352  global_dag_.translateColVarsToInfoHash(inner_join_cols, false);
353  boost::hash_combine(join_qual_info, inner_join_col_info);
354  auto outer_join_cols = rel_trans_join->getJoinCols(false);
355  auto outer_join_col_info =
356  global_dag_.translateColVarsToInfoHash(outer_join_cols, false);
357  boost::hash_combine(join_qual_info, outer_join_col_info);
358  // collect table keys from both rhs and lhs side
359  std::unordered_set<size_t> collected_table_keys;
360  collected_table_keys.insert(lhs_input_keys.begin(), lhs_input_keys.end());
361  if (!inner_join_cols.empty() &&
362  inner_join_cols[0]->get_type_info().is_dict_encoded_type()) {
363  collected_table_keys.insert(rhs_input_keys.begin(), rhs_input_keys.end());
364  }
365 
366  auto it = hash_table_query_plan_dag_.find(join_qual_info);
367  if (it == hash_table_query_plan_dag_.end()) {
368  VLOG(2) << "Add hashtable access path"
369  << ", inner join col info: " << inner_join_col_info
370  << " (access path: " << hash_table_identfier << ")"
371  << ", outer join col info: " << outer_join_col_info
372  << " (access path: " << outer_table_identifier << ")";
374  join_qual_info,
375  HashTableBuildDag(inner_join_col_info,
376  outer_join_col_info,
377  boost::hash_value(hash_table_identfier),
378  boost::hash_value(outer_table_identifier),
379  std::move(collected_table_keys)));
380  }
381  } else {
382  VLOG(2) << "Add loop join access path, for LHS: " << outer_table_identifier
383  << ", for RHS: " << hash_table_identfier << "\n";
384  }
385 }
386 
387 namespace {
388 struct OpInfo {
389  std::string type_;
390  std::string qualifier_;
391  std::string typeinfo_;
392 };
393 
394 // Return the input index whose tableId matches the given tbl_id.
395 // If none then return -1.
396 int get_input_idx(const RelLeftDeepInnerJoin* rel_left_deep_join,
397  const shared::TableKey& table_key) {
398  for (size_t input_idx = 0; input_idx < rel_left_deep_join->inputCount(); ++input_idx) {
399  auto const input_node = rel_left_deep_join->getInput(input_idx);
400  auto const scan_node = dynamic_cast<const RelScan*>(input_node);
401  shared::TableKey target_table_key{0, 0};
402  if (scan_node) {
403  target_table_key.db_id = scan_node->getCatalog().getDatabaseId();
404  target_table_key.table_id = scan_node->getTableDescriptor()->tableId;
405  } else {
406  target_table_key.table_id = -1 * input_node->getId(); // temporary table
407  }
408  if (target_table_key == table_key) {
409  return input_idx;
410  }
411  }
412  return -1;
413 }
414 } // namespace
415 
416 std::vector<Analyzer::ColumnVar const*> QueryPlanDagExtractor::getColVar(
417  Analyzer::Expr const* col_info) {
418  if (auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(col_info)) {
419  return {col_var};
420  } else {
421  return global_dag_.collectColVars(col_info);
422  }
423 }
424 
425 // we coalesce join quals and related filter conditions into a single RelLeftDeepInnerJoin
426 // node when converting calcite AST to logical query plan, but to recycle hashtable(s) we
427 // need to know access path of each hashtable, so we disassemble it into a set of join
428 // qual and collect hashtable info from there
430  const RelAlgNode* parent_node,
431  const RelLeftDeepInnerJoin* rel_left_deep_join) {
432  CHECK(parent_node);
433  CHECK(rel_left_deep_join);
434 
435  // RelLeftDeepInnerJoin node does not need to be added to DAG since
436  // RelLeftDeepInnerJoin is a logical node and
437  // we add all join nodes of this `RelLeftDeepInnerJoin`
438  // thus, the below `left_deep_tree_id` is not the same as its DAG id
439  // (we do not have a DAG node id for this `RelLeftDeepInnerJoin`)
440  auto left_deep_tree_id = rel_left_deep_join->getId();
441  auto left_deep_join_info = getPerNestingJoinQualInfo(left_deep_tree_id);
442  if (!left_deep_join_info) {
443  // we should have left_deep_tree_info for input left deep tree node
444  VLOG(1) << "Stop DAG extraction (Detect Non-supported join pattern)";
446  return;
447  }
448 
449  // gathering per-join qual level info to correctly recycle each hashtable (and necessary
450  // info) that we created
451  // Here we visit each join qual in bottom-up manner to distinct DAGs among join quals
452  // Let say we have three joins- #1: R.a = S.a / #2: R.b = T.b / #3. R.c = X.c
453  // When we start to visit #1, we cannot determine outer col's dag clearly
454  // since we need to visit #2 and #3 due to the current visitor's behavior
455  // In contrast, when starting from #3, we clearly retrieve both inputs' dag
456  // by skipping already visited nodes
457  // So when we visit #2 after visiting #3, we can skip to consider nodes beloning to
458  // qual #3 so we clearly retrieve DAG only corresponding to #2's
459  for (size_t level_idx = 0; level_idx < left_deep_join_info->size(); ++level_idx) {
460  const auto& current_level_join_conditions = left_deep_join_info->at(level_idx);
461  std::vector<const Analyzer::ColumnVar*> inner_join_cols;
462  std::vector<const Analyzer::ColumnVar*> outer_join_cols;
463  std::vector<std::shared_ptr<const Analyzer::Expr>> filter_ops;
464  int inner_input_idx{-1};
465  int outer_input_idx{-1};
466  OpInfo op_info{"UNDEFINED", "UNDEFINED", "UNDEFINED"};
467  std::unordered_set<std::string> visited_filter_ops;
468 
469  // we first check whether this qual needs nested loop
470  const bool found_eq_join_qual =
471  current_level_join_conditions.type != JoinType::INVALID &&
472  boost::algorithm::any_of(current_level_join_conditions.quals, IsEquivBinOp{});
473  const bool nested_loop = !found_eq_join_qual;
474  const bool is_left_join = current_level_join_conditions.type == JoinType::LEFT;
475  RexScalar const* const outer_join_cond =
476  is_left_join ? rel_left_deep_join->getOuterCondition(level_idx + 1) : nullptr;
477 
478  // collect join col, filter ops, and detailed info of join operation, i.e., op_type,
479  // qualifier, ...
480  // when we have more than one quals, i.e., current_level_join_conditions.quals.size()
481  // > 1, we consider the first qual is used as hashtable building
482  bool is_geo_join{false};
483  for (const auto& join_qual : current_level_join_conditions.quals) {
484  auto qual_bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(join_qual);
485  auto join_qual_str = ::toString(join_qual);
486  if (qual_bin_oper) {
487  is_geo_join = qual_bin_oper->is_bbox_intersect_oper();
488  if (join_qual == current_level_join_conditions.quals.front()) {
489  // set op_info based on the first qual
490  op_info = OpInfo{::toString(qual_bin_oper->get_optype()),
491  ::toString(qual_bin_oper->get_qualifier()),
492  qual_bin_oper->get_type_info().to_string()};
493  }
494  for (auto& col_pair_info : normalizeColumnsPair(qual_bin_oper.get())) {
495  // even though we fall back to loop join when left outer join has
496  // non-eq comparator so we additionally check it here to classify the qual
497  // properly todo(yoonmin): relax left join case once we have an improved logic
498  // for this
499  if (!found_eq_join_qual && (is_left_join || col_pair_info.loop_join_qual)) {
500  // we only consider that cur level's join is loop join if we have no
501  // equi-join qual and both lhs and rhs are not col_var,
502  // i.e., lhs: col_var / rhs: constant / bin_op: kGE
503  // also, currently we fallback to loop-join when left outer join
504  // has non-eq join qual
505  if (visited_filter_ops.insert(join_qual_str).second) {
506  filter_ops.push_back(join_qual);
507  }
508  } else {
509  // a qual_bin_oper becomes an inner join qual iff both lhs and rhs are col_var
510  // otherwise it becomes a filter qual
511  bool found_valid_col_vars = false;
512  std::vector<const Analyzer::ColumnVar*> lhs_cvs, rhs_cvs;
513  if (col_pair_info.inner_outer.first && col_pair_info.inner_outer.second) {
514  // we need to modify lhs and rhs if range_oper is detected
515  if (auto range_oper = dynamic_cast<const Analyzer::RangeOper*>(
516  col_pair_info.inner_outer.second)) {
517  lhs_cvs = getColVar(range_oper->get_left_operand());
518  rhs_cvs = getColVar(col_pair_info.inner_outer.first);
519  is_geo_join = true;
520  } else {
521  // this qual is valid and used for typical hash join op
522  lhs_cvs = getColVar(col_pair_info.inner_outer.first);
523  rhs_cvs = getColVar(col_pair_info.inner_outer.second);
524  }
525  if (!lhs_cvs.empty() && !rhs_cvs.empty()) {
526  found_valid_col_vars = true;
527  if (inner_input_idx == -1) {
528  inner_input_idx =
529  get_input_idx(rel_left_deep_join, lhs_cvs.front()->getTableKey());
530  }
531  if (outer_input_idx == -1) {
532  outer_input_idx =
533  get_input_idx(rel_left_deep_join, rhs_cvs.front()->getTableKey());
534  }
535  std::copy(
536  lhs_cvs.begin(), lhs_cvs.end(), std::back_inserter(inner_join_cols));
537  std::copy(
538  rhs_cvs.begin(), rhs_cvs.end(), std::back_inserter(outer_join_cols));
539  }
540  }
541  if (!found_valid_col_vars &&
542  visited_filter_ops.insert(join_qual_str).second) {
543  filter_ops.push_back(join_qual);
544  }
545  }
546  }
547  } else {
548  if (visited_filter_ops.insert(join_qual_str).second) {
549  filter_ops.push_back(join_qual);
550  }
551  }
552  }
553  if (!is_geo_join && (inner_join_cols.size() != outer_join_cols.size())) {
554  VLOG(1) << "Stop DAG extraction (Detect inner/outer col mismatch)";
556  return;
557  }
558 
559  // create RelTranslatedJoin based on the collected info from the join qual
560  // there are total seven types of join query pattern
561  // 1. INNER HASH ONLY
562  // 2. INNER LOOP ONLY (!)
563  // 3. LEFT LOOP ONLY
564  // 4. INNER HASH + INNER LOOP (!)
565  // 5. LEFT LOOP + INNER HASH
566  // 6. LEFT LOOP + INNER LOOP (!)
567  // 7. LEFT LOOP + INNER HASH + INNER LOOP (!)
568  // here, if a query contains INNER LOOP join, its qual has nothing
569  // so, some patterns do not have bin_oper at the specific join nest level
570  // if we find INNER LOOP, corresponding RelTranslatedJoin has nulled LHS and RHS
571  // to mark it as loop join
572  const RelAlgNode* lhs;
573  const RelAlgNode* rhs;
574  if (inner_input_idx != -1 && outer_input_idx != -1) {
575  lhs = rel_left_deep_join->getInput(inner_input_idx);
576  rhs = rel_left_deep_join->getInput(outer_input_idx);
577  } else {
578  if (level_idx == 0) {
579  lhs = rel_left_deep_join->getInput(0);
580  rhs = rel_left_deep_join->getInput(1);
581  } else {
582  lhs = translated_join_info_->rbegin()->get();
583  rhs = rel_left_deep_join->getInput(level_idx + 1);
584  }
585  }
586  CHECK(lhs);
587  CHECK(rhs);
588  auto cur_translated_join_node =
589  std::make_shared<RelTranslatedJoin>(lhs,
590  rhs,
591  inner_join_cols,
592  outer_join_cols,
593  filter_ops,
594  outer_join_cond,
595  nested_loop,
596  current_level_join_conditions.type,
597  op_info.type_,
598  op_info.qualifier_,
599  op_info.typeinfo_);
600  CHECK(cur_translated_join_node);
601  handleTranslatedJoin(parent_node, cur_translated_join_node.get());
602  translated_join_info_->push_back(std::move(cur_translated_join_node));
603  }
604 }
605 
607  SortInfo const& sort_info) {
608  boost::hash_combine(cache_key, sort_info.hashLimit());
609  return cache_key;
610 }
bool isNestedLoopQual() const
Definition: RelAlgDag.h:1843
#define CHECK_EQ(x, y)
Definition: Logger.h:301
QueryPlanDagCache & global_dag_
TableIdToNodeMap getTableIdToNodeMap()
static ExtractedQueryPlanDag extractQueryPlanDag(const RelAlgNode *top_node, Executor *executor)
static ExtractedJoinInfo extractJoinInfo(const RelAlgNode *top_node, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos, Executor *executor)
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:72
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
static size_t applyLimitClauseToCacheKey(size_t cache_key, SortInfo const &sort_info)
std::vector< InnerOuterOrLoopQual > normalizeColumnsPair(const Analyzer::BinOper *condition)
const RexScalar * getOuterCondition(const size_t nesting_level) const
void setQueryPlanDag(const std::string &extracted_query_plan_dag) const
Definition: RelAlgDag.h:854
std::string QueryPlanDAG
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
const Expr * get_right_operand() const
Definition: Analyzer.h:456
void setRelNodeDagId(const size_t id) const
Definition: RelAlgDag.h:919
#define UNREACHABLE()
Definition: Logger.h:338
static std::pair< bool, std::string > hasNonSupportedNodeInDag(const RelAlgNode *rel_alg_node)
std::vector< std::string > extracted_dag_
std::vector< const Analyzer::ColumnVar * > getJoinCols(bool lhs) const
Definition: RelAlgDag.h:1837
static std::unordered_set< size_t > getScanNodeTableKey(RelAlgNode const *rel_alg_node)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
size_t getQueryPlanDagHash() const
Definition: RelAlgDag.h:863
void register_and_visit(const RelAlgNode *parent_node, const RelAlgNode *child_node)
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
bool g_is_test_env
Definition: Execute.cpp:153
const RelAlgNode * getRHS() const
Definition: RelAlgDag.h:1808
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
unsigned getId() const
Definition: RelAlgDag.h:869
size_t translateColVarsToInfoHash(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
bool isEmptyQueryPlanDag(const std::string &dag)
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
bool is_bbox_intersect_oper() const
Definition: Analyzer.h:453
std::unique_lock< T > unique_lock
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:877
HashTableBuildDagMap getHashTableBuildDag()
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
static void extractQueryPlanDagImpl(const RelAlgNode *top_npde, QueryPlanDagExtractor &dag_extractor)
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos_
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
HashTableBuildDagMap hash_table_query_plan_dag_
void handleLeftDeepJoinTree(const RelAlgNode *, const RelLeftDeepInnerJoin *)
const RelAlgNode * getLHS() const
Definition: RelAlgDag.h:1807
void visit(const RelAlgNode *, const RelAlgNode *)
std::shared_ptr< TranslatedJoinInfo > translated_join_info_
size_t hashLimit() const
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:291
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_bbox_intersect=false)
Definition: HashJoin.cpp:822
const Expr * get_left_operand() const
Definition: Analyzer.h:455
std::string getExtractedQueryPlanDagStr(size_t start_pos=0)
std::vector< Analyzer::ColumnVar const * > getColVar(const Analyzer::Expr *col_info)
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
const size_t inputCount() const
Definition: RelAlgDag.h:875
size_t getRelNodeDagId() const
Definition: RelAlgDag.h:921
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:973
#define VLOG(n)
Definition: Logger.h:388
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)
const JoinQualsPerNestingLevel * getPerNestingJoinQualInfo(std::optional< unsigned > left_deep_join_tree_id)
bool operator()(std::shared_ptr< Analyzer::Expr > const &qual)
int get_input_idx(RelAlgExecutionUnit const &ra_exe_unit, const shared::TableKey &outer_table_key)
void addTableIdToNodeLink(const shared::TableKey &table_id, const RelAlgNode *node)