OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryPlanDagExtractor Class Reference

#include <QueryPlanDagExtractor.h>

+ Collaboration diagram for QueryPlanDagExtractor:

Public Member Functions

 QueryPlanDagExtractor (QueryPlanDagCache &global_dag, std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos, Executor *executor, bool analyze_join_ops)
 
HashTableBuildDagMap getHashTableBuildDag ()
 
const JoinQualsPerNestingLevelgetPerNestingJoinQualInfo (std::optional< unsigned > left_deep_join_tree_id)
 
bool isDagExtractionAvailable ()
 
std::string getExtractedQueryPlanDagStr (size_t start_pos=0)
 
std::vector< InnerOuterOrLoopQualnormalizeColumnsPair (const Analyzer::BinOper *condition)
 
bool isEmptyQueryPlanDag (const std::string &dag)
 
TableIdToNodeMap getTableIdToNodeMap ()
 
void addTableIdToNodeLink (const shared::TableKey &table_id, const RelAlgNode *node)
 
void clearInternalStatus ()
 

Static Public Member Functions

static ExtractedJoinInfo extractJoinInfo (const RelAlgNode *top_node, std::optional< unsigned > left_deep_tree_id, std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos, Executor *executor)
 
static ExtractedQueryPlanDag extractQueryPlanDag (const RelAlgNode *top_node, Executor *executor)
 
static size_t applyLimitClauseToCacheKey (size_t cache_key, SortInfo const &sort_info)
 

Private Member Functions

void visit (const RelAlgNode *, const RelAlgNode *)
 
std::vector
< Analyzer::ColumnVar const * > 
getColVar (const Analyzer::Expr *col_info)
 
void register_and_visit (const RelAlgNode *parent_node, const RelAlgNode *child_node)
 
void handleLeftDeepJoinTree (const RelAlgNode *, const RelLeftDeepInnerJoin *)
 
void handleTranslatedJoin (const RelAlgNode *, const RelTranslatedJoin *)
 
bool validateNodeId (const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
 
bool registerNodeToDagCache (const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
 

Static Private Member Functions

static void extractQueryPlanDagImpl (const RelAlgNode *top_npde, QueryPlanDagExtractor &dag_extractor)
 

Private Attributes

QueryPlanDagCacheglobal_dag_
 
bool contain_not_supported_rel_node_
 
std::unordered_map< unsigned,
JoinQualsPerNestingLevel
left_deep_tree_infos_
 
Executorexecutor_
 
bool analyze_join_ops_
 
std::shared_ptr
< TranslatedJoinInfo
translated_join_info_
 
HashTableBuildDagMap hash_table_query_plan_dag_
 
TableIdToNodeMap table_id_to_node_map_
 
std::vector< std::string > extracted_dag_
 

Detailed Description

Definition at line 52 of file QueryPlanDagExtractor.h.

Constructor & Destructor Documentation

QueryPlanDagExtractor::QueryPlanDagExtractor ( QueryPlanDagCache global_dag,
std::unordered_map< unsigned, JoinQualsPerNestingLevel left_deep_tree_infos,
Executor executor,
bool  analyze_join_ops 
)
inline

Definition at line 54 of file QueryPlanDagExtractor.h.

References analyze_join_ops_, and translated_join_info_.

59  : global_dag_(global_dag)
61  , left_deep_tree_infos_(left_deep_tree_infos)
62  , executor_(executor)
63  , analyze_join_ops_(analyze_join_ops) {
64  if (analyze_join_ops_) {
65  translated_join_info_ = std::make_shared<TranslatedJoinInfo>();
66  }
67  }
QueryPlanDagCache & global_dag_
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos_
std::shared_ptr< TranslatedJoinInfo > translated_join_info_

Member Function Documentation

void QueryPlanDagExtractor::addTableIdToNodeLink ( const shared::TableKey table_id,
const RelAlgNode node 
)
inline

Definition at line 104 of file QueryPlanDagExtractor.h.

References table_id_to_node_map_.

Referenced by handleTranslatedJoin().

104  {
105  auto it = table_id_to_node_map_.find(table_id);
106  if (it == table_id_to_node_map_.end()) {
107  table_id_to_node_map_.emplace(table_id, node);
108  }
109  }
TableIdToNodeMap table_id_to_node_map_

+ Here is the caller graph for this function:

size_t QueryPlanDagExtractor::applyLimitClauseToCacheKey ( size_t  cache_key,
SortInfo const &  sort_info 
)
static

Definition at line 606 of file QueryPlanDagExtractor.cpp.

References SortInfo::hashLimit().

Referenced by RelAlgExecutor::createAggregateWorkUnit(), RelAlgExecutor::createCompoundWorkUnit(), RelAlgExecutor::createFilterWorkUnit(), RelAlgExecutor::createProjectWorkUnit(), RelAlgExecutor::createWindowFunctionContext(), RelAlgExecutor::executeRelAlgStep(), RelAlgExecutor::executeSort(), and RaExecutionSequence::next().

607  {
608  boost::hash_combine(cache_key, sort_info.hashLimit());
609  return cache_key;
610 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::clearInternalStatus ( )
inline

Definition at line 111 of file QueryPlanDagExtractor.h.

References contain_not_supported_rel_node_, extracted_dag_, and table_id_to_node_map_.

Referenced by handleLeftDeepJoinTree(), handleTranslatedJoin(), validateNodeId(), and visit().

111  {
113  extracted_dag_.clear();
114  table_id_to_node_map_.clear();
115  }
TableIdToNodeMap table_id_to_node_map_
std::vector< std::string > extracted_dag_

+ Here is the caller graph for this function:

ExtractedJoinInfo QueryPlanDagExtractor::extractJoinInfo ( const RelAlgNode top_node,
std::optional< unsigned >  left_deep_tree_id,
std::unordered_map< unsigned, JoinQualsPerNestingLevel left_deep_tree_infos,
Executor executor 
)
static

Definition at line 109 of file QueryPlanDagExtractor.cpp.

References EMPTY_HASHED_PLAN_DAG_KEY, extractQueryPlanDagImpl(), getHashTableBuildDag(), RelAlgNode::getQueryPlanDagHash(), and getTableIdToNodeMap().

Referenced by RelAlgExecutor::createAggregateWorkUnit(), RelAlgExecutor::createCompoundWorkUnit(), RelAlgExecutor::createFilterWorkUnit(), and RelAlgExecutor::createProjectWorkUnit().

113  {
114  // we already extract a query plan dag for the input query which is stored at top_node
115  if (top_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
116  return {};
117  }
118  heavyai::unique_lock<heavyai::shared_mutex> lock(executor->getDataRecyclerLock());
119  auto& cached_dag = executor->getQueryPlanDagCache();
120  QueryPlanDagExtractor dag_extractor(cached_dag, left_deep_tree_infos, executor, true);
121  extractQueryPlanDagImpl(top_node, dag_extractor);
122  return {dag_extractor.getHashTableBuildDag(), dag_extractor.getTableIdToNodeMap()};
123 }
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
size_t getQueryPlanDagHash() const
Definition: RelAlgDag.h:863
std::unique_lock< T > unique_lock
static void extractQueryPlanDagImpl(const RelAlgNode *top_npde, QueryPlanDagExtractor &dag_extractor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ExtractedQueryPlanDag QueryPlanDagExtractor::extractQueryPlanDag ( const RelAlgNode top_node,
Executor executor 
)
static

Definition at line 85 of file QueryPlanDagExtractor.cpp.

References EMPTY_QUERY_PLAN, extractQueryPlanDagImpl(), QueryPlanDagChecker::hasNonSupportedNodeInDag(), RelAlgNode::setQueryPlanDag(), and VLOG.

Referenced by QueryRunner::QueryRunner::extractQueryPlanDag(), and RaExecutionSequence::next().

87  {
88  auto dag_checker_res = QueryPlanDagChecker::hasNonSupportedNodeInDag(top_node);
89  if (dag_checker_res.first) {
90  VLOG(1) << "Stop DAG extraction (" << dag_checker_res.second << ")";
91  return {EMPTY_QUERY_PLAN, true};
92  }
93  heavyai::unique_lock<heavyai::shared_mutex> lock(executor->getDataRecyclerLock());
94  auto& cached_dag = executor->getQueryPlanDagCache();
95  QueryPlanDagExtractor dag_extractor(cached_dag, {}, executor, false);
96  extractQueryPlanDagImpl(top_node, dag_extractor);
97  auto extracted_query_plan_dag = dag_extractor.getExtractedQueryPlanDagStr();
98  top_node->setQueryPlanDag(extracted_query_plan_dag);
99  if (auto sort_node = dynamic_cast<RelSort const*>(top_node)) {
100  // we evaluate sort node based on the resultset of its child node
101  // so we need to mark the extracted query plan of the child node
102  // for the resultset recycling
103  auto child_dag = dag_extractor.getExtractedQueryPlanDagStr(1);
104  sort_node->getInput(0)->setQueryPlanDag(child_dag);
105  }
106  return {extracted_query_plan_dag, dag_extractor.isDagExtractionAvailable()};
107 }
void setQueryPlanDag(const std::string &extracted_query_plan_dag) const
Definition: RelAlgDag.h:854
static std::pair< bool, std::string > hasNonSupportedNodeInDag(const RelAlgNode *rel_alg_node)
std::unique_lock< T > unique_lock
static void extractQueryPlanDagImpl(const RelAlgNode *top_npde, QueryPlanDagExtractor &dag_extractor)
constexpr char const * EMPTY_QUERY_PLAN
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::extractQueryPlanDagImpl ( const RelAlgNode top_npde,
QueryPlanDagExtractor dag_extractor 
)
staticprivate

Definition at line 125 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::addNodeIfAbsent(), CHECK, contain_not_supported_rel_node_, executor_, extracted_dag_, g_is_test_env, getExtractedQueryPlanDagStr(), RelAlgNode::getInput(), global_dag_, RelAlgNode::inputCount(), isDagExtractionAvailable(), run_benchmark_import::res, RelAlgNode::setRelNodeDagId(), toString(), UNREACHABLE, visit(), and VLOG.

Referenced by extractJoinInfo(), and extractQueryPlanDag().

127  {
128  // add the root node of this query plan DAG
129  auto res = dag_extractor.global_dag_.addNodeIfAbsent(top_node);
130  if (!res) {
131  VLOG(1) << "Stop DAG extraction (Query plan dag cache reaches the maximum capacity)";
132  dag_extractor.contain_not_supported_rel_node_ = true;
133  return;
134  }
135  CHECK(res.has_value());
136  top_node->setRelNodeDagId(res.value());
137  dag_extractor.extracted_dag_.push_back(::toString(res.value()));
138 
139  // visit child node if necessary
140  if (auto table_func_node = dynamic_cast<const RelTableFunction*>(top_node)) {
141  for (size_t i = 0; i < table_func_node->inputCount(); ++i) {
142  dag_extractor.visit(table_func_node, table_func_node->getInput(i));
143  }
144  } else {
145  auto num_child_node = top_node->inputCount();
146  switch (num_child_node) {
147  case 1: // unary op
148  dag_extractor.visit(top_node, top_node->getInput(0));
149  break;
150  case 2: // binary op
151  if (auto trans_join_node = dynamic_cast<const RelTranslatedJoin*>(top_node)) {
152  dag_extractor.visit(trans_join_node, trans_join_node->getLHS());
153  dag_extractor.visit(trans_join_node, trans_join_node->getRHS());
154  break;
155  }
156  VLOG(1) << "Visit an invalid rel node while extracting query plan DAG: "
157  << ::toString(top_node);
158  return;
159  case 0: // leaf node
160  break;
161  default:
162  // since we replace RelLeftDeepJoin as a set of RelTranslatedJoin
163  // which is a binary op, # child nodes for every rel node should be <= 2
164  UNREACHABLE();
165  }
166  }
167 
168  // check whether extracted DAG is available to use
169  if (dag_extractor.extracted_dag_.empty() || dag_extractor.isDagExtractionAvailable()) {
170  dag_extractor.contain_not_supported_rel_node_ = true;
171  return;
172  }
173 
174  if (g_is_test_env) {
175  dag_extractor.executor_->registerExtractedQueryPlanDag(
176  dag_extractor.getExtractedQueryPlanDagStr());
177  }
178  return;
179 }
QueryPlanDagCache & global_dag_
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
#define UNREACHABLE()
Definition: Logger.h:338
std::vector< std::string > extracted_dag_
bool g_is_test_env
Definition: Execute.cpp:153
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
void visit(const RelAlgNode *, const RelAlgNode *)
#define CHECK(condition)
Definition: Logger.h:291
std::string getExtractedQueryPlanDagStr(size_t start_pos=0)
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< Analyzer::ColumnVar const * > QueryPlanDagExtractor::getColVar ( const Analyzer::Expr col_info)
private

Definition at line 416 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::collectColVars(), and global_dag_.

Referenced by handleLeftDeepJoinTree().

417  {
418  if (auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(col_info)) {
419  return {col_var};
420  } else {
421  return global_dag_.collectColVars(col_info);
422  }
423 }
QueryPlanDagCache & global_dag_
std::vector< const Analyzer::ColumnVar * > collectColVars(const Analyzer::Expr *target)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string QueryPlanDagExtractor::getExtractedQueryPlanDagStr ( size_t  start_pos = 0)

Definition at line 181 of file QueryPlanDagExtractor.cpp.

References EMPTY_QUERY_PLAN, and extracted_dag_.

Referenced by extractQueryPlanDagImpl(), and handleTranslatedJoin().

181  {
182  std::ostringstream oss;
183  size_t cnt = 0;
184  if (start_pos > extracted_dag_.size()) {
185  return EMPTY_QUERY_PLAN;
186  }
187  for (auto& dag_node_id : extracted_dag_) {
188  if (cnt >= start_pos) {
189  oss << dag_node_id << "|";
190  }
191  ++cnt;
192  }
193  return oss.str();
194 }
std::vector< std::string > extracted_dag_
constexpr char const * EMPTY_QUERY_PLAN

+ Here is the caller graph for this function:

HashTableBuildDagMap QueryPlanDagExtractor::getHashTableBuildDag ( )
inline

Definition at line 78 of file QueryPlanDagExtractor.h.

References hash_table_query_plan_dag_.

Referenced by extractJoinInfo().

HashTableBuildDagMap hash_table_query_plan_dag_

+ Here is the caller graph for this function:

const JoinQualsPerNestingLevel* QueryPlanDagExtractor::getPerNestingJoinQualInfo ( std::optional< unsigned >  left_deep_join_tree_id)
inline

Definition at line 80 of file QueryPlanDagExtractor.h.

References CHECK, and left_deep_tree_infos_.

Referenced by handleLeftDeepJoinTree().

81  {
82  if (left_deep_tree_infos_.empty() || !left_deep_join_tree_id) {
83  return nullptr;
84  }
85  CHECK(left_deep_join_tree_id.has_value());
86  auto it = left_deep_tree_infos_.find(left_deep_join_tree_id.value());
87  if (it == left_deep_tree_infos_.end()) {
88  return nullptr;
89  }
90  return &it->second;
91  }
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

TableIdToNodeMap QueryPlanDagExtractor::getTableIdToNodeMap ( )
inline

Definition at line 102 of file QueryPlanDagExtractor.h.

References table_id_to_node_map_.

Referenced by extractJoinInfo().

102 { return table_id_to_node_map_; }
TableIdToNodeMap table_id_to_node_map_

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::handleLeftDeepJoinTree ( const RelAlgNode parent_node,
const RelLeftDeepInnerJoin rel_left_deep_join 
)
private

Definition at line 429 of file QueryPlanDagExtractor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), CHECK, clearInternalStatus(), gpu_enabled::copy(), anonymous_namespace{QueryMemoryInitializer.cpp}::get_input_idx(), getColVar(), RelAlgNode::getId(), RelAlgNode::getInput(), RelLeftDeepInnerJoin::getOuterCondition(), getPerNestingJoinQualInfo(), handleTranslatedJoin(), INVALID, LEFT, normalizeColumnsPair(), toString(), translated_join_info_, and VLOG.

Referenced by visit().

431  {
432  CHECK(parent_node);
433  CHECK(rel_left_deep_join);
434 
435  // RelLeftDeepInnerJoin node does not need to be added to DAG since
436  // RelLeftDeepInnerJoin is a logical node and
437  // we add all join nodes of this `RelLeftDeepInnerJoin`
438  // thus, the below `left_deep_tree_id` is not the same as its DAG id
439  // (we do not have a DAG node id for this `RelLeftDeepInnerJoin`)
440  auto left_deep_tree_id = rel_left_deep_join->getId();
441  auto left_deep_join_info = getPerNestingJoinQualInfo(left_deep_tree_id);
442  if (!left_deep_join_info) {
443  // we should have left_deep_tree_info for input left deep tree node
444  VLOG(1) << "Stop DAG extraction (Detect Non-supported join pattern)";
446  return;
447  }
448 
449  // gathering per-join qual level info to correctly recycle each hashtable (and necessary
450  // info) that we created
451  // Here we visit each join qual in bottom-up manner to distinct DAGs among join quals
452  // Let say we have three joins- #1: R.a = S.a / #2: R.b = T.b / #3. R.c = X.c
453  // When we start to visit #1, we cannot determine outer col's dag clearly
454  // since we need to visit #2 and #3 due to the current visitor's behavior
455  // In contrast, when starting from #3, we clearly retrieve both inputs' dag
456  // by skipping already visited nodes
457  // So when we visit #2 after visiting #3, we can skip to consider nodes beloning to
458  // qual #3 so we clearly retrieve DAG only corresponding to #2's
459  for (size_t level_idx = 0; level_idx < left_deep_join_info->size(); ++level_idx) {
460  const auto& current_level_join_conditions = left_deep_join_info->at(level_idx);
461  std::vector<const Analyzer::ColumnVar*> inner_join_cols;
462  std::vector<const Analyzer::ColumnVar*> outer_join_cols;
463  std::vector<std::shared_ptr<const Analyzer::Expr>> filter_ops;
464  int inner_input_idx{-1};
465  int outer_input_idx{-1};
466  OpInfo op_info{"UNDEFINED", "UNDEFINED", "UNDEFINED"};
467  std::unordered_set<std::string> visited_filter_ops;
468 
469  // we first check whether this qual needs nested loop
470  const bool found_eq_join_qual =
471  current_level_join_conditions.type != JoinType::INVALID &&
472  boost::algorithm::any_of(current_level_join_conditions.quals, IsEquivBinOp{});
473  const bool nested_loop = !found_eq_join_qual;
474  const bool is_left_join = current_level_join_conditions.type == JoinType::LEFT;
475  RexScalar const* const outer_join_cond =
476  is_left_join ? rel_left_deep_join->getOuterCondition(level_idx + 1) : nullptr;
477 
478  // collect join col, filter ops, and detailed info of join operation, i.e., op_type,
479  // qualifier, ...
480  // when we have more than one quals, i.e., current_level_join_conditions.quals.size()
481  // > 1, we consider the first qual is used as hashtable building
482  bool is_geo_join{false};
483  for (const auto& join_qual : current_level_join_conditions.quals) {
484  auto qual_bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(join_qual);
485  auto join_qual_str = ::toString(join_qual);
486  if (qual_bin_oper) {
487  is_geo_join = qual_bin_oper->is_bbox_intersect_oper();
488  if (join_qual == current_level_join_conditions.quals.front()) {
489  // set op_info based on the first qual
490  op_info = OpInfo{::toString(qual_bin_oper->get_optype()),
491  ::toString(qual_bin_oper->get_qualifier()),
492  qual_bin_oper->get_type_info().to_string()};
493  }
494  for (auto& col_pair_info : normalizeColumnsPair(qual_bin_oper.get())) {
495  // even though we fall back to loop join when left outer join has
496  // non-eq comparator so we additionally check it here to classify the qual
497  // properly todo(yoonmin): relax left join case once we have an improved logic
498  // for this
499  if (!found_eq_join_qual && (is_left_join || col_pair_info.loop_join_qual)) {
500  // we only consider that cur level's join is loop join if we have no
501  // equi-join qual and both lhs and rhs are not col_var,
502  // i.e., lhs: col_var / rhs: constant / bin_op: kGE
503  // also, currently we fallback to loop-join when left outer join
504  // has non-eq join qual
505  if (visited_filter_ops.insert(join_qual_str).second) {
506  filter_ops.push_back(join_qual);
507  }
508  } else {
509  // a qual_bin_oper becomes an inner join qual iff both lhs and rhs are col_var
510  // otherwise it becomes a filter qual
511  bool found_valid_col_vars = false;
512  std::vector<const Analyzer::ColumnVar*> lhs_cvs, rhs_cvs;
513  if (col_pair_info.inner_outer.first && col_pair_info.inner_outer.second) {
514  // we need to modify lhs and rhs if range_oper is detected
515  if (auto range_oper = dynamic_cast<const Analyzer::RangeOper*>(
516  col_pair_info.inner_outer.second)) {
517  lhs_cvs = getColVar(range_oper->get_left_operand());
518  rhs_cvs = getColVar(col_pair_info.inner_outer.first);
519  is_geo_join = true;
520  } else {
521  // this qual is valid and used for typical hash join op
522  lhs_cvs = getColVar(col_pair_info.inner_outer.first);
523  rhs_cvs = getColVar(col_pair_info.inner_outer.second);
524  }
525  if (!lhs_cvs.empty() && !rhs_cvs.empty()) {
526  found_valid_col_vars = true;
527  if (inner_input_idx == -1) {
528  inner_input_idx =
529  get_input_idx(rel_left_deep_join, lhs_cvs.front()->getTableKey());
530  }
531  if (outer_input_idx == -1) {
532  outer_input_idx =
533  get_input_idx(rel_left_deep_join, rhs_cvs.front()->getTableKey());
534  }
535  std::copy(
536  lhs_cvs.begin(), lhs_cvs.end(), std::back_inserter(inner_join_cols));
537  std::copy(
538  rhs_cvs.begin(), rhs_cvs.end(), std::back_inserter(outer_join_cols));
539  }
540  }
541  if (!found_valid_col_vars &&
542  visited_filter_ops.insert(join_qual_str).second) {
543  filter_ops.push_back(join_qual);
544  }
545  }
546  }
547  } else {
548  if (visited_filter_ops.insert(join_qual_str).second) {
549  filter_ops.push_back(join_qual);
550  }
551  }
552  }
553  if (!is_geo_join && (inner_join_cols.size() != outer_join_cols.size())) {
554  VLOG(1) << "Stop DAG extraction (Detect inner/outer col mismatch)";
556  return;
557  }
558 
559  // create RelTranslatedJoin based on the collected info from the join qual
560  // there are total seven types of join query pattern
561  // 1. INNER HASH ONLY
562  // 2. INNER LOOP ONLY (!)
563  // 3. LEFT LOOP ONLY
564  // 4. INNER HASH + INNER LOOP (!)
565  // 5. LEFT LOOP + INNER HASH
566  // 6. LEFT LOOP + INNER LOOP (!)
567  // 7. LEFT LOOP + INNER HASH + INNER LOOP (!)
568  // here, if a query contains INNER LOOP join, its qual has nothing
569  // so, some patterns do not have bin_oper at the specific join nest level
570  // if we find INNER LOOP, corresponding RelTranslatedJoin has nulled LHS and RHS
571  // to mark it as loop join
572  const RelAlgNode* lhs;
573  const RelAlgNode* rhs;
574  if (inner_input_idx != -1 && outer_input_idx != -1) {
575  lhs = rel_left_deep_join->getInput(inner_input_idx);
576  rhs = rel_left_deep_join->getInput(outer_input_idx);
577  } else {
578  if (level_idx == 0) {
579  lhs = rel_left_deep_join->getInput(0);
580  rhs = rel_left_deep_join->getInput(1);
581  } else {
582  lhs = translated_join_info_->rbegin()->get();
583  rhs = rel_left_deep_join->getInput(level_idx + 1);
584  }
585  }
586  CHECK(lhs);
587  CHECK(rhs);
588  auto cur_translated_join_node =
589  std::make_shared<RelTranslatedJoin>(lhs,
590  rhs,
591  inner_join_cols,
592  outer_join_cols,
593  filter_ops,
594  outer_join_cond,
595  nested_loop,
596  current_level_join_conditions.type,
597  op_info.type_,
598  op_info.qualifier_,
599  op_info.typeinfo_);
600  CHECK(cur_translated_join_node);
601  handleTranslatedJoin(parent_node, cur_translated_join_node.get());
602  translated_join_info_->push_back(std::move(cur_translated_join_node));
603  }
604 }
std::vector< InnerOuterOrLoopQual > normalizeColumnsPair(const Analyzer::BinOper *condition)
const RexScalar * getOuterCondition(const size_t nesting_level) const
unsigned getId() const
Definition: RelAlgDag.h:869
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:877
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
std::shared_ptr< TranslatedJoinInfo > translated_join_info_
#define CHECK(condition)
Definition: Logger.h:291
std::vector< Analyzer::ColumnVar const * > getColVar(const Analyzer::Expr *col_info)
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
#define VLOG(n)
Definition: Logger.h:388
const JoinQualsPerNestingLevel * getPerNestingJoinQualInfo(std::optional< unsigned > left_deep_join_tree_id)
int get_input_idx(RelAlgExecutionUnit const &ra_exe_unit, const shared::TableKey &outer_table_key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::handleTranslatedJoin ( const RelAlgNode parent_node,
const RelTranslatedJoin rel_trans_join 
)
private

Definition at line 281 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::addNodeIfAbsent(), addTableIdToNodeLink(), CHECK, clearInternalStatus(), EMPTY_HASHED_PLAN_DAG_KEY, extracted_dag_, getExtractedQueryPlanDagStr(), RelTranslatedJoin::getJoinCols(), RelTranslatedJoin::getLHS(), RelTranslatedJoin::getRHS(), ScanNodeTableKeyCollector::getScanNodeTableKey(), global_dag_, hash_table_query_plan_dag_, hash_value(), isEmptyQueryPlanDag(), RelTranslatedJoin::isNestedLoopQual(), anonymous_namespace{RelAlgDag.cpp}::node_id(), registerNodeToDagCache(), run_benchmark_import::res, split(), QueryPlanDagCache::translateColVarsToInfoHash(), validateNodeId(), visit(), and VLOG.

Referenced by handleLeftDeepJoinTree(), and visit().

283  {
284  // when left-deep tree has multiple joins this rel_trans_join can be revisited
285  // but we need to mark the child query plan to accurately catch the query plan dag
286  // here we do not create new dag id since all rel nodes are visited already
287  CHECK(parent_node);
288  CHECK(rel_trans_join);
289 
290  auto res = global_dag_.addNodeIfAbsent(rel_trans_join);
291  if (!validateNodeId(rel_trans_join, res) ||
292  !registerNodeToDagCache(parent_node, rel_trans_join, res)) {
293  return;
294  }
295 
296  // To extract an access path (query plan DAG) for hashtable is to use a difference of
297  // two query plan DAGs 1) query plan DAG after visiting RHS node and 2) query plan DAG
298  // after visiting LHS node so by comparing 1) and 2) we can extract which query plan DAG
299  // is necessary to project join cols that are used to build a hashtable and we use it as
300  // hashtable access path
301 
302  // this lambda function deals with the case of recycled query resultset
303  // specifically, we can avoid unnecessary visiting of child tree(s) when we already have
304  // the extracted query plan DAG for the given child rel node
305  // instead, we just fill the node id vector (a sequence of node ids we visited) by using
306  // the dag of the child node
307  auto fill_node_ids_to_dag_vec = [&](const std::string& node_ids) {
308  auto node_ids_vec = split(node_ids, "|");
309  // the last elem is an empty one
310  std::for_each(node_ids_vec.begin(),
311  std::prev(node_ids_vec.end()),
312  [&](const std::string& node_id) { extracted_dag_.push_back(node_id); });
313  };
314  QueryPlanDAG current_plan_dag, after_rhs_visited, after_lhs_visited;
315  current_plan_dag = getExtractedQueryPlanDagStr();
316  auto rhs_node = rel_trans_join->getRHS();
317  std::unordered_set<size_t> rhs_input_keys, lhs_input_keys;
318  if (rhs_node) {
319  if (rhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
320  visit(rel_trans_join, rhs_node);
321  } else {
322  fill_node_ids_to_dag_vec(rhs_node->getQueryPlanDag());
323  }
324  after_rhs_visited = getExtractedQueryPlanDagStr();
325  addTableIdToNodeLink({0, int32_t(rhs_node->getId())}, rhs_node);
326  rhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(rhs_node);
327  }
328  auto lhs_node = rel_trans_join->getLHS();
329  if (rel_trans_join->getLHS()) {
330  if (lhs_node->getQueryPlanDagHash() == EMPTY_HASHED_PLAN_DAG_KEY) {
331  visit(rel_trans_join, lhs_node);
332  } else {
333  fill_node_ids_to_dag_vec(lhs_node->getQueryPlanDag());
334  }
335  after_lhs_visited = getExtractedQueryPlanDagStr();
336  addTableIdToNodeLink({0, int32_t(lhs_node->getId())}, lhs_node);
337  lhs_input_keys = ScanNodeTableKeyCollector::getScanNodeTableKey(lhs_node);
338  }
339  if (isEmptyQueryPlanDag(after_lhs_visited) || isEmptyQueryPlanDag(after_rhs_visited)) {
340  VLOG(1) << "Stop DAG extraction (Detect invalid query plan dag of join col(s))";
342  return;
343  }
344  // after visiting new node, we have added node id(s) which can be used as an access path
345  // so, we extract that node id(s) by splitting the new plan dag by the current plan dag
346  auto outer_table_identifier = split(after_rhs_visited, current_plan_dag)[1];
347  auto hash_table_identfier = split(after_lhs_visited, after_rhs_visited)[1];
348  auto join_qual_info = EMPTY_HASHED_PLAN_DAG_KEY;
349  if (!rel_trans_join->isNestedLoopQual()) {
350  auto inner_join_cols = rel_trans_join->getJoinCols(true);
351  auto inner_join_col_info =
352  global_dag_.translateColVarsToInfoHash(inner_join_cols, false);
353  boost::hash_combine(join_qual_info, inner_join_col_info);
354  auto outer_join_cols = rel_trans_join->getJoinCols(false);
355  auto outer_join_col_info =
356  global_dag_.translateColVarsToInfoHash(outer_join_cols, false);
357  boost::hash_combine(join_qual_info, outer_join_col_info);
358  // collect table keys from both rhs and lhs side
359  std::unordered_set<size_t> collected_table_keys;
360  collected_table_keys.insert(lhs_input_keys.begin(), lhs_input_keys.end());
361  if (!inner_join_cols.empty() &&
362  inner_join_cols[0]->get_type_info().is_dict_encoded_type()) {
363  collected_table_keys.insert(rhs_input_keys.begin(), rhs_input_keys.end());
364  }
365 
366  auto it = hash_table_query_plan_dag_.find(join_qual_info);
367  if (it == hash_table_query_plan_dag_.end()) {
368  VLOG(2) << "Add hashtable access path"
369  << ", inner join col info: " << inner_join_col_info
370  << " (access path: " << hash_table_identfier << ")"
371  << ", outer join col info: " << outer_join_col_info
372  << " (access path: " << outer_table_identifier << ")";
374  join_qual_info,
375  HashTableBuildDag(inner_join_col_info,
376  outer_join_col_info,
377  boost::hash_value(hash_table_identfier),
378  boost::hash_value(outer_table_identifier),
379  std::move(collected_table_keys)));
380  }
381  } else {
382  VLOG(2) << "Add loop join access path, for LHS: " << outer_table_identifier
383  << ", for RHS: " << hash_table_identfier << "\n";
384  }
385 }
bool isNestedLoopQual() const
Definition: RelAlgDag.h:1843
QueryPlanDagCache & global_dag_
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
std::string QueryPlanDAG
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::vector< std::string > extracted_dag_
std::vector< const Analyzer::ColumnVar * > getJoinCols(bool lhs) const
Definition: RelAlgDag.h:1837
static std::unordered_set< size_t > getScanNodeTableKey(RelAlgNode const *rel_alg_node)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
const RelAlgNode * getRHS() const
Definition: RelAlgDag.h:1808
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
size_t translateColVarsToInfoHash(std::vector< const Analyzer::ColumnVar * > &col_vars, bool col_id_only) const
bool isEmptyQueryPlanDag(const std::string &dag)
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
HashTableBuildDagMap hash_table_query_plan_dag_
const RelAlgNode * getLHS() const
Definition: RelAlgDag.h:1807
void visit(const RelAlgNode *, const RelAlgNode *)
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
#define CHECK(condition)
Definition: Logger.h:291
std::string getExtractedQueryPlanDagStr(size_t start_pos=0)
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:973
#define VLOG(n)
Definition: Logger.h:388
void addTableIdToNodeLink(const shared::TableKey &table_id, const RelAlgNode *node)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::isDagExtractionAvailable ( )
inline

Definition at line 93 of file QueryPlanDagExtractor.h.

References contain_not_supported_rel_node_.

Referenced by extractQueryPlanDagImpl().

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::isEmptyQueryPlanDag ( const std::string &  dag)
inline

Definition at line 100 of file QueryPlanDagExtractor.h.

Referenced by handleTranslatedJoin().

100 { return dag.compare("N/A") == 0; }

+ Here is the caller graph for this function:

std::vector< InnerOuterOrLoopQual > QueryPlanDagExtractor::normalizeColumnsPair ( const Analyzer::BinOper condition)

Definition at line 38 of file QueryPlanDagExtractor.cpp.

References CHECK_EQ, executor_, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), Analyzer::BinOper::is_bbox_intersect_oper(), HashJoin::normalizeColumnPair(), and run_benchmark_import::result.

Referenced by handleLeftDeepJoinTree().

39  {
40  std::vector<InnerOuterOrLoopQual> result;
41  const auto lhs_tuple_expr =
42  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
43  const auto rhs_tuple_expr =
44  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
45 
46  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
47  auto do_normalize_inner_outer_pair = [&result, &condition](
48  const Analyzer::Expr* lhs,
49  const Analyzer::Expr* rhs,
50  const TemporaryTables* temporary_table) {
51  try {
52  auto inner_outer_pair =
54  lhs, rhs, temporary_table, condition->is_bbox_intersect_oper())
55  .first;
56  InnerOuterOrLoopQual valid_qual{
57  std::make_pair(inner_outer_pair.first, inner_outer_pair.second), false};
58  result.push_back(valid_qual);
59  } catch (HashJoinFail& e) {
60  InnerOuterOrLoopQual invalid_qual{std::make_pair(lhs, rhs), true};
61  result.push_back(invalid_qual);
62  }
63  };
64  if (lhs_tuple_expr) {
65  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
66  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
67  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
68  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
69  do_normalize_inner_outer_pair(
70  lhs_tuple[i].get(), rhs_tuple[i].get(), executor_->getTemporaryTables());
71  }
72  } else {
73  do_normalize_inner_outer_pair(condition->get_left_operand(),
74  condition->get_right_operand(),
75  executor_->getTemporaryTables());
76  }
77  return result;
78 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const Expr * get_right_operand() const
Definition: Analyzer.h:456
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
bool is_bbox_intersect_oper() const
Definition: Analyzer.h:453
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_bbox_intersect=false)
Definition: HashJoin.cpp:822
const Expr * get_left_operand() const
Definition: Analyzer.h:455

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::register_and_visit ( const RelAlgNode parent_node,
const RelAlgNode child_node 
)
private

Definition at line 221 of file QueryPlanDagExtractor.cpp.

References QueryPlanDagCache::addNodeIfAbsent(), RelAlgNode::getInput(), global_dag_, RelAlgNode::inputCount(), registerNodeToDagCache(), run_benchmark_import::res, validateNodeId(), and visit().

Referenced by visit().

222  {
223  // This function takes a responsibility for all rel nodes
224  // except 1) RelLeftDeepJoinTree and 2) RelTranslatedJoin
225  auto res = global_dag_.addNodeIfAbsent(child_node);
226  if (validateNodeId(child_node, res) &&
227  registerNodeToDagCache(parent_node, child_node, res)) {
228  for (size_t i = 0; i < child_node->inputCount(); i++) {
229  visit(child_node, child_node->getInput(i));
230  }
231  }
232 }
QueryPlanDagCache & global_dag_
std::optional< RelNodeId > addNodeIfAbsent(const RelAlgNode *)
bool validateNodeId(const RelAlgNode *node, std::optional< RelNodeId > retrieved_node_id)
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:877
bool registerNodeToDagCache(const RelAlgNode *parent_node, const RelAlgNode *child_node, std::optional< RelNodeId > retrieved_node_id)
void visit(const RelAlgNode *, const RelAlgNode *)
const size_t inputCount() const
Definition: RelAlgDag.h:875

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::registerNodeToDagCache ( const RelAlgNode parent_node,
const RelAlgNode child_node,
std::optional< RelNodeId retrieved_node_id 
)
private

Definition at line 208 of file QueryPlanDagExtractor.cpp.

References CHECK, QueryPlanDagCache::connectNodes(), extracted_dag_, RelAlgNode::getRelNodeDagId(), global_dag_, and toString().

Referenced by handleTranslatedJoin(), and register_and_visit().

211  {
212  CHECK(parent_node);
213  CHECK(child_node);
214  CHECK(retrieved_node_id.has_value());
215  auto parent_node_id = parent_node->getRelNodeDagId();
216  global_dag_.connectNodes(parent_node_id, retrieved_node_id.value());
217  extracted_dag_.push_back(::toString(retrieved_node_id.value()));
218  return true;
219 }
QueryPlanDagCache & global_dag_
void connectNodes(const RelNodeId parent_id, const RelNodeId child_id)
std::vector< std::string > extracted_dag_
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
#define CHECK(condition)
Definition: Logger.h:291
size_t getRelNodeDagId() const
Definition: RelAlgDag.h:921

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryPlanDagExtractor::validateNodeId ( const RelAlgNode node,
std::optional< RelNodeId retrieved_node_id 
)
private

Definition at line 196 of file QueryPlanDagExtractor.cpp.

References CHECK, clearInternalStatus(), RelAlgNode::setRelNodeDagId(), and VLOG.

Referenced by handleTranslatedJoin(), and register_and_visit().

197  {
198  if (!retrieved_node_id) {
199  VLOG(1) << "Stop DAG extraction (Detect an invalid dag id)";
201  return false;
202  }
203  CHECK(retrieved_node_id.has_value());
204  node->setRelNodeDagId(retrieved_node_id.value());
205  return true;
206 }
void setRelNodeDagId(const size_t id) const
Definition: RelAlgDag.h:919
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryPlanDagExtractor::visit ( const RelAlgNode parent_node,
const RelAlgNode child_node 
)
private

Definition at line 238 of file QueryPlanDagExtractor.cpp.

References analyze_join_ops_, clearInternalStatus(), contain_not_supported_rel_node_, handleLeftDeepJoinTree(), handleTranslatedJoin(), left_deep_tree_infos_, register_and_visit(), RexVisitorBase< T >::visit(), and VLOG.

Referenced by extractQueryPlanDagImpl(), handleTranslatedJoin(), and register_and_visit().

239  {
240  if (!child_node || contain_not_supported_rel_node_) {
241  return;
242  }
243  bool child_visited = false;
244  if (analyze_join_ops_) {
245  if (auto left_deep_joins = dynamic_cast<const RelLeftDeepInnerJoin*>(child_node)) {
246  if (left_deep_tree_infos_.empty()) {
247  // we should have left_deep_tree_info for input left deep tree node
248  VLOG(1) << "Stop DAG extraction (Detect non-supported join pattern)";
250  return;
251  }
252  auto true_parent_node = parent_node;
253  std::shared_ptr<RelFilter> dummy_filter_node{nullptr};
254  const auto inner_cond = left_deep_joins->getInnerCondition();
255  // we analyze left-deep join tree as per-join qual level, so
256  // when visiting RelLeftDeepInnerJoin we decompose it into individual join node
257  // (RelTranslatedJoin).
258  // Thus, this RelLeftDeepInnerJoin object itself is useless when recycling data
259  // but sometimes it has inner condition that has to consider so we add an extra
260  // RelFilter node containing the condition to keep query semantic correctly
261  if (auto cond = dynamic_cast<const RexOperator*>(inner_cond)) {
262  RexDeepCopyVisitor copier;
263  auto copied_inner_cond = copier.visit(cond);
264  dummy_filter_node = std::make_shared<RelFilter>(copied_inner_cond);
265  register_and_visit(parent_node, dummy_filter_node.get());
266  true_parent_node = dummy_filter_node.get();
267  }
268  handleLeftDeepJoinTree(true_parent_node, left_deep_joins);
269  child_visited = true;
270  } else if (auto translated_join_node =
271  dynamic_cast<const RelTranslatedJoin*>(child_node)) {
272  handleTranslatedJoin(parent_node, translated_join_node);
273  child_visited = true;
274  }
275  }
276  if (!child_visited) {
277  register_and_visit(parent_node, child_node);
278  }
279 }
void register_and_visit(const RelAlgNode *parent_node, const RelAlgNode *child_node)
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
void handleTranslatedJoin(const RelAlgNode *, const RelTranslatedJoin *)
std::unordered_map< unsigned, JoinQualsPerNestingLevel > left_deep_tree_infos_
void handleLeftDeepJoinTree(const RelAlgNode *, const RelLeftDeepInnerJoin *)
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

bool QueryPlanDagExtractor::analyze_join_ops_
private

Definition at line 136 of file QueryPlanDagExtractor.h.

Referenced by QueryPlanDagExtractor(), and visit().

bool QueryPlanDagExtractor::contain_not_supported_rel_node_
private
Executor* QueryPlanDagExtractor::executor_
private

Definition at line 135 of file QueryPlanDagExtractor.h.

Referenced by extractQueryPlanDagImpl(), and normalizeColumnsPair().

std::vector<std::string> QueryPlanDagExtractor::extracted_dag_
private
QueryPlanDagCache& QueryPlanDagExtractor::global_dag_
private
HashTableBuildDagMap QueryPlanDagExtractor::hash_table_query_plan_dag_
private

Definition at line 138 of file QueryPlanDagExtractor.h.

Referenced by getHashTableBuildDag(), and handleTranslatedJoin().

std::unordered_map<unsigned, JoinQualsPerNestingLevel> QueryPlanDagExtractor::left_deep_tree_infos_
private

Definition at line 134 of file QueryPlanDagExtractor.h.

Referenced by getPerNestingJoinQualInfo(), and visit().

TableIdToNodeMap QueryPlanDagExtractor::table_id_to_node_map_
private
std::shared_ptr<TranslatedJoinInfo> QueryPlanDagExtractor::translated_join_info_
private

Definition at line 137 of file QueryPlanDagExtractor.h.

Referenced by handleLeftDeepJoinTree(), and QueryPlanDagExtractor().


The documentation for this class was generated from the following files: