OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgExecutionUnit.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #pragma once
25 
27 #include "QueryHint.h"
28 #include "RelAlgDag.h"
29 #include "Shared/DbObjectKeys.h"
30 #include "Shared/sqldefs.h"
31 #include "Shared/toString.h"
35 
36 #include <boost/graph/adjacency_list.hpp>
37 
38 #include <functional>
39 #include <list>
40 #include <memory>
41 #include <optional>
42 #include <vector>
43 
44 using AdjacentList = boost::adjacency_list<boost::setS, boost::vecS, boost::directedS>;
45 // node ID used when extracting query plan DAG
46 // note this ID is different from RelNode's id since query plan DAG extractor assigns an
47 // unique node ID only to a rel node which is included in extracted DAG (if we cannot
48 // extract a DAG from the query plan DAG extractor skips to assign unique IDs to rel nodes
49 // in that query plan
50 using RelNodeId = size_t;
51 // hash value of explained rel node
52 using RelNodeExplainedHash = size_t;
53 // a string representation of a query plan that is collected by visiting query plan DAG
54 // starting from root to leaf and concatenate each rel node's id
55 // where two adjacent rel nodes in a QueryPlanDAG are connected via '|' delimiter
56 // i.e., 1|2|3|4|
57 using QueryPlanDAG = std::string;
58 // hashed value of QueryPlanNodeIds
59 using QueryPlanHash = size_t;
60 // hold query plan dag and column info of join columns
61 // used to detect a correct cached hashtable
63  public:
64  HashTableBuildDag(size_t in_inner_cols_info,
65  size_t in_outer_cols_info,
66  QueryPlanHash in_inner_cols_access_path,
67  QueryPlanHash in_outer_cols_access_path,
68  std::unordered_set<size_t>&& inputTableKeys)
69  : inner_cols_info(in_inner_cols_info)
70  , outer_cols_info(in_outer_cols_info)
71  , inner_cols_access_path(in_inner_cols_access_path)
72  , outer_cols_access_path(in_outer_cols_access_path)
73  , inputTableKeys(std::move(inputTableKeys)) {}
78  std::unordered_set<size_t>
79  inputTableKeys; // table keys of input(s), e.g., scan node or subquery's DAG
80 };
81 // A map btw. join qual's column info and its corresponding hashtable access path as query
82 // plan DAG i.e., A.a = B.b and build hashtable on B.b? <(A.a = B.b) --> query plan DAG of
83 // projecting B.b> here, this two-level mapping (join qual -> inner join col -> hashtable
84 // access plan DAG) is required since we have to extract query plan before deciding which
85 // join col becomes inner since rel alg related metadata is required to extract query
86 // plan, and the actual decision happens at the time of building hashtable
87 using HashTableBuildDagMap = std::unordered_map<size_t, HashTableBuildDag>;
88 // A map btw. join column's input table id to its corresponding rel node
89 // for each hash join operation, we can determine whether its input source
90 // has inconsistency in its source data, e.g., row ordering
91 // by seeing a type of input node, e.g., RelSort
92 // note that disabling DAG extraction when we find sort node from join's input
93 // is too restrict when a query becomes complex (and so have multiple joins)
94 // since it eliminates a change of data recycling
95 using TableIdToNodeMap = std::unordered_map<shared::TableKey, const RelAlgNode*>;
96 
100  kQual, // INNER + OUTER
101  kDirect // set target directly (i.e., put Analyzer::Expr* instead of
102  // Analyzer::BinOper*)
103 };
104 constexpr char const* EMPTY_QUERY_PLAN = "";
106 
108 
109 namespace Analyzer {
110 class Expr;
111 class ColumnVar;
112 class Estimator;
113 struct OrderEntry;
114 
115 } // namespace Analyzer
116 
117 struct SortInfo {
119  : order_entries({})
121  , limit(std::nullopt)
122  , offset(0) {}
123 
124  SortInfo(const std::list<Analyzer::OrderEntry>& oe,
125  const SortAlgorithm sa,
126  std::optional<size_t> l,
127  size_t o)
128  : order_entries(oe), algorithm(sa), limit(l), offset(o) {}
129 
130  SortInfo& operator=(const SortInfo& other) {
132  algorithm = other.algorithm;
133  limit = other.limit;
134  offset = other.offset;
135  return *this;
136  }
137 
138  static SortInfo createFromSortNode(const RelSort* sort_node) {
139  return {sort_node->getOrderEntries(),
141  sort_node->getLimit(),
142  sort_node->getOffset()};
143  }
144 
145  size_t hashLimit() const {
146  size_t hash{0};
147  boost::hash_combine(hash, limit.has_value());
148  boost::hash_combine(hash, limit.value_or(0));
149  return hash;
150  }
151 
152  std::list<Analyzer::OrderEntry> order_entries;
154  std::optional<size_t> limit;
155  size_t offset;
156 };
157 
159  std::list<std::shared_ptr<Analyzer::Expr>> quals;
161 };
162 
163 using JoinQualsPerNestingLevel = std::vector<JoinCondition>;
164 
166  std::vector<InputDescriptor> input_descs;
167  std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
168  std::list<std::shared_ptr<Analyzer::Expr>> simple_quals;
169  std::list<std::shared_ptr<Analyzer::Expr>> quals;
171  const std::list<std::shared_ptr<Analyzer::Expr>> groupby_exprs;
172  std::vector<Analyzer::Expr*> target_exprs;
173  std::unordered_map<size_t, SQLTypeInfo> target_exprs_original_type_infos;
174  const std::shared_ptr<Analyzer::Estimator> estimator;
176  size_t scan_limit;
181  bool use_bump_allocator{false};
182  // empty if not a UNION, true if UNION ALL, false if regular UNION
183  const std::optional<bool> union_all;
184  std::shared_ptr<const query_state::QueryState> query_state;
185  std::vector<Analyzer::Expr*> target_exprs_union; // targets in second subquery of UNION
186  mutable std::vector<std::pair<std::vector<size_t>, size_t>> per_device_cardinality;
187 
188  RelAlgExecutionUnit createNdvExecutionUnit(const int64_t range) const;
190  Analyzer::Expr* replacement_target) const;
191 
192  // Call lambda() for each aggregate target_expr of SQLAgg type AggType.
193  template <SQLAgg AggType>
195  std::function<void(Analyzer::AggExpr const*, size_t target_idx)> lambda) const {
196  for (size_t target_idx = 0; target_idx < target_exprs.size(); ++target_idx) {
197  Analyzer::Expr const* target_expr = target_exprs[target_idx];
198  if (auto const* agg_expr = dynamic_cast<Analyzer::AggExpr const*>(target_expr)) {
199  if (agg_expr->get_aggtype() == AggType) {
200  lambda(agg_expr, target_idx);
201  }
202  }
203  }
204  }
205 };
206 
207 std::ostream& operator<<(std::ostream& os, const RelAlgExecutionUnit& ra_exe_unit);
208 
210  const std::vector<InputDescriptor> input_descs;
211  std::list<std::shared_ptr<const InputColDescriptor>> input_col_descs;
212  std::vector<Analyzer::Expr*> input_exprs;
213  std::vector<Analyzer::ColumnVar*> table_func_inputs;
214  std::vector<Analyzer::Expr*> target_exprs;
215  mutable size_t output_buffer_size_param;
218 
219  public:
220  std::string toString() const {
221  return typeName(this) + "(" + "input_exprs=" + ::toString(input_exprs) +
222  ", table_func_inputs=" + ::toString(table_func_inputs) +
223  ", target_exprs=" + ::toString(target_exprs) +
224  ", output_buffer_size_param=" + ::toString(output_buffer_size_param) +
225  ", table_func=" + ::toString(table_func) +
226  ", query_plan_dag=" + ::toString(query_plan_dag_hash) + ")";
227  }
228 };
229 
230 class ResultSet;
231 using ResultSetPtr = std::shared_ptr<ResultSet>;
std::vector< Analyzer::Expr * > target_exprs
size_t getOffset() const
Definition: RelAlgDag.h:2228
JoinType
Definition: sqldefs.h:238
std::list< Analyzer::OrderEntry > getOrderEntries() const
Definition: RelAlgDag.h:2264
std::vector< Analyzer::Expr * > input_exprs
std::vector< Analyzer::ColumnVar * > table_func_inputs
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
const table_functions::TableFunction table_func
std::string QueryPlanDAG
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
QueryPlanHash outer_cols_access_path
JoinColumnSide
const std::vector< InputDescriptor > input_descs
std::vector< InputDescriptor > input_descs
boost::adjacency_list< boost::setS, boost::vecS, boost::directedS > AdjacentList
std::vector< JoinCondition > JoinQualsPerNestingLevel
std::shared_ptr< ResultSet > ResultSetPtr
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
static SortInfo createFromSortNode(const RelSort *sort_node)
SortAlgorithm algorithm
std::unordered_set< size_t > inputTableKeys
std::vector< Analyzer::Expr * > target_exprs_union
void eachAggTarget(std::function< void(Analyzer::AggExpr const *, size_t target_idx)> lambda) const
std::vector< std::pair< std::vector< size_t >, size_t > > per_device_cardinality
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
SortInfo & operator=(const SortInfo &other)
const JoinQualsPerNestingLevel join_quals
std::optional< size_t > limit
TableIdToNodeMap table_id_to_node_map
std::list< Analyzer::OrderEntry > order_entries
RelAlgExecutionUnit createCountAllExecutionUnit(Analyzer::Expr *replacement_target) const
const std::shared_ptr< Analyzer::Estimator > estimator
std::string toString() const
HashTableBuildDag(size_t in_inner_cols_info, size_t in_outer_cols_info, QueryPlanHash in_inner_cols_access_path, QueryPlanHash in_outer_cols_access_path, std::unordered_set< size_t > &&inputTableKeys)
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
QueryPlanHash inner_cols_access_path
SortInfo(const std::list< Analyzer::OrderEntry > &oe, const SortAlgorithm sa, std::optional< size_t > l, size_t o)
size_t RelNodeExplainedHash
std::unordered_map< size_t, SQLTypeInfo > target_exprs_original_type_infos
RelAlgExecutionUnit createNdvExecutionUnit(const int64_t range) const
size_t hashLimit() const
size_t QueryPlanHash
std::string typeName(const T *v)
Definition: toString.h:106
std::list< std::shared_ptr< Analyzer::Expr > > quals
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
constexpr char const * EMPTY_QUERY_PLAN
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::shared_ptr< const query_state::QueryState > query_state
Common Enum definitions for SQL processing.
std::vector< Analyzer::Expr * > target_exprs
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
std::optional< size_t > getLimit() const
Definition: RelAlgDag.h:2226
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:64
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
size_t RelNodeId
HashTableBuildDagMap hash_table_build_plan_dag