OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CardinalityEstimator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CardinalityEstimator.h"
18 #include "ErrorHandling.h"
19 #include "ExpressionRewrite.h"
20 #include "RelAlgExecutor.h"
21 
22 int64_t g_large_ndv_threshold = 10000000;
24 
25 namespace Analyzer {
26 
28  return 1024 * 1024 * g_large_ndv_multiplier;
29 }
30 
31 } // namespace Analyzer
32 
34  CHECK(dynamic_cast<const Analyzer::NDVEstimator*>(estimator_.get()));
35  CHECK(host_estimator_buffer_);
36  auto bits_set = bitmap_set_size(host_estimator_buffer_, estimator_->getBufferSize());
37  if (bits_set == 0) {
38  // empty result set, return 1 for a groups buffer size of 1
39  return 1;
40  }
41  const auto total_bits = estimator_->getBufferSize() * 8;
42  CHECK_LE(bits_set, total_bits);
43  const auto unset_bits = total_bits - bits_set;
44  const auto ratio = static_cast<double>(unset_bits) / total_bits;
45  if (ratio == 0.) {
46  LOG(WARNING)
47  << "Failed to get a high quality cardinality estimation, falling back to "
48  "approximate group by buffer size guess.";
49  return 0;
50  }
51  return -static_cast<double>(total_bits) * log(ratio);
52 }
53 
55  const int64_t range,
56  const bool is_agg,
57  const CompilationOptions& co,
58  const ExecutionOptions& eo) {
59  const auto estimator_exe_unit = work_unit.exe_unit.createNdvExecutionUnit(range);
60  size_t one{1};
61  ColumnCacheMap column_cache;
62  try {
63  const auto estimator_result =
64  executor_->executeWorkUnit(one,
65  is_agg,
67  estimator_exe_unit,
68  co,
69  eo,
70  nullptr,
71  false,
72  column_cache);
73  if (!estimator_result) {
74  return 1; // empty row set, only needs one slot
75  }
76  return estimator_result->getNDVEstimator();
77  } catch (const QueryExecutionError& e) {
78  if (e.hasErrorCode(ErrorCode::OUT_OF_TIME)) {
79  throw std::runtime_error("Cardinality estimation query ran out of time");
80  }
81  if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
82  throw std::runtime_error("Cardinality estimation query has been interrupted");
83  }
84  throw std::runtime_error("Failed to run the cardinality estimation query: " +
86  }
87  UNREACHABLE();
88  return 0;
89 }
90 
92  const int64_t range) const {
93  const bool use_large_estimator =
94  range > g_large_ndv_threshold || groupby_exprs.size() > 1;
95  return {input_descs,
98  quals,
99  join_quals,
100  {},
101  {},
102  {},
103  use_large_estimator ? makeExpr<Analyzer::LargeNDVEstimator>(groupby_exprs)
104  : makeExpr<Analyzer::NDVEstimator>(groupby_exprs),
105  SortInfo(),
106  0,
107  query_hint,
111  false,
112  union_all,
113  query_state,
114  {}};
115 }
116 
118  Analyzer::Expr* replacement_target) const {
119  return {input_descs,
121  simple_quals,
123  join_quals,
124  {},
125  {replacement_target},
126  {},
127  nullptr,
128  SortInfo(),
129  0,
130  query_hint,
134  false,
135  union_all,
136  query_state,
137  {replacement_target},
138  /*per_device_cardinality=*/{}};
139 }
140 
142  const RelAlgExecutionUnit& ra_exe_unit,
143  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& results_per_device) {
144  if (results_per_device.empty()) {
145  return nullptr;
146  }
147  CHECK(dynamic_cast<const Analyzer::NDVEstimator*>(ra_exe_unit.estimator.get()));
148  const auto& result_set = results_per_device.front().first;
149  CHECK(result_set);
150  auto estimator_buffer = result_set->getHostEstimatorBuffer();
151  CHECK(estimator_buffer);
152  for (size_t i = 1; i < results_per_device.size(); ++i) {
153  const auto& next_result_set = results_per_device[i].first;
154  const auto other_estimator_buffer = next_result_set->getHostEstimatorBuffer();
155  for (size_t off = 0; off < ra_exe_unit.estimator->getBufferSize(); ++off) {
156  estimator_buffer[off] |= other_estimator_buffer[off];
157  }
158  }
159  return std::move(result_set);
160 }
size_t getBufferSize() const final
bool is_agg(const Analyzer::Expr *expr)
int64_t g_large_ndv_threshold
int32_t getErrorCode() const
Definition: ErrorHandling.h:63
QueryPlanHash query_plan_dag_hash
const std::optional< bool > union_all
#define LOG(tag)
Definition: Logger.h:285
RelAlgExecutionUnit exe_unit
std::vector< InputDescriptor > input_descs
#define UNREACHABLE()
Definition: Logger.h:338
std::shared_ptr< ResultSet > ResultSetPtr
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::list< std::shared_ptr< Analyzer::Expr > > strip_join_covered_filter_quals(const std::list< std::shared_ptr< Analyzer::Expr >> &quals, const JoinQualsPerNestingLevel &join_quals)
const JoinQualsPerNestingLevel join_quals
TableIdToNodeMap table_id_to_node_map
bool hasErrorCode(ErrorCode const ec) const
Definition: ErrorHandling.h:65
RelAlgExecutionUnit createCountAllExecutionUnit(Analyzer::Expr *replacement_target) const
size_t getNDVEstimation(const WorkUnit &work_unit, const int64_t range, const bool is_agg, const CompilationOptions &co, const ExecutionOptions &eo)
const std::shared_ptr< Analyzer::Estimator > estimator
#define CHECK_LE(x, y)
Definition: Logger.h:304
RelAlgExecutionUnit createNdvExecutionUnit(const int64_t range) const
size_t getNDVEstimator() const
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
std::list< std::shared_ptr< Analyzer::Expr > > quals
RegisteredQueryHint query_hint
#define CHECK(condition)
Definition: Logger.h:291
std::vector< InputTableInfo > get_table_infos(const std::vector< InputDescriptor > &input_descs, Executor *executor)
Estimators to be used when precise cardinality isn&#39;t useful.
static std::string getErrorMessageFromCode(const int32_t error_code)
std::shared_ptr< const query_state::QueryState > query_state
ResultSetPtr reduce_estimator_results(const RelAlgExecutionUnit &ra_exe_unit, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
Executor * executor_
size_t bitmap_set_size(const int8_t *bitmap, const size_t bitmap_byte_sz)
Definition: CountDistinct.h:37
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
size_t g_large_ndv_multiplier
HashTableBuildDagMap hash_table_build_plan_dag