OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDag.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgDag.h"
19 #include "Catalog/Catalog.h"
21 #include "JsonAccessors.h"
23 #include "RelAlgOptimizer.h"
24 #include "RelLeftDeepInnerJoin.h"
25 #include "RexVisitor.h"
26 #include "Shared/sqldefs.h"
27 
28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
32 
33 #include <string>
34 #include <unordered_set>
35 
36 extern bool g_cluster;
37 extern bool g_enable_union;
38 
39 namespace {
40 
41 const unsigned FIRST_RA_NODE_ID = 1;
42 
43 } // namespace
44 
45 thread_local unsigned RelAlgNode::crt_id_ = FIRST_RA_NODE_ID;
46 
49 }
50 
52  const std::shared_ptr<const ExecutionResult> result) {
53  auto row_set = result->getRows();
54  CHECK(row_set);
55  CHECK_EQ(size_t(1), row_set->colCount());
56  *(type_.get()) = row_set->getColType(0);
57  (*(result_.get())) = result;
58 }
59 
60 std::unique_ptr<RexSubQuery> RexSubQuery::deepCopy() const {
61  return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
62 }
63 
64 unsigned RexSubQuery::getId() const {
65  return ra_->getId();
66 }
67 
68 namespace {
69 
70 class RexRebindInputsVisitor : public RexVisitor<void*> {
71  public:
72  RexRebindInputsVisitor(const RelAlgNode* old_input, const RelAlgNode* new_input)
73  : old_input_(old_input), new_input_(new_input) {}
74 
75  virtual ~RexRebindInputsVisitor() = default;
76 
77  void* visitInput(const RexInput* rex_input) const override {
78  const auto old_source = rex_input->getSourceNode();
79  if (old_source == old_input_) {
80  const auto left_deep_join = dynamic_cast<const RelLeftDeepInnerJoin*>(new_input_);
81  if (left_deep_join) {
82  rebind_inputs_from_left_deep_join(rex_input, left_deep_join);
83  return nullptr;
84  }
85  rex_input->setSourceNode(new_input_);
86  }
87  return nullptr;
88  };
89 
90  private:
91  const RelAlgNode* old_input_;
93 };
94 
95 // Creates an output with n columns.
96 std::vector<RexInput> n_outputs(const RelAlgNode* node, const size_t n) {
97  std::vector<RexInput> outputs;
98  outputs.reserve(n);
99  for (size_t i = 0; i < n; ++i) {
100  outputs.emplace_back(node, i);
101  }
102  return outputs;
103 }
104 
106  public:
108  const RelAlgNode* old_input,
109  const RelAlgNode* new_input,
110  std::unordered_map<unsigned, unsigned> old_to_new_index_map)
111  : RexRebindInputsVisitor(old_input, new_input), mapping_(old_to_new_index_map) {}
112 
113  void* visitInput(const RexInput* rex_input) const override {
114  RexRebindInputsVisitor::visitInput(rex_input);
115  auto mapping_itr = mapping_.find(rex_input->getIndex());
116  CHECK(mapping_itr != mapping_.end());
117  rex_input->setIndex(mapping_itr->second);
118  return nullptr;
119  }
120 
121  private:
122  const std::unordered_map<unsigned, unsigned> mapping_;
123 };
124 
126  : public RexVisitorBase<std::unique_ptr<const RexScalar>> {
127  public:
128  enum class WindowExprType { PARTITION_KEY, ORDER_KEY };
130  std::shared_ptr<RelProject> new_project,
131  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project,
132  std::vector<std::string>& fields_for_new_project,
133  std::unordered_map<size_t, size_t>& expr_offset_cache)
134  : new_project_(new_project)
135  , scalar_exprs_for_new_project_(scalar_exprs_for_new_project)
136  , fields_for_new_project_(fields_for_new_project)
137  , expr_offset_cache_(expr_offset_cache)
138  , found_case_expr_window_operand_(false)
139  , has_partition_expr_(false) {}
140 
141  size_t pushDownExpressionImpl(const RexScalar* expr) const {
142  auto hash = expr->toHash();
143  auto it = expr_offset_cache_.find(hash);
144  auto new_offset = -1;
145  if (it == expr_offset_cache_.end()) {
146  CHECK(
147  expr_offset_cache_.emplace(hash, scalar_exprs_for_new_project_.size()).second);
148  new_offset = scalar_exprs_for_new_project_.size();
149  fields_for_new_project_.emplace_back("");
150  scalar_exprs_for_new_project_.emplace_back(deep_copier_.visit(expr));
151  } else {
152  // we already pushed down the same expression, so reuse it
153  new_offset = it->second;
154  }
155  return new_offset;
156  }
157 
159  size_t expr_offset) const {
160  // given window expr offset and inner expr's offset,
161  // return a (push-downed) expression's offset from the new projection node
162  switch (type) {
163  case WindowExprType::PARTITION_KEY: {
164  auto it = pushed_down_partition_key_offset_.find(expr_offset);
165  CHECK(it != pushed_down_partition_key_offset_.end());
166  return it->second;
167  }
168  case WindowExprType::ORDER_KEY: {
169  auto it = pushed_down_order_key_offset_.find(expr_offset);
170  CHECK(it != pushed_down_order_key_offset_.end());
171  return it->second;
172  }
173  default:
174  UNREACHABLE();
175  return std::nullopt;
176  }
177  }
178 
180  const RexWindowFunctionOperator* window_expr) const {
181  // step 1. push "all" target expressions of the window_func_project_node down to the
182  // new child projection
183  // each window expr is a separate target expression of the projection node
184  // and they have their own inner expression related to partition / order clauses
185  // so we capture their offsets to correctly rebind their input
186  pushed_down_window_operands_offset_.clear();
187  pushed_down_partition_key_offset_.clear();
188  pushed_down_order_key_offset_.clear();
189  for (size_t offset = 0; offset < window_expr->size(); ++offset) {
190  auto expr = window_expr->getOperand(offset);
191  auto literal_expr = dynamic_cast<const RexLiteral*>(expr);
192  auto case_expr = dynamic_cast<const RexCase*>(expr);
193  if (case_expr) {
194  // when columnar output is enabled, pushdown case expr can incur an issue
195  // during columnarization, so we record this and try to force rowwise-output
196  // until we fix the issue
197  // todo (yoonmin) : relax this
198  found_case_expr_window_operand_ = true;
199  }
200  if (!literal_expr) {
201  auto new_offset = pushDownExpressionImpl(expr);
202  pushed_down_window_operands_offset_.emplace(offset, new_offset);
203  }
204  }
205  size_t offset = 0;
206  for (const auto& partition_key : window_expr->getPartitionKeys()) {
207  auto new_offset = pushDownExpressionImpl(partition_key.get());
208  pushed_down_partition_key_offset_.emplace(offset, new_offset);
209  ++offset;
210  }
211  has_partition_expr_ = !window_expr->getPartitionKeys().empty();
212  offset = 0;
213  for (const auto& order_key : window_expr->getOrderKeys()) {
214  auto new_offset = pushDownExpressionImpl(order_key.get());
215  pushed_down_order_key_offset_.emplace(offset, new_offset);
216  ++offset;
217  }
218 
219  // step 2. rebind projected targets of the window_func_project_node with the new
220  // project node
221  std::vector<std::unique_ptr<const RexScalar>> window_operands;
222  auto deconst_window_expr = const_cast<RexWindowFunctionOperator*>(window_expr);
223  for (size_t idx = 0; idx < window_expr->size(); ++idx) {
224  auto it = pushed_down_window_operands_offset_.find(idx);
225  if (it != pushed_down_window_operands_offset_.end()) {
226  auto new_input = std::make_unique<const RexInput>(new_project_.get(), it->second);
227  CHECK(new_input);
228  window_operands.emplace_back(std::move(new_input));
229  } else {
230  auto copied_expr = deep_copier_.visit(window_expr->getOperand(idx));
231  window_operands.emplace_back(std::move(copied_expr));
232  }
233  }
234  deconst_window_expr->replaceOperands(std::move(window_operands));
235 
236  for (size_t idx = 0; idx < window_expr->getPartitionKeys().size(); ++idx) {
237  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::PARTITION_KEY, idx);
238  CHECK(new_offset);
239  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
240  CHECK(new_input);
241  deconst_window_expr->replacePartitionKey(idx, std::move(new_input));
242  }
243 
244  for (size_t idx = 0; idx < window_expr->getOrderKeys().size(); ++idx) {
245  auto new_offset = getOffsetForPushedDownExpr(WindowExprType::ORDER_KEY, idx);
246  CHECK(new_offset);
247  auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
248  CHECK(new_input);
249  deconst_window_expr->replaceOrderKey(idx, std::move(new_input));
250  }
251  }
252 
253  std::unique_ptr<const RexScalar> visitInput(const RexInput* rex_input) const override {
254  auto new_offset = pushDownExpressionImpl(rex_input);
255  CHECK_LT(new_offset, scalar_exprs_for_new_project_.size());
256  auto hash = rex_input->toHash();
257  auto it = expr_offset_cache_.find(hash);
258  CHECK(it != expr_offset_cache_.end());
259  CHECK_EQ(new_offset, it->second);
260  auto new_input = std::make_unique<const RexInput>(new_project_.get(), new_offset);
261  CHECK(new_input);
262  return new_input;
263  }
264 
265  std::unique_ptr<const RexScalar> visitLiteral(
266  const RexLiteral* rex_literal) const override {
267  return deep_copier_.visit(rex_literal);
268  }
269 
270  std::unique_ptr<const RexScalar> visitRef(const RexRef* rex_ref) const override {
271  return deep_copier_.visit(rex_ref);
272  }
273 
274  std::unique_ptr<const RexScalar> visitSubQuery(
275  const RexSubQuery* rex_subquery) const override {
276  return deep_copier_.visit(rex_subquery);
277  }
278 
279  std::unique_ptr<const RexScalar> visitCase(const RexCase* rex_case) const override {
280  std::vector<
281  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
282  new_expr_pair_list;
283  std::unique_ptr<const RexScalar> new_else_expr;
284  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
285  const auto when = rex_case->getWhen(i);
286  auto new_when = PushDownGenericExpressionInWindowFunction::visit(when);
287  const auto then = rex_case->getThen(i);
288  auto new_then = PushDownGenericExpressionInWindowFunction::visit(then);
289  new_expr_pair_list.emplace_back(std::move(new_when), std::move(new_then));
290  }
291  if (rex_case->getElse()) {
292  new_else_expr = deep_copier_.visit(rex_case->getElse());
293  }
294  auto new_case = std::make_unique<const RexCase>(new_expr_pair_list, new_else_expr);
295  return new_case;
296  }
297 
298  std::unique_ptr<const RexScalar> visitOperator(
299  const RexOperator* rex_operator) const override {
300  const auto rex_window_func_operator =
301  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
302  if (rex_window_func_operator) {
303  pushDownExpressionInWindowFunction(rex_window_func_operator);
304  return deep_copier_.visit(rex_operator);
305  } else {
306  std::unique_ptr<const RexOperator> new_operator{nullptr};
307  std::vector<std::unique_ptr<const RexScalar>> new_operands;
308  for (size_t i = 0; i < rex_operator->size(); ++i) {
309  const auto operand = rex_operator->getOperand(i);
310  auto new_operand = PushDownGenericExpressionInWindowFunction::visit(operand);
311  new_operands.emplace_back(std::move(new_operand));
312  }
313  if (auto function_op = dynamic_cast<const RexFunctionOperator*>(rex_operator)) {
314  new_operator = std::make_unique<const RexFunctionOperator>(
315  function_op->getName(), new_operands, rex_operator->getType());
316  } else {
317  new_operator = std::make_unique<const RexOperator>(
318  rex_operator->getOperator(), new_operands, rex_operator->getType());
319  }
320  CHECK(new_operator);
321  return new_operator;
322  }
323  }
324 
325  bool hasCaseExprAsWindowOperand() { return found_case_expr_window_operand_; }
326 
327  bool hasPartitionExpression() { return has_partition_expr_; }
328 
329  private:
330  std::unique_ptr<const RexScalar> defaultResult() const override { return nullptr; }
331 
332  std::shared_ptr<RelProject> new_project_;
333  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project_;
334  std::vector<std::string>& fields_for_new_project_;
335  std::unordered_map<size_t, size_t>& expr_offset_cache_;
337  mutable bool has_partition_expr_;
338  mutable std::unordered_map<size_t, size_t> pushed_down_window_operands_offset_;
339  mutable std::unordered_map<size_t, size_t> pushed_down_partition_key_offset_;
340  mutable std::unordered_map<size_t, size_t> pushed_down_order_key_offset_;
342 };
343 
344 } // namespace
345 
347  std::shared_ptr<const RelAlgNode> old_input,
348  std::shared_ptr<const RelAlgNode> input,
349  std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
350  RelAlgNode::replaceInput(old_input, input);
351  std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
352  if (old_to_new_index_map) {
353  rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
354  old_input.get(), input.get(), *old_to_new_index_map);
355  } else {
356  rebind_inputs =
357  std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
358  }
359  CHECK(rebind_inputs);
360  for (const auto& scalar_expr : scalar_exprs_) {
361  rebind_inputs->visit(scalar_expr.get());
362  }
363 }
364 
365 void RelProject::appendInput(std::string new_field_name,
366  std::unique_ptr<const RexScalar> new_input) {
367  fields_.emplace_back(std::move(new_field_name));
368  scalar_exprs_.emplace_back(std::move(new_input));
369 }
370 
372  const auto scan_node = dynamic_cast<const RelScan*>(ra_node);
373  if (scan_node) {
374  // Scan node has no inputs, output contains all columns in the table.
375  CHECK_EQ(size_t(0), scan_node->inputCount());
376  return n_outputs(scan_node, scan_node->size());
377  }
378  const auto project_node = dynamic_cast<const RelProject*>(ra_node);
379  if (project_node) {
380  // Project output count doesn't depend on the input
381  CHECK_EQ(size_t(1), project_node->inputCount());
382  return n_outputs(project_node, project_node->size());
383  }
384  const auto filter_node = dynamic_cast<const RelFilter*>(ra_node);
385  if (filter_node) {
386  // Filter preserves shape
387  CHECK_EQ(size_t(1), filter_node->inputCount());
388  const auto prev_out = get_node_output(filter_node->getInput(0));
389  return n_outputs(filter_node, prev_out.size());
390  }
391  const auto aggregate_node = dynamic_cast<const RelAggregate*>(ra_node);
392  if (aggregate_node) {
393  // Aggregate output count doesn't depend on the input
394  CHECK_EQ(size_t(1), aggregate_node->inputCount());
395  return n_outputs(aggregate_node, aggregate_node->size());
396  }
397  const auto compound_node = dynamic_cast<const RelCompound*>(ra_node);
398  if (compound_node) {
399  // Compound output count doesn't depend on the input
400  CHECK_EQ(size_t(1), compound_node->inputCount());
401  return n_outputs(compound_node, compound_node->size());
402  }
403  const auto join_node = dynamic_cast<const RelJoin*>(ra_node);
404  if (join_node) {
405  // Join concatenates the outputs from the inputs and the output
406  // directly references the nodes in the input.
407  CHECK_EQ(size_t(2), join_node->inputCount());
408  auto lhs_out =
409  n_outputs(join_node->getInput(0), get_node_output(join_node->getInput(0)).size());
410  const auto rhs_out =
411  n_outputs(join_node->getInput(1), get_node_output(join_node->getInput(1)).size());
412  lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
413  return lhs_out;
414  }
415  const auto table_func_node = dynamic_cast<const RelTableFunction*>(ra_node);
416  if (table_func_node) {
417  // Table Function output count doesn't depend on the input
418  return n_outputs(table_func_node, table_func_node->size());
419  }
420  const auto sort_node = dynamic_cast<const RelSort*>(ra_node);
421  if (sort_node) {
422  // Sort preserves shape
423  CHECK_EQ(size_t(1), sort_node->inputCount());
424  const auto prev_out = get_node_output(sort_node->getInput(0));
425  return n_outputs(sort_node, prev_out.size());
426  }
427  const auto logical_values_node = dynamic_cast<const RelLogicalValues*>(ra_node);
428  if (logical_values_node) {
429  CHECK_EQ(size_t(0), logical_values_node->inputCount());
430  return n_outputs(logical_values_node, logical_values_node->size());
431  }
432  const auto logical_union_node = dynamic_cast<const RelLogicalUnion*>(ra_node);
433  if (logical_union_node) {
434  return n_outputs(logical_union_node, logical_union_node->size());
435  }
436  LOG(FATAL) << "Unhandled ra_node type: " << ::toString(ra_node);
437  return {};
438 }
439 
441  if (!isSimple()) {
442  return false;
443  }
444  CHECK_EQ(size_t(1), inputCount());
445  const auto source = getInput(0);
446  if (dynamic_cast<const RelJoin*>(source)) {
447  return false;
448  }
449  const auto source_shape = get_node_output(source);
450  if (source_shape.size() != scalar_exprs_.size()) {
451  return false;
452  }
453  for (size_t i = 0; i < scalar_exprs_.size(); ++i) {
454  const auto& scalar_expr = scalar_exprs_[i];
455  const auto input = dynamic_cast<const RexInput*>(scalar_expr.get());
456  CHECK(input);
457  CHECK_EQ(source, input->getSourceNode());
458  // We should add the additional check that input->getIndex() !=
459  // source_shape[i].getIndex(), but Calcite doesn't generate the right
460  // Sort-Project-Sort sequence when joins are involved.
461  if (input->getSourceNode() != source_shape[i].getSourceNode()) {
462  return false;
463  }
464  }
465  return true;
466 }
467 
468 namespace {
469 
470 bool isRenamedInput(const RelAlgNode* node,
471  const size_t index,
472  const std::string& new_name) {
473  CHECK_LT(index, node->size());
474  if (auto join = dynamic_cast<const RelJoin*>(node)) {
475  CHECK_EQ(size_t(2), join->inputCount());
476  const auto lhs_size = join->getInput(0)->size();
477  if (index < lhs_size) {
478  return isRenamedInput(join->getInput(0), index, new_name);
479  }
480  CHECK_GE(index, lhs_size);
481  return isRenamedInput(join->getInput(1), index - lhs_size, new_name);
482  }
483 
484  if (auto scan = dynamic_cast<const RelScan*>(node)) {
485  return new_name != scan->getFieldName(index);
486  }
487 
488  if (auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
489  return new_name != aggregate->getFieldName(index);
490  }
491 
492  if (auto project = dynamic_cast<const RelProject*>(node)) {
493  return new_name != project->getFieldName(index);
494  }
495 
496  if (auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
497  return new_name != table_func->getFieldName(index);
498  }
499 
500  if (auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
501  const auto& tuple_type = logical_values->getTupleType();
502  CHECK_LT(index, tuple_type.size());
503  return new_name != tuple_type[index].get_resname();
504  }
505 
506  CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
507  dynamic_cast<const RelLogicalUnion*>(node));
508  return isRenamedInput(node->getInput(0), index, new_name);
509 }
510 
511 } // namespace
512 
514  if (!isSimple()) {
515  return false;
516  }
517  CHECK_EQ(scalar_exprs_.size(), fields_.size());
518  for (size_t i = 0; i < fields_.size(); ++i) {
519  auto rex_in = dynamic_cast<const RexInput*>(scalar_exprs_[i].get());
520  CHECK(rex_in);
521  if (isRenamedInput(rex_in->getSourceNode(), rex_in->getIndex(), fields_[i])) {
522  return true;
523  }
524  }
525  return false;
526 }
527 
528 void RelJoin::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
529  std::shared_ptr<const RelAlgNode> input) {
530  RelAlgNode::replaceInput(old_input, input);
531  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
532  if (condition_) {
533  rebind_inputs.visit(condition_.get());
534  }
535 }
536 
537 void RelFilter::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
538  std::shared_ptr<const RelAlgNode> input) {
539  RelAlgNode::replaceInput(old_input, input);
540  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
541  rebind_inputs.visit(filter_.get());
542 }
543 
544 void RelCompound::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
545  std::shared_ptr<const RelAlgNode> input) {
546  RelAlgNode::replaceInput(old_input, input);
547  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
548  for (const auto& scalar_source : scalar_sources_) {
549  rebind_inputs.visit(scalar_source.get());
550  }
551  if (filter_expr_) {
552  rebind_inputs.visit(filter_expr_.get());
553  }
554 }
555 
557  : RelAlgNode(rhs)
559  , fields_(rhs.fields_)
560  , hint_applied_(false)
561  , hints_(std::make_unique<Hints>())
562  , has_pushed_down_window_expr_(rhs.has_pushed_down_window_expr_) {
563  RexDeepCopyVisitor copier;
564  for (auto const& expr : rhs.scalar_exprs_) {
565  scalar_exprs_.push_back(copier.visit(expr.get()));
566  }
567  if (rhs.hint_applied_) {
568  for (auto const& kv : *rhs.hints_) {
569  addHint(kv.second);
570  }
571  }
572 }
573 
575  : RelAlgNode(rhs)
576  , tuple_type_(rhs.tuple_type_)
577  , values_(RexDeepCopyVisitor::copy(rhs.values_)) {}
578 
580  RexDeepCopyVisitor copier;
581  filter_ = copier.visit(rhs.filter_.get());
582 }
583 
585  : RelAlgNode(rhs)
586  , groupby_count_(rhs.groupby_count_)
587  , fields_(rhs.fields_)
588  , hint_applied_(false)
589  , hints_(std::make_unique<Hints>()) {
590  agg_exprs_.reserve(rhs.agg_exprs_.size());
591  for (auto const& agg : rhs.agg_exprs_) {
592  agg_exprs_.push_back(agg->deepCopy());
593  }
594  if (rhs.hint_applied_) {
595  for (auto const& kv : *rhs.hints_) {
596  addHint(kv.second);
597  }
598  }
599 }
600 
602  : RelAlgNode(rhs)
603  , join_type_(rhs.join_type_)
604  , hint_applied_(false)
605  , hints_(std::make_unique<Hints>()) {
606  RexDeepCopyVisitor copier;
607  condition_ = copier.visit(rhs.condition_.get());
608  if (rhs.hint_applied_) {
609  for (auto const& kv : *rhs.hints_) {
610  addHint(kv.second);
611  }
612  }
613 }
614 
615 namespace {
616 
617 std::vector<std::unique_ptr<const RexAgg>> copyAggExprs(
618  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs) {
619  std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
620  agg_exprs_copy.reserve(agg_exprs.size());
621  for (auto const& agg_expr : agg_exprs) {
622  agg_exprs_copy.push_back(agg_expr->deepCopy());
623  }
624  return agg_exprs_copy;
625 }
626 
627 std::vector<std::unique_ptr<const RexScalar>> copyRexScalars(
628  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources) {
629  std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
630  scalar_sources_copy.reserve(scalar_sources.size());
631  RexDeepCopyVisitor copier;
632  for (auto const& scalar_source : scalar_sources) {
633  scalar_sources_copy.push_back(copier.visit(scalar_source.get()));
634  }
635  return scalar_sources_copy;
636 }
637 
638 std::vector<const Rex*> remapTargetPointers(
639  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_new,
640  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_new,
641  std::vector<std::unique_ptr<const RexAgg>> const& agg_exprs_old,
642  std::vector<std::unique_ptr<const RexScalar>> const& scalar_sources_old,
643  std::vector<const Rex*> const& target_exprs_old) {
644  std::vector<const Rex*> target_exprs(target_exprs_old);
645  std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
646  for (size_t i = 0; i < agg_exprs_new.size(); ++i) {
647  old_to_new_target.emplace(agg_exprs_old[i].get(), agg_exprs_new[i].get());
648  }
649  for (size_t i = 0; i < scalar_sources_new.size(); ++i) {
650  old_to_new_target.emplace(scalar_sources_old[i].get(), scalar_sources_new[i].get());
651  }
652  for (auto& target : target_exprs) {
653  auto target_it = old_to_new_target.find(target);
654  CHECK(target_it != old_to_new_target.end());
655  target = target_it->second;
656  }
657  return target_exprs;
658 }
659 
660 } // namespace
661 
663  : RelAlgNode(rhs)
665  , groupby_count_(rhs.groupby_count_)
666  , agg_exprs_(copyAggExprs(rhs.agg_exprs_))
667  , fields_(rhs.fields_)
668  , is_agg_(rhs.is_agg_)
669  , scalar_sources_(copyRexScalars(rhs.scalar_sources_))
670  , target_exprs_(remapTargetPointers(agg_exprs_,
671  scalar_sources_,
672  rhs.agg_exprs_,
673  rhs.scalar_sources_,
674  rhs.target_exprs_))
675  , hint_applied_(false)
676  , hints_(std::make_unique<Hints>()) {
677  RexDeepCopyVisitor copier;
678  filter_expr_ = rhs.filter_expr_ ? copier.visit(rhs.filter_expr_.get()) : nullptr;
679  if (rhs.hint_applied_) {
680  for (auto const& kv : *rhs.hints_) {
681  addHint(kv.second);
682  }
683  }
684 }
685 
686 void RelTableFunction::replaceInput(std::shared_ptr<const RelAlgNode> old_input,
687  std::shared_ptr<const RelAlgNode> input) {
688  RelAlgNode::replaceInput(old_input, input);
689  RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
690  for (const auto& target_expr : target_exprs_) {
691  rebind_inputs.visit(target_expr.get());
692  }
693  for (const auto& func_input : table_func_inputs_) {
694  rebind_inputs.visit(func_input.get());
695  }
696 }
697 
699  int32_t literal_args = 0;
700  for (const auto& arg : table_func_inputs_) {
701  const auto rex_literal = dynamic_cast<const RexLiteral*>(arg.get());
702  if (rex_literal) {
703  literal_args += 1;
704  }
705  }
706  return literal_args;
707 }
708 
709 namespace {
710 
712  std::vector<const Rex*>& column_inputs,
713  const std::vector<std::unique_ptr<const RexScalar>>& old_table_func_inputs,
714  const std::vector<std::unique_ptr<const RexScalar>>& new_table_func_inputs) {
715  CHECK_EQ(old_table_func_inputs.size(), new_table_func_inputs.size());
716  std::unordered_map<const Rex*, const Rex*> old_to_new_input;
717  for (size_t i = 0; i < old_table_func_inputs.size(); ++i) {
718  old_to_new_input.emplace(old_table_func_inputs[i].get(),
719  new_table_func_inputs[i].get());
720  }
721  for (auto& target : column_inputs) {
722  auto target_it = old_to_new_input.find(target);
723  CHECK(target_it != old_to_new_input.end());
724  target = target_it->second;
725  }
726 }
727 
728 } // namespace
729 
731  std::vector<std::unique_ptr<const RexScalar>>&& exprs) {
732  // this should only be called in the event of disambiguating inputs, which means the
733  // RexScalar types in the exprs argument should be the exact same as those previously.
734  // So we can then assume all col_inputs_ would be in the same place. So just re-adjust
735  // the pointers.
737  table_func_inputs_ = std::move(exprs);
738 }
739 
741  : RelAlgNode(rhs)
742  , function_name_(rhs.function_name_)
743  , fields_(rhs.fields_)
744  , col_inputs_(rhs.col_inputs_)
745  , table_func_inputs_(copyRexScalars(rhs.table_func_inputs_))
746  , target_exprs_(copyRexScalars(rhs.target_exprs_)) {
748 }
749 
750 namespace std {
751 template <>
752 struct hash<std::pair<const RelAlgNode*, int>> {
753  size_t operator()(const std::pair<const RelAlgNode*, int>& input_col) const {
754  auto ptr_val = reinterpret_cast<const int64_t*>(&input_col.first);
755  auto h = static_cast<size_t>(*ptr_val);
756  boost::hash_combine(h, input_col.second);
757  return h;
758  }
759 };
760 } // namespace std
761 
762 namespace {
763 
764 std::set<std::pair<const RelAlgNode*, int>> get_equiv_cols(const RelAlgNode* node,
765  const size_t which_col) {
766  std::set<std::pair<const RelAlgNode*, int>> work_set;
767  auto walker = node;
768  auto curr_col = which_col;
769  while (true) {
770  work_set.insert(std::make_pair(walker, curr_col));
771  if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
772  break;
773  }
774  CHECK_EQ(size_t(1), walker->inputCount());
775  auto only_source = walker->getInput(0);
776  if (auto project = dynamic_cast<const RelProject*>(walker)) {
777  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
778  const auto join_source = dynamic_cast<const RelJoin*>(only_source);
779  if (join_source) {
780  CHECK_EQ(size_t(2), join_source->inputCount());
781  auto lhs = join_source->getInput(0);
782  CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
783  join_source->getInput(1) == input->getSourceNode());
784  } else {
785  CHECK_EQ(input->getSourceNode(), only_source);
786  }
787  curr_col = input->getIndex();
788  } else {
789  break;
790  }
791  } else if (auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
792  if (curr_col >= aggregate->getGroupByCount()) {
793  break;
794  }
795  }
796  walker = only_source;
797  }
798  return work_set;
799 }
800 
801 } // namespace
802 
803 bool RelSort::hasEquivCollationOf(const RelSort& that) const {
804  if (collation_.size() != that.collation_.size()) {
805  return false;
806  }
807 
808  for (size_t i = 0, e = collation_.size(); i < e; ++i) {
809  auto this_sort_key = collation_[i];
810  auto that_sort_key = that.collation_[i];
811  if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
812  return false;
813  }
814  if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
815  return false;
816  }
817  auto this_equiv_keys = get_equiv_cols(this, this_sort_key.getField());
818  auto that_equiv_keys = get_equiv_cols(&that, that_sort_key.getField());
819  std::vector<std::pair<const RelAlgNode*, int>> intersect;
820  std::set_intersection(this_equiv_keys.begin(),
821  this_equiv_keys.end(),
822  that_equiv_keys.begin(),
823  that_equiv_keys.end(),
824  std::back_inserter(intersect));
825  if (intersect.empty()) {
826  return false;
827  }
828  }
829  return true;
830 }
831 
832 // class RelLogicalUnion methods
833 
835  : RelAlgNode(std::move(inputs)), is_all_(is_all) {
836  if (!g_enable_union) {
837  throw QueryNotSupported(
838  "The DEPRECATED enable-union option is set to off. Please remove this option as "
839  "it may be disabled in the future.");
840  }
841  CHECK_LE(2u, inputs_.size());
842  if (!is_all_) {
843  throw QueryNotSupported("UNION without ALL is not supported yet.");
844  }
846  throw QueryNotSupported(
847  "Unsupported CAST in UNION: Currently, we only allow casting text type to "
848  "dictionary-encoded strings.");
849  }
850 }
851 
852 size_t RelLogicalUnion::size() const {
853  return inputs_.front()->size();
854 }
855 
857  return cat(::typeName(this), "(is_all(", is_all_, "))");
858 }
859 
860 std::string RelLogicalUnion::getFieldName(const size_t i) const {
861  if (auto const* compound = dynamic_cast<RelCompound const*>(inputs_[0].get())) {
862  return compound->getFieldName(i);
863  } else if (auto const* project = dynamic_cast<RelProject const*>(inputs_[0].get())) {
864  return project->getFieldName(i);
865  } else if (auto const* logical_union =
866  dynamic_cast<RelLogicalUnion const*>(inputs_[0].get())) {
867  return logical_union->getFieldName(i);
868  } else if (auto const* aggregate =
869  dynamic_cast<RelAggregate const*>(inputs_[0].get())) {
870  return aggregate->getFieldName(i);
871  } else if (auto const* scan = dynamic_cast<RelScan const*>(inputs_[0].get())) {
872  return scan->getFieldName(i);
873  } else if (auto const* table_func =
874  dynamic_cast<RelTableFunction const*>(inputs_[0].get())) {
875  return table_func->getFieldName(i);
876  }
877  UNREACHABLE() << "Unhandled input type: " << ::toString(inputs_.front());
878  return {};
879 }
880 
881 namespace {
882 std::vector<bool> get_notnulls(std::vector<TargetMetaInfo> const& tmis0) {
883  std::vector<bool> notnulls(tmis0.size());
884  for (size_t j = 0; j < tmis0.size(); ++j) {
885  notnulls[j] = tmis0[j].get_type_info().get_notnull();
886  }
887  return notnulls;
888 }
889 
891  ti0.set_notnull({}); // Actual value doesn't matter
892  ti1.set_notnull({}); // as long as they are the same.
893  return ti0 == ti1;
894 }
895 
896 void set_notnulls(std::vector<TargetMetaInfo>* tmis0, std::vector<bool> const& notnulls) {
897  for (size_t j = 0; j < tmis0->size(); ++j) {
898  SQLTypeInfo ti = (*tmis0)[j].get_type_info();
899  SQLTypeInfo physical_ti = (*tmis0)[j].get_physical_type_info();
900  ti.set_notnull(notnulls[j]);
901  physical_ti.set_notnull(notnulls[j]);
902  (*tmis0)[j] = TargetMetaInfo((*tmis0)[j].get_resname(), ti, physical_ti);
903  }
904 }
905 } // namespace
906 
907 // The returned std::vector<TargetMetaInfo> is identical to
908 // inputs_[0]->getOutputMetainfo() except for its SQLTypeInfo::notnull values, which is
909 // the logical AND over each input. In other words, the returned columns are notnull iff
910 // all corresponding input columns are notnull.
911 std::vector<TargetMetaInfo> RelLogicalUnion::getCompatibleMetainfoTypes() const {
912  std::vector<TargetMetaInfo> tmis0 = inputs_[0]->getOutputMetainfo();
913  std::vector<bool> notnulls = get_notnulls(tmis0);
914  for (size_t i = 1; i < inputs_.size(); ++i) {
915  std::vector<TargetMetaInfo> const& tmisi = inputs_[i]->getOutputMetainfo();
916  if (tmis0.size() != tmisi.size()) {
917  LOG(INFO) << "tmis0.size()=" << tmis0.size() << " != " << tmisi.size()
918  << "=tmisi.size() for i=" << i;
919  throw std::runtime_error("Subqueries of a UNION must have matching data types.");
920  }
921  for (size_t j = 0; j < tmis0.size(); ++j) {
922  SQLTypeInfo const& ti0 = tmis0[j].get_type_info();
923  SQLTypeInfo const& ti1 = tmisi[j].get_type_info();
924  // Allow types of different nullability to be UNIONed.
925  if (!same_ignoring_notnull(ti0, ti1)) {
926  LOG(INFO) << "Types do not match for UNION:\n tmis0[" << j
927  << "].get_type_info().to_string() = " << ti0.to_string() << "\n tmis"
928  << i << '[' << j
929  << "].get_type_info().to_string() = " << ti1.to_string();
930  // The only permitted difference is when both columns are dictionary-encoded.
931  if (!(ti0.is_dict_encoded_string() && ti1.is_dict_encoded_string())) {
932  throw std::runtime_error(
933  "Subqueries of a UNION must have the exact same data types.");
934  }
935  }
936  notnulls[j] = notnulls[j] && ti1.get_notnull();
937  }
938  }
939  set_notnulls(&tmis0, notnulls); // Set each SQLTypeInfo::notnull to compatible values.
940  return tmis0;
941 }
942 
943 // Rest of code requires a raw pointer, but RexInput object needs to live somewhere.
945  size_t input_idx) const {
946  if (auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
947  RexInput rex_input(*rex_input_ptr);
948  rex_input.setSourceNode(getInput(input_idx));
949  scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
950  return scalar_exprs_.back().get();
951  }
952  return rex_scalar;
953 }
954 
956  for (auto const& input : inputs_) {
957  if (auto* proj_node = dynamic_cast<RelProject const*>(input.get())) {
958  for (size_t i = 0; i < proj_node->size(); i++) {
959  if (auto* oper = dynamic_cast<RexOperator const*>(proj_node->getProjectAt(i))) {
960  if (oper->getOperator() == SQLOps::kCAST && oper->getType().is_string() &&
961  !oper->getType().is_dict_encoded_string()) {
962  return false;
963  }
964  }
965  }
966  }
967  }
968  return true;
969 }
970 
971 namespace {
972 
973 unsigned node_id(const rapidjson::Value& ra_node) noexcept {
974  const auto& id = field(ra_node, "id");
975  return std::stoi(json_str(id));
976 }
977 
978 std::string json_node_to_string(const rapidjson::Value& node) noexcept {
979  rapidjson::StringBuffer buffer;
980  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
981  node.Accept(writer);
982  return buffer.GetString();
983 }
984 
985 // The parse_* functions below de-serialize expressions as they come from Calcite.
986 // RelAlgDagBuilder will take care of making the representation easy to
987 // navigate for lower layers, for example by replacing RexAbstractInput with RexInput.
988 
989 std::unique_ptr<RexAbstractInput> parse_abstract_input(
990  const rapidjson::Value& expr) noexcept {
991  const auto& input = field(expr, "input");
992  return std::unique_ptr<RexAbstractInput>(new RexAbstractInput(json_i64(input)));
993 }
994 
995 std::unique_ptr<RexLiteral> parse_literal(const rapidjson::Value& expr) {
996  CHECK(expr.IsObject());
997  const auto& literal = field(expr, "literal");
998  const auto type = to_sql_type(json_str(field(expr, "type")));
999  const auto target_type = to_sql_type(json_str(field(expr, "target_type")));
1000  const auto scale = json_i64(field(expr, "scale"));
1001  const auto precision = json_i64(field(expr, "precision"));
1002  const auto type_scale = json_i64(field(expr, "type_scale"));
1003  const auto type_precision = json_i64(field(expr, "type_precision"));
1004  if (literal.IsNull()) {
1005  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1006  }
1007  switch (type) {
1008  case kINT:
1009  case kBIGINT:
1010  case kDECIMAL:
1011  case kINTERVAL_DAY_TIME:
1012  case kINTERVAL_YEAR_MONTH:
1013  case kTIME:
1014  case kTIMESTAMP:
1015  case kDATE:
1016  return std::unique_ptr<RexLiteral>(new RexLiteral(json_i64(literal),
1017  type,
1018  target_type,
1019  scale,
1020  precision,
1021  type_scale,
1022  type_precision));
1023  case kDOUBLE: {
1024  if (literal.IsDouble()) {
1025  return std::unique_ptr<RexLiteral>(new RexLiteral(json_double(literal),
1026  type,
1027  target_type,
1028  scale,
1029  precision,
1030  type_scale,
1031  type_precision));
1032  } else if (literal.IsInt64()) {
1033  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetInt64()),
1034  type,
1035  target_type,
1036  scale,
1037  precision,
1038  type_scale,
1039  type_precision);
1040 
1041  } else if (literal.IsUint64()) {
1042  return std::make_unique<RexLiteral>(static_cast<double>(literal.GetUint64()),
1043  type,
1044  target_type,
1045  scale,
1046  precision,
1047  type_scale,
1048  type_precision);
1049  }
1050  UNREACHABLE() << "Unhandled type: " << literal.GetType();
1051  }
1052  case kTEXT:
1053  return std::unique_ptr<RexLiteral>(new RexLiteral(json_str(literal),
1054  type,
1055  target_type,
1056  scale,
1057  precision,
1058  type_scale,
1059  type_precision));
1060  case kBOOLEAN:
1061  return std::unique_ptr<RexLiteral>(new RexLiteral(json_bool(literal),
1062  type,
1063  target_type,
1064  scale,
1065  precision,
1066  type_scale,
1067  type_precision));
1068  case kNULLT:
1069  return std::unique_ptr<RexLiteral>(new RexLiteral(target_type));
1070  default:
1071  CHECK(false);
1072  }
1073  CHECK(false);
1074  return nullptr;
1075 }
1076 
1077 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1078  RelAlgDag& root_dag);
1079 
1080 SQLTypeInfo parse_type(const rapidjson::Value& type_obj) {
1081  if (type_obj.IsArray()) {
1082  throw QueryNotSupported("Composite types are not currently supported.");
1083  }
1084  CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
1085  << json_node_to_string(type_obj);
1086  const auto type = to_sql_type(json_str(field(type_obj, "type")));
1087  const auto nullable = json_bool(field(type_obj, "nullable"));
1088  const auto precision_it = type_obj.FindMember("precision");
1089  const int precision =
1090  precision_it != type_obj.MemberEnd() ? json_i64(precision_it->value) : 0;
1091  const auto scale_it = type_obj.FindMember("scale");
1092  const int scale = scale_it != type_obj.MemberEnd() ? json_i64(scale_it->value) : 0;
1093  SQLTypeInfo ti(type, !nullable);
1094  ti.set_precision(precision);
1095  ti.set_scale(scale);
1096  return ti;
1097 }
1098 
1099 std::vector<std::unique_ptr<const RexScalar>> parse_expr_array(
1100  const rapidjson::Value& arr,
1101  RelAlgDag& root_dag) {
1102  std::vector<std::unique_ptr<const RexScalar>> exprs;
1103  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1104  exprs.emplace_back(parse_scalar_expr(*it, root_dag));
1105  }
1106  return exprs;
1107 }
1108 
1110  if (name == "ROW_NUMBER") {
1112  }
1113  if (name == "RANK") {
1115  }
1116  if (name == "DENSE_RANK") {
1118  }
1119  if (name == "PERCENT_RANK") {
1121  }
1122  if (name == "CUME_DIST") {
1124  }
1125  if (name == "NTILE") {
1127  }
1128  if (name == "LAG") {
1130  }
1131  if (name == "LAG_IN_FRAME") {
1133  }
1134  if (name == "LEAD") {
1136  }
1137  if (name == "LEAD_IN_FRAME") {
1139  }
1140  if (name == "FIRST_VALUE") {
1142  }
1143  if (name == "LAST_VALUE") {
1145  }
1146  if (name == "NTH_VALUE") {
1148  }
1149  if (name == "NTH_VALUE_IN_FRAME") {
1151  }
1152  if (name == "FIRST_VALUE_IN_FRAME") {
1154  }
1155  if (name == "LAST_VALUE_IN_FRAME") {
1157  }
1158  if (name == "AVG") {
1160  }
1161  if (name == "MIN") {
1163  }
1164  if (name == "MAX") {
1166  }
1167  if (name == "SUM") {
1169  }
1170  if (name == "COUNT") {
1172  }
1173  if (name == "COUNT_IF") {
1175  }
1176  if (name == "SUM_IF") {
1178  }
1179  if (name == "$SUM0") {
1181  }
1182  if (name == "FORWARD_FILL") {
1184  }
1185  if (name == "BACKWARD_FILL") {
1187  }
1188  if (name == "CONDITIONAL_CHANGE_EVENT") {
1190  }
1191  throw std::runtime_error("Unsupported window function: " + name);
1192 }
1193 
1194 std::vector<std::unique_ptr<const RexScalar>> parse_window_order_exprs(
1195  const rapidjson::Value& arr,
1196  RelAlgDag& root_dag) {
1197  std::vector<std::unique_ptr<const RexScalar>> exprs;
1198  for (auto it = arr.Begin(); it != arr.End(); ++it) {
1199  exprs.emplace_back(parse_scalar_expr(field(*it, "field"), root_dag));
1200  }
1201  return exprs;
1202 }
1203 
1204 SortDirection parse_sort_direction(const rapidjson::Value& collation) {
1205  return json_str(field(collation, "direction")) == std::string("DESCENDING")
1208 }
1209 
1210 NullSortedPosition parse_nulls_position(const rapidjson::Value& collation) {
1211  return json_str(field(collation, "nulls")) == std::string("FIRST")
1214 }
1215 
1216 std::vector<SortField> parse_window_order_collation(const rapidjson::Value& arr,
1217  RelAlgDag& root_dag) {
1218  std::vector<SortField> collation;
1219  size_t field_idx = 0;
1220  for (auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
1221  const auto sort_dir = parse_sort_direction(*it);
1222  const auto null_pos = parse_nulls_position(*it);
1223  collation.emplace_back(field_idx, sort_dir, null_pos);
1224  }
1225  return collation;
1226 }
1227 
1229  const rapidjson::Value& window_bound_obj,
1230  RelAlgDag& root_dag) {
1231  CHECK(window_bound_obj.IsObject());
1233  window_bound.unbounded = json_bool(field(window_bound_obj, "unbounded"));
1234  window_bound.preceding = json_bool(field(window_bound_obj, "preceding"));
1235  window_bound.following = json_bool(field(window_bound_obj, "following"));
1236  window_bound.is_current_row = json_bool(field(window_bound_obj, "is_current_row"));
1237  const auto& offset_field = field(window_bound_obj, "offset");
1238  if (offset_field.IsObject()) {
1239  window_bound.bound_expr = parse_scalar_expr(offset_field, root_dag);
1240  } else {
1241  CHECK(offset_field.IsNull());
1242  }
1243  window_bound.order_key = json_i64(field(window_bound_obj, "order_key"));
1244  return window_bound;
1245 }
1246 
1247 std::unique_ptr<const RexSubQuery> parse_subquery(const rapidjson::Value& expr,
1248  RelAlgDag& root_dag) {
1249  const auto& operands = field(expr, "operands");
1250  CHECK(operands.IsArray());
1251  CHECK_GE(operands.Size(), unsigned(0));
1252  const auto& subquery_ast = field(expr, "subquery");
1253 
1254  auto subquery_dag = RelAlgDagBuilder::buildDagForSubquery(root_dag, subquery_ast);
1255  const auto subquery_root_node = subquery_dag->getRootNodeShPtr();
1256  auto subquery = std::make_shared<RexSubQuery>(subquery_root_node);
1257  auto query_hint = subquery_dag->getQueryHint(subquery_dag->getRootNodeShPtr().get());
1258  root_dag.registerSubquery(subquery);
1259  const auto subquery_global_hint = subquery_dag->getGlobalHints();
1260  if (subquery_global_hint.isAnyQueryHintDelivered()) {
1261  // we need to propagate global query hint found in this subquery to its parent
1262  const auto new_global_hint = root_dag.getGlobalHints() || subquery_global_hint;
1263  root_dag.setGlobalQueryHints(new_global_hint);
1264  }
1265  const auto subquery_local_hint = subquery_dag->getQueryHint(subquery_root_node.get());
1266  if (subquery_local_hint) {
1267  // register local query hint of this subquery to its parent to correctly
1268  // enables them when executing this subquery
1269  root_dag.registerQueryHint(subquery_root_node.get(), *subquery_local_hint);
1270  }
1271  return subquery->deepCopy();
1272 }
1273 
1274 std::unique_ptr<RexOperator> parse_operator(const rapidjson::Value& expr,
1275  RelAlgDag& root_dag) {
1276  const auto op_name = json_str(field(expr, "op"));
1277  const bool is_quantifier =
1278  op_name == std::string("PG_ANY") || op_name == std::string("PG_ALL");
1279  const auto op = is_quantifier ? kFUNCTION : to_sql_op(op_name);
1280  const auto& operators_json_arr = field(expr, "operands");
1281  CHECK(operators_json_arr.IsArray());
1282  auto operands = parse_expr_array(operators_json_arr, root_dag);
1283  const auto type_it = expr.FindMember("type");
1284  CHECK(type_it != expr.MemberEnd());
1285  auto ti = parse_type(type_it->value);
1286  if (op == kIN && expr.HasMember("subquery")) {
1287  auto subquery = parse_subquery(expr, root_dag);
1288  operands.emplace_back(std::move(subquery));
1289  }
1290  if (expr.FindMember("partition_keys") != expr.MemberEnd()) {
1291  const auto& partition_keys_arr = field(expr, "partition_keys");
1292  auto partition_keys = parse_expr_array(partition_keys_arr, root_dag);
1293  const auto& order_keys_arr = field(expr, "order_keys");
1294  auto order_keys = parse_window_order_exprs(order_keys_arr, root_dag);
1295  const auto collation = parse_window_order_collation(order_keys_arr, root_dag);
1296  const auto kind = parse_window_function_kind(op_name);
1297  const auto lower_bound = parse_window_bound(field(expr, "lower_bound"), root_dag);
1298  const auto upper_bound = parse_window_bound(field(expr, "upper_bound"), root_dag);
1299  bool is_rows = json_bool(field(expr, "is_rows"));
1300  ti.set_notnull(false);
1301  return std::make_unique<RexWindowFunctionOperator>(kind,
1302  operands,
1303  partition_keys,
1304  order_keys,
1305  collation,
1306  lower_bound,
1307  upper_bound,
1308  is_rows,
1309  ti);
1310  }
1311  return std::unique_ptr<RexOperator>(op == kFUNCTION
1312  ? new RexFunctionOperator(op_name, operands, ti)
1313  : new RexOperator(op, operands, ti));
1314 }
1315 
1316 std::unique_ptr<RexCase> parse_case(const rapidjson::Value& expr, RelAlgDag& root_dag) {
1317  const auto& operands = field(expr, "operands");
1318  CHECK(operands.IsArray());
1319  CHECK_GE(operands.Size(), unsigned(2));
1320  std::unique_ptr<const RexScalar> else_expr;
1321  std::vector<
1322  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1323  expr_pair_list;
1324  for (auto operands_it = operands.Begin(); operands_it != operands.End();) {
1325  auto when_expr = parse_scalar_expr(*operands_it++, root_dag);
1326  if (operands_it == operands.End()) {
1327  else_expr = std::move(when_expr);
1328  break;
1329  }
1330  auto then_expr = parse_scalar_expr(*operands_it++, root_dag);
1331  expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1332  }
1333  return std::unique_ptr<RexCase>(new RexCase(expr_pair_list, else_expr));
1334 }
1335 
1336 std::vector<std::string> strings_from_json_array(
1337  const rapidjson::Value& json_str_arr) noexcept {
1338  CHECK(json_str_arr.IsArray());
1339  std::vector<std::string> fields;
1340  for (auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1341  ++json_str_arr_it) {
1342  CHECK(json_str_arr_it->IsString());
1343  fields.emplace_back(json_str_arr_it->GetString());
1344  }
1345  return fields;
1346 }
1347 
1348 std::vector<size_t> indices_from_json_array(
1349  const rapidjson::Value& json_idx_arr) noexcept {
1350  CHECK(json_idx_arr.IsArray());
1351  std::vector<size_t> indices;
1352  for (auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1353  ++json_idx_arr_it) {
1354  CHECK(json_idx_arr_it->IsInt());
1355  CHECK_GE(json_idx_arr_it->GetInt(), 0);
1356  indices.emplace_back(json_idx_arr_it->GetInt());
1357  }
1358  return indices;
1359 }
1360 
1361 std::unique_ptr<const RexAgg> parse_aggregate_expr(const rapidjson::Value& expr) {
1362  const auto agg_str = json_str(field(expr, "agg"));
1363  if (agg_str == "APPROX_QUANTILE") {
1364  LOG(INFO) << "APPROX_QUANTILE is deprecated. Please use APPROX_PERCENTILE instead.";
1365  }
1366  const auto agg = to_agg_kind(agg_str);
1367  const auto distinct = json_bool(field(expr, "distinct"));
1368  const auto agg_ti = parse_type(field(expr, "type"));
1369  const auto operands = indices_from_json_array(field(expr, "operands"));
1370  bool const allow_multiple_args =
1371  shared::is_any<kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kSUM_IF>(agg);
1372  if (operands.size() > 1 && (operands.size() != 2 || !allow_multiple_args)) {
1373  throw QueryNotSupported("Multiple arguments for aggregates aren't supported");
1374  }
1375  return std::unique_ptr<const RexAgg>(new RexAgg(agg, distinct, agg_ti, operands));
1376 }
1377 
1378 std::unique_ptr<const RexScalar> parse_scalar_expr(const rapidjson::Value& expr,
1379  RelAlgDag& root_dag) {
1380  CHECK(expr.IsObject());
1381  if (expr.IsObject() && expr.HasMember("input")) {
1382  return std::unique_ptr<const RexScalar>(parse_abstract_input(expr));
1383  }
1384  if (expr.IsObject() && expr.HasMember("literal")) {
1385  return std::unique_ptr<const RexScalar>(parse_literal(expr));
1386  }
1387  if (expr.IsObject() && expr.HasMember("op")) {
1388  const auto op_str = json_str(field(expr, "op"));
1389  if (op_str == std::string("CASE")) {
1390  return std::unique_ptr<const RexScalar>(parse_case(expr, root_dag));
1391  }
1392  if (op_str == std::string("$SCALAR_QUERY")) {
1393  return std::unique_ptr<const RexScalar>(parse_subquery(expr, root_dag));
1394  }
1395  return std::unique_ptr<const RexScalar>(parse_operator(expr, root_dag));
1396  }
1397  std::string const node_str = json_node_to_string(expr);
1398  if (node_str.find("\"correl\":\"$cor") != std::string::npos) {
1399  throw QueryNotSupported("Unable to decorrelate one of the correlated subqueries.");
1400  }
1401  throw QueryNotSupported("Expression node " + node_str + " not supported");
1402 }
1403 
1404 JoinType to_join_type(const std::string& join_type_name) {
1405  if (join_type_name == "inner") {
1406  return JoinType::INNER;
1407  }
1408  if (join_type_name == "left") {
1409  return JoinType::LEFT;
1410  }
1411  if (join_type_name == "semi") {
1412  return JoinType::SEMI;
1413  }
1414  if (join_type_name == "anti") {
1415  return JoinType::ANTI;
1416  }
1417  throw QueryNotSupported("Join type (" + join_type_name + ") not supported");
1418 }
1419 
1420 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar*, const RANodeOutput&);
1421 
1422 std::unique_ptr<const RexOperator> disambiguate_operator(
1423  const RexOperator* rex_operator,
1424  const RANodeOutput& ra_output) noexcept {
1425  std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1426  for (size_t i = 0; i < rex_operator->size(); ++i) {
1427  auto operand = rex_operator->getOperand(i);
1428  if (dynamic_cast<const RexSubQuery*>(operand)) {
1429  disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1430  } else {
1431  disambiguated_operands.emplace_back(disambiguate_rex(operand, ra_output));
1432  }
1433  }
1434  const auto rex_window_function_operator =
1435  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
1436  if (rex_window_function_operator) {
1437  const auto& partition_keys = rex_window_function_operator->getPartitionKeys();
1438  std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1439  for (const auto& partition_key : partition_keys) {
1440  disambiguated_partition_keys.emplace_back(
1441  disambiguate_rex(partition_key.get(), ra_output));
1442  }
1443  std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1444  const auto& order_keys = rex_window_function_operator->getOrderKeys();
1445  for (const auto& order_key : order_keys) {
1446  disambiguated_order_keys.emplace_back(disambiguate_rex(order_key.get(), ra_output));
1447  }
1448  return rex_window_function_operator->disambiguatedOperands(
1449  disambiguated_operands,
1450  disambiguated_partition_keys,
1451  disambiguated_order_keys,
1452  rex_window_function_operator->getCollation());
1453  }
1454  return rex_operator->getDisambiguated(disambiguated_operands);
1455 }
1456 
1457 std::unique_ptr<const RexCase> disambiguate_case(const RexCase* rex_case,
1458  const RANodeOutput& ra_output) {
1459  std::vector<
1460  std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1461  disambiguated_expr_pair_list;
1462  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1463  auto disambiguated_when = disambiguate_rex(rex_case->getWhen(i), ra_output);
1464  auto disambiguated_then = disambiguate_rex(rex_case->getThen(i), ra_output);
1465  disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1466  std::move(disambiguated_then));
1467  }
1468  std::unique_ptr<const RexScalar> disambiguated_else{
1469  disambiguate_rex(rex_case->getElse(), ra_output)};
1470  return std::unique_ptr<const RexCase>(
1471  new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1472 }
1473 
1474 // The inputs used by scalar expressions are given as indices in the serialized
1475 // representation of the query. This is hard to navigate; make the relationship
1476 // explicit by creating RexInput expressions which hold a pointer to the source
1477 // relational algebra node and the index relative to the output of that node.
1478 std::unique_ptr<const RexScalar> disambiguate_rex(const RexScalar* rex_scalar,
1479  const RANodeOutput& ra_output) {
1480  const auto rex_abstract_input = dynamic_cast<const RexAbstractInput*>(rex_scalar);
1481  if (rex_abstract_input) {
1482  CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1483  return std::unique_ptr<const RexInput>(
1484  new RexInput(ra_output[rex_abstract_input->getIndex()]));
1485  }
1486  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
1487  if (rex_operator) {
1488  return disambiguate_operator(rex_operator, ra_output);
1489  }
1490  const auto rex_case = dynamic_cast<const RexCase*>(rex_scalar);
1491  if (rex_case) {
1492  return disambiguate_case(rex_case, ra_output);
1493  }
1494  if (auto const rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar)) {
1495  return rex_literal->deepCopy();
1496  } else if (auto const rex_subquery = dynamic_cast<const RexSubQuery*>(rex_scalar)) {
1497  return rex_subquery->deepCopy();
1498  } else {
1499  throw QueryNotSupported("Unable to disambiguate expression of type " +
1500  std::string(typeid(*rex_scalar).name()));
1501  }
1502 }
1503 
1504 void bind_project_to_input(RelProject* project_node, const RANodeOutput& input) noexcept {
1505  CHECK_EQ(size_t(1), project_node->inputCount());
1506  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1507  for (size_t i = 0; i < project_node->size(); ++i) {
1508  const auto projected_expr = project_node->getProjectAt(i);
1509  if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1510  disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1511  } else {
1512  disambiguated_exprs.emplace_back(disambiguate_rex(projected_expr, input));
1513  }
1514  }
1515  project_node->setExpressions(disambiguated_exprs);
1516 }
1517 
1519  const RANodeOutput& input) noexcept {
1520  std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1521  for (size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1522  const auto target_expr = table_func_node->getTableFuncInputAt(i);
1523  if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1524  disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1525  } else {
1526  disambiguated_exprs.emplace_back(disambiguate_rex(target_expr, input));
1527  }
1528  }
1529  table_func_node->setTableFuncInputs(std::move(disambiguated_exprs));
1530 }
1531 
1532 void bind_inputs(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1533  for (auto ra_node : nodes) {
1534  const auto filter_node = std::dynamic_pointer_cast<RelFilter>(ra_node);
1535  if (filter_node) {
1536  CHECK_EQ(size_t(1), filter_node->inputCount());
1537  auto disambiguated_condition = disambiguate_rex(
1538  filter_node->getCondition(), get_node_output(filter_node->getInput(0)));
1539  filter_node->setCondition(disambiguated_condition);
1540  continue;
1541  }
1542  const auto join_node = std::dynamic_pointer_cast<RelJoin>(ra_node);
1543  if (join_node) {
1544  CHECK_EQ(size_t(2), join_node->inputCount());
1545  auto disambiguated_condition =
1546  disambiguate_rex(join_node->getCondition(), get_node_output(join_node.get()));
1547  join_node->setCondition(disambiguated_condition);
1548  continue;
1549  }
1550  const auto project_node = std::dynamic_pointer_cast<RelProject>(ra_node);
1551  if (project_node) {
1552  bind_project_to_input(project_node.get(),
1553  get_node_output(project_node->getInput(0)));
1554  continue;
1555  }
1556  const auto table_func_node = std::dynamic_pointer_cast<RelTableFunction>(ra_node);
1557  if (table_func_node) {
1558  /*
1559  Collect all inputs from table function input (non-literal)
1560  arguments.
1561  */
1562  RANodeOutput input;
1563  input.reserve(table_func_node->inputCount());
1564  for (size_t i = 0; i < table_func_node->inputCount(); i++) {
1565  auto node_output = get_node_output(table_func_node->getInput(i));
1566  input.insert(input.end(), node_output.begin(), node_output.end());
1567  }
1568  bind_table_func_to_input(table_func_node.get(), input);
1569  }
1570  }
1571 }
1572 
1573 void handle_query_hint(const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1574  RelAlgDag& rel_alg_dag) noexcept {
1575  // query hint is delivered by the above three nodes
1576  // when a query block has top-sort node, a hint is registered to
1577  // one of the node which locates at the nearest from the sort node
1578  RegisteredQueryHint global_query_hint;
1579  for (auto node : nodes) {
1580  Hints* hint_delivered = nullptr;
1581  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1582  if (agg_node) {
1583  if (agg_node->hasDeliveredHint()) {
1584  hint_delivered = agg_node->getDeliveredHints();
1585  }
1586  }
1587  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
1588  if (project_node) {
1589  if (project_node->hasDeliveredHint()) {
1590  hint_delivered = project_node->getDeliveredHints();
1591  }
1592  }
1593  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
1594  if (compound_node) {
1595  if (compound_node->hasDeliveredHint()) {
1596  hint_delivered = compound_node->getDeliveredHints();
1597  }
1598  }
1599  if (hint_delivered && !hint_delivered->empty()) {
1600  rel_alg_dag.registerQueryHints(node, hint_delivered, global_query_hint);
1601  }
1602  }
1603  // the current rel_alg_dag may contain global query hints from the subquery
1604  // so we combine the current global hint we collected with the original one together
1605  // to propagate global query hints correctly
1606  const auto existing_global_query_hints = rel_alg_dag.getGlobalHints();
1607  const auto new_global_query_hints = existing_global_query_hints || global_query_hint;
1608  rel_alg_dag.setGlobalQueryHints(new_global_query_hints);
1609 }
1610 
1611 void compute_node_hash(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
1612  // compute each rel node's hash value in advance to avoid inconsistency of their hash
1613  // values depending on the toHash's caller
1614  // specifically, we manipulate our logical query plan before retrieving query step
1615  // sequence but once we compute a hash value we cached it so there is no way to update
1616  // it after the plan has been changed starting from the top node, we compute the hash
1617  // value (top-down manner)
1618  std::for_each(
1619  nodes.rbegin(), nodes.rend(), [](const std::shared_ptr<RelAlgNode>& node) {
1620  auto node_hash = node->toHash();
1621  CHECK_NE(node_hash, static_cast<size_t>(0));
1622  });
1623 }
1624 
1625 void mark_nops(const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1626  for (auto node : nodes) {
1627  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
1628  if (!agg_node || agg_node->getAggExprsCount()) {
1629  continue;
1630  }
1631  CHECK_EQ(size_t(1), node->inputCount());
1632  const auto agg_input_node = dynamic_cast<const RelAggregate*>(node->getInput(0));
1633  if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1634  agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1635  agg_node->markAsNop();
1636  }
1637  }
1638 }
1639 
1640 namespace {
1641 
1642 std::vector<const Rex*> reproject_targets(
1643  const RelProject* simple_project,
1644  const std::vector<const Rex*>& target_exprs) noexcept {
1645  std::vector<const Rex*> result;
1646  for (size_t i = 0; i < simple_project->size(); ++i) {
1647  const auto input_rex = dynamic_cast<const RexInput*>(simple_project->getProjectAt(i));
1648  CHECK(input_rex);
1649  CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1650  result.push_back(target_exprs[input_rex->getIndex()]);
1651  }
1652  return result;
1653 }
1654 
1661  public:
1663  const RelAlgNode* node_to_keep,
1664  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1665  : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1666 
1667  // Reproject the RexInput from its current RA Node to the RA Node we intend to keep
1668  RetType visitInput(const RexInput* input) const final {
1669  if (input->getSourceNode() == node_to_keep_) {
1670  const auto index = input->getIndex();
1671  CHECK_LT(index, scalar_sources_.size());
1672  return visit(scalar_sources_[index].get());
1673  } else {
1674  return input->deepCopy();
1675  }
1676  }
1677 
1678  private:
1680  const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources_;
1681 };
1682 
1683 } // namespace
1684 
1686  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1687  const std::vector<size_t>& pattern,
1688  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
1689  query_hints) noexcept {
1690  CHECK_GE(pattern.size(), size_t(2));
1691  CHECK_LE(pattern.size(), size_t(4));
1692 
1693  std::unique_ptr<const RexScalar> filter_rex;
1694  std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1695  size_t groupby_count{0};
1696  std::vector<std::string> fields;
1697  std::vector<const RexAgg*> agg_exprs;
1698  std::vector<const Rex*> target_exprs;
1699  bool first_project{true};
1700  bool is_agg{false};
1701  RelAlgNode* last_node{nullptr};
1702 
1703  std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1704  size_t node_hash{0};
1705  unsigned node_id{0};
1706  bool hint_registered{false};
1707  RegisteredQueryHint registered_query_hint = RegisteredQueryHint::defaults();
1708  for (const auto node_idx : pattern) {
1709  const auto ra_node = nodes[node_idx];
1710  auto registered_query_hint_map_it = query_hints.find(ra_node->toHash());
1711  if (registered_query_hint_map_it != query_hints.end()) {
1712  auto& registered_query_hint_map = registered_query_hint_map_it->second;
1713  auto registered_query_hint_it = registered_query_hint_map.find(ra_node->getId());
1714  if (registered_query_hint_it != registered_query_hint_map.end()) {
1715  hint_registered = true;
1716  node_hash = registered_query_hint_map_it->first;
1717  node_id = registered_query_hint_it->first;
1718  registered_query_hint = registered_query_hint_it->second;
1719  }
1720  }
1721  const auto ra_filter = std::dynamic_pointer_cast<RelFilter>(ra_node);
1722  if (ra_filter) {
1723  CHECK(!filter_rex);
1724  filter_rex.reset(ra_filter->getAndReleaseCondition());
1725  CHECK(filter_rex);
1726  last_node = ra_node.get();
1727  continue;
1728  }
1729  const auto ra_project = std::dynamic_pointer_cast<RelProject>(ra_node);
1730  if (ra_project) {
1731  fields = ra_project->getFields();
1732  manipulation_target = ra_project;
1733 
1734  if (first_project) {
1735  CHECK_EQ(size_t(1), ra_project->inputCount());
1736  // Rebind the input of the project to the input of the filter itself
1737  // since we know that we'll evaluate the filter on the fly, with no
1738  // intermediate buffer.
1739  const auto filter_input = dynamic_cast<const RelFilter*>(ra_project->getInput(0));
1740  if (filter_input) {
1741  CHECK_EQ(size_t(1), filter_input->inputCount());
1742  bind_project_to_input(ra_project.get(),
1743  get_node_output(filter_input->getInput(0)));
1744  }
1745  scalar_sources = ra_project->getExpressionsAndRelease();
1746  for (const auto& scalar_expr : scalar_sources) {
1747  target_exprs.push_back(scalar_expr.get());
1748  }
1749  first_project = false;
1750  } else {
1751  if (ra_project->isSimple()) {
1752  target_exprs = reproject_targets(ra_project.get(), target_exprs);
1753  } else {
1754  // TODO(adb): This is essentially a more general case of simple project, we
1755  // could likely merge the two
1756  std::vector<const Rex*> result;
1757  RexInputReplacementVisitor visitor(last_node, scalar_sources);
1758  for (size_t i = 0; i < ra_project->size(); ++i) {
1759  const auto rex = ra_project->getProjectAt(i);
1760  if (auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1761  const auto index = rex_input->getIndex();
1762  CHECK_LT(index, target_exprs.size());
1763  result.push_back(target_exprs[index]);
1764  } else {
1765  scalar_sources.push_back(visitor.visit(rex));
1766  result.push_back(scalar_sources.back().get());
1767  }
1768  }
1769  target_exprs = result;
1770  }
1771  }
1772  last_node = ra_node.get();
1773  continue;
1774  }
1775  const auto ra_aggregate = std::dynamic_pointer_cast<RelAggregate>(ra_node);
1776  if (ra_aggregate) {
1777  is_agg = true;
1778  fields = ra_aggregate->getFields();
1779  agg_exprs = ra_aggregate->getAggregatesAndRelease();
1780  groupby_count = ra_aggregate->getGroupByCount();
1781  decltype(target_exprs){}.swap(target_exprs);
1782  CHECK_LE(groupby_count, scalar_sources.size());
1783  for (size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1784  const auto rex_ref = new RexRef(group_idx + 1);
1785  target_exprs.push_back(rex_ref);
1786  scalar_sources.emplace_back(rex_ref);
1787  }
1788  for (const auto rex_agg : agg_exprs) {
1789  target_exprs.push_back(rex_agg);
1790  }
1791  last_node = ra_node.get();
1792  continue;
1793  }
1794  }
1795 
1796  auto compound_node =
1797  std::make_shared<RelCompound>(filter_rex,
1798  target_exprs,
1799  groupby_count,
1800  agg_exprs,
1801  fields,
1802  scalar_sources,
1803  is_agg,
1804  manipulation_target->isUpdateViaSelect(),
1805  manipulation_target->isDeleteViaSelect(),
1806  manipulation_target->isVarlenUpdateRequired(),
1807  manipulation_target->getModifiedTableDescriptor(),
1808  manipulation_target->getTargetColumns(),
1809  manipulation_target->getModifiedTableCatalog());
1810  auto old_node = nodes[pattern.back()];
1811  nodes[pattern.back()] = compound_node;
1812  auto first_node = nodes[pattern.front()];
1813  CHECK_EQ(size_t(1), first_node->inputCount());
1814  compound_node->addManagedInput(first_node->getAndOwnInput(0));
1815  if (hint_registered) {
1816  // pass the registered hint from the origin node to newly created compound node
1817  // where it is coalesced
1818  auto registered_query_hint_map_it = query_hints.find(node_hash);
1819  CHECK(registered_query_hint_map_it != query_hints.end());
1820  auto registered_query_hint_map = registered_query_hint_map_it->second;
1821  if (registered_query_hint_map.size() > 1) {
1822  registered_query_hint_map.erase(node_id);
1823  } else {
1824  CHECK_EQ(registered_query_hint_map.size(), static_cast<size_t>(1));
1825  query_hints.erase(node_hash);
1826  }
1827  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
1828  hint_map.emplace(compound_node->getId(), registered_query_hint);
1829  query_hints.emplace(compound_node->toHash(), hint_map);
1830  }
1831  for (size_t i = 0; i < pattern.size() - 1; ++i) {
1832  nodes[pattern[i]].reset();
1833  }
1834  for (auto node : nodes) {
1835  if (!node) {
1836  continue;
1837  }
1838  node->replaceInput(old_node, compound_node);
1839  }
1840 }
1841 
1842 class RANodeIterator : public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1843  using ElementType = std::shared_ptr<RelAlgNode>;
1844  using Super = std::vector<ElementType>::const_iterator;
1845  using Container = std::vector<ElementType>;
1846 
1847  public:
1848  enum class AdvancingMode { DUChain, InOrder };
1849 
1850  explicit RANodeIterator(const Container& nodes)
1851  : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1852  size_t non_zero_count = 0;
1853  for (const auto& node : nodes) {
1854  if (node) {
1855  ++non_zero_count;
1856  }
1857  }
1859  }()) {}
1860 
1861  explicit operator size_t() {
1862  return std::distance(owner_.begin(), *static_cast<Super*>(this));
1863  }
1864 
1865  RANodeIterator operator++() = delete;
1866 
1867  void advance(AdvancingMode mode) {
1868  Super& super = *this;
1869  switch (mode) {
1870  case AdvancingMode::DUChain: {
1871  size_t use_count = 0;
1872  Super only_use = owner_.end();
1873  for (Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1874  if (!*nodeIt) {
1875  continue;
1876  }
1877  for (size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1878  if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1879  ++use_count;
1880  if (1 == use_count) {
1881  only_use = nodeIt;
1882  } else {
1883  super = owner_.end();
1884  return;
1885  }
1886  }
1887  }
1888  }
1889  super = only_use;
1890  break;
1891  }
1892  case AdvancingMode::InOrder:
1893  for (size_t i = 0; i != owner_.size(); ++i) {
1894  if (!visited_.count(i)) {
1895  super = owner_.begin();
1896  std::advance(super, i);
1897  return;
1898  }
1899  }
1900  super = owner_.end();
1901  break;
1902  default:
1903  CHECK(false);
1904  }
1905  }
1906 
1907  bool allVisited() { return visited_.size() == nodeCount_; }
1908 
1910  visited_.insert(size_t(*this));
1911  Super& super = *this;
1912  return *super;
1913  }
1914 
1915  const ElementType* operator->() { return &(operator*()); }
1916 
1917  private:
1919  const size_t nodeCount_;
1920  std::unordered_set<size_t> visited_;
1921 };
1922 
1923 namespace {
1924 
1925 bool input_can_be_coalesced(const RelAlgNode* parent_node,
1926  const size_t index,
1927  const bool first_rex_is_input) {
1928  if (auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1929  if (index == 0 && agg_node->getGroupByCount() > 0) {
1930  return true;
1931  } else {
1932  // Is an aggregated target, only allow the project to be elided if the aggregate
1933  // target is simply passed through (i.e. if the top level expression attached to
1934  // the project node is a RexInput expression)
1935  return first_rex_is_input;
1936  }
1937  }
1938  return first_rex_is_input;
1939 }
1940 
1946 class CoalesceSecondaryProjectVisitor : public RexVisitor<bool> {
1947  public:
1948  bool visitInput(const RexInput* input) const final {
1949  // The top level expression node is checked before we apply the visitor. If we get
1950  // here, this input rex is a child of another rex node, and we handle the can be
1951  // coalesced check slightly differently
1952  return input_can_be_coalesced(input->getSourceNode(), input->getIndex(), false);
1953  }
1954 
1955  bool visitLiteral(const RexLiteral*) const final { return false; }
1956 
1957  bool visitSubQuery(const RexSubQuery*) const final { return false; }
1958 
1959  bool visitRef(const RexRef*) const final { return false; }
1960 
1961  protected:
1962  bool aggregateResult(const bool& aggregate, const bool& next_result) const final {
1963  return aggregate && next_result;
1964  }
1965 
1966  bool defaultResult() const final { return true; }
1967 };
1968 
1969 // Detect the window function SUM pattern: CASE WHEN COUNT() > 0 THEN SUM ELSE 0
1971  const auto case_operator = dynamic_cast<const RexCase*>(rex);
1972  if (case_operator && case_operator->branchCount() == 1) {
1973  const auto then_window =
1974  dynamic_cast<const RexWindowFunctionOperator*>(case_operator->getThen(0));
1975  if (then_window && then_window->getKind() == SqlWindowFunctionKind::SUM_INTERNAL) {
1976  return true;
1977  }
1978  }
1979  return false;
1980 }
1981 
1982 // Check for Window Function AVG:
1983 // (CASE WHEN count > 0 THEN sum ELSE 0) / COUNT
1985  const RexOperator* divide_operator = dynamic_cast<const RexOperator*>(rex);
1986  if (divide_operator && divide_operator->getOperator() == kDIVIDE) {
1987  CHECK_EQ(divide_operator->size(), size_t(2));
1988  const auto case_operator =
1989  dynamic_cast<const RexCase*>(divide_operator->getOperand(0));
1990  const auto second_window =
1991  dynamic_cast<const RexWindowFunctionOperator*>(divide_operator->getOperand(1));
1992  if (case_operator && second_window &&
1993  second_window->getKind() == SqlWindowFunctionKind::COUNT) {
1994  if (is_window_function_sum(case_operator)) {
1995  return true;
1996  }
1997  }
1998  }
1999  return false;
2000 }
2001 
2002 // Detect both window function operators and window function operators embedded in case
2003 // statements (for null handling)
2005  return dynamic_cast<const RexWindowFunctionOperator*>(rex) ||
2007 }
2008 
2009 } // namespace
2010 
2012  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2013  const std::vector<const RelAlgNode*>& left_deep_joins,
2014  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2015  query_hints) {
2016  enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
2017  std::vector<size_t> crt_pattern;
2018  CoalesceState crt_state{CoalesceState::Initial};
2019 
2020  auto reset_state = [&crt_pattern, &crt_state]() {
2021  crt_state = CoalesceState::Initial;
2022  std::vector<size_t>().swap(crt_pattern);
2023  };
2024 
2025  for (RANodeIterator nodeIt(nodes); !nodeIt.allVisited();) {
2026  const auto ra_node = nodeIt != nodes.end() ? *nodeIt : nullptr;
2027  switch (crt_state) {
2028  case CoalesceState::Initial: {
2029  if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
2030  std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
2031  left_deep_joins.end()) {
2032  crt_pattern.push_back(size_t(nodeIt));
2033  crt_state = CoalesceState::Filter;
2034  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2035  } else if (auto project_node =
2036  std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2037  if (project_node->hasWindowFunctionExpr()) {
2038  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2039  } else {
2040  crt_pattern.push_back(size_t(nodeIt));
2041  crt_state = CoalesceState::FirstProject;
2042  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2043  }
2044  } else {
2045  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2046  }
2047  break;
2048  }
2049  case CoalesceState::Filter: {
2050  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2051  // Given we now add preceding projects for all window functions following
2052  // RelFilter nodes, the following should never occur
2053  CHECK(!project_node->hasWindowFunctionExpr());
2054  crt_pattern.push_back(size_t(nodeIt));
2055  crt_state = CoalesceState::FirstProject;
2056  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2057  } else {
2058  reset_state();
2059  }
2060  break;
2061  }
2062  case CoalesceState::FirstProject: {
2063  if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
2064  crt_pattern.push_back(size_t(nodeIt));
2065  crt_state = CoalesceState::Aggregate;
2066  nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2067  } else {
2068  if (crt_pattern.size() >= 2) {
2069  create_compound(nodes, crt_pattern, query_hints);
2070  }
2071  reset_state();
2072  }
2073  break;
2074  }
2075  case CoalesceState::Aggregate: {
2076  if (auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2077  if (!project_node->hasWindowFunctionExpr()) {
2078  // TODO(adb): overloading the simple project terminology again here
2079  bool is_simple_project{true};
2080  for (size_t i = 0; i < project_node->size(); i++) {
2081  const auto scalar_rex = project_node->getProjectAt(i);
2082  // If the top level scalar rex is an input node, we can bypass the visitor
2083  if (auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
2085  input_rex->getSourceNode(), input_rex->getIndex(), true)) {
2086  is_simple_project = false;
2087  break;
2088  }
2089  continue;
2090  }
2091  CoalesceSecondaryProjectVisitor visitor;
2092  if (!visitor.visit(project_node->getProjectAt(i))) {
2093  is_simple_project = false;
2094  break;
2095  }
2096  }
2097  if (is_simple_project) {
2098  crt_pattern.push_back(size_t(nodeIt));
2099  nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2100  }
2101  }
2102  }
2103  CHECK_GE(crt_pattern.size(), size_t(2));
2104  create_compound(nodes, crt_pattern, query_hints);
2105  reset_state();
2106  break;
2107  }
2108  default:
2109  CHECK(false);
2110  }
2111  }
2112  if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
2113  if (crt_pattern.size() >= 2) {
2114  create_compound(nodes, crt_pattern, query_hints);
2115  }
2116  CHECK(!crt_pattern.empty());
2117  }
2118 }
2119 
2120 namespace {
2122  RelAlgNode const* node,
2123  std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs,
2124  std::vector<std::string>& fields) {
2125  for (size_t i = 0; i < node->size(); i++) {
2126  auto new_rex_input = std::make_unique<RexInput>(node, i);
2127  scalar_exprs.emplace_back(std::move(new_rex_input));
2128  fields.emplace_back("");
2129  }
2130 }
2131 } // namespace
2132 
2133 // Handle a query plan pattern "Aggregate-Join" which is not supported b/c Join node
2134 // (i.e., RelLeftDeepJoin) has multiple inputs (at least two if it has a single binary
2135 // join) We expect that the Aggregate node has a single input node, so we inject a new
2136 // Project node to satisfy the condition After the transformation, we have
2137 // "Aggregate-Project-Join" and can continue the execution w/o any crash or exception
2139  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2140  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2141  query_hints) {
2142  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2143  bool replace_nodes = false;
2144  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2145  auto node = *node_itr;
2146  if (auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node)) {
2147  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2148  std::vector<std::string> fields;
2149  std::shared_ptr<RelProject> new_project;
2150  CHECK_EQ(agg_node->getInputs().size(), size_t(1));
2151  CHECK_NE(*node_itr, *node_list.begin());
2152  const auto prev_node = *std::prev(node_itr);
2153  CHECK(prev_node);
2154  auto const input_node_ptr = agg_node->getAndOwnInput(0);
2155  if (auto join_node =
2156  std::dynamic_pointer_cast<RelLeftDeepInnerJoin const>(input_node_ptr)) {
2157  for (auto const* join_input_node : join_node->getInputs()) {
2158  create_rex_input_for_new_project_node(join_input_node, scalar_exprs, fields);
2159  }
2160  if (!scalar_exprs.empty()) {
2161  replace_nodes = true;
2162  new_project = std::make_shared<RelProject>(scalar_exprs, fields, join_node);
2163  agg_node->replaceInput(join_node, new_project);
2164  node_list.insert(node_itr, new_project);
2165  }
2166  }
2167  }
2168  }
2169  if (replace_nodes) {
2170  nodes.assign(node_list.begin(), node_list.end());
2171  }
2172 }
2173 
2174 void eliminate_redundant_projection(std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
2175  for (auto& node : nodes) {
2176  if (auto* proj_node = dynamic_cast<RelProject*>(node.get())) {
2177  if (proj_node->isSimple()) {
2178  if (auto child_proj_node =
2179  dynamic_cast<RelProject const*>(proj_node->getInput(0))) {
2180  std::vector<std::unique_ptr<RexScalar const>> scalar_exprs;
2181  RexDeepCopyVisitor copier;
2182  for (size_t i = 0; i < proj_node->size(); i++) {
2183  auto rex_abs_input =
2184  dynamic_cast<RexAbstractInput const*>(proj_node->getProjectAt(i));
2185  scalar_exprs.push_back(
2186  copier.visit(child_proj_node->getProjectAt(rex_abs_input->getIndex())));
2187  }
2188  CHECK_EQ(scalar_exprs.size(), proj_node->getFields().size());
2189  proj_node->setExpressions(scalar_exprs);
2190  proj_node->replaceInput(proj_node->getAndOwnInput(0),
2191  child_proj_node->getAndOwnInput(0));
2192  }
2193  }
2194  }
2195  }
2196 }
2197 
2198 class WindowFunctionCollector : public RexVisitor<void*> {
2199  public:
2201  std::unordered_map<size_t, const RexScalar*>& collected_window_func,
2202  bool only_add_window_expr)
2203  : collected_window_func_(collected_window_func)
2204  , only_add_window_expr_(only_add_window_expr) {}
2205 
2206  protected:
2207  // Detect embedded window function expressions in operators
2208  void* visitOperator(const RexOperator* rex_operator) const final {
2209  if (is_window_function_operator(rex_operator)) {
2210  tryAddWindowExpr(rex_operator);
2211  }
2212  const size_t operand_count = rex_operator->size();
2213  for (size_t i = 0; i < operand_count; ++i) {
2214  const auto operand = rex_operator->getOperand(i);
2215  if (is_window_function_operator(operand)) {
2216  // Handle both RexWindowFunctionOperators and window functions built up from
2217  // multiple RexScalar objects (e.g. AVG)
2218  tryAddWindowExpr(operand);
2219  } else {
2220  visit(operand);
2221  }
2222  }
2223  return defaultResult();
2224  }
2225 
2226  // Detect embedded window function expressions in case statements. Note that this may
2227  // manifest as a nested case statement inside a top level case statement, as some
2228  // window functions (sum, avg) are represented as a case statement. Use the
2229  // is_window_function_operator helper to detect complete window function expressions.
2230  void* visitCase(const RexCase* rex_case) const final {
2231  if (is_window_function_operator(rex_case)) {
2232  tryAddWindowExpr(rex_case);
2233  if (!only_add_window_expr_) {
2234  return nullptr;
2235  }
2236  }
2237 
2238  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2239  const auto when = rex_case->getWhen(i);
2240  if (is_window_function_operator(when)) {
2241  tryAddWindowExpr(when);
2242  } else {
2243  visit(when);
2244  }
2245  const auto then = rex_case->getThen(i);
2246  if (is_window_function_operator(then)) {
2247  tryAddWindowExpr(then);
2248  } else {
2249  visit(then);
2250  }
2251  }
2252  if (rex_case->getElse()) {
2253  auto else_expr = rex_case->getElse();
2254  if (is_window_function_operator(else_expr)) {
2255  tryAddWindowExpr(else_expr);
2256  } else {
2257  visit(else_expr);
2258  }
2259  }
2260  return defaultResult();
2261  }
2262 
2263  void tryAddWindowExpr(RexScalar const* expr) const {
2264  if (!only_add_window_expr_) {
2265  collected_window_func_.emplace(expr->toHash(), expr);
2266  } else {
2267  if (auto window_expr = dynamic_cast<RexWindowFunctionOperator const*>(expr)) {
2268  collected_window_func_.emplace(window_expr->toHash(), window_expr);
2269  }
2270  }
2271  }
2272 
2273  void* defaultResult() const final { return nullptr; }
2274 
2275  private:
2276  std::unordered_map<size_t, const RexScalar*>& collected_window_func_;
2278 };
2279 
2281  public:
2283  std::unordered_set<size_t>& collected_window_func_hash,
2284  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func,
2285  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map,
2286  RelProject* new_project,
2287  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2288  new_rex_input_from_child_node)
2289  : collected_window_func_hash_(collected_window_func_hash)
2290  , new_rex_input_for_window_func_(new_rex_input_for_window_func)
2291  , window_func_to_new_rex_input_idx_map_(window_func_to_new_rex_input_idx_map)
2292  , new_project_(new_project)
2293  , new_rex_input_from_child_node_(new_rex_input_from_child_node) {
2294  CHECK_EQ(collected_window_func_hash_.size(),
2295  window_func_to_new_rex_input_idx_map_.size());
2296  for (auto hash : collected_window_func_hash_) {
2297  auto rex_it = window_func_to_new_rex_input_idx_map_.find(hash);
2298  CHECK(rex_it != window_func_to_new_rex_input_idx_map_.end());
2299  CHECK_LT(rex_it->second, new_rex_input_for_window_func_.size());
2300  }
2301  CHECK(new_project_);
2302  }
2303 
2304  protected:
2305  RetType visitInput(const RexInput* rex_input) const final {
2306  if (rex_input->getSourceNode() != new_project_) {
2307  const auto cur_index = rex_input->getIndex();
2308  auto cur_source_node = rex_input->getSourceNode();
2309  std::string field_name = "";
2310  if (auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
2311  field_name = cur_project_node->getFieldName(cur_index);
2312  }
2313  auto rex_input_hash = rex_input->toHash();
2314  auto rex_input_it = new_rex_input_from_child_node_.find(rex_input_hash);
2315  if (rex_input_it == new_rex_input_from_child_node_.end()) {
2316  auto new_rex_input =
2317  std::make_unique<RexInput>(new_project_, new_project_->size());
2318  new_project_->appendInput(field_name, rex_input->deepCopy());
2319  new_rex_input_from_child_node_.emplace(rex_input_hash, new_rex_input->deepCopy());
2320  return new_rex_input;
2321  } else {
2322  return rex_input_it->second->deepCopy();
2323  }
2324  } else {
2325  return rex_input->deepCopy();
2326  }
2327  }
2328 
2329  RetType visitOperator(const RexOperator* rex_operator) const final {
2330  auto new_rex_idx = is_collected_window_function(rex_operator->toHash());
2331  if (new_rex_idx) {
2332  return get_new_rex_input(*new_rex_idx);
2333  }
2334 
2335  const auto rex_window_function_operator =
2336  dynamic_cast<const RexWindowFunctionOperator*>(rex_operator);
2337  if (rex_window_function_operator) {
2338  // Deep copy the embedded window function operator
2339  return visitWindowFunctionOperator(rex_window_function_operator);
2340  }
2341 
2342  const size_t operand_count = rex_operator->size();
2343  std::vector<RetType> new_opnds;
2344  for (size_t i = 0; i < operand_count; ++i) {
2345  const auto operand = rex_operator->getOperand(i);
2346  auto new_rex_idx_for_operand = is_collected_window_function(operand->toHash());
2347  if (new_rex_idx_for_operand) {
2348  new_opnds.push_back(get_new_rex_input(*new_rex_idx_for_operand));
2349  } else {
2350  new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
2351  }
2352  }
2353  return rex_operator->getDisambiguated(new_opnds);
2354  }
2355 
2356  RetType visitCase(const RexCase* rex_case) const final {
2357  auto new_rex_idx = is_collected_window_function(rex_case->toHash());
2358  if (new_rex_idx) {
2359  return get_new_rex_input(*new_rex_idx);
2360  }
2361 
2362  std::vector<std::pair<RetType, RetType>> new_pair_list;
2363  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
2364  auto when_operand = rex_case->getWhen(i);
2365  auto new_rex_idx_for_when_operand =
2366  is_collected_window_function(when_operand->toHash());
2367 
2368  auto then_operand = rex_case->getThen(i);
2369  auto new_rex_idx_for_then_operand =
2370  is_collected_window_function(then_operand->toHash());
2371 
2372  new_pair_list.emplace_back(
2373  new_rex_idx_for_when_operand ? get_new_rex_input(*new_rex_idx_for_when_operand)
2374  : visit(when_operand),
2375  new_rex_idx_for_then_operand ? get_new_rex_input(*new_rex_idx_for_then_operand)
2376  : visit(then_operand));
2377  }
2378  auto new_rex_idx_for_else_operand =
2379  is_collected_window_function(rex_case->getElse()->toHash());
2380  auto new_else = new_rex_idx_for_else_operand
2381  ? get_new_rex_input(*new_rex_idx_for_else_operand)
2382  : visit(rex_case->getElse());
2383  return std::make_unique<RexCase>(new_pair_list, new_else);
2384  }
2385 
2386  private:
2387  std::optional<size_t> is_collected_window_function(size_t rex_hash) const {
2388  auto rex_it = window_func_to_new_rex_input_idx_map_.find(rex_hash);
2389  if (rex_it != window_func_to_new_rex_input_idx_map_.end()) {
2390  return rex_it->second;
2391  }
2392  return std::nullopt;
2393  }
2394 
2395  std::unique_ptr<const RexScalar> get_new_rex_input(size_t rex_idx) const {
2396  CHECK_GE(rex_idx, 0UL);
2397  CHECK_LT(rex_idx, new_rex_input_for_window_func_.size());
2398  auto& new_rex_input = new_rex_input_for_window_func_.at(rex_idx);
2399  CHECK(new_rex_input);
2400  auto copied_rex_input = copier_.visit(new_rex_input.get());
2401  return copied_rex_input;
2402  }
2403 
2404  std::unordered_set<size_t>& collected_window_func_hash_;
2405  // we should have new rex_input for each window function collected
2406  std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func_;
2407  // an index to get a new rex_input for the collected window function
2408  std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map_;
2410  std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2413 };
2414 
2416  std::shared_ptr<RelProject> prev_node,
2417  std::shared_ptr<RelProject> new_node,
2418  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2419  query_hints) {
2420  auto delivered_hints = prev_node->getDeliveredHints();
2421  bool needs_propagate_hints = !delivered_hints->empty();
2422  if (needs_propagate_hints) {
2423  for (auto& kv : *delivered_hints) {
2424  new_node->addHint(kv.second);
2425  }
2426  auto prev_it = query_hints.find(prev_node->toHash());
2427  // query hint for the prev projection node should be registered
2428  CHECK(prev_it != query_hints.end());
2429  auto prev_hint_it = prev_it->second.find(prev_node->getId());
2430  CHECK(prev_hint_it != prev_it->second.end());
2431  std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
2432  hint_map.emplace(new_node->getId(), prev_hint_it->second);
2433  query_hints.emplace(new_node->toHash(), hint_map);
2434  }
2435 }
2436 
2460  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2461  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2462  query_hints) {
2463  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2464  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2465  const auto node = *node_itr;
2466  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2467  if (!window_func_project_node) {
2468  continue;
2469  }
2470 
2471  const auto prev_node_itr = std::prev(node_itr);
2472  const auto prev_node = *prev_node_itr;
2473  CHECK(prev_node);
2474 
2475  // map scalar expression index in the project node to window function ptr
2476  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2477  WindowFunctionCollector collector(collected_window_func, false);
2478  // Iterate the target exprs of the project node and check for window function
2479  // expressions. If an embedded expression exists, collect it
2480  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2481  const auto scalar_rex = window_func_project_node->getProjectAt(i);
2482  if (is_window_function_operator(scalar_rex)) {
2483  // top level window function exprs are fine
2484  continue;
2485  }
2486  collector.visit(scalar_rex);
2487  }
2488 
2489  if (!collected_window_func.empty()) {
2490  // we have a nested window function expression
2491  std::unordered_set<size_t> collected_window_func_hash;
2492  // the current window function needs a set of new rex input which references
2493  // expressions in the newly introduced projection node
2494  std::vector<std::unique_ptr<const RexScalar>> new_rex_input_for_window_func;
2495  // a target projection expression of the newly created projection node
2496  std::vector<std::unique_ptr<const RexScalar>> new_scalar_expr_for_window_project;
2497  // a map between nested window function (hash val) and
2498  // its rex index stored in the `new_rex_input_for_window_func`
2499  std::unordered_map<size_t, size_t> window_func_to_new_rex_input_idx_map;
2500  // a map between RexInput of the current window function projection node (hash
2501  // val) and its corresponding new RexInput which is pushed down to the new
2502  // projection node
2503  std::unordered_map<size_t, std::unique_ptr<const RexInput>>
2504  new_rex_input_from_child_node;
2505  RexDeepCopyVisitor copier;
2506 
2507  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2508  std::vector<std::string> dummy_fields;
2509  std::vector<std::string> new_project_field_names;
2510  // create a new project node, it will contain window function expressions
2511  auto new_project =
2512  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2513  // insert this new project node between the current window project node and its
2514  // child node
2515  node_list.insert(node_itr, new_project);
2516 
2517  // retrieve various information to replace expressions in the current window
2518  // function project node w/ considering scalar expressions in the new project node
2519  std::for_each(collected_window_func.begin(),
2520  collected_window_func.end(),
2521  [&new_project_field_names,
2522  &collected_window_func_hash,
2523  &new_rex_input_for_window_func,
2524  &new_scalar_expr_for_window_project,
2525  &copier,
2526  &new_project,
2527  &window_func_to_new_rex_input_idx_map](const auto& kv) {
2528  // compute window function expr's hash, and create a new rex_input
2529  // for it
2530  collected_window_func_hash.insert(kv.first);
2531 
2532  // map an old expression in the window function project node
2533  // to an index of the corresponding new RexInput
2534  const auto rex_idx = new_rex_input_for_window_func.size();
2535  window_func_to_new_rex_input_idx_map.emplace(kv.first, rex_idx);
2536 
2537  // create a new RexInput and make it as one of new expression of
2538  // the newly created project node
2539  new_rex_input_for_window_func.emplace_back(
2540  std::make_unique<const RexInput>(new_project.get(), rex_idx));
2541  new_scalar_expr_for_window_project.push_back(
2542  std::move(copier.visit(kv.second)));
2543  new_project_field_names.emplace_back("");
2544  });
2545  new_project->setExpressions(new_scalar_expr_for_window_project);
2546  new_project->setFields(std::move(new_project_field_names));
2547 
2548  auto window_func_scalar_exprs =
2549  window_func_project_node->getExpressionsAndRelease();
2550  RexWindowFuncReplacementVisitor replacer(collected_window_func_hash,
2551  new_rex_input_for_window_func,
2552  window_func_to_new_rex_input_idx_map,
2553  new_project.get(),
2554  new_rex_input_from_child_node);
2555  size_t rex_idx = 0;
2556  for (auto& scalar_expr : window_func_scalar_exprs) {
2557  // try to replace the old expressions in the window function project node
2558  // with expressions of the newly created project node
2559  auto new_parent_rex = replacer.visit(scalar_expr.get());
2560  window_func_scalar_exprs[rex_idx] = std::move(new_parent_rex);
2561  rex_idx++;
2562  }
2563  // Update the previous window project node
2564  window_func_project_node->setExpressions(window_func_scalar_exprs);
2565  window_func_project_node->replaceInput(prev_node, new_project);
2566  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2567  new_project->setPushedDownWindowExpr();
2568  }
2569  }
2570  nodes.assign(node_list.begin(), node_list.end());
2571 }
2572 
2573 using RexInputSet = std::unordered_set<RexInput>;
2574 
2575 class RexInputCollector : public RexVisitor<RexInputSet> {
2576  public:
2577  RexInputSet visitInput(const RexInput* input) const override {
2578  return RexInputSet{*input};
2579  }
2580 
2581  protected:
2583  const RexInputSet& next_result) const override {
2584  auto result = aggregate;
2585  result.insert(next_result.begin(), next_result.end());
2586  return result;
2587  }
2588 };
2589 
2590 namespace {
2592  bool& has_generic_expr_in_window_func) {
2593  for (auto const& partition_key : window_expr->getPartitionKeys()) {
2594  auto partition_input = dynamic_cast<RexInput const*>(partition_key.get());
2595  if (!partition_input) {
2596  return true;
2597  }
2598  }
2599  for (auto const& order_key : window_expr->getOrderKeys()) {
2600  auto order_input = dynamic_cast<RexInput const*>(order_key.get());
2601  if (!order_input) {
2602  return true;
2603  }
2604  }
2605  for (size_t k = 0; k < window_expr->size(); k++) {
2606  if (!shared::dynamic_castable_to_any<RexInput, RexLiteral>(
2607  window_expr->getOperand(k))) {
2608  has_generic_expr_in_window_func = true;
2609  return true;
2610  }
2611  }
2612  return false;
2613 }
2614 
2615 std::pair<bool, bool> need_pushdown_generic_expr(
2616  RelProject const* window_func_project_node) {
2617  bool has_generic_expr_in_window_func = false;
2618  bool res = false;
2619  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2620  auto const projected_target = window_func_project_node->getProjectAt(i);
2621  if (auto const* window_expr =
2622  dynamic_cast<RexWindowFunctionOperator const*>(projected_target)) {
2623  res =
2624  find_generic_expr_in_window_func(window_expr, has_generic_expr_in_window_func);
2625  } else if (auto const* case_expr = dynamic_cast<RexCase const*>(projected_target)) {
2626  std::unordered_map<size_t, const RexScalar*> collected_window_func;
2627  WindowFunctionCollector collector(collected_window_func, true);
2628  collector.visit(case_expr);
2629  for (auto const& kv : collected_window_func) {
2630  auto const* candidate_window_expr =
2631  dynamic_cast<RexWindowFunctionOperator const*>(kv.second);
2632  CHECK(candidate_window_expr);
2633  res = find_generic_expr_in_window_func(candidate_window_expr,
2634  has_generic_expr_in_window_func);
2635  }
2636  }
2637  }
2638  return std::make_pair(has_generic_expr_in_window_func, res);
2639 }
2640 }; // namespace
2654  std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2655  const bool always_add_project_if_first_project_is_window_expr,
2656  std::unordered_map<size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2657  query_hints) {
2658  std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2659  size_t project_node_counter{0};
2660  for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2661  const auto node = *node_itr;
2662 
2663  auto window_func_project_node = std::dynamic_pointer_cast<RelProject>(node);
2664  if (!window_func_project_node) {
2665  continue;
2666  }
2667  project_node_counter++;
2668  if (!window_func_project_node->hasWindowFunctionExpr()) {
2669  // this projection node does not have a window function
2670  // expression -- skip to the next node in the DAG.
2671  continue;
2672  }
2673 
2674  const auto prev_node_itr = std::prev(node_itr);
2675  const auto prev_node = *prev_node_itr;
2676  CHECK(prev_node);
2677 
2678  auto filter_node = std::dynamic_pointer_cast<RelFilter>(prev_node);
2679  auto join_node = std::dynamic_pointer_cast<RelJoin>(prev_node);
2680 
2681  auto scan_node = std::dynamic_pointer_cast<RelScan>(prev_node);
2682  const bool has_multi_fragment_scan_input =
2683  (scan_node &&
2684  (scan_node->getNumShards() > 0 || scan_node->getNumFragments() > 1));
2685  auto const [has_generic_expr_in_window_func, needs_expr_pushdown] =
2686  need_pushdown_generic_expr(window_func_project_node.get());
2687 
2688  // We currently add a preceding project node in one of two conditions:
2689  // 1. always_add_project_if_first_project_is_window_expr = true, which
2690  // we currently only set for distributed, but could also be set to support
2691  // multi-frag window function inputs, either if we can detect that an input table
2692  // is multi-frag up front, or using a retry mechanism like we do for join filter
2693  // push down.
2694  // TODO(todd): Investigate a viable approach for the above.
2695  // 2. Regardless of #1, if the window function project node is preceded by a
2696  // filter node. This is required both for correctness and to avoid pulling
2697  // all source input columns into memory since non-coalesced filter node
2698  // inputs are currently not pruned or eliminated via dead column elimination.
2699  // Note that we expect any filter node followed by a project node to be coalesced
2700  // into a single compound node in RelAlgDag::coalesce_nodes, and that action
2701  // prunes unused inputs.
2702  // TODO(todd): Investigate whether the shotgun filter node issue affects other
2703  // query plans, i.e. filters before joins, and whether there is a more general
2704  // approach to solving this (will still need the preceding project node for
2705  // window functions preceded by filter nodes for correctness though)
2706  // 3. Similar to the above, when the window function project node is preceded
2707  // by a join node.
2708  // 4. when partition by / order by clauses have a general expression instead of
2709  // referencing column
2710 
2711  if (!((always_add_project_if_first_project_is_window_expr &&
2712  project_node_counter == 1) ||
2713  filter_node || join_node || has_multi_fragment_scan_input ||
2714  needs_expr_pushdown)) {
2715  continue;
2716  }
2717 
2718  if (needs_expr_pushdown || join_node) {
2719  // previous logic cannot cover join_node case well, so use the newly introduced
2720  // push-down expression logic to safely add pre_project node before processing
2721  // window function
2722  std::unordered_map<size_t, size_t> expr_offset_cache;
2723  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_new_project;
2724  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_window_project;
2725  std::vector<std::string> fields_for_window_project;
2726  std::vector<std::string> fields_for_new_project;
2727 
2728  // step 0. create new project node with an empty scalar expr to rebind target
2729  // exprs
2730  std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2731  std::vector<std::string> dummy_fields;
2732  auto new_project =
2733  std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2734 
2735  // step 1 - 2
2736  PushDownGenericExpressionInWindowFunction visitor(new_project,
2737  scalar_exprs_for_new_project,
2738  fields_for_new_project,
2739  expr_offset_cache);
2740  for (size_t i = 0; i < window_func_project_node->size(); ++i) {
2741  auto projected_target = window_func_project_node->getProjectAt(i);
2742  auto new_projection_target = visitor.visit(projected_target);
2743  scalar_exprs_for_window_project.emplace_back(
2744  std::move(new_projection_target.release()));
2745  }
2746  new_project->setExpressions(scalar_exprs_for_new_project);
2747  new_project->setFields(std::move(fields_for_new_project));
2748  bool has_groupby = false;
2749  auto aggregate = std::dynamic_pointer_cast<RelAggregate>(prev_node);
2750  if (aggregate) {
2751  has_groupby = aggregate->getGroupByCount() > 0;
2752  }
2753  // force rowwise output to prevent computing incorrect query result
2754  if (has_groupby && visitor.hasPartitionExpression()) {
2755  // we currently may compute incorrect result with columnar output when
2756  // 1) the window function has partition expression, and
2757  // 2) a parent node of the window function projection node has group by
2758  // expression todo (yoonmin) : relax this
2759  VLOG(1)
2760  << "Query output overridden to row-wise format due to presence of a window "
2761  "function with partition expression and group-by expression.";
2762  new_project->forceRowwiseOutput();
2763  } else if (has_generic_expr_in_window_func) {
2764  VLOG(1) << "Query output overridden to row-wise format due to presence of a "
2765  "generic expression as an input expression of the window "
2766  "function.";
2767  new_project->forceRowwiseOutput();
2768  } else if (visitor.hasCaseExprAsWindowOperand()) {
2769  VLOG(1)
2770  << "Query output overridden to row-wise format due to presence of a window "
2771  "function with a case statement as its operand.";
2772  new_project->forceRowwiseOutput();
2773  }
2774 
2775  // step 3. finalize
2776  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2777  new_project->setPushedDownWindowExpr();
2778  node_list.insert(node_itr, new_project);
2779  window_func_project_node->replaceInput(prev_node, new_project);
2780  window_func_project_node->setExpressions(scalar_exprs_for_window_project);
2781  } else {
2782  // try to push rex_inputs listed in the projection target exprs down to a new
2783  // project node
2784  RexInputSet inputs;
2785  RexInputCollector input_collector;
2786  for (size_t i = 0; i < window_func_project_node->size(); i++) {
2787  auto new_inputs =
2788  input_collector.visit(window_func_project_node->getProjectAt(i));
2789  inputs.insert(new_inputs.begin(), new_inputs.end());
2790  }
2791  std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2792  std::vector<std::string> fields;
2793  std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2794 
2795  if (inputs.empty()) {
2796  // this case only happens when the input is multi-fragmented but has no expr(s)
2797  // to push down in the window_func_project_node's target exprs such as
2798  // SELECT SUM(1), ROW_NUMBER() over () FROM test where test is multi-fragmented
2799  // to handle this, we push an artificial literal down to the child project node
2800  // to make an input of window_func_project_node a single-fragmented table
2801  CHECK(has_multi_fragment_scan_input);
2802  CHECK(!needs_expr_pushdown);
2803  auto const bool_scale = std::numeric_limits<int32_t>::min();
2804  scalar_exprs.push_back(std::make_unique<RexLiteral>(
2805  true, SQLTypes::kBOOLEAN, SQLTypes::kBOOLEAN, bool_scale, 1, bool_scale, 1));
2806  old_index_to_new_index.insert(std::make_pair(0, 0));
2807  fields.emplace_back("");
2808  } else {
2809  // we have at least one rex_input to pushdown
2810  // let's make sure we have the correct # of exprs to pushdown
2811  std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2812  std::sort(
2813  sorted_inputs.begin(), sorted_inputs.end(), [](const auto& a, const auto& b) {
2814  return a.getIndex() < b.getIndex();
2815  });
2816 
2817  for (auto& input : sorted_inputs) {
2818  CHECK_EQ(input.getSourceNode(), prev_node.get());
2819  CHECK(old_index_to_new_index
2820  .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2821  .second);
2822  scalar_exprs.emplace_back(input.deepCopy());
2823  fields.emplace_back("");
2824  }
2825  }
2826  // modify window_func_project_node's target exprs to refer to push-downed expr in
2827  // the new projection node
2828  CHECK_GT(scalar_exprs.size(), 0UL);
2829  CHECK_EQ(scalar_exprs.size(), fields.size());
2830  auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2831  propagate_hints_to_new_project(window_func_project_node, new_project, query_hints);
2832  new_project->setPushedDownWindowExpr();
2833  node_list.insert(node_itr, new_project);
2834  window_func_project_node->replaceInput(
2835  prev_node, new_project, old_index_to_new_index);
2836  }
2837  }
2838  nodes.assign(node_list.begin(), node_list.end());
2839 }
2840 
2841 int64_t get_int_literal_field(const rapidjson::Value& obj,
2842  const char field[],
2843  const int64_t default_val) noexcept {
2844  const auto it = obj.FindMember(field);
2845  if (it == obj.MemberEnd()) {
2846  return default_val;
2847  }
2848  std::unique_ptr<RexLiteral> lit(parse_literal(it->value));
2849  CHECK_EQ(kDECIMAL, lit->getType());
2850  CHECK_EQ(unsigned(0), lit->getScale());
2851  CHECK_EQ(unsigned(0), lit->getTargetScale());
2852  return lit->getVal<int64_t>();
2853 }
2854 
2855 void check_empty_inputs_field(const rapidjson::Value& node) noexcept {
2856  const auto& inputs_json = field(node, "inputs");
2857  CHECK(inputs_json.IsArray() && !inputs_json.Size());
2858 }
2859 
2860 const std::pair<const Catalog_Namespace::Catalog*, const TableDescriptor*>
2861 getCatalogAndTableFromScanNode(const rapidjson::Value& scan_ra) {
2862  const auto& table_json = field(scan_ra, "table");
2863  CHECK(table_json.IsArray());
2864  CHECK_EQ(unsigned(2), table_json.Size());
2865  const auto cat =
2866  Catalog_Namespace::SysCatalog::instance().getCatalog(table_json[0].GetString());
2867  CHECK(cat);
2868  const auto td = cat->getMetadataForTable(table_json[1].GetString());
2869  CHECK(td);
2870  return {cat.get(), td};
2871 }
2872 
2873 std::vector<std::string> getFieldNamesFromScanNode(const rapidjson::Value& scan_ra) {
2874  const auto& fields_json = field(scan_ra, "fieldNames");
2875  return strings_from_json_array(fields_json);
2876 }
2877 
2878 } // namespace
2879 
2881  for (const auto& expr : scalar_exprs_) {
2882  if (is_window_function_operator(expr.get())) {
2883  return true;
2884  }
2885  }
2886  return false;
2887 }
2888 namespace details {
2889 
2891  public:
2893 
2894  std::vector<std::shared_ptr<RelAlgNode>> run(const rapidjson::Value& rels,
2895  RelAlgDag& root_dag) {
2896  for (auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2897  const auto& crt_node = *rels_it;
2898  const auto id = node_id(crt_node);
2899  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
2900  CHECK(crt_node.IsObject());
2901  std::shared_ptr<RelAlgNode> ra_node = nullptr;
2902  const auto rel_op = json_str(field(crt_node, "relOp"));
2903  if (rel_op == std::string("EnumerableTableScan") ||
2904  rel_op == std::string("LogicalTableScan")) {
2905  ra_node = dispatchTableScan(crt_node);
2906  } else if (rel_op == std::string("LogicalProject")) {
2907  ra_node = dispatchProject(crt_node, root_dag);
2908  } else if (rel_op == std::string("LogicalFilter")) {
2909  ra_node = dispatchFilter(crt_node, root_dag);
2910  } else if (rel_op == std::string("LogicalAggregate")) {
2911  ra_node = dispatchAggregate(crt_node);
2912  } else if (rel_op == std::string("LogicalJoin")) {
2913  ra_node = dispatchJoin(crt_node, root_dag);
2914  } else if (rel_op == std::string("LogicalSort")) {
2915  ra_node = dispatchSort(crt_node);
2916  } else if (rel_op == std::string("LogicalValues")) {
2917  ra_node = dispatchLogicalValues(crt_node);
2918  } else if (rel_op == std::string("LogicalTableModify")) {
2919  ra_node = dispatchModify(crt_node);
2920  } else if (rel_op == std::string("LogicalTableFunctionScan")) {
2921  ra_node = dispatchTableFunction(crt_node, root_dag);
2922  } else if (rel_op == std::string("LogicalUnion")) {
2923  ra_node = dispatchUnion(crt_node);
2924  } else {
2925  throw QueryNotSupported(std::string("Node ") + rel_op + " not supported yet");
2926  }
2927  nodes_.push_back(ra_node);
2928  }
2929 
2930  return std::move(nodes_);
2931  }
2932 
2933  private:
2934  std::shared_ptr<RelScan> dispatchTableScan(const rapidjson::Value& scan_ra) {
2935  check_empty_inputs_field(scan_ra);
2936  CHECK(scan_ra.IsObject());
2937  const auto [cat, td] = getCatalogAndTableFromScanNode(scan_ra);
2938  const auto field_names = getFieldNamesFromScanNode(scan_ra);
2939  if (scan_ra.HasMember("hints")) {
2940  auto scan_node = std::make_shared<RelScan>(td, field_names, *cat);
2941  getRelAlgHints(scan_ra, scan_node);
2942  return scan_node;
2943  }
2944  return std::make_shared<RelScan>(td, field_names, *cat);
2945  }
2946 
2947  std::shared_ptr<RelProject> dispatchProject(const rapidjson::Value& proj_ra,
2948  RelAlgDag& root_dag) {
2949  const auto inputs = getRelAlgInputs(proj_ra);
2950  CHECK_EQ(size_t(1), inputs.size());
2951  const auto& exprs_json = field(proj_ra, "exprs");
2952  CHECK(exprs_json.IsArray());
2953  std::vector<std::unique_ptr<const RexScalar>> exprs;
2954  for (auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2955  ++exprs_json_it) {
2956  exprs.emplace_back(parse_scalar_expr(*exprs_json_it, root_dag));
2957  }
2958  const auto& fields = field(proj_ra, "fields");
2959  if (proj_ra.HasMember("hints")) {
2960  auto project_node = std::make_shared<RelProject>(
2961  exprs, strings_from_json_array(fields), inputs.front());
2962  getRelAlgHints(proj_ra, project_node);
2963  return project_node;
2964  }
2965  return std::make_shared<RelProject>(
2966  exprs, strings_from_json_array(fields), inputs.front());
2967  }
2968 
2969  std::shared_ptr<RelFilter> dispatchFilter(const rapidjson::Value& filter_ra,
2970  RelAlgDag& root_dag) {
2971  const auto inputs = getRelAlgInputs(filter_ra);
2972  CHECK_EQ(size_t(1), inputs.size());
2973  const auto id = node_id(filter_ra);
2974  CHECK(id);
2975  auto condition = parse_scalar_expr(field(filter_ra, "condition"), root_dag);
2976  return std::make_shared<RelFilter>(condition, inputs.front());
2977  }
2978 
2979  std::shared_ptr<RelAggregate> dispatchAggregate(const rapidjson::Value& agg_ra) {
2980  const auto inputs = getRelAlgInputs(agg_ra);
2981  CHECK_EQ(size_t(1), inputs.size());
2982  const auto fields = strings_from_json_array(field(agg_ra, "fields"));
2983  const auto group = indices_from_json_array(field(agg_ra, "group"));
2984  for (size_t i = 0; i < group.size(); ++i) {
2985  CHECK_EQ(i, group[i]);
2986  }
2987  if (agg_ra.HasMember("groups") || agg_ra.HasMember("indicator")) {
2988  throw QueryNotSupported("GROUP BY extensions not supported");
2989  }
2990  const auto& aggs_json_arr = field(agg_ra, "aggs");
2991  CHECK(aggs_json_arr.IsArray());
2992  std::vector<std::unique_ptr<const RexAgg>> aggs;
2993  for (auto aggs_json_arr_it = aggs_json_arr.Begin();
2994  aggs_json_arr_it != aggs_json_arr.End();
2995  ++aggs_json_arr_it) {
2996  aggs.emplace_back(parse_aggregate_expr(*aggs_json_arr_it));
2997  }
2998  if (agg_ra.HasMember("hints")) {
2999  auto agg_node =
3000  std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
3001  getRelAlgHints(agg_ra, agg_node);
3002  return agg_node;
3003  }
3004  return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
3005  }
3006 
3007  std::shared_ptr<RelJoin> dispatchJoin(const rapidjson::Value& join_ra,
3008  RelAlgDag& root_dag) {
3009  const auto inputs = getRelAlgInputs(join_ra);
3010  CHECK_EQ(size_t(2), inputs.size());
3011  const auto join_type = to_join_type(json_str(field(join_ra, "joinType")));
3012  auto filter_rex = parse_scalar_expr(field(join_ra, "condition"), root_dag);
3013  if (join_ra.HasMember("hints")) {
3014  auto join_node =
3015  std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
3016  getRelAlgHints(join_ra, join_node);
3017  return join_node;
3018  }
3019  return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
3020  }
3021 
3022  std::shared_ptr<RelSort> dispatchSort(const rapidjson::Value& sort_ra) {
3023  const auto inputs = getRelAlgInputs(sort_ra);
3024  CHECK_EQ(size_t(1), inputs.size());
3025  std::vector<SortField> collation;
3026  const auto& collation_arr = field(sort_ra, "collation");
3027  CHECK(collation_arr.IsArray());
3028  for (auto collation_arr_it = collation_arr.Begin();
3029  collation_arr_it != collation_arr.End();
3030  ++collation_arr_it) {
3031  const size_t field_idx = json_i64(field(*collation_arr_it, "field"));
3032  const auto sort_dir = parse_sort_direction(*collation_arr_it);
3033  const auto null_pos = parse_nulls_position(*collation_arr_it);
3034  collation.emplace_back(field_idx, sort_dir, null_pos);
3035  }
3036  auto limit = get_int_literal_field(sort_ra, "fetch", -1);
3037  const auto offset = get_int_literal_field(sort_ra, "offset", 0);
3038  auto ret = std::make_shared<RelSort>(
3039  collation,
3040  limit >= 0 ? std::make_optional<size_t>(limit) : std::nullopt,
3041  offset,
3042  inputs.front());
3043  return ret;
3044  }
3045 
3046  std::shared_ptr<RelModify> dispatchModify(const rapidjson::Value& logical_modify_ra) {
3047  const auto inputs = getRelAlgInputs(logical_modify_ra);
3048  CHECK_EQ(size_t(1), inputs.size());
3049 
3050  const auto [cat, table_descriptor] =
3051  getCatalogAndTableFromScanNode(logical_modify_ra);
3052  if (table_descriptor->isView) {
3053  throw std::runtime_error("UPDATE of a view is unsupported.");
3054  }
3055 
3056  bool flattened = json_bool(field(logical_modify_ra, "flattened"));
3057  std::string op = json_str(field(logical_modify_ra, "operation"));
3058  RelModify::TargetColumnList target_column_list;
3059 
3060  if (op == "UPDATE") {
3061  const auto& update_columns = field(logical_modify_ra, "updateColumnList");
3062  CHECK(update_columns.IsArray());
3063 
3064  for (auto column_arr_it = update_columns.Begin();
3065  column_arr_it != update_columns.End();
3066  ++column_arr_it) {
3067  target_column_list.push_back(column_arr_it->GetString());
3068  }
3069  }
3070 
3071  auto modify_node = std::make_shared<RelModify>(
3072  *cat, table_descriptor, flattened, op, target_column_list, inputs[0]);
3073  switch (modify_node->getOperation()) {
3075  modify_node->applyDeleteModificationsToInputNode();
3076  break;
3077  }
3079  modify_node->applyUpdateModificationsToInputNode();
3080  break;
3081  }
3082  default:
3083  throw std::runtime_error("Unsupported RelModify operation: " +
3084  json_node_to_string(logical_modify_ra));
3085  }
3086 
3087  return modify_node;
3088  }
3089 
3090  std::shared_ptr<RelTableFunction> dispatchTableFunction(
3091  const rapidjson::Value& table_func_ra,
3092  RelAlgDag& root_dag) {
3093  const auto inputs = getRelAlgInputs(table_func_ra);
3094  const auto& invocation = field(table_func_ra, "invocation");
3095  CHECK(invocation.IsObject());
3096 
3097  const auto& operands = field(invocation, "operands");
3098  CHECK(operands.IsArray());
3099  CHECK_GE(operands.Size(), unsigned(0));
3100 
3101  std::vector<const Rex*> col_inputs;
3102  std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
3103  std::vector<std::string> fields;
3104 
3105  for (auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
3106  ++exprs_json_it) {
3107  const auto& expr_json = *exprs_json_it;
3108  CHECK(expr_json.IsObject());
3109  if (expr_json.HasMember("op")) {
3110  const auto op_str = json_str(field(expr_json, "op"));
3111  if (op_str == "CAST" && expr_json.HasMember("type")) {
3112  const auto& expr_type = field(expr_json, "type");
3113  CHECK(expr_type.IsObject());
3114  CHECK(expr_type.HasMember("type"));
3115  const auto& expr_type_name = json_str(field(expr_type, "type"));
3116  if (expr_type_name == "CURSOR") {
3117  CHECK(expr_json.HasMember("operands"));
3118  const auto& expr_operands = field(expr_json, "operands");
3119  CHECK(expr_operands.IsArray());
3120  if (expr_operands.Size() != 1) {
3121  throw std::runtime_error(
3122  "Table functions currently only support one ResultSet input");
3123  }
3124  auto pos = field(expr_operands[0], "input").GetInt();
3125  CHECK_LT(pos, inputs.size());
3126  for (size_t i = inputs[pos]->size(); i > 0; i--) {
3127  table_func_inputs.emplace_back(
3128  std::make_unique<RexAbstractInput>(col_inputs.size()));
3129  col_inputs.emplace_back(table_func_inputs.back().get());
3130  }
3131  continue;
3132  }
3133  }
3134  }
3135  table_func_inputs.emplace_back(parse_scalar_expr(*exprs_json_it, root_dag));
3136  }
3137 
3138  const auto& op_name = field(invocation, "op");
3139  CHECK(op_name.IsString());
3140 
3141  std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
3142  const auto& row_types = field(table_func_ra, "rowType");
3143  CHECK(row_types.IsArray());
3144  CHECK_GE(row_types.Size(), unsigned(0));
3145  const auto& row_types_array = row_types.GetArray();
3146  for (size_t i = 0; i < row_types_array.Size(); i++) {
3147  // We don't care about the type information in rowType -- replace each output with
3148  // a reference to be resolved later in the translator
3149  table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
3150  fields.emplace_back("");
3151  }
3152  return std::make_shared<RelTableFunction>(op_name.GetString(),
3153  inputs,
3154  fields,
3155  col_inputs,
3156  table_func_inputs,
3157  table_function_projected_outputs);
3158  }
3159 
3160  std::shared_ptr<RelLogicalValues> dispatchLogicalValues(
3161  const rapidjson::Value& logical_values_ra) {
3162  const auto& tuple_type_arr = field(logical_values_ra, "type");
3163  CHECK(tuple_type_arr.IsArray());
3164  std::vector<TargetMetaInfo> tuple_type;
3165  for (auto tuple_type_arr_it = tuple_type_arr.Begin();
3166  tuple_type_arr_it != tuple_type_arr.End();
3167  ++tuple_type_arr_it) {
3168  auto component_type = parse_type(*tuple_type_arr_it);
3169  const auto component_name = json_str(field(*tuple_type_arr_it, "name"));
3170  if (component_type.is_none_encoded_string()) {
3171  component_type.set_compression(kENCODING_DICT);
3172  component_type.set_comp_param(TRANSIENT_DICT_ID);
3173  component_type.setStringDictKey({TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID});
3174  component_type.set_size(4);
3175  }
3176  tuple_type.emplace_back(component_name, component_type);
3177  }
3178  const auto& inputs_arr = field(logical_values_ra, "inputs");
3179  CHECK(inputs_arr.IsArray());
3180  const auto& tuples_arr = field(logical_values_ra, "tuples");
3181  CHECK(tuples_arr.IsArray());
3182 
3183  if (inputs_arr.Size()) {
3184  throw QueryNotSupported("Inputs not supported in logical values yet.");
3185  }
3186 
3187  std::vector<RelLogicalValues::RowValues> values;
3188  if (tuples_arr.Size()) {
3189  for (const auto& row : tuples_arr.GetArray()) {
3190  CHECK(row.IsArray());
3191  const auto values_json = row.GetArray();
3192  if (!values.empty()) {
3193  CHECK_EQ(values[0].size(), values_json.Size());
3194  }
3195  values.emplace_back(RelLogicalValues::RowValues{});
3196  for (const auto& value : values_json) {
3197  CHECK(value.IsObject());
3198  CHECK(value.HasMember("literal"));
3199  values.back().emplace_back(parse_literal(value));
3200  }
3201  }
3202  }
3203 
3204  return std::make_shared<RelLogicalValues>(tuple_type, values);
3205  }
3206 
3207  std::shared_ptr<RelLogicalUnion> dispatchUnion(
3208  const rapidjson::Value& logical_union_ra) {
3209  auto inputs = getRelAlgInputs(logical_union_ra);
3210  auto const& all_type_bool = field(logical_union_ra, "all");
3211  CHECK(all_type_bool.IsBool());
3212  return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
3213  }
3214 
3215  RelAlgInputs getRelAlgInputs(const rapidjson::Value& node) {
3216  if (node.HasMember("inputs")) {
3217  const auto str_input_ids = strings_from_json_array(field(node, "inputs"));
3218  RelAlgInputs ra_inputs;
3219  for (const auto& str_id : str_input_ids) {
3220  ra_inputs.push_back(nodes_[std::stoi(str_id)]);
3221  }
3222  return ra_inputs;
3223  }
3224  return {prev(node)};
3225  }
3226 
3227  std::pair<std::string, std::string> getKVOptionPair(std::string& str, size_t& pos) {
3228  auto option = str.substr(0, pos);
3229  std::string delim = "=";
3230  size_t delim_pos = option.find(delim);
3231  auto key = option.substr(0, delim_pos);
3232  auto val = option.substr(delim_pos + 1, option.length());
3233  str.erase(0, pos + delim.length() + 1);
3234  return {key, val};
3235  }
3236 
3237  ExplainedQueryHint parseHintString(std::string& hint_string) {
3238  std::string white_space_delim = " ";
3239  int l = hint_string.length();
3240  hint_string = hint_string.erase(0, 1).substr(0, l - 2);
3241  size_t pos = 0;
3242  auto global_hint_checker = [&](const std::string& input_hint_name) -> HintIdentifier {
3243  bool global_hint = false;
3244  std::string hint_name = input_hint_name;
3245  auto global_hint_identifier = hint_name.substr(0, 2);
3246  if (global_hint_identifier.compare("g_") == 0) {
3247  global_hint = true;
3248  hint_name = hint_name.substr(2, hint_string.length());
3249  }
3250  return {global_hint, hint_name};
3251  };
3252  auto parsed_hint =
3253  global_hint_checker(hint_string.substr(0, hint_string.find(white_space_delim)));
3254  auto hint_type = RegisteredQueryHint::translateQueryHint(parsed_hint.hint_name);
3255  if ((pos = hint_string.find("options:")) != std::string::npos) {
3256  // need to parse hint options
3257  std::vector<std::string> tokens;
3258  bool kv_list_op = false;
3259  std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
3260  if (raw_options.find('{') != std::string::npos) {
3261  kv_list_op = true;
3262  } else {
3263  CHECK(raw_options.find('[') != std::string::npos);
3264  }
3265  auto t1 = raw_options.erase(0, 1);
3266  raw_options = t1.substr(0, t1.length() - 1);
3267  std::string op_delim = ", ";
3268  if (kv_list_op) {
3269  // kv options
3270  std::unordered_map<std::string, std::string> kv_options;
3271  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3272  auto kv_pair = getKVOptionPair(raw_options, pos);
3273  kv_options.emplace(kv_pair.first, kv_pair.second);
3274  }
3275  // handle the last kv pair
3276  auto kv_pair = getKVOptionPair(raw_options, pos);
3277  kv_options.emplace(kv_pair.first, kv_pair.second);
3278  return {hint_type, parsed_hint.global_hint, false, true, kv_options};
3279  } else {
3280  std::vector<std::string> list_options;
3281  while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3282  list_options.emplace_back(raw_options.substr(0, pos));
3283  raw_options.erase(0, pos + white_space_delim.length() + 1);
3284  }
3285  // handle the last option
3286  list_options.emplace_back(raw_options.substr(0, pos));
3287  return {hint_type, parsed_hint.global_hint, false, false, list_options};
3288  }
3289  } else {
3290  // marker hint: no extra option for this hint
3291  return {hint_type, parsed_hint.global_hint, true, false};
3292  }
3293  }
3294 
3295  void getRelAlgHints(const rapidjson::Value& json_node,
3296  std::shared_ptr<RelAlgNode> node) {
3297  std::string hint_explained = json_str(field(json_node, "hints"));
3298  size_t pos = 0;
3299  std::string delim = "|";
3300  std::vector<std::string> hint_list;
3301  while ((pos = hint_explained.find(delim)) != std::string::npos) {
3302  hint_list.emplace_back(hint_explained.substr(0, pos));
3303  hint_explained.erase(0, pos + delim.length());
3304  }
3305  // handling the last one
3306  hint_list.emplace_back(hint_explained.substr(0, pos));
3307 
3308  const auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node);
3309  if (agg_node) {
3310  for (std::string& hint : hint_list) {
3311  auto parsed_hint = parseHintString(hint);
3312  agg_node->addHint(parsed_hint);
3313  }
3314  }
3315  const auto project_node = std::dynamic_pointer_cast<RelProject>(node);
3316  if (project_node) {
3317  for (std::string& hint : hint_list) {
3318  auto parsed_hint = parseHintString(hint);
3319  project_node->addHint(parsed_hint);
3320  }
3321  }
3322  const auto scan_node = std::dynamic_pointer_cast<RelScan>(node);
3323  if (scan_node) {
3324  for (std::string& hint : hint_list) {
3325  auto parsed_hint = parseHintString(hint);
3326  scan_node->addHint(parsed_hint);
3327  }
3328  }
3329  const auto join_node = std::dynamic_pointer_cast<RelJoin>(node);
3330  if (join_node) {
3331  for (std::string& hint : hint_list) {
3332  auto parsed_hint = parseHintString(hint);
3333  join_node->addHint(parsed_hint);
3334  }
3335  }
3336 
3337  const auto compound_node = std::dynamic_pointer_cast<RelCompound>(node);
3338  if (compound_node) {
3339  for (std::string& hint : hint_list) {
3340  auto parsed_hint = parseHintString(hint);
3341  compound_node->addHint(parsed_hint);
3342  }
3343  }
3344  }
3345 
3346  std::shared_ptr<const RelAlgNode> prev(const rapidjson::Value& crt_node) {
3347  const auto id = node_id(crt_node);
3348  CHECK(id);
3349  CHECK_EQ(static_cast<size_t>(id), nodes_.size());
3350  return nodes_.back();
3351  }
3352 
3353  std::vector<std::shared_ptr<RelAlgNode>> nodes_;
3354 };
3355 
3356 } // namespace details
3357 
3358 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDag(const std::string& query_ra,
3359  const bool optimize_dag) {
3360  rapidjson::Document query_ast;
3361  query_ast.Parse(query_ra.c_str());
3362  VLOG(2) << "Parsing query RA JSON: " << query_ra;
3363  if (query_ast.HasParseError()) {
3364  query_ast.GetParseError();
3365  LOG(ERROR) << "Failed to parse RA tree from Calcite (offset "
3366  << query_ast.GetErrorOffset() << "):\n"
3367  << rapidjson::GetParseError_En(query_ast.GetParseError());
3368  VLOG(1) << "Failed to parse query RA: " << query_ra;
3369  throw std::runtime_error(
3370  "Failed to parse relational algebra tree. Possible query syntax error.");
3371  }
3372  CHECK(query_ast.IsObject());
3374 
3375  return build(query_ast, nullptr, optimize_dag);
3376 }
3377 
3378 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::buildDagForSubquery(
3379  RelAlgDag& root_dag,
3380  const rapidjson::Value& query_ast) {
3381  return build(query_ast, &root_dag, true);
3382 }
3383 
3384 std::unique_ptr<RelAlgDag> RelAlgDagBuilder::build(const rapidjson::Value& query_ast,
3385  RelAlgDag* root_dag,
3386  const bool optimize_dag) {
3387  const auto& rels = field(query_ast, "rels");
3388  CHECK(rels.IsArray());
3389 
3390  auto rel_alg_dag_ptr = std::make_unique<RelAlgDag>();
3391  auto& rel_alg_dag = *rel_alg_dag_ptr;
3392  auto& nodes = getNodes(rel_alg_dag);
3393 
3394  try {
3395  nodes = details::RelAlgDispatcher().run(rels, root_dag ? *root_dag : rel_alg_dag);
3396  } catch (const QueryNotSupported&) {
3397  throw;
3398  }
3399  CHECK(!nodes.empty());
3400  bind_inputs(nodes);
3401 
3403 
3404  if (optimize_dag) {
3405  optimizeDag(rel_alg_dag);
3406  }
3407 
3408  return rel_alg_dag_ptr;
3409 }
3410 
3412  auto const build_state = rel_alg_dag.getBuildState();
3413  if (build_state == RelAlgDag::BuildState::kBuiltOptimized) {
3414  return;
3415  }
3416 
3418  << static_cast<int>(build_state);
3419 
3420  auto& nodes = getNodes(rel_alg_dag);
3421  auto& subqueries = getSubqueries(rel_alg_dag);
3422  auto& query_hints = getQueryHints(rel_alg_dag);
3423 
3424  compute_node_hash(nodes);
3425  handle_query_hint(nodes, rel_alg_dag);
3426  mark_nops(nodes);
3427  simplify_sort(nodes);
3429  eliminate_identical_copy(nodes);
3430  fold_filters(nodes);
3431  std::vector<const RelAlgNode*> filtered_left_deep_joins;
3432  std::vector<const RelAlgNode*> left_deep_joins;
3433  for (const auto& node : nodes) {
3434  const auto left_deep_join_root = get_left_deep_join_root(node);
3435  // The filter which starts a left-deep join pattern must not be coalesced
3436  // since it contains (part of) the join condition.
3437  if (left_deep_join_root) {
3438  left_deep_joins.push_back(left_deep_join_root.get());
3439  if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
3440  filtered_left_deep_joins.push_back(left_deep_join_root.get());
3441  }
3442  }
3443  }
3444  if (filtered_left_deep_joins.empty()) {
3446  }
3447  eliminate_dead_columns(nodes);
3448  eliminate_dead_subqueries(subqueries, nodes.back().get());
3449  separate_window_function_expressions(nodes, query_hints);
3451  nodes,
3452  g_cluster /* always_add_project_if_first_project_is_window_expr */,
3453  query_hints);
3454  coalesce_nodes(nodes, left_deep_joins, query_hints);
3455  CHECK(nodes.back().use_count() == 1);
3456  create_left_deep_join(nodes);
3457  handle_agg_over_join(nodes, query_hints);
3459 
3461 }
3462 
3463 void RelAlgDag::eachNode(std::function<void(RelAlgNode const*)> const& callback) const {
3464  for (auto const& node : nodes_) {
3465  if (node) {
3466  callback(node.get());
3467  }
3468  }
3469 }
3470 
3472  for (auto& node : nodes_) {
3473  if (node) {
3474  node->resetQueryExecutionState();
3475  }
3476  }
3477 }
3478 
3479 // Return tree with depth represented by indentations.
3480 std::string tree_string(const RelAlgNode* ra, const size_t depth) {
3481  std::string result = std::string(2 * depth, ' ') + ::toString(ra) + '\n';
3482  for (size_t i = 0; i < ra->inputCount(); ++i) {
3483  result += tree_string(ra->getInput(i), depth + 1);
3484  }
3485  return result;
3486 }
3487 
3488 std::string RexSubQuery::toString(RelRexToStringConfig config) const {
3489  if (!config.attributes_only) {
3490  return cat(::typeName(this), "(", ra_->toString(config), ")");
3491  } else {
3492  return cat(::typeName(this), "()");
3493  }
3494 }
3495 
3496 std::string RexInput::toString(RelRexToStringConfig config) const {
3497  const auto scan_node = dynamic_cast<const RelScan*>(node_);
3498  if (scan_node) {
3499  auto field_name = scan_node->getFieldName(getIndex());
3500  auto table_name = scan_node->getTableDescriptor()->tableName;
3501  return ::typeName(this) + "(" + table_name + "." + field_name + ")";
3502  }
3503  auto node_id_in_plan = node_->getIdInPlanTree();
3504  auto node_id_str =
3505  node_id_in_plan ? std::to_string(*node_id_in_plan) : std::to_string(node_->getId());
3506  auto node_str = config.skip_input_nodes ? "(input_node_id=" + node_id_str
3507  : "(input_node=" + node_->toString(config);
3508  return cat(::typeName(this), node_str, ", in_index=", std::to_string(getIndex()), ")");
3509 }
3510 
3511 std::string RelCompound::toString(RelRexToStringConfig config) const {
3512  if (!config.attributes_only) {
3513  auto ret = cat(::typeName(this),
3514  ", filter_expr=",
3515  (filter_expr_ ? filter_expr_->toString(config) : "null"),
3516  ", target_exprs=");
3517  for (auto& expr : target_exprs_) {
3518  ret += expr->toString(config) + " ";
3519  }
3520  ret += ", agg_exps=";
3521  for (auto& expr : agg_exprs_) {
3522  ret += expr->toString(config) + " ";
3523  }
3524  ret += ", scalar_sources=";
3525  for (auto& expr : scalar_sources_) {
3526  ret += expr->toString(config) + " ";
3527  }
3528  return cat(ret,
3529  ", ",
3531  ", ",
3532  ", fields=",
3533  ::toString(fields_),
3534  ", is_agg=",
3536  } else {
3537  return cat(::typeName(this),
3538  "(",
3540  ", fields=",
3541  ::toString(fields_),
3542  ", is_agg=",
3544  ")");
3545  }
3546 }
3547 
3548 std::size_t hash_value(RexAbstractInput const& rex_ab_input) {
3549  if (rex_ab_input.hash_) {
3550  return *rex_ab_input.hash_;
3551  }
3552  rex_ab_input.hash_ = typeid(RexAbstractInput).hash_code();
3553  boost::hash_combine(*rex_ab_input.hash_, rex_ab_input.in_index_);
3554  return *rex_ab_input.hash_;
3555 }
3556 
3557 std::size_t hash_value(RexLiteral const& rex_literal) {
3558  if (rex_literal.hash_) {
3559  return *rex_literal.hash_;
3560  }
3561  rex_literal.hash_ = typeid(RexLiteral).hash_code();
3562  boost::apply_visitor(
3563  [&rex_literal](auto&& current_val) {
3564  using T = std::decay_t<decltype(current_val)>;
3565  if constexpr (!std::is_same_v<boost::blank, T>) {
3566  static_assert(std::is_same_v<int64_t, T> || std::is_same_v<double, T> ||
3567  std::is_same_v<std::string, T> || std::is_same_v<bool, T>);
3568  boost::hash_combine(*rex_literal.hash_, current_val);
3569  }
3570  },
3571  rex_literal.literal_);
3572  boost::hash_combine(*rex_literal.hash_, rex_literal.type_);
3573  boost::hash_combine(*rex_literal.hash_, rex_literal.target_type_);
3574  boost::hash_combine(*rex_literal.hash_, rex_literal.scale_);
3575  boost::hash_combine(*rex_literal.hash_, rex_literal.precision_);
3576  boost::hash_combine(*rex_literal.hash_, rex_literal.target_scale_);
3577  boost::hash_combine(*rex_literal.hash_, rex_literal.target_precision_);
3578  return *rex_literal.hash_;
3579 }
3580 
3581 std::size_t hash_value(RexOperator const& rex_op) {
3582  if (rex_op.hash_) {
3583  return *rex_op.hash_;
3584  }
3585  rex_op.hash_ = typeid(RexOperator).hash_code();
3586  boost::hash_combine(*rex_op.hash_, rex_op.op_);
3587  boost::hash_combine(*rex_op.hash_, rex_op.operands_);
3588  boost::hash_combine(*rex_op.hash_, rex_op.getType().get_type_name());
3589  return *rex_op.hash_;
3590 }
3591 
3592 std::size_t hash_value(RexCase const& rex_case) {
3593  if (rex_case.hash_) {
3594  return *rex_case.hash_;
3595  }
3596  rex_case.hash_ = typeid(RexCase).hash_code();
3597  boost::hash_combine(*rex_case.hash_, rex_case.expr_pair_list_);
3598  boost::hash_combine(*rex_case.hash_, rex_case.else_expr_);
3599  return *rex_case.hash_;
3600 }
3601 
3602 std::size_t hash_value(RexFunctionOperator const& rex_op) {
3603  if (rex_op.hash_) {
3604  return *rex_op.hash_;
3605  }
3606  rex_op.hash_ = typeid(RexFunctionOperator).hash_code();
3607  boost::hash_combine(*rex_op.hash_, ::toString(rex_op.op_));
3608  boost::hash_combine(*rex_op.hash_, rex_op.getType().get_type_name());
3609  boost::hash_combine(*rex_op.hash_, rex_op.operands_);
3610  boost::hash_combine(*rex_op.hash_, rex_op.name_);
3611  return *rex_op.hash_;
3612 }
3613 
3614 std::size_t hash_value(SortField const& sort_field) {
3615  auto hash = boost::hash_value(sort_field.field_);
3616  boost::hash_combine(hash, sort_field.sort_dir_ == SortDirection::Ascending ? "a" : "d");
3617  boost::hash_combine(hash,
3618  sort_field.nulls_pos_ == NullSortedPosition::First ? "f" : "l");
3619  return hash;
3620 }
3621 
3622 std::size_t hash_value(RexWindowFunctionOperator const& rex_window) {
3623  if (rex_window.hash_) {
3624  return *rex_window.hash_;
3625  }
3626  rex_window.hash_ = typeid(RexWindowFunctionOperator).hash_code();
3627  boost::hash_combine(*rex_window.hash_, rex_window.getType().get_type_name());
3628  boost::hash_combine(*rex_window.hash_, rex_window.getName());
3629  boost::hash_combine(*rex_window.hash_, rex_window.is_rows_);
3630  boost::hash_combine(*rex_window.hash_, rex_window.collation_);
3631  boost::hash_combine(*rex_window.hash_, rex_window.operands_);
3632  boost::hash_combine(*rex_window.hash_, rex_window.partition_keys_);
3633  boost::hash_combine(*rex_window.hash_, rex_window.order_keys_);
3634  auto get_window_bound_hash =
3636  auto h = boost::hash_value(bound.bound_expr);
3637  boost::hash_combine(h, bound.unbounded);
3638  boost::hash_combine(h, bound.preceding);
3639  boost::hash_combine(h, bound.following);
3640  boost::hash_combine(h, bound.is_current_row);
3641  boost::hash_combine(h, bound.order_key);
3642  return h;
3643  };
3644  boost::hash_combine(*rex_window.hash_,
3645  get_window_bound_hash(rex_window.frame_start_bound_));
3646  boost::hash_combine(*rex_window.hash_,
3647  get_window_bound_hash(rex_window.frame_end_bound_));
3648  return *rex_window.hash_;
3649 }
3650 
3651 std::size_t hash_value(RexRef const& rex_ref) {
3652  if (rex_ref.hash_) {
3653  return *rex_ref.hash_;
3654  }
3655  rex_ref.hash_ = typeid(RexRef).hash_code();
3656  boost::hash_combine(*rex_ref.hash_, rex_ref.index_);
3657  return *rex_ref.hash_;
3658 }
3659 
3660 std::size_t hash_value(RexAgg const& rex_agg) {
3661  if (rex_agg.hash_) {
3662  return *rex_agg.hash_;
3663  }
3664  rex_agg.hash_ = typeid(RexAgg).hash_code();
3665  boost::hash_combine(*rex_agg.hash_, rex_agg.operands_);
3666  boost::hash_combine(*rex_agg.hash_, rex_agg.agg_);
3667  boost::hash_combine(*rex_agg.hash_, rex_agg.distinct_);
3668  boost::hash_combine(*rex_agg.hash_, rex_agg.type_.get_type_name());
3669  return *rex_agg.hash_;
3670 }
3671 
3672 std::size_t hash_value(RexSubQuery const& rex_subq) {
3673  if (rex_subq.hash_) {
3674  return *rex_subq.hash_;
3675  }
3676  rex_subq.hash_ = typeid(RexSubQuery).hash_code();
3677  boost::hash_combine(*rex_subq.hash_, rex_subq.ra_);
3678  return *rex_subq.hash_;
3679 }
3680 
3681 std::size_t hash_value(RexInput const& rex_input) {
3682  if (rex_input.hash_) {
3683  return *rex_input.hash_;
3684  }
3685  rex_input.hash_ = typeid(RexInput).hash_code();
3686  boost::hash_combine(*rex_input.hash_, rex_input.node_);
3687  boost::hash_combine(*rex_input.hash_, rex_input.getIndex());
3688  return *rex_input.hash_;
3689 }
3690 
3691 std::size_t hash_value(RelScan const& rel_scan) {
3692  if (rel_scan.hash_) {
3693  return *rel_scan.hash_;
3694  }
3695  rel_scan.hash_ = typeid(RelScan).hash_code();
3696  boost::hash_combine(*rel_scan.hash_, rel_scan.td_->tableId);
3697  boost::hash_combine(*rel_scan.hash_, rel_scan.td_->tableName);
3698  boost::hash_combine(*rel_scan.hash_, ::toString(rel_scan.field_names_));
3699  return *rel_scan.hash_;
3700 }
3701 
3702 std::size_t hash_value(RelProject const& rel_project) {
3703  if (rel_project.hash_) {
3704  return *rel_project.hash_;
3705  }
3706  rel_project.hash_ = typeid(RelProject).hash_code();
3707  boost::hash_combine(*rel_project.hash_, rel_project.scalar_exprs_);
3708  boost::hash_combine(*rel_project.hash_, rel_project.fields_);
3709  boost::hash_combine(*rel_project.hash_, rel_project.inputs_);
3710  return *rel_project.hash_;
3711 }
3712 
3713 std::size_t hash_value(RelAggregate const& rel_agg) {
3714  if (rel_agg.hash_) {
3715  return *rel_agg.hash_;
3716  }
3717  rel_agg.hash_ = typeid(RelAggregate).hash_code();
3718  boost::hash_combine(*rel_agg.hash_, rel_agg.groupby_count_);
3719  boost::hash_combine(*rel_agg.hash_, rel_agg.agg_exprs_);
3720  boost::hash_combine(*rel_agg.hash_, rel_agg.fields_);
3721  boost::hash_combine(*rel_agg.hash_, rel_agg.inputs_);
3722  return *rel_agg.hash_;
3723 }
3724 
3725 std::size_t hash_value(RelJoin const& rel_join) {
3726  if (rel_join.hash_) {
3727  return *rel_join.hash_;
3728  }
3729  rel_join.hash_ = typeid(RelJoin).hash_code();
3730  boost::hash_combine(*rel_join.hash_, rel_join.condition_);
3731  boost::hash_combine(*rel_join.hash_, rel_join.inputs_);
3732  boost::hash_combine(*rel_join.hash_, ::toString(rel_join.getJoinType()));
3733  return *rel_join.hash_;
3734 }
3735 
3736 std::size_t hash_value(RelTranslatedJoin const& rel_tr_join) {
3737  if (rel_tr_join.hash_) {
3738  return *rel_tr_join.hash_;
3739  }
3740  rel_tr_join.hash_ = typeid(RelTranslatedJoin).hash_code();
3741  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.lhs_);
3742  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.rhs_);
3743  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.outer_join_cond_);
3744  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.nested_loop_);
3745  boost::hash_combine(*rel_tr_join.hash_, ::toString(rel_tr_join.join_type_));
3746  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.op_type_);
3747  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.qualifier_);
3748  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.op_typeinfo_);
3749  boost::hash_combine(*rel_tr_join.hash_, rel_tr_join.filter_ops_);
3750  return *rel_tr_join.hash_;
3751 }
3752 
3753 std::size_t hash_value(RelFilter const& rel_filter) {
3754  if (rel_filter.hash_) {
3755  return *rel_filter.hash_;
3756  }
3757  rel_filter.hash_ = typeid(RelFilter).hash_code();
3758  boost::hash_combine(*rel_filter.hash_, rel_filter.filter_);
3759  boost::hash_combine(*rel_filter.hash_, rel_filter.inputs_);
3760  return *rel_filter.hash_;
3761 }
3762 
3763 std::size_t hash_value(RelLeftDeepInnerJoin const& rel_join) {
3764  if (rel_join.hash_) {
3765  return *rel_join.hash_;
3766  }
3767  rel_join.hash_ = typeid(RelLeftDeepInnerJoin).hash_code();
3768  boost::hash_combine(*rel_join.hash_, rel_join.condition_);
3769  boost::hash_combine(*rel_join.hash_, rel_join.outer_conditions_per_level_);
3770  boost::hash_combine(*rel_join.hash_, rel_join.original_filter_);
3771  boost::hash_combine(*rel_join.hash_, rel_join.inputs_);
3772  return *rel_join.hash_;
3773 }
3774 
3775 std::size_t hash_value(RelCompound const& rel_compound) {
3776  if (rel_compound.hash_) {
3777  return *rel_compound.hash_;
3778  }
3779  rel_compound.hash_ = typeid(RelCompound).hash_code();
3780  boost::hash_combine(*rel_compound.hash_, rel_compound.filter_expr_);
3781  boost::hash_combine(*rel_compound.hash_, rel_compound.is_agg_);
3782  boost::hash_combine(*rel_compound.hash_, rel_compound.target_exprs_);
3783  boost::hash_combine(*rel_compound.hash_, rel_compound.agg_exprs_);
3784  boost::hash_combine(*rel_compound.hash_, rel_compound.scalar_sources_);
3785  boost::hash_combine(*rel_compound.hash_, rel_compound.groupby_count_);
3786  boost::hash_combine(*rel_compound.hash_, rel_compound.fields_);
3787  boost::hash_combine(*rel_compound.hash_, rel_compound.inputs_);
3788  return *rel_compound.hash_;
3789 }
3790 
3791 std::size_t hash_value(RelSort const& rel_sort) {
3792  if (rel_sort.hash_) {
3793  return *rel_sort.hash_;
3794  }
3795  rel_sort.hash_ = typeid(RelSort).hash_code();
3796  boost::hash_combine(*rel_sort.hash_, rel_sort.collation_);
3797  boost::hash_combine(*rel_sort.hash_, rel_sort.limit_.has_value());
3798  boost::hash_combine(*rel_sort.hash_, rel_sort.limit_.value_or(0));
3799  boost::hash_combine(*rel_sort.hash_, rel_sort.offset_);
3800  boost::hash_combine(*rel_sort.hash_, rel_sort.inputs_);
3801  return *rel_sort.hash_;
3802 }
3803 
3804 std::size_t hash_value(RelModify const& rel_modify) {
3805  if (rel_modify.hash_) {
3806  return *rel_modify.hash_;
3807  }
3808  rel_modify.hash_ = typeid(RelModify).hash_code();
3809  boost::hash_combine(*rel_modify.hash_, rel_modify.table_descriptor_->tableName);
3810  boost::hash_combine(*rel_modify.hash_, rel_modify.table_descriptor_->tableId);
3811  boost::hash_combine(*rel_modify.hash_, rel_modify.flattened_);
3812  boost::hash_combine(*rel_modify.hash_,
3814  boost::hash_combine(*rel_modify.hash_, rel_modify.target_column_list_);
3815  boost::hash_combine(*rel_modify.hash_, rel_modify.inputs_);
3816  return *rel_modify.hash_;
3817 }
3818 
3819 std::size_t hash_value(RelTableFunction const& rel_tf) {
3820  if (rel_tf.hash_) {
3821  return *rel_tf.hash_;
3822  }
3823  rel_tf.hash_ = typeid(RelTableFunction).hash_code();
3824  boost::hash_combine(*rel_tf.hash_, rel_tf.function_name_);
3825  boost::hash_combine(*rel_tf.hash_, rel_tf.table_func_inputs_);
3826  boost::hash_combine(*rel_tf.hash_, rel_tf.target_exprs_);
3827  boost::hash_combine(*rel_tf.hash_, rel_tf.fields_);
3828  boost::hash_combine(*rel_tf.hash_, rel_tf.inputs_);
3829  return *rel_tf.hash_;
3830 }
3831 
3832 std::size_t hash_value(RelLogicalValues const& rel_lv) {
3833  if (rel_lv.hash_) {
3834  return *rel_lv.hash_;
3835  }
3836  rel_lv.hash_ = typeid(RelLogicalValues).hash_code();
3837  for (auto& target_meta_info : rel_lv.tuple_type_) {
3838  boost::hash_combine(*rel_lv.hash_, target_meta_info.get_resname());
3839  boost::hash_combine(*rel_lv.hash_, target_meta_info.get_type_info().get_type_name());
3840  }
3841  return *rel_lv.hash_;
3842 }
3843 
3844 std::size_t hash_value(RelLogicalUnion const& rel_lv) {
3845  if (rel_lv.hash_) {
3846  return *rel_lv.hash_;
3847  }
3848  rel_lv.hash_ = typeid(RelLogicalUnion).hash_code();
3849  boost::hash_combine(*rel_lv.hash_, rel_lv.is_all_);
3850  boost::hash_combine(*rel_lv.hash_, rel_lv.inputs_);
3851  return *rel_lv.hash_;
3852 }
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:2776
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
const size_t getGroupByCount() const
Definition: RelAlgDag.h:1508
SQLTypes to_sql_type(const std::string &type_name)
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
Definition: RelAlgDag.h:3369
std::optional< size_t > is_collected_window_function(size_t rex_hash) const
Definition: RelAlgDag.cpp:2387
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1210
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
Definition: RelAlgDag.h:1731
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
Definition: RelAlgDag.cpp:1422
RexWindowBound frame_start_bound_
Definition: RelAlgDag.h:725
RelCompound(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
Definition: RelAlgDag.h:2021
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:440
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
Definition: RelAlgDag.cpp:2979
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2969
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:113
RexRebindReindexInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input, std::unordered_map< unsigned, unsigned > old_to_new_index_map)
Definition: RelAlgDag.cpp:107
void set_notnulls(std::vector< TargetMetaInfo > *tmis0, std::vector< bool > const &notnulls)
Definition: RelAlgDag.cpp:896
std::vector< std::unique_ptr< const RexScalar > > outer_conditions_per_level_
Definition: RelAlgDag.h:2007
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1625
std::unique_ptr< RexSubQuery > deepCopy() const
Definition: RelAlgDag.cpp:60
unsigned in_index_
Definition: RelAlgDag.h:188
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.h:1374
JoinType
Definition: sqldefs.h:238
static std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > & getQueryHints(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3414
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
Definition: RelAlgDag.h:2645
std::string cat(Ts &&...args)
std::optional< size_t > getOffsetForPushedDownExpr(WindowExprType type, size_t expr_offset) const
Definition: RelAlgDag.cpp:158
RexWindowFuncReplacementVisitor(std::unordered_set< size_t > &collected_window_func_hash, std::vector< std::unique_ptr< const RexScalar >> &new_rex_input_for_window_func, std::unordered_map< size_t, size_t > &window_func_to_new_rex_input_idx_map, RelProject *new_project, std::unordered_map< size_t, std::unique_ptr< const RexInput >> &new_rex_input_from_child_node)
Definition: RelAlgDag.cpp:2282
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1194
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< bool > get_notnulls(std::vector< TargetMetaInfo > const &tmis0)
Definition: RelAlgDag.cpp:882
Definition: sqltypes.h:76
std::vector< std::unique_ptr< const RexScalar > > & scalar_exprs_for_new_project_
Definition: RelAlgDag.cpp:333
size_t size() const override
Definition: RelAlgDag.h:1320
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:2136
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
std::string tableName
const bool nested_loop_
Definition: RelAlgDag.h:1856
size_t groupby_count_
Definition: RelAlgDag.h:1606
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
Definition: RelAlgDag.cpp:1925
void eliminate_redundant_projection(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: RelAlgDag.cpp:2174
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3511
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *rex_input) const final
Definition: RelAlgDag.cpp:2305
std::vector< RexInput > RANodeOutput
Definition: RelAlgDag.h:3461
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
Definition: RelAlgDag.cpp:1457
const RexScalar * getElse() const
Definition: RelAlgDag.h:445
static thread_local unsigned crt_id_
Definition: RelAlgDag.h:953
NullSortedPosition nulls_pos_
Definition: RelAlgDag.h:571
std::unique_ptr< const RexScalar > visitOperator(const RexOperator *rex_operator) const override
Definition: RelAlgDag.cpp:298
std::string function_name_
Definition: RelAlgDag.h:2639
const RexScalar * outer_join_cond_
Definition: RelAlgDag.h:1855
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
Definition: RelAlgDag.cpp:1109
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2934
RelProject(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
Definition: RelAlgDag.h:1261
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
Definition: RelAlgDag.cpp:944
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:686
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
Definition: RelAlgDag.cpp:3207
#define LOG(tag)
Definition: Logger.h:285
std::vector< std::string > TargetColumnList
Definition: RelAlgDag.h:2291
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1316
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:378
std::unique_ptr< const RexScalar > get_new_rex_input(size_t rex_idx) const
Definition: RelAlgDag.cpp:2395
size_t size() const
Definition: RelAlgDag.h:364
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1436
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2947
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:51
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:366
std::vector< const Rex * > col_inputs_
Definition: RelAlgDag.h:2642
const JoinType join_type_
Definition: RelAlgDag.h:1857
std::string json_node_to_string(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:978
bool hasEquivCollationOf(const RelSort &that) const
Definition: RelAlgDag.cpp:803
JoinType to_join_type(const std::string &join_type_name)
Definition: RelAlgDag.cpp:1404
void resetQueryExecutionState()
Definition: RelAlgDag.cpp:3471
std::shared_ptr< RelFilter > original_filter_
Definition: RelAlgDag.h:2008
std::pair< bool, bool > need_pushdown_generic_expr(RelProject const *window_func_project_node)
Definition: RelAlgDag.cpp:2615
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:46
std::unique_ptr< const RexScalar > else_expr_
Definition: RelAlgDag.h:469
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:3381
std::string join(T const &container, std::string const &delim)
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1247
#define UNREACHABLE()
Definition: Logger.h:338
void handle_query_hint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
Definition: RelAlgDag.cpp:1573
bool hint_applied_
Definition: RelAlgDag.h:1733
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
#define CHECK_GE(x, y)
Definition: Logger.h:306
SortDirection
Definition: RelAlgDag.h:531
void registerQueryHint(const RelAlgNode *node, const RegisteredQueryHint &query_hint)
Definition: RelAlgDag.h:3327
std::vector< std::string > fields_
Definition: RelAlgDag.h:1460
const std::pair< const Catalog_Namespace::Catalog *, const TableDescriptor * > getCatalogAndTableFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2861
void pushDownExpressionInWindowFunction(const RexWindowFunctionOperator *window_expr) const
Definition: RelAlgDag.cpp:179
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1701
Definition: sqldefs.h:51
std::unique_ptr< const RexScalar > visitCase(const RexCase *rex_case) const override
Definition: RelAlgDag.cpp:279
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:435
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
Definition: RelAlgDag.cpp:1348
const RegisteredQueryHint & getGlobalHints() const
Definition: RelAlgDag.h:3367
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
Definition: RelAlgDag.cpp:365
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2415
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
Definition: RelAlgDag.cpp:470
std::unique_ptr< const RexScalar > defaultResult() const override
Definition: RelAlgDag.cpp:330
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1576
const RelAlgNode * rhs_
Definition: RelAlgDag.h:1851
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:1361
const TableDescriptor * td_
Definition: RelAlgDag.h:1174
std::optional< size_t > getIdInPlanTree() const
Definition: RelAlgDag.h:134
std::unordered_map< size_t, const RexScalar * > & collected_window_func_
Definition: RelAlgDag.cpp:2276
const std::string op_type_
Definition: RelAlgDag.h:1858
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
Definition: RelAlgDag.h:2172
#define CHECK_GT(x, y)
Definition: Logger.h:305
void * visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:77
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
Definition: RelAlgDag.cpp:989
static std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3408
RexInputSet aggregateResult(const RexInputSet &aggregate, const RexInputSet &next_result) const override
Definition: RelAlgDag.cpp:2582
std::vector< std::string > field_names_
Definition: RelAlgDag.h:1175
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
Definition: RelAlgDag.cpp:1478
std::unique_ptr< const RexScalar > visitLiteral(const RexLiteral *rex_literal) const override
Definition: RelAlgDag.cpp:265
bool flattened_
Definition: RelAlgDag.h:2495
std::string to_string(char const *&&v)
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const bool always_add_project_if_first_project_is_window_expr, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2653
const std::string getFieldName(const size_t i) const
Definition: RelAlgDag.h:1127
std::unique_ptr< const RexScalar > visitSubQuery(const RexSubQuery *rex_subquery) const override
Definition: RelAlgDag.cpp:274
const std::string qualifier_
Definition: RelAlgDag.h:1859
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
Definition: RelAlgDag.h:2278
constexpr double a
Definition: Utm.h:32
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:3007
RelLogicalValues()=default
std::vector< std::unique_ptr< const RexScalar > > & new_rex_input_for_window_func_
Definition: RelAlgDag.cpp:2406
std::unordered_set< RexInput > RexInputSet
Definition: RelAlgDag.cpp:2573
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
Definition: RexVisitor.h:27
std::string to_string() const
Definition: sqltypes.h:528
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:33
unsigned getIndex() const
Definition: RelAlgDag.h:174
size_t field_
Definition: RelAlgDag.h:569
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2459
void markAsNop()
Definition: RelAlgDag.h:925
static SysCatalog & instance()
Definition: SysCatalog.h:343
bool aggregateResult(const bool &aggregate, const bool &next_result) const final
Definition: RelAlgDag.cpp:1962
SQLOps getOperator() const
Definition: RelAlgDag.h:376
void setExecutionResult(const ExecutionResultShPtr result)
Definition: RelAlgDag.cpp:51
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:3090
std::unordered_map< size_t, std::unique_ptr< const RexInput > > & new_rex_input_from_child_node_
Definition: RelAlgDag.cpp:2411
virtual size_t toHash() const override
Definition: RelAlgDag.h:461
unsigned getId() const
Definition: RelAlgDag.h:869
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
Definition: RelAlgDag.cpp:764
void create_rex_input_for_new_project_node(RelAlgNode const *node, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, std::vector< std::string > &fields)
Definition: RelAlgDag.cpp:2121
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1274
static std::unique_ptr< RelAlgDag > buildDagForSubquery(RelAlgDag &root_dag, const rapidjson::Value &query_ast)
Definition: RelAlgDag.cpp:3378
std::unique_ptr< const RexScalar > visitInput(const RexInput *rex_input) const override
Definition: RelAlgDag.cpp:253
static QueryHint translateQueryHint(const std::string &hint_name)
Definition: QueryHint.h:382
size_t index_
Definition: RelAlgDag.h:762
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
#define CHECK_NE(x, y)
Definition: Logger.h:302
bool isRenaming() const
Definition: RelAlgDag.cpp:513
void setIndex(const unsigned in_index) const
Definition: RelAlgDag.h:176
std::vector< size_t > operands_
Definition: RelAlgDag.h:821
Hints * getDeliveredHints()
Definition: RelAlgDag.h:1599
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2011
std::vector< std::string > fields_
Definition: RelAlgDag.h:2169
SQLOps to_sql_op(const std::string &op_str)
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1610
void set_scale(int s)
Definition: sqltypes.h:475
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:41
const TableDescriptor * table_descriptor_
Definition: RelAlgDag.h:2494
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1734
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
Definition: RelAlgDag.cpp:627
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
Definition: RelAlgDag.h:71
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
Definition: RelAlgDag.h:1459
RetType visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2329
const RelAlgNode * lhs_
Definition: RelAlgDag.h:1850
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:56
void addHint(const ExplainedQueryHint &hint_explained)
Definition: RelAlgDag.h:1413
size_t branchCount() const
Definition: RelAlgDag.h:433
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:877
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Definition: RelAlgDag.cpp:1080
Checked json field retrieval.
const std::vector< std::shared_ptr< const Analyzer::Expr > > filter_ops_
Definition: RelAlgDag.h:1854
void * visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2230
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.cpp:3353
std::unique_ptr< const RexScalar > filter_
Definition: RelAlgDag.h:1945
bool isSimple() const
Definition: RelAlgDag.h:1307
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
Definition: RelAlgDag.cpp:638
std::vector< std::unique_ptr< const RexScalar > > operands_
Definition: RelAlgDag.h:400
std::optional< size_t > hash_
Definition: RelAlgDag.h:151
std::vector< std::string > fields_
Definition: RelAlgDag.h:2640
std::vector< const Rex * > target_exprs_
Definition: RelAlgDag.h:2174
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
Definition: RelAlgDag.cpp:1532
std::optional< size_t > hash_
Definition: RelAlgDag.h:947
unsigned getId() const
Definition: RelAlgDag.cpp:64
const RelAlgNode * node_
Definition: RelAlgDag.h:1079
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
Definition: RelAlgDag.h:908
bool find_generic_expr_in_window_func(RexWindowFunctionOperator const *window_expr, bool &has_generic_expr_in_window_func)
Definition: RelAlgDag.cpp:2591
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1504
RexInputSet visitInput(const RexInput *input) const override
Definition: RelAlgDag.cpp:2577
static std::string yieldModifyOperationString(ModifyOperation const op)
Definition: RelAlgDag.h:2293
bool distinct_
Definition: RelAlgDag.h:819
std::vector< TargetMetaInfo > getCompatibleMetainfoTypes() const
Definition: RelAlgDag.cpp:911
virtual std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const =0
static std::unique_ptr< RelAlgDag > buildDag(const std::string &query_ra, const bool optimize_dag)
Definition: RelAlgDag.cpp:3358
std::string tree_string(const RelAlgNode *ra, const size_t depth)
Definition: RelAlgDag.cpp:3480
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:1607
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1216
void compute_node_hash(const std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Definition: RelAlgDag.cpp:1611
Hints * getDeliveredHints()
Definition: RelAlgDag.h:2159
SQLAgg agg_
Definition: RelAlgDag.h:818
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &&exprs)
Definition: RelAlgDag.cpp:730
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:537
PushDownGenericExpressionInWindowFunction(std::shared_ptr< RelProject > new_project, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs_for_new_project, std::vector< std::string > &fields_for_new_project, std::unordered_map< size_t, size_t > &expr_offset_cache)
Definition: RelAlgDag.cpp:129
const RexScalar * getProjectAt(const size_t idx) const
Definition: RelAlgDag.h:1352
bool hint_applied_
Definition: RelAlgDag.h:1609
size_t offset_
Definition: RelAlgDag.h:2280
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:79
Definition: sqltypes.h:80
static RegisteredQueryHint defaults()
Definition: QueryHint.h:379
static std::unique_ptr< RelAlgDag > build(const rapidjson::Value &query_ast, RelAlgDag *root_dag, const bool optimize_dag)
Definition: RelAlgDag.cpp:3384
int32_t countRexLiteralArgs() const
Definition: RelAlgDag.cpp:698
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:2176
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
Definition: RelAlgDag.cpp:1642
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:643
bool allStringCastsAreToDictionaryEncodedStrings() const
Definition: RelAlgDag.cpp:955
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:2894
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:304
const std::unordered_map< unsigned, unsigned > mapping_
Definition: RelAlgDag.cpp:122
std::unique_ptr< Hints > hints_
Definition: RelAlgDag.h:1462
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
Definition: RelAlgDag.cpp:2841
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:544
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
Definition: RelAlgDag.h:2830
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
Definition: RelAlgDag.h:2168
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1099
std::unique_ptr< const RexScalar > filter_expr_
Definition: RelAlgDag.h:2166
static std::vector< std::shared_ptr< RelAlgNode > > & getNodes(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.h:3404
void setSourceNode(const RelAlgNode *node) const
Definition: RelAlgDag.h:1061
bool hasWindowFunctionExpr() const
Definition: RelAlgDag.cpp:2880
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
Definition: RelAlgDag.cpp:3046
std::vector< ElementType >::const_iterator Super
Definition: RelAlgDag.cpp:1844
RelFilter()=default
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
Definition: RelAlgDag.cpp:617
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
Definition: RelAlgDag.cpp:995
ConstRexScalarPtrVector order_keys_
Definition: RelAlgDag.h:723
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
Definition: RelAlgDag.cpp:1336
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
Definition: QueryHint.h:405
virtual size_t size() const =0
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:1056
std::vector< SortField > collation_
Definition: RelAlgDag.h:724
RexWindowBound frame_end_bound_
Definition: RelAlgDag.h:726
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:856
SQLTypeInfo type_
Definition: RelAlgDag.h:820
std::string get_type_name() const
Definition: sqltypes.h:484
virtual size_t toHash() const override
Definition: RelAlgDag.h:1074
std::vector< std::pair< std::unique_ptr< const RexScalar >, std::unique_ptr< const RexScalar > > > expr_pair_list_
Definition: RelAlgDag.h:468
std::string typeName(const T *v)
Definition: toString.h:106
ExplainedQueryHint parseHintString(std::string &hint_string)
Definition: RelAlgDag.cpp:3237
SqlWindowFunctionKind
Definition: sqldefs.h:129
void * visitOperator(const RexOperator *rex_operator) const final
Definition: RelAlgDag.cpp:2208
std::unique_ptr< const RexScalar > condition_
Definition: RelAlgDag.h:2006
SortDirection sort_dir_
Definition: RelAlgDag.h:570
Definition: sqldefs.h:55
void eachNode(std::function< void(RelAlgNode const *)> const &) const
Definition: RelAlgDag.cpp:3463
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3488
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
Definition: RelAlgDag.cpp:3022
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
const std::vector< std::string > & getFields() const
Definition: RelAlgDag.h:1366
std::unique_ptr< const RexScalar > visitRef(const RexRef *rex_ref) const override
Definition: RelAlgDag.cpp:270
std::string getFieldName(const size_t i) const
Definition: RelAlgDag.cpp:860
static void optimizeDag(RelAlgDag &rel_alg_dag)
Definition: RelAlgDag.cpp:3411
void handle_agg_over_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
Definition: RelAlgDag.cpp:2138
bool g_enable_watchdog false
Definition: Execute.cpp:80
void set_notnull(bool n)
Definition: sqltypes.h:477
ModifyOperation operation_
Definition: RelAlgDag.h:2496
#define CHECK(condition)
Definition: Logger.h:291
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:653
const std::string op_typeinfo_
Definition: RelAlgDag.h:1860
std::string name_
Definition: RelAlgDag.h:526
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1378
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
Definition: RelAlgDag.cpp:1662
bool g_enable_union
ConstRexScalarPtrVector partition_keys_
Definition: RelAlgDag.h:722
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
Definition: RelAlgDag.cpp:1685
bool g_cluster
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
Definition: RelAlgDag.cpp:96
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
Definition: RelAlgDag.cpp:3346
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
Definition: RelAlgDag.cpp:528
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
Definition: RelAlgDag.cpp:3295
SortDirection parse_sort_direction(const rapidjson::Value &collation)
Definition: RelAlgDag.cpp:1204
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, RelAlgDag &root_dag)
Definition: RelAlgDag.cpp:1228
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
Definition: sqltypes.h:643
Definition: sqltypes.h:72
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
Definition: RelAlgDag.cpp:3496
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RexRebindInputsVisitor(const RelAlgNode *old_input, const RelAlgNode *new_input)
Definition: RelAlgDag.cpp:72
std::optional< size_t > limit_
Definition: RelAlgDag.h:2279
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2656
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
Definition: RelAlgDag.cpp:1518
const std::string & getName() const
Definition: RelAlgDag.h:506
RetType visitCase(const RexCase *rex_case) const final
Definition: RelAlgDag.cpp:2356
const size_t inputCount() const
Definition: RelAlgDag.h:875
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
Definition: RelAlgDag.cpp:2855
WindowFunctionCollector(std::unordered_map< size_t, const RexScalar * > &collected_window_func, bool only_add_window_expr)
Definition: RelAlgDag.cpp:2200
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
unsigned node_id(const rapidjson::Value &ra_node) noexcept
Definition: RelAlgDag.cpp:973
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
Definition: RelAlgDag.cpp:852
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
Definition: RelAlgDag.cpp:753
std::unordered_map< size_t, size_t > & window_func_to_new_rex_input_idx_map_
Definition: RelAlgDag.cpp:2408
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
Definition: RelAlgDag.cpp:3215
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
Definition: RelAlgDag.cpp:2873
SQLOps op_
Definition: RelAlgDag.h:399
RelTableFunction()=default
TargetColumnList target_column_list_
Definition: RelAlgDag.h:2497
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
Definition: RelAlgDag.cpp:3160
std::vector< std::string > fields_
Definition: RelAlgDag.h:1608
JoinType getJoinType() const
Definition: RelAlgDag.h:1650
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
std::unique_ptr< const RexScalar > RetType
Definition: RexVisitor.h:140
NullSortedPosition
Definition: RelAlgDag.h:533
RANodeOutput get_node_output(const RelAlgNode *ra_node)
Definition: RelAlgDag.cpp:371
virtual size_t toHash() const =0
#define VLOG(n)
Definition: Logger.h:388
BuildState getBuildState() const
Definition: RelAlgDag.h:2805
RelAlgInputs inputs_
Definition: RelAlgDag.h:945
void reset_table_function_inputs(std::vector< const Rex * > &column_inputs, const std::vector< std::unique_ptr< const RexScalar >> &old_table_func_inputs, const std::vector< std::unique_ptr< const RexScalar >> &new_table_func_inputs)
Definition: RelAlgDag.cpp:711
void set_precision(int d)
Definition: sqltypes.h:473
std::vector< TargetMetaInfo > tuple_type_
Definition: RelAlgDag.h:2735
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
Definition: RelAlgDag.cpp:3227
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool isIdentity() const
Definition: RelAlgDag.cpp:440
bool same_ignoring_notnull(SQLTypeInfo ti0, SQLTypeInfo ti1)
Definition: RelAlgDag.cpp:890
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
Definition: RelAlgDag.h:2648
static void setBuildState(RelAlgDag &rel_alg_dag, const RelAlgDag::BuildState build_state)
Definition: RelAlgDag.h:3418
static void resetRelAlgFirstId() noexcept
Definition: RelAlgDag.cpp:47