28 #include <rapidjson/error/en.h>
29 #include <rapidjson/error/error.h>
30 #include <rapidjson/stringbuffer.h>
31 #include <rapidjson/writer.h>
34 #include <unordered_set>
52 const std::shared_ptr<const ExecutionResult>
result) {
53 auto row_set = result->getRows();
55 CHECK_EQ(
size_t(1), row_set->colCount());
56 *(type_.get()) = row_set->getColType(0);
57 (*(result_.get())) =
result;
61 return std::make_unique<RexSubQuery>(type_, result_, ra_->deepCopy());
73 : old_input_(old_input), new_input_(new_input) {}
79 if (old_source == old_input_) {
97 std::vector<RexInput> outputs;
99 for (
size_t i = 0; i <
n; ++i) {
100 outputs.emplace_back(node, i);
110 std::unordered_map<unsigned, unsigned> old_to_new_index_map)
114 RexRebindInputsVisitor::visitInput(rex_input);
115 auto mapping_itr = mapping_.find(rex_input->
getIndex());
116 CHECK(mapping_itr != mapping_.end());
117 rex_input->
setIndex(mapping_itr->second);
122 const std::unordered_map<unsigned, unsigned>
mapping_;
130 std::shared_ptr<RelProject> new_project,
131 std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs_for_new_project,
132 std::vector<std::string>& fields_for_new_project,
133 std::unordered_map<size_t, size_t>& expr_offset_cache)
134 : new_project_(new_project)
135 , scalar_exprs_for_new_project_(scalar_exprs_for_new_project)
136 , fields_for_new_project_(fields_for_new_project)
137 , expr_offset_cache_(expr_offset_cache)
138 , found_case_expr_window_operand_(
false)
139 , has_partition_expr_(
false) {}
142 auto hash = expr->
toHash();
143 auto it = expr_offset_cache_.find(hash);
144 auto new_offset = -1;
145 if (it == expr_offset_cache_.end()) {
147 expr_offset_cache_.emplace(hash, scalar_exprs_for_new_project_.size()).second);
148 new_offset = scalar_exprs_for_new_project_.size();
149 fields_for_new_project_.emplace_back(
"");
150 scalar_exprs_for_new_project_.emplace_back(deep_copier_.visit(expr));
153 new_offset = it->second;
159 size_t expr_offset)
const {
163 case WindowExprType::PARTITION_KEY: {
164 auto it = pushed_down_partition_key_offset_.find(expr_offset);
165 CHECK(it != pushed_down_partition_key_offset_.end());
168 case WindowExprType::ORDER_KEY: {
169 auto it = pushed_down_order_key_offset_.find(expr_offset);
170 CHECK(it != pushed_down_order_key_offset_.end());
186 pushed_down_window_operands_offset_.clear();
187 pushed_down_partition_key_offset_.clear();
188 pushed_down_order_key_offset_.clear();
189 for (
size_t offset = 0; offset < window_expr->
size(); ++offset) {
191 auto literal_expr =
dynamic_cast<const RexLiteral*
>(expr);
192 auto case_expr =
dynamic_cast<const RexCase*
>(expr);
198 found_case_expr_window_operand_ =
true;
201 auto new_offset = pushDownExpressionImpl(expr);
202 pushed_down_window_operands_offset_.emplace(offset, new_offset);
207 auto new_offset = pushDownExpressionImpl(partition_key.get());
208 pushed_down_partition_key_offset_.emplace(offset, new_offset);
213 for (
const auto& order_key : window_expr->
getOrderKeys()) {
214 auto new_offset = pushDownExpressionImpl(order_key.get());
215 pushed_down_order_key_offset_.emplace(offset, new_offset);
221 std::vector<std::unique_ptr<const RexScalar>> window_operands;
223 for (
size_t idx = 0; idx < window_expr->
size(); ++idx) {
224 auto it = pushed_down_window_operands_offset_.find(idx);
225 if (it != pushed_down_window_operands_offset_.end()) {
226 auto new_input = std::make_unique<const RexInput>(new_project_.get(), it->second);
228 window_operands.emplace_back(std::move(new_input));
230 auto copied_expr = deep_copier_.visit(window_expr->
getOperand(idx));
231 window_operands.emplace_back(std::move(copied_expr));
234 deconst_window_expr->replaceOperands(std::move(window_operands));
237 auto new_offset = getOffsetForPushedDownExpr(WindowExprType::PARTITION_KEY, idx);
239 auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
241 deconst_window_expr->replacePartitionKey(idx, std::move(new_input));
244 for (
size_t idx = 0; idx < window_expr->
getOrderKeys().size(); ++idx) {
245 auto new_offset = getOffsetForPushedDownExpr(WindowExprType::ORDER_KEY, idx);
247 auto new_input = std::make_unique<const RexInput>(new_project_.get(), *new_offset);
249 deconst_window_expr->replaceOrderKey(idx, std::move(new_input));
254 auto new_offset = pushDownExpressionImpl(rex_input);
255 CHECK_LT(new_offset, scalar_exprs_for_new_project_.size());
256 auto hash = rex_input->
toHash();
257 auto it = expr_offset_cache_.find(hash);
258 CHECK(it != expr_offset_cache_.end());
260 auto new_input = std::make_unique<const RexInput>(new_project_.get(), new_offset);
266 const RexLiteral* rex_literal)
const override {
267 return deep_copier_.visit(rex_literal);
270 std::unique_ptr<const RexScalar>
visitRef(
const RexRef* rex_ref)
const override {
271 return deep_copier_.visit(rex_ref);
275 const RexSubQuery* rex_subquery)
const override {
276 return deep_copier_.visit(rex_subquery);
281 std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
283 std::unique_ptr<const RexScalar> new_else_expr;
284 for (
size_t i = 0; i < rex_case->
branchCount(); ++i) {
285 const auto when = rex_case->
getWhen(i);
286 auto new_when = PushDownGenericExpressionInWindowFunction::visit(when);
287 const auto then = rex_case->
getThen(i);
288 auto new_then = PushDownGenericExpressionInWindowFunction::visit(then);
289 new_expr_pair_list.emplace_back(std::move(new_when), std::move(new_then));
292 new_else_expr = deep_copier_.visit(rex_case->
getElse());
294 auto new_case = std::make_unique<const RexCase>(new_expr_pair_list, new_else_expr);
300 const auto rex_window_func_operator =
302 if (rex_window_func_operator) {
303 pushDownExpressionInWindowFunction(rex_window_func_operator);
304 return deep_copier_.visit(rex_operator);
306 std::unique_ptr<const RexOperator> new_operator{
nullptr};
307 std::vector<std::unique_ptr<const RexScalar>> new_operands;
308 for (
size_t i = 0; i < rex_operator->
size(); ++i) {
309 const auto operand = rex_operator->
getOperand(i);
310 auto new_operand = PushDownGenericExpressionInWindowFunction::visit(operand);
311 new_operands.emplace_back(std::move(new_operand));
313 if (
auto function_op = dynamic_cast<const RexFunctionOperator*>(rex_operator)) {
314 new_operator = std::make_unique<const RexFunctionOperator>(
315 function_op->getName(), new_operands, rex_operator->
getType());
317 new_operator = std::make_unique<const RexOperator>(
330 std::unique_ptr<const RexScalar>
defaultResult()
const override {
return nullptr; }
347 std::shared_ptr<const RelAlgNode> old_input,
348 std::shared_ptr<const RelAlgNode> input,
349 std::optional<std::unordered_map<unsigned, unsigned>> old_to_new_index_map) {
351 std::unique_ptr<RexRebindInputsVisitor> rebind_inputs;
352 if (old_to_new_index_map) {
353 rebind_inputs = std::make_unique<RexRebindReindexInputsVisitor>(
354 old_input.get(), input.get(), *old_to_new_index_map);
357 std::make_unique<RexRebindInputsVisitor>(old_input.get(), input.get());
359 CHECK(rebind_inputs);
361 rebind_inputs->visit(scalar_expr.get());
366 std::unique_ptr<const RexScalar> new_input) {
367 fields_.emplace_back(std::move(new_field_name));
372 const auto scan_node =
dynamic_cast<const RelScan*
>(ra_node);
375 CHECK_EQ(
size_t(0), scan_node->inputCount());
376 return n_outputs(scan_node, scan_node->size());
378 const auto project_node =
dynamic_cast<const RelProject*
>(ra_node);
381 CHECK_EQ(
size_t(1), project_node->inputCount());
382 return n_outputs(project_node, project_node->size());
384 const auto filter_node =
dynamic_cast<const RelFilter*
>(ra_node);
387 CHECK_EQ(
size_t(1), filter_node->inputCount());
389 return n_outputs(filter_node, prev_out.size());
391 const auto aggregate_node =
dynamic_cast<const RelAggregate*
>(ra_node);
392 if (aggregate_node) {
394 CHECK_EQ(
size_t(1), aggregate_node->inputCount());
395 return n_outputs(aggregate_node, aggregate_node->size());
397 const auto compound_node =
dynamic_cast<const RelCompound*
>(ra_node);
400 CHECK_EQ(
size_t(1), compound_node->inputCount());
401 return n_outputs(compound_node, compound_node->size());
403 const auto join_node =
dynamic_cast<const RelJoin*
>(ra_node);
407 CHECK_EQ(
size_t(2), join_node->inputCount());
412 lhs_out.insert(lhs_out.end(), rhs_out.begin(), rhs_out.end());
415 const auto table_func_node =
dynamic_cast<const RelTableFunction*
>(ra_node);
416 if (table_func_node) {
418 return n_outputs(table_func_node, table_func_node->size());
420 const auto sort_node =
dynamic_cast<const RelSort*
>(ra_node);
423 CHECK_EQ(
size_t(1), sort_node->inputCount());
425 return n_outputs(sort_node, prev_out.size());
427 const auto logical_values_node =
dynamic_cast<const RelLogicalValues*
>(ra_node);
428 if (logical_values_node) {
429 CHECK_EQ(
size_t(0), logical_values_node->inputCount());
430 return n_outputs(logical_values_node, logical_values_node->size());
432 const auto logical_union_node =
dynamic_cast<const RelLogicalUnion*
>(ra_node);
433 if (logical_union_node) {
434 return n_outputs(logical_union_node, logical_union_node->size());
446 if (dynamic_cast<const RelJoin*>(source)) {
455 const auto input =
dynamic_cast<const RexInput*
>(scalar_expr.get());
457 CHECK_EQ(source, input->getSourceNode());
461 if (input->getSourceNode() != source_shape[i].getSourceNode()) {
472 const std::string& new_name) {
474 if (
auto join = dynamic_cast<const RelJoin*>(node)) {
476 const auto lhs_size =
join->getInput(0)->size();
477 if (index < lhs_size) {
484 if (
auto scan = dynamic_cast<const RelScan*>(node)) {
485 return new_name != scan->getFieldName(index);
488 if (
auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
489 return new_name != aggregate->getFieldName(index);
492 if (
auto project = dynamic_cast<const RelProject*>(node)) {
493 return new_name != project->getFieldName(index);
496 if (
auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
497 return new_name != table_func->getFieldName(index);
500 if (
auto logical_values = dynamic_cast<const RelLogicalValues*>(node)) {
501 const auto& tuple_type = logical_values->getTupleType();
503 return new_name != tuple_type[index].get_resname();
506 CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node) ||
507 dynamic_cast<const RelLogicalUnion*>(node));
518 for (
size_t i = 0; i <
fields_.size(); ++i) {
529 std::shared_ptr<const RelAlgNode> input) {
531 RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
538 std::shared_ptr<const RelAlgNode> input) {
540 RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
541 rebind_inputs.visit(
filter_.get());
545 std::shared_ptr<const RelAlgNode> input) {
547 RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
549 rebind_inputs.visit(scalar_source.get());
559 , fields_(rhs.fields_)
560 , hint_applied_(
false)
561 , hints_(std::make_unique<
Hints>())
562 , has_pushed_down_window_expr_(rhs.has_pushed_down_window_expr_) {
568 for (
auto const& kv : *rhs.
hints_) {
576 , tuple_type_(rhs.tuple_type_)
586 , groupby_count_(rhs.groupby_count_)
587 , fields_(rhs.fields_)
588 , hint_applied_(
false)
589 , hints_(std::make_unique<
Hints>()) {
595 for (
auto const& kv : *rhs.
hints_) {
603 , join_type_(rhs.join_type_)
604 , hint_applied_(
false)
605 , hints_(std::make_unique<
Hints>()) {
609 for (
auto const& kv : *rhs.
hints_) {
618 std::vector<std::unique_ptr<const RexAgg>>
const& agg_exprs) {
619 std::vector<std::unique_ptr<const RexAgg>> agg_exprs_copy;
620 agg_exprs_copy.reserve(agg_exprs.size());
621 for (
auto const& agg_expr : agg_exprs) {
622 agg_exprs_copy.push_back(agg_expr->deepCopy());
624 return agg_exprs_copy;
628 std::vector<std::unique_ptr<const RexScalar>>
const& scalar_sources) {
629 std::vector<std::unique_ptr<const RexScalar>> scalar_sources_copy;
630 scalar_sources_copy.reserve(scalar_sources.size());
632 for (
auto const& scalar_source : scalar_sources) {
633 scalar_sources_copy.push_back(copier.
visit(scalar_source.get()));
635 return scalar_sources_copy;
639 std::vector<std::unique_ptr<const RexAgg>>
const& agg_exprs_new,
640 std::vector<std::unique_ptr<const RexScalar>>
const& scalar_sources_new,
641 std::vector<std::unique_ptr<const RexAgg>>
const& agg_exprs_old,
642 std::vector<std::unique_ptr<const RexScalar>>
const& scalar_sources_old,
643 std::vector<const Rex*>
const& target_exprs_old) {
644 std::vector<const Rex*> target_exprs(target_exprs_old);
645 std::unordered_map<const Rex*, const Rex*> old_to_new_target(target_exprs.size());
646 for (
size_t i = 0; i < agg_exprs_new.size(); ++i) {
647 old_to_new_target.emplace(agg_exprs_old[i].
get(), agg_exprs_new[i].
get());
649 for (
size_t i = 0; i < scalar_sources_new.size(); ++i) {
650 old_to_new_target.emplace(scalar_sources_old[i].
get(), scalar_sources_new[i].
get());
652 for (
auto& target : target_exprs) {
653 auto target_it = old_to_new_target.find(target);
654 CHECK(target_it != old_to_new_target.end());
655 target = target_it->second;
665 , groupby_count_(rhs.groupby_count_)
667 , fields_(rhs.fields_)
668 , is_agg_(rhs.is_agg_)
675 , hint_applied_(
false)
676 , hints_(std::make_unique<
Hints>()) {
680 for (
auto const& kv : *rhs.
hints_) {
687 std::shared_ptr<const RelAlgNode> input) {
689 RexRebindInputsVisitor rebind_inputs(old_input.get(), input.get());
691 rebind_inputs.visit(target_expr.get());
694 rebind_inputs.visit(func_input.get());
699 int32_t literal_args = 0;
701 const auto rex_literal =
dynamic_cast<const RexLiteral*
>(arg.get());
712 std::vector<const Rex*>& column_inputs,
713 const std::vector<std::unique_ptr<const RexScalar>>& old_table_func_inputs,
714 const std::vector<std::unique_ptr<const RexScalar>>& new_table_func_inputs) {
715 CHECK_EQ(old_table_func_inputs.size(), new_table_func_inputs.size());
716 std::unordered_map<const Rex*, const Rex*> old_to_new_input;
717 for (
size_t i = 0; i < old_table_func_inputs.size(); ++i) {
718 old_to_new_input.emplace(old_table_func_inputs[i].
get(),
719 new_table_func_inputs[i].
get());
721 for (
auto& target : column_inputs) {
722 auto target_it = old_to_new_input.find(target);
723 CHECK(target_it != old_to_new_input.end());
724 target = target_it->second;
731 std::vector<std::unique_ptr<const RexScalar>>&& exprs) {
742 , function_name_(rhs.function_name_)
743 , fields_(rhs.fields_)
744 , col_inputs_(rhs.col_inputs_)
752 struct hash<std::pair<const RelAlgNode*, int>> {
753 size_t operator()(
const std::pair<const RelAlgNode*, int>& input_col)
const {
754 auto ptr_val =
reinterpret_cast<const int64_t*
>(&input_col.first);
755 auto h =
static_cast<size_t>(*ptr_val);
756 boost::hash_combine(h, input_col.second);
765 const size_t which_col) {
766 std::set<std::pair<const RelAlgNode*, int>> work_set;
768 auto curr_col = which_col;
770 work_set.insert(std::make_pair(walker, curr_col));
771 if (dynamic_cast<const RelScan*>(walker) || dynamic_cast<const RelJoin*>(walker)) {
774 CHECK_EQ(
size_t(1), walker->inputCount());
775 auto only_source = walker->getInput(0);
776 if (
auto project = dynamic_cast<const RelProject*>(walker)) {
777 if (
auto input = dynamic_cast<const RexInput*>(project->getProjectAt(curr_col))) {
778 const auto join_source =
dynamic_cast<const RelJoin*
>(only_source);
780 CHECK_EQ(
size_t(2), join_source->inputCount());
781 auto lhs = join_source->getInput(0);
782 CHECK((input->getIndex() < lhs->size() && lhs == input->getSourceNode()) ||
783 join_source->getInput(1) == input->getSourceNode());
785 CHECK_EQ(input->getSourceNode(), only_source);
787 curr_col = input->getIndex();
791 }
else if (
auto aggregate = dynamic_cast<const RelAggregate*>(walker)) {
792 if (curr_col >= aggregate->getGroupByCount()) {
796 walker = only_source;
808 for (
size_t i = 0, e =
collation_.size(); i < e; ++i) {
811 if (this_sort_key.getSortDir() != that_sort_key.getSortDir()) {
814 if (this_sort_key.getNullsPosition() != that_sort_key.getNullsPosition()) {
817 auto this_equiv_keys =
get_equiv_cols(
this, this_sort_key.getField());
818 auto that_equiv_keys =
get_equiv_cols(&that, that_sort_key.getField());
819 std::vector<std::pair<const RelAlgNode*, int>> intersect;
820 std::set_intersection(this_equiv_keys.begin(),
821 this_equiv_keys.end(),
822 that_equiv_keys.begin(),
823 that_equiv_keys.end(),
824 std::back_inserter(intersect));
825 if (intersect.empty()) {
835 :
RelAlgNode(std::move(inputs)), is_all_(is_all) {
838 "The DEPRECATED enable-union option is set to off. Please remove this option as "
839 "it may be disabled in the future.");
847 "Unsupported CAST in UNION: Currently, we only allow casting text type to "
848 "dictionary-encoded strings.");
853 return inputs_.front()->size();
861 if (
auto const* compound = dynamic_cast<RelCompound const*>(
inputs_[0].
get())) {
862 return compound->getFieldName(i);
863 }
else if (
auto const* project = dynamic_cast<RelProject const*>(
inputs_[0].
get())) {
864 return project->getFieldName(i);
865 }
else if (
auto const* logical_union =
866 dynamic_cast<RelLogicalUnion const*>(
inputs_[0].
get())) {
867 return logical_union->getFieldName(i);
868 }
else if (
auto const* aggregate =
869 dynamic_cast<RelAggregate const*>(
inputs_[0].
get())) {
870 return aggregate->getFieldName(i);
871 }
else if (
auto const* scan = dynamic_cast<RelScan const*>(
inputs_[0].
get())) {
872 return scan->getFieldName(i);
873 }
else if (
auto const* table_func =
874 dynamic_cast<RelTableFunction const*>(
inputs_[0].
get())) {
875 return table_func->getFieldName(i);
882 std::vector<bool>
get_notnulls(std::vector<TargetMetaInfo>
const& tmis0) {
883 std::vector<bool> notnulls(tmis0.size());
884 for (
size_t j = 0; j < tmis0.size(); ++j) {
885 notnulls[j] = tmis0[j].get_type_info().get_notnull();
896 void set_notnulls(std::vector<TargetMetaInfo>* tmis0, std::vector<bool>
const& notnulls) {
897 for (
size_t j = 0; j < tmis0->size(); ++j) {
899 SQLTypeInfo physical_ti = (*tmis0)[j].get_physical_type_info();
902 (*tmis0)[j] =
TargetMetaInfo((*tmis0)[j].get_resname(), ti, physical_ti);
912 std::vector<TargetMetaInfo> tmis0 =
inputs_[0]->getOutputMetainfo();
914 for (
size_t i = 1; i <
inputs_.size(); ++i) {
915 std::vector<TargetMetaInfo>
const& tmisi =
inputs_[i]->getOutputMetainfo();
916 if (tmis0.size() != tmisi.size()) {
917 LOG(
INFO) <<
"tmis0.size()=" << tmis0.size() <<
" != " << tmisi.size()
918 <<
"=tmisi.size() for i=" << i;
919 throw std::runtime_error(
"Subqueries of a UNION must have matching data types.");
921 for (
size_t j = 0; j < tmis0.size(); ++j) {
926 LOG(
INFO) <<
"Types do not match for UNION:\n tmis0[" << j
927 <<
"].get_type_info().to_string() = " << ti0.
to_string() <<
"\n tmis"
929 <<
"].get_type_info().to_string() = " << ti1.
to_string();
932 throw std::runtime_error(
933 "Subqueries of a UNION must have the exact same data types.");
945 size_t input_idx)
const {
946 if (
auto const* rex_input_ptr = dynamic_cast<RexInput const*>(rex_scalar)) {
949 scalar_exprs_.emplace_back(std::make_shared<RexInput const>(std::move(rex_input)));
956 for (
auto const& input :
inputs_) {
957 if (
auto* proj_node = dynamic_cast<RelProject const*>(input.get())) {
958 for (
size_t i = 0; i < proj_node->size(); i++) {
959 if (
auto* oper = dynamic_cast<RexOperator const*>(proj_node->getProjectAt(i))) {
960 if (oper->getOperator() ==
SQLOps::kCAST && oper->getType().is_string() &&
961 !oper->getType().is_dict_encoded_string()) {
973 unsigned node_id(
const rapidjson::Value& ra_node) noexcept {
974 const auto&
id =
field(ra_node,
"id");
979 rapidjson::StringBuffer buffer;
980 rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
982 return buffer.GetString();
990 const rapidjson::Value& expr) noexcept {
991 const auto& input =
field(expr,
"input");
996 CHECK(expr.IsObject());
997 const auto& literal =
field(expr,
"literal");
1002 const auto type_scale =
json_i64(
field(expr,
"type_scale"));
1003 const auto type_precision =
json_i64(
field(expr,
"type_precision"));
1004 if (literal.IsNull()) {
1005 return std::unique_ptr<RexLiteral>(
new RexLiteral(target_type));
1016 return std::unique_ptr<RexLiteral>(
new RexLiteral(
json_i64(literal),
1024 if (literal.IsDouble()) {
1025 return std::unique_ptr<RexLiteral>(
new RexLiteral(
json_double(literal),
1032 }
else if (literal.IsInt64()) {
1033 return std::make_unique<RexLiteral>(
static_cast<double>(literal.GetInt64()),
1041 }
else if (literal.IsUint64()) {
1042 return std::make_unique<RexLiteral>(
static_cast<double>(literal.GetUint64()),
1050 UNREACHABLE() <<
"Unhandled type: " << literal.GetType();
1053 return std::unique_ptr<RexLiteral>(
new RexLiteral(
json_str(literal),
1061 return std::unique_ptr<RexLiteral>(
new RexLiteral(
json_bool(literal),
1069 return std::unique_ptr<RexLiteral>(
new RexLiteral(target_type));
1077 std::unique_ptr<const RexScalar>
parse_scalar_expr(
const rapidjson::Value& expr,
1081 if (type_obj.IsArray()) {
1084 CHECK(type_obj.IsObject() && type_obj.MemberCount() >= 2)
1088 const auto precision_it = type_obj.FindMember(
"precision");
1089 const int precision =
1090 precision_it != type_obj.MemberEnd() ?
json_i64(precision_it->value) : 0;
1091 const auto scale_it = type_obj.FindMember(
"scale");
1092 const int scale = scale_it != type_obj.MemberEnd() ?
json_i64(scale_it->value) : 0;
1100 const rapidjson::Value& arr,
1102 std::vector<std::unique_ptr<const RexScalar>> exprs;
1103 for (
auto it = arr.Begin(); it != arr.End(); ++it) {
1110 if (name ==
"ROW_NUMBER") {
1113 if (name ==
"RANK") {
1116 if (name ==
"DENSE_RANK") {
1119 if (name ==
"PERCENT_RANK") {
1122 if (name ==
"CUME_DIST") {
1125 if (name ==
"NTILE") {
1128 if (name ==
"LAG") {
1131 if (name ==
"LAG_IN_FRAME") {
1134 if (name ==
"LEAD") {
1137 if (name ==
"LEAD_IN_FRAME") {
1140 if (name ==
"FIRST_VALUE") {
1143 if (name ==
"LAST_VALUE") {
1146 if (name ==
"NTH_VALUE") {
1149 if (name ==
"NTH_VALUE_IN_FRAME") {
1152 if (name ==
"FIRST_VALUE_IN_FRAME") {
1155 if (name ==
"LAST_VALUE_IN_FRAME") {
1158 if (name ==
"AVG") {
1161 if (name ==
"MIN") {
1164 if (name ==
"MAX") {
1167 if (name ==
"SUM") {
1170 if (name ==
"COUNT") {
1173 if (name ==
"COUNT_IF") {
1176 if (name ==
"SUM_IF") {
1179 if (name ==
"$SUM0") {
1182 if (name ==
"FORWARD_FILL") {
1185 if (name ==
"BACKWARD_FILL") {
1188 if (name ==
"CONDITIONAL_CHANGE_EVENT") {
1191 throw std::runtime_error(
"Unsupported window function: " + name);
1195 const rapidjson::Value& arr,
1197 std::vector<std::unique_ptr<const RexScalar>> exprs;
1198 for (
auto it = arr.Begin(); it != arr.End(); ++it) {
1205 return json_str(
field(collation,
"direction")) == std::string(
"DESCENDING")
1211 return json_str(
field(collation,
"nulls")) == std::string(
"FIRST")
1218 std::vector<SortField> collation;
1219 size_t field_idx = 0;
1220 for (
auto it = arr.Begin(); it != arr.End(); ++it, ++field_idx) {
1223 collation.emplace_back(field_idx, sort_dir, null_pos);
1229 const rapidjson::Value& window_bound_obj,
1231 CHECK(window_bound_obj.IsObject());
1234 window_bound.preceding =
json_bool(
field(window_bound_obj,
"preceding"));
1235 window_bound.following =
json_bool(
field(window_bound_obj,
"following"));
1236 window_bound.is_current_row =
json_bool(
field(window_bound_obj,
"is_current_row"));
1237 const auto& offset_field =
field(window_bound_obj,
"offset");
1238 if (offset_field.IsObject()) {
1241 CHECK(offset_field.IsNull());
1243 window_bound.order_key =
json_i64(
field(window_bound_obj,
"order_key"));
1244 return window_bound;
1249 const auto& operands =
field(expr,
"operands");
1250 CHECK(operands.IsArray());
1251 CHECK_GE(operands.Size(), unsigned(0));
1252 const auto& subquery_ast =
field(expr,
"subquery");
1255 const auto subquery_root_node = subquery_dag->getRootNodeShPtr();
1256 auto subquery = std::make_shared<RexSubQuery>(subquery_root_node);
1257 auto query_hint = subquery_dag->getQueryHint(subquery_dag->getRootNodeShPtr().get());
1259 const auto subquery_global_hint = subquery_dag->getGlobalHints();
1260 if (subquery_global_hint.isAnyQueryHintDelivered()) {
1262 const auto new_global_hint = root_dag.
getGlobalHints() || subquery_global_hint;
1265 const auto subquery_local_hint = subquery_dag->getQueryHint(subquery_root_node.get());
1266 if (subquery_local_hint) {
1271 return subquery->deepCopy();
1277 const bool is_quantifier =
1278 op_name == std::string(
"PG_ANY") || op_name == std::string(
"PG_ALL");
1280 const auto& operators_json_arr =
field(expr,
"operands");
1281 CHECK(operators_json_arr.IsArray());
1283 const auto type_it = expr.FindMember(
"type");
1284 CHECK(type_it != expr.MemberEnd());
1286 if (op ==
kIN && expr.HasMember(
"subquery")) {
1288 operands.emplace_back(std::move(subquery));
1290 if (expr.FindMember(
"partition_keys") != expr.MemberEnd()) {
1291 const auto& partition_keys_arr =
field(expr,
"partition_keys");
1293 const auto& order_keys_arr =
field(expr,
"order_keys");
1300 ti.set_notnull(
false);
1301 return std::make_unique<RexWindowFunctionOperator>(kind,
1311 return std::unique_ptr<RexOperator>(op ==
kFUNCTION
1317 const auto& operands =
field(expr,
"operands");
1318 CHECK(operands.IsArray());
1319 CHECK_GE(operands.Size(), unsigned(2));
1320 std::unique_ptr<const RexScalar> else_expr;
1322 std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1324 for (
auto operands_it = operands.Begin(); operands_it != operands.End();) {
1326 if (operands_it == operands.End()) {
1327 else_expr = std::move(when_expr);
1331 expr_pair_list.emplace_back(std::move(when_expr), std::move(then_expr));
1333 return std::unique_ptr<RexCase>(
new RexCase(expr_pair_list, else_expr));
1337 const rapidjson::Value& json_str_arr) noexcept {
1338 CHECK(json_str_arr.IsArray());
1339 std::vector<std::string> fields;
1340 for (
auto json_str_arr_it = json_str_arr.Begin(); json_str_arr_it != json_str_arr.End();
1341 ++json_str_arr_it) {
1342 CHECK(json_str_arr_it->IsString());
1343 fields.emplace_back(json_str_arr_it->GetString());
1349 const rapidjson::Value& json_idx_arr) noexcept {
1350 CHECK(json_idx_arr.IsArray());
1351 std::vector<size_t> indices;
1352 for (
auto json_idx_arr_it = json_idx_arr.Begin(); json_idx_arr_it != json_idx_arr.End();
1353 ++json_idx_arr_it) {
1354 CHECK(json_idx_arr_it->IsInt());
1355 CHECK_GE(json_idx_arr_it->GetInt(), 0);
1356 indices.emplace_back(json_idx_arr_it->GetInt());
1363 if (agg_str ==
"APPROX_QUANTILE") {
1364 LOG(
INFO) <<
"APPROX_QUANTILE is deprecated. Please use APPROX_PERCENTILE instead.";
1370 bool const allow_multiple_args =
1371 shared::is_any<kAPPROX_COUNT_DISTINCT, kAPPROX_QUANTILE, kSUM_IF>(agg);
1372 if (operands.size() > 1 && (operands.size() != 2 || !allow_multiple_args)) {
1375 return std::unique_ptr<const RexAgg>(
new RexAgg(agg, distinct, agg_ti, operands));
1380 CHECK(expr.IsObject());
1381 if (expr.IsObject() && expr.HasMember(
"input")) {
1384 if (expr.IsObject() && expr.HasMember(
"literal")) {
1385 return std::unique_ptr<const RexScalar>(
parse_literal(expr));
1387 if (expr.IsObject() && expr.HasMember(
"op")) {
1389 if (op_str == std::string(
"CASE")) {
1390 return std::unique_ptr<const RexScalar>(
parse_case(expr, root_dag));
1392 if (op_str == std::string(
"$SCALAR_QUERY")) {
1393 return std::unique_ptr<const RexScalar>(
parse_subquery(expr, root_dag));
1395 return std::unique_ptr<const RexScalar>(
parse_operator(expr, root_dag));
1398 if (node_str.find(
"\"correl\":\"$cor") != std::string::npos) {
1399 throw QueryNotSupported(
"Unable to decorrelate one of the correlated subqueries.");
1405 if (join_type_name ==
"inner") {
1408 if (join_type_name ==
"left") {
1411 if (join_type_name ==
"semi") {
1414 if (join_type_name ==
"anti") {
1425 std::vector<std::unique_ptr<const RexScalar>> disambiguated_operands;
1426 for (
size_t i = 0; i < rex_operator->size(); ++i) {
1427 auto operand = rex_operator->getOperand(i);
1428 if (dynamic_cast<const RexSubQuery*>(operand)) {
1429 disambiguated_operands.emplace_back(rex_operator->getOperandAndRelease(i));
1434 const auto rex_window_function_operator =
1436 if (rex_window_function_operator) {
1437 const auto& partition_keys = rex_window_function_operator->
getPartitionKeys();
1438 std::vector<std::unique_ptr<const RexScalar>> disambiguated_partition_keys;
1439 for (
const auto& partition_key : partition_keys) {
1440 disambiguated_partition_keys.emplace_back(
1443 std::vector<std::unique_ptr<const RexScalar>> disambiguated_order_keys;
1444 const auto& order_keys = rex_window_function_operator->getOrderKeys();
1445 for (
const auto& order_key : order_keys) {
1446 disambiguated_order_keys.emplace_back(
disambiguate_rex(order_key.get(), ra_output));
1448 return rex_window_function_operator->disambiguatedOperands(
1449 disambiguated_operands,
1450 disambiguated_partition_keys,
1451 disambiguated_order_keys,
1452 rex_window_function_operator->getCollation());
1454 return rex_operator->getDisambiguated(disambiguated_operands);
1460 std::pair<std::unique_ptr<const RexScalar>, std::unique_ptr<const RexScalar>>>
1461 disambiguated_expr_pair_list;
1462 for (
size_t i = 0; i < rex_case->
branchCount(); ++i) {
1465 disambiguated_expr_pair_list.emplace_back(std::move(disambiguated_when),
1466 std::move(disambiguated_then));
1468 std::unique_ptr<const RexScalar> disambiguated_else{
1470 return std::unique_ptr<const RexCase>(
1471 new RexCase(disambiguated_expr_pair_list, disambiguated_else));
1480 const auto rex_abstract_input =
dynamic_cast<const RexAbstractInput*
>(rex_scalar);
1481 if (rex_abstract_input) {
1482 CHECK_LT(static_cast<size_t>(rex_abstract_input->getIndex()), ra_output.size());
1483 return std::unique_ptr<const RexInput>(
1484 new RexInput(ra_output[rex_abstract_input->getIndex()]));
1486 const auto rex_operator =
dynamic_cast<const RexOperator*
>(rex_scalar);
1490 const auto rex_case =
dynamic_cast<const RexCase*
>(rex_scalar);
1494 if (
auto const rex_literal = dynamic_cast<const RexLiteral*>(rex_scalar)) {
1495 return rex_literal->deepCopy();
1496 }
else if (
auto const rex_subquery = dynamic_cast<const RexSubQuery*>(rex_scalar)) {
1497 return rex_subquery->deepCopy();
1500 std::string(
typeid(*rex_scalar).name()));
1505 CHECK_EQ(
size_t(1), project_node->inputCount());
1506 std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1507 for (
size_t i = 0; i < project_node->size(); ++i) {
1508 const auto projected_expr = project_node->getProjectAt(i);
1509 if (dynamic_cast<const RexSubQuery*>(projected_expr)) {
1510 disambiguated_exprs.emplace_back(project_node->getProjectAtAndRelease(i));
1515 project_node->setExpressions(disambiguated_exprs);
1520 std::vector<std::unique_ptr<const RexScalar>> disambiguated_exprs;
1521 for (
size_t i = 0; i < table_func_node->getTableFuncInputsSize(); ++i) {
1522 const auto target_expr = table_func_node->getTableFuncInputAt(i);
1523 if (dynamic_cast<const RexSubQuery*>(target_expr)) {
1524 disambiguated_exprs.emplace_back(table_func_node->getTableFuncInputAtAndRelease(i));
1529 table_func_node->setTableFuncInputs(std::move(disambiguated_exprs));
1532 void bind_inputs(
const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1533 for (
auto ra_node : nodes) {
1534 const auto filter_node = std::dynamic_pointer_cast<
RelFilter>(ra_node);
1536 CHECK_EQ(
size_t(1), filter_node->inputCount());
1538 filter_node->getCondition(),
get_node_output(filter_node->getInput(0)));
1539 filter_node->setCondition(disambiguated_condition);
1542 const auto join_node = std::dynamic_pointer_cast<
RelJoin>(ra_node);
1544 CHECK_EQ(
size_t(2), join_node->inputCount());
1545 auto disambiguated_condition =
1547 join_node->setCondition(disambiguated_condition);
1550 const auto project_node = std::dynamic_pointer_cast<
RelProject>(ra_node);
1556 const auto table_func_node = std::dynamic_pointer_cast<
RelTableFunction>(ra_node);
1557 if (table_func_node) {
1563 input.reserve(table_func_node->inputCount());
1564 for (
size_t i = 0; i < table_func_node->inputCount(); i++) {
1566 input.insert(input.end(), node_output.begin(), node_output.end());
1579 for (
auto node : nodes) {
1580 Hints* hint_delivered =
nullptr;
1581 const auto agg_node = std::dynamic_pointer_cast<
RelAggregate>(node);
1583 if (agg_node->hasDeliveredHint()) {
1587 const auto project_node = std::dynamic_pointer_cast<
RelProject>(node);
1589 if (project_node->hasDeliveredHint()) {
1593 const auto compound_node = std::dynamic_pointer_cast<
RelCompound>(node);
1594 if (compound_node) {
1595 if (compound_node->hasDeliveredHint()) {
1599 if (hint_delivered && !hint_delivered->empty()) {
1600 rel_alg_dag.registerQueryHints(node, hint_delivered, global_query_hint);
1606 const auto existing_global_query_hints = rel_alg_dag.getGlobalHints();
1607 const auto new_global_query_hints = existing_global_query_hints || global_query_hint;
1608 rel_alg_dag.setGlobalQueryHints(new_global_query_hints);
1619 nodes.rbegin(), nodes.rend(), [](
const std::shared_ptr<RelAlgNode>& node) {
1620 auto node_hash = node->toHash();
1621 CHECK_NE(node_hash, static_cast<size_t>(0));
1625 void mark_nops(
const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1626 for (
auto node : nodes) {
1627 const auto agg_node = std::dynamic_pointer_cast<
RelAggregate>(node);
1628 if (!agg_node || agg_node->getAggExprsCount()) {
1631 CHECK_EQ(
size_t(1), node->inputCount());
1632 const auto agg_input_node =
dynamic_cast<const RelAggregate*
>(node->getInput(0));
1633 if (agg_input_node && !agg_input_node->getAggExprsCount() &&
1634 agg_node->getGroupByCount() == agg_input_node->getGroupByCount()) {
1644 const std::vector<const Rex*>& target_exprs) noexcept {
1645 std::vector<const Rex*>
result;
1646 for (
size_t i = 0; i < simple_project->size(); ++i) {
1647 const auto input_rex =
dynamic_cast<const RexInput*
>(simple_project->getProjectAt(i));
1649 CHECK_LT(static_cast<size_t>(input_rex->getIndex()), target_exprs.size());
1650 result.push_back(target_exprs[input_rex->getIndex()]);
1664 const std::vector<std::unique_ptr<const RexScalar>>& scalar_sources)
1665 : node_to_keep_(node_to_keep), scalar_sources_(scalar_sources) {}
1669 if (input->getSourceNode() == node_to_keep_) {
1670 const auto index = input->getIndex();
1671 CHECK_LT(index, scalar_sources_.size());
1672 return visit(scalar_sources_[index].
get());
1674 return input->deepCopy();
1686 std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1687 const std::vector<size_t>& pattern,
1688 std::unordered_map<
size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
1689 query_hints) noexcept {
1690 CHECK_GE(pattern.size(), size_t(2));
1691 CHECK_LE(pattern.size(), size_t(4));
1693 std::unique_ptr<const RexScalar> filter_rex;
1694 std::vector<std::unique_ptr<const RexScalar>> scalar_sources;
1695 size_t groupby_count{0};
1696 std::vector<std::string> fields;
1697 std::vector<const RexAgg*> agg_exprs;
1698 std::vector<const Rex*> target_exprs;
1699 bool first_project{
true};
1703 std::shared_ptr<ModifyManipulationTarget> manipulation_target;
1704 size_t node_hash{0};
1706 bool hint_registered{
false};
1708 for (
const auto node_idx : pattern) {
1709 const auto ra_node = nodes[node_idx];
1710 auto registered_query_hint_map_it = query_hints.find(ra_node->toHash());
1711 if (registered_query_hint_map_it != query_hints.end()) {
1712 auto& registered_query_hint_map = registered_query_hint_map_it->second;
1713 auto registered_query_hint_it = registered_query_hint_map.find(ra_node->getId());
1714 if (registered_query_hint_it != registered_query_hint_map.end()) {
1715 hint_registered =
true;
1716 node_hash = registered_query_hint_map_it->first;
1717 node_id = registered_query_hint_it->first;
1718 registered_query_hint = registered_query_hint_it->second;
1721 const auto ra_filter = std::dynamic_pointer_cast<
RelFilter>(ra_node);
1724 filter_rex.reset(ra_filter->getAndReleaseCondition());
1726 last_node = ra_node.get();
1729 const auto ra_project = std::dynamic_pointer_cast<
RelProject>(ra_node);
1732 manipulation_target = ra_project;
1734 if (first_project) {
1735 CHECK_EQ(
size_t(1), ra_project->inputCount());
1739 const auto filter_input =
dynamic_cast<const RelFilter*
>(ra_project->getInput(0));
1741 CHECK_EQ(
size_t(1), filter_input->inputCount());
1745 scalar_sources = ra_project->getExpressionsAndRelease();
1746 for (
const auto& scalar_expr : scalar_sources) {
1747 target_exprs.push_back(scalar_expr.get());
1749 first_project =
false;
1751 if (ra_project->isSimple()) {
1756 std::vector<const Rex*>
result;
1757 RexInputReplacementVisitor visitor(last_node, scalar_sources);
1758 for (
size_t i = 0; i < ra_project->size(); ++i) {
1759 const auto rex = ra_project->getProjectAt(i);
1760 if (
auto rex_input = dynamic_cast<const RexInput*>(rex)) {
1761 const auto index = rex_input->getIndex();
1762 CHECK_LT(index, target_exprs.size());
1763 result.push_back(target_exprs[index]);
1765 scalar_sources.push_back(visitor.visit(rex));
1766 result.push_back(scalar_sources.back().get());
1772 last_node = ra_node.get();
1775 const auto ra_aggregate = std::dynamic_pointer_cast<
RelAggregate>(ra_node);
1778 fields = ra_aggregate->getFields();
1779 agg_exprs = ra_aggregate->getAggregatesAndRelease();
1780 groupby_count = ra_aggregate->getGroupByCount();
1781 decltype(target_exprs){}.swap(target_exprs);
1782 CHECK_LE(groupby_count, scalar_sources.size());
1783 for (
size_t group_idx = 0; group_idx < groupby_count; ++group_idx) {
1784 const auto rex_ref =
new RexRef(group_idx + 1);
1785 target_exprs.push_back(rex_ref);
1786 scalar_sources.emplace_back(rex_ref);
1788 for (
const auto rex_agg : agg_exprs) {
1789 target_exprs.push_back(rex_agg);
1791 last_node = ra_node.get();
1796 auto compound_node =
1797 std::make_shared<RelCompound>(filter_rex,
1804 manipulation_target->isUpdateViaSelect(),
1805 manipulation_target->isDeleteViaSelect(),
1806 manipulation_target->isVarlenUpdateRequired(),
1807 manipulation_target->getModifiedTableDescriptor(),
1808 manipulation_target->getTargetColumns(),
1809 manipulation_target->getModifiedTableCatalog());
1810 auto old_node = nodes[pattern.back()];
1811 nodes[pattern.back()] = compound_node;
1812 auto first_node = nodes[pattern.front()];
1813 CHECK_EQ(
size_t(1), first_node->inputCount());
1814 compound_node->addManagedInput(first_node->getAndOwnInput(0));
1815 if (hint_registered) {
1818 auto registered_query_hint_map_it = query_hints.find(node_hash);
1819 CHECK(registered_query_hint_map_it != query_hints.end());
1820 auto registered_query_hint_map = registered_query_hint_map_it->second;
1821 if (registered_query_hint_map.size() > 1) {
1822 registered_query_hint_map.erase(
node_id);
1824 CHECK_EQ(registered_query_hint_map.size(),
static_cast<size_t>(1));
1825 query_hints.erase(node_hash);
1827 std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
1828 hint_map.emplace(compound_node->getId(), registered_query_hint);
1829 query_hints.emplace(compound_node->toHash(), hint_map);
1831 for (
size_t i = 0; i < pattern.size() - 1; ++i) {
1832 nodes[pattern[i]].reset();
1834 for (
auto node : nodes) {
1838 node->replaceInput(old_node, compound_node);
1842 class RANodeIterator :
public std::vector<std::shared_ptr<RelAlgNode>>::const_iterator {
1844 using Super = std::vector<ElementType>::const_iterator;
1851 : Super(nodes.begin()), owner_(nodes), nodeCount_([&nodes]() -> size_t {
1852 size_t non_zero_count = 0;
1853 for (
const auto& node : nodes) {
1861 explicit operator size_t() {
1862 return std::distance(owner_.begin(), *
static_cast<Super*
>(
this));
1865 RANodeIterator operator++() =
delete;
1868 Super& super = *
this;
1870 case AdvancingMode::DUChain: {
1871 size_t use_count = 0;
1872 Super only_use = owner_.end();
1873 for (
Super nodeIt = std::next(super); nodeIt != owner_.end(); ++nodeIt) {
1877 for (
size_t i = 0; i < (*nodeIt)->inputCount(); ++i) {
1878 if ((*super) == (*nodeIt)->getAndOwnInput(i)) {
1880 if (1 == use_count) {
1883 super = owner_.end();
1892 case AdvancingMode::InOrder:
1893 for (
size_t i = 0; i != owner_.size(); ++i) {
1894 if (!visited_.count(i)) {
1895 super = owner_.begin();
1896 std::advance(super, i);
1900 super = owner_.end();
1910 visited_.insert(
size_t(*
this));
1911 Super& super = *
this;
1927 const bool first_rex_is_input) {
1928 if (
auto agg_node = dynamic_cast<const RelAggregate*>(parent_node)) {
1929 if (index == 0 && agg_node->getGroupByCount() > 0) {
1935 return first_rex_is_input;
1938 return first_rex_is_input;
1963 return aggregate && next_result;
1971 const auto case_operator =
dynamic_cast<const RexCase*
>(rex);
1972 if (case_operator && case_operator->branchCount() == 1) {
1973 const auto then_window =
1988 const auto case_operator =
1990 const auto second_window =
1992 if (case_operator && second_window &&
2012 std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2013 const std::vector<const RelAlgNode*>& left_deep_joins,
2014 std::unordered_map<
size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2016 enum class CoalesceState { Initial, Filter, FirstProject, Aggregate };
2017 std::vector<size_t> crt_pattern;
2018 CoalesceState crt_state{CoalesceState::Initial};
2020 auto reset_state = [&crt_pattern, &crt_state]() {
2021 crt_state = CoalesceState::Initial;
2022 std::vector<size_t>().
swap(crt_pattern);
2026 const auto ra_node = nodeIt != nodes.end() ? *nodeIt :
nullptr;
2027 switch (crt_state) {
2028 case CoalesceState::Initial: {
2029 if (std::dynamic_pointer_cast<const RelFilter>(ra_node) &&
2030 std::find(left_deep_joins.begin(), left_deep_joins.end(), ra_node.get()) ==
2031 left_deep_joins.end()) {
2032 crt_pattern.push_back(
size_t(nodeIt));
2033 crt_state = CoalesceState::Filter;
2034 nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2035 }
else if (
auto project_node =
2036 std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2037 if (project_node->hasWindowFunctionExpr()) {
2038 nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2040 crt_pattern.push_back(
size_t(nodeIt));
2041 crt_state = CoalesceState::FirstProject;
2042 nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2045 nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2049 case CoalesceState::Filter: {
2050 if (
auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2053 CHECK(!project_node->hasWindowFunctionExpr());
2054 crt_pattern.push_back(
size_t(nodeIt));
2055 crt_state = CoalesceState::FirstProject;
2056 nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2062 case CoalesceState::FirstProject: {
2063 if (std::dynamic_pointer_cast<const RelAggregate>(ra_node)) {
2064 crt_pattern.push_back(
size_t(nodeIt));
2065 crt_state = CoalesceState::Aggregate;
2066 nodeIt.advance(RANodeIterator::AdvancingMode::DUChain);
2068 if (crt_pattern.size() >= 2) {
2075 case CoalesceState::Aggregate: {
2076 if (
auto project_node = std::dynamic_pointer_cast<const RelProject>(ra_node)) {
2077 if (!project_node->hasWindowFunctionExpr()) {
2079 bool is_simple_project{
true};
2080 for (
size_t i = 0; i < project_node->size(); i++) {
2081 const auto scalar_rex = project_node->getProjectAt(i);
2083 if (
auto input_rex = dynamic_cast<const RexInput*>(scalar_rex)) {
2085 input_rex->getSourceNode(), input_rex->getIndex(),
true)) {
2086 is_simple_project =
false;
2091 CoalesceSecondaryProjectVisitor visitor;
2092 if (!visitor.visit(project_node->getProjectAt(i))) {
2093 is_simple_project =
false;
2097 if (is_simple_project) {
2098 crt_pattern.push_back(
size_t(nodeIt));
2099 nodeIt.advance(RANodeIterator::AdvancingMode::InOrder);
2103 CHECK_GE(crt_pattern.size(), size_t(2));
2112 if (crt_state == CoalesceState::FirstProject || crt_state == CoalesceState::Aggregate) {
2113 if (crt_pattern.size() >= 2) {
2116 CHECK(!crt_pattern.empty());
2123 std::vector<std::unique_ptr<const RexScalar>>& scalar_exprs,
2124 std::vector<std::string>& fields) {
2125 for (
size_t i = 0; i < node->
size(); i++) {
2126 auto new_rex_input = std::make_unique<RexInput>(node, i);
2127 scalar_exprs.emplace_back(std::move(new_rex_input));
2128 fields.emplace_back(
"");
2139 std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2140 std::unordered_map<
size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2142 std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2143 bool replace_nodes =
false;
2144 for (
auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2145 auto node = *node_itr;
2146 if (
auto agg_node = std::dynamic_pointer_cast<RelAggregate>(node)) {
2147 std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2148 std::vector<std::string> fields;
2149 std::shared_ptr<RelProject> new_project;
2150 CHECK_EQ(agg_node->getInputs().size(), size_t(1));
2151 CHECK_NE(*node_itr, *node_list.begin());
2152 const auto prev_node = *std::prev(node_itr);
2154 auto const input_node_ptr = agg_node->getAndOwnInput(0);
2155 if (
auto join_node =
2156 std::dynamic_pointer_cast<RelLeftDeepInnerJoin const>(input_node_ptr)) {
2157 for (
auto const* join_input_node : join_node->getInputs()) {
2160 if (!scalar_exprs.empty()) {
2161 replace_nodes =
true;
2162 new_project = std::make_shared<RelProject>(scalar_exprs, fields, join_node);
2163 agg_node->replaceInput(join_node, new_project);
2164 node_list.insert(node_itr, new_project);
2169 if (replace_nodes) {
2170 nodes.assign(node_list.begin(), node_list.end());
2175 for (
auto& node : nodes) {
2176 if (
auto* proj_node = dynamic_cast<RelProject*>(node.get())) {
2177 if (proj_node->isSimple()) {
2178 if (
auto child_proj_node =
2179 dynamic_cast<RelProject const*>(proj_node->getInput(0))) {
2180 std::vector<std::unique_ptr<RexScalar const>> scalar_exprs;
2182 for (
size_t i = 0; i < proj_node->size(); i++) {
2183 auto rex_abs_input =
2185 scalar_exprs.push_back(
2186 copier.
visit(child_proj_node->getProjectAt(rex_abs_input->getIndex())));
2188 CHECK_EQ(scalar_exprs.size(), proj_node->getFields().size());
2189 proj_node->setExpressions(scalar_exprs);
2190 proj_node->replaceInput(proj_node->getAndOwnInput(0),
2191 child_proj_node->getAndOwnInput(0));
2201 std::unordered_map<size_t, const RexScalar*>& collected_window_func,
2202 bool only_add_window_expr)
2203 : collected_window_func_(collected_window_func)
2204 , only_add_window_expr_(only_add_window_expr) {}
2210 tryAddWindowExpr(rex_operator);
2212 const size_t operand_count = rex_operator->size();
2213 for (
size_t i = 0; i < operand_count; ++i) {
2214 const auto operand = rex_operator->getOperand(i);
2218 tryAddWindowExpr(operand);
2223 return defaultResult();
2232 tryAddWindowExpr(rex_case);
2233 if (!only_add_window_expr_) {
2238 for (
size_t i = 0; i < rex_case->
branchCount(); ++i) {
2239 const auto when = rex_case->
getWhen(i);
2241 tryAddWindowExpr(when);
2245 const auto then = rex_case->
getThen(i);
2247 tryAddWindowExpr(then);
2253 auto else_expr = rex_case->
getElse();
2255 tryAddWindowExpr(else_expr);
2260 return defaultResult();
2264 if (!only_add_window_expr_) {
2265 collected_window_func_.emplace(expr->
toHash(), expr);
2267 if (
auto window_expr = dynamic_cast<RexWindowFunctionOperator const*>(expr)) {
2268 collected_window_func_.emplace(window_expr->toHash(), window_expr);
2283 std::unordered_set<size_t>& collected_window_func_hash,
2284 std::vector<std::unique_ptr<const RexScalar>>& new_rex_input_for_window_func,
2285 std::unordered_map<size_t, size_t>& window_func_to_new_rex_input_idx_map,
2287 std::unordered_map<
size_t, std::unique_ptr<const RexInput>>&
2288 new_rex_input_from_child_node)
2289 : collected_window_func_hash_(collected_window_func_hash)
2290 , new_rex_input_for_window_func_(new_rex_input_for_window_func)
2291 , window_func_to_new_rex_input_idx_map_(window_func_to_new_rex_input_idx_map)
2292 , new_project_(new_project)
2293 , new_rex_input_from_child_node_(new_rex_input_from_child_node) {
2294 CHECK_EQ(collected_window_func_hash_.size(),
2295 window_func_to_new_rex_input_idx_map_.size());
2296 for (
auto hash : collected_window_func_hash_) {
2297 auto rex_it = window_func_to_new_rex_input_idx_map_.find(hash);
2298 CHECK(rex_it != window_func_to_new_rex_input_idx_map_.end());
2299 CHECK_LT(rex_it->second, new_rex_input_for_window_func_.size());
2301 CHECK(new_project_);
2306 if (rex_input->getSourceNode() != new_project_) {
2307 const auto cur_index = rex_input->getIndex();
2308 auto cur_source_node = rex_input->getSourceNode();
2309 std::string field_name =
"";
2310 if (
auto cur_project_node = dynamic_cast<const RelProject*>(cur_source_node)) {
2311 field_name = cur_project_node->getFieldName(cur_index);
2313 auto rex_input_hash = rex_input->toHash();
2314 auto rex_input_it = new_rex_input_from_child_node_.find(rex_input_hash);
2315 if (rex_input_it == new_rex_input_from_child_node_.end()) {
2316 auto new_rex_input =
2317 std::make_unique<RexInput>(new_project_, new_project_->size());
2318 new_project_->appendInput(field_name, rex_input->deepCopy());
2319 new_rex_input_from_child_node_.emplace(rex_input_hash, new_rex_input->deepCopy());
2320 return new_rex_input;
2322 return rex_input_it->second->deepCopy();
2325 return rex_input->deepCopy();
2330 auto new_rex_idx = is_collected_window_function(rex_operator->toHash());
2332 return get_new_rex_input(*new_rex_idx);
2335 const auto rex_window_function_operator =
2337 if (rex_window_function_operator) {
2339 return visitWindowFunctionOperator(rex_window_function_operator);
2342 const size_t operand_count = rex_operator->size();
2343 std::vector<RetType> new_opnds;
2344 for (
size_t i = 0; i < operand_count; ++i) {
2345 const auto operand = rex_operator->getOperand(i);
2346 auto new_rex_idx_for_operand = is_collected_window_function(operand->toHash());
2347 if (new_rex_idx_for_operand) {
2348 new_opnds.push_back(get_new_rex_input(*new_rex_idx_for_operand));
2350 new_opnds.emplace_back(visit(rex_operator->getOperand(i)));
2353 return rex_operator->getDisambiguated(new_opnds);
2357 auto new_rex_idx = is_collected_window_function(rex_case->
toHash());
2359 return get_new_rex_input(*new_rex_idx);
2362 std::vector<std::pair<RetType, RetType>> new_pair_list;
2363 for (
size_t i = 0; i < rex_case->
branchCount(); ++i) {
2364 auto when_operand = rex_case->
getWhen(i);
2365 auto new_rex_idx_for_when_operand =
2366 is_collected_window_function(when_operand->toHash());
2368 auto then_operand = rex_case->
getThen(i);
2369 auto new_rex_idx_for_then_operand =
2370 is_collected_window_function(then_operand->toHash());
2372 new_pair_list.emplace_back(
2373 new_rex_idx_for_when_operand ? get_new_rex_input(*new_rex_idx_for_when_operand)
2374 : visit(when_operand),
2375 new_rex_idx_for_then_operand ? get_new_rex_input(*new_rex_idx_for_then_operand)
2376 : visit(then_operand));
2378 auto new_rex_idx_for_else_operand =
2379 is_collected_window_function(rex_case->
getElse()->
toHash());
2380 auto new_else = new_rex_idx_for_else_operand
2381 ? get_new_rex_input(*new_rex_idx_for_else_operand)
2383 return std::make_unique<RexCase>(new_pair_list, new_else);
2388 auto rex_it = window_func_to_new_rex_input_idx_map_.find(rex_hash);
2389 if (rex_it != window_func_to_new_rex_input_idx_map_.end()) {
2390 return rex_it->second;
2392 return std::nullopt;
2397 CHECK_LT(rex_idx, new_rex_input_for_window_func_.size());
2398 auto& new_rex_input = new_rex_input_for_window_func_.at(rex_idx);
2399 CHECK(new_rex_input);
2400 auto copied_rex_input = copier_.visit(new_rex_input.get());
2401 return copied_rex_input;
2410 std::unordered_map<size_t, std::unique_ptr<const RexInput>>&
2416 std::shared_ptr<RelProject> prev_node,
2417 std::shared_ptr<RelProject> new_node,
2418 std::unordered_map<
size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2420 auto delivered_hints = prev_node->getDeliveredHints();
2421 bool needs_propagate_hints = !delivered_hints->empty();
2422 if (needs_propagate_hints) {
2423 for (
auto& kv : *delivered_hints) {
2424 new_node->addHint(kv.second);
2426 auto prev_it = query_hints.find(prev_node->toHash());
2428 CHECK(prev_it != query_hints.end());
2429 auto prev_hint_it = prev_it->second.find(prev_node->getId());
2430 CHECK(prev_hint_it != prev_it->second.end());
2431 std::unordered_map<unsigned, RegisteredQueryHint> hint_map;
2432 hint_map.emplace(new_node->getId(), prev_hint_it->second);
2433 query_hints.emplace(new_node->toHash(), hint_map);
2460 std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2461 std::unordered_map<
size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2463 std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2464 for (
auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2465 const auto node = *node_itr;
2466 auto window_func_project_node = std::dynamic_pointer_cast<
RelProject>(node);
2467 if (!window_func_project_node) {
2471 const auto prev_node_itr = std::prev(node_itr);
2472 const auto prev_node = *prev_node_itr;
2476 std::unordered_map<size_t, const RexScalar*> collected_window_func;
2480 for (
size_t i = 0; i < window_func_project_node->size(); i++) {
2481 const auto scalar_rex = window_func_project_node->getProjectAt(i);
2486 collector.visit(scalar_rex);
2489 if (!collected_window_func.empty()) {
2491 std::unordered_set<size_t> collected_window_func_hash;
2494 std::vector<std::unique_ptr<const RexScalar>> new_rex_input_for_window_func;
2496 std::vector<std::unique_ptr<const RexScalar>> new_scalar_expr_for_window_project;
2499 std::unordered_map<size_t, size_t> window_func_to_new_rex_input_idx_map;
2503 std::unordered_map<size_t, std::unique_ptr<const RexInput>>
2504 new_rex_input_from_child_node;
2507 std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2508 std::vector<std::string> dummy_fields;
2509 std::vector<std::string> new_project_field_names;
2512 std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2515 node_list.insert(node_itr, new_project);
2519 std::for_each(collected_window_func.begin(),
2520 collected_window_func.end(),
2521 [&new_project_field_names,
2522 &collected_window_func_hash,
2523 &new_rex_input_for_window_func,
2524 &new_scalar_expr_for_window_project,
2527 &window_func_to_new_rex_input_idx_map](
const auto& kv) {
2530 collected_window_func_hash.insert(kv.first);
2534 const auto rex_idx = new_rex_input_for_window_func.size();
2535 window_func_to_new_rex_input_idx_map.emplace(kv.first, rex_idx);
2539 new_rex_input_for_window_func.emplace_back(
2540 std::make_unique<const RexInput>(new_project.get(), rex_idx));
2541 new_scalar_expr_for_window_project.push_back(
2542 std::move(copier.visit(kv.second)));
2543 new_project_field_names.emplace_back(
"");
2545 new_project->setExpressions(new_scalar_expr_for_window_project);
2546 new_project->setFields(std::move(new_project_field_names));
2548 auto window_func_scalar_exprs =
2549 window_func_project_node->getExpressionsAndRelease();
2551 new_rex_input_for_window_func,
2552 window_func_to_new_rex_input_idx_map,
2554 new_rex_input_from_child_node);
2556 for (
auto& scalar_expr : window_func_scalar_exprs) {
2559 auto new_parent_rex = replacer.
visit(scalar_expr.get());
2560 window_func_scalar_exprs[rex_idx] = std::move(new_parent_rex);
2564 window_func_project_node->setExpressions(window_func_scalar_exprs);
2565 window_func_project_node->replaceInput(prev_node, new_project);
2567 new_project->setPushedDownWindowExpr();
2570 nodes.assign(node_list.begin(), node_list.end());
2585 result.insert(next_result.begin(), next_result.end());
2592 bool& has_generic_expr_in_window_func) {
2594 auto partition_input =
dynamic_cast<RexInput const*
>(partition_key.get());
2595 if (!partition_input) {
2599 for (
auto const& order_key : window_expr->
getOrderKeys()) {
2600 auto order_input =
dynamic_cast<RexInput const*
>(order_key.get());
2605 for (
size_t k = 0; k < window_expr->
size(); k++) {
2606 if (!shared::dynamic_castable_to_any<RexInput, RexLiteral>(
2608 has_generic_expr_in_window_func =
true;
2616 RelProject const* window_func_project_node) {
2617 bool has_generic_expr_in_window_func =
false;
2619 for (
size_t i = 0; i < window_func_project_node->
size(); ++i) {
2620 auto const projected_target = window_func_project_node->
getProjectAt(i);
2621 if (
auto const* window_expr =
2622 dynamic_cast<RexWindowFunctionOperator const*>(projected_target)) {
2625 }
else if (
auto const* case_expr = dynamic_cast<RexCase const*>(projected_target)) {
2626 std::unordered_map<size_t, const RexScalar*> collected_window_func;
2628 collector.visit(case_expr);
2629 for (
auto const& kv : collected_window_func) {
2630 auto const* candidate_window_expr =
2632 CHECK(candidate_window_expr);
2634 has_generic_expr_in_window_func);
2638 return std::make_pair(has_generic_expr_in_window_func, res);
2654 std::vector<std::shared_ptr<RelAlgNode>>& nodes,
2655 const bool always_add_project_if_first_project_is_window_expr,
2656 std::unordered_map<
size_t, std::unordered_map<unsigned, RegisteredQueryHint>>&
2658 std::list<std::shared_ptr<RelAlgNode>> node_list(nodes.begin(), nodes.end());
2659 size_t project_node_counter{0};
2660 for (
auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
2661 const auto node = *node_itr;
2663 auto window_func_project_node = std::dynamic_pointer_cast<
RelProject>(node);
2664 if (!window_func_project_node) {
2667 project_node_counter++;
2668 if (!window_func_project_node->hasWindowFunctionExpr()) {
2674 const auto prev_node_itr = std::prev(node_itr);
2675 const auto prev_node = *prev_node_itr;
2678 auto filter_node = std::dynamic_pointer_cast<
RelFilter>(prev_node);
2679 auto join_node = std::dynamic_pointer_cast<
RelJoin>(prev_node);
2681 auto scan_node = std::dynamic_pointer_cast<
RelScan>(prev_node);
2682 const bool has_multi_fragment_scan_input =
2684 (scan_node->getNumShards() > 0 || scan_node->getNumFragments() > 1));
2685 auto const [has_generic_expr_in_window_func, needs_expr_pushdown] =
2711 if (!((always_add_project_if_first_project_is_window_expr &&
2712 project_node_counter == 1) ||
2713 filter_node || join_node || has_multi_fragment_scan_input ||
2714 needs_expr_pushdown)) {
2718 if (needs_expr_pushdown || join_node) {
2722 std::unordered_map<size_t, size_t> expr_offset_cache;
2723 std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_new_project;
2724 std::vector<std::unique_ptr<const RexScalar>> scalar_exprs_for_window_project;
2725 std::vector<std::string> fields_for_window_project;
2726 std::vector<std::string> fields_for_new_project;
2730 std::vector<std::unique_ptr<const RexScalar>> dummy_scalar_exprs;
2731 std::vector<std::string> dummy_fields;
2733 std::make_shared<RelProject>(dummy_scalar_exprs, dummy_fields, prev_node);
2737 scalar_exprs_for_new_project,
2738 fields_for_new_project,
2740 for (
size_t i = 0; i < window_func_project_node->size(); ++i) {
2741 auto projected_target = window_func_project_node->getProjectAt(i);
2742 auto new_projection_target = visitor.visit(projected_target);
2743 scalar_exprs_for_window_project.emplace_back(
2744 std::move(new_projection_target.release()));
2746 new_project->setExpressions(scalar_exprs_for_new_project);
2747 new_project->setFields(std::move(fields_for_new_project));
2748 bool has_groupby =
false;
2749 auto aggregate = std::dynamic_pointer_cast<
RelAggregate>(prev_node);
2754 if (has_groupby && visitor.hasPartitionExpression()) {
2760 <<
"Query output overridden to row-wise format due to presence of a window "
2761 "function with partition expression and group-by expression.";
2762 new_project->forceRowwiseOutput();
2763 }
else if (has_generic_expr_in_window_func) {
2764 VLOG(1) <<
"Query output overridden to row-wise format due to presence of a "
2765 "generic expression as an input expression of the window "
2767 new_project->forceRowwiseOutput();
2768 }
else if (visitor.hasCaseExprAsWindowOperand()) {
2770 <<
"Query output overridden to row-wise format due to presence of a window "
2771 "function with a case statement as its operand.";
2772 new_project->forceRowwiseOutput();
2777 new_project->setPushedDownWindowExpr();
2778 node_list.insert(node_itr, new_project);
2779 window_func_project_node->replaceInput(prev_node, new_project);
2780 window_func_project_node->setExpressions(scalar_exprs_for_window_project);
2786 for (
size_t i = 0; i < window_func_project_node->size(); i++) {
2788 input_collector.visit(window_func_project_node->getProjectAt(i));
2789 inputs.insert(new_inputs.begin(), new_inputs.end());
2791 std::vector<std::unique_ptr<const RexScalar>> scalar_exprs;
2792 std::vector<std::string> fields;
2793 std::unordered_map<unsigned, unsigned> old_index_to_new_index;
2795 if (inputs.empty()) {
2801 CHECK(has_multi_fragment_scan_input);
2802 CHECK(!needs_expr_pushdown);
2803 auto const bool_scale = std::numeric_limits<int32_t>::min();
2804 scalar_exprs.push_back(std::make_unique<RexLiteral>(
2806 old_index_to_new_index.insert(std::make_pair(0, 0));
2807 fields.emplace_back(
"");
2811 std::vector<RexInput> sorted_inputs(inputs.begin(), inputs.end());
2813 sorted_inputs.begin(), sorted_inputs.end(), [](
const auto&
a,
const auto& b) {
2814 return a.getIndex() < b.getIndex();
2817 for (
auto& input : sorted_inputs) {
2818 CHECK_EQ(input.getSourceNode(), prev_node.get());
2819 CHECK(old_index_to_new_index
2820 .insert(std::make_pair(input.getIndex(), scalar_exprs.size()))
2822 scalar_exprs.emplace_back(input.deepCopy());
2823 fields.emplace_back(
"");
2828 CHECK_GT(scalar_exprs.size(), 0UL);
2829 CHECK_EQ(scalar_exprs.size(), fields.size());
2830 auto new_project = std::make_shared<RelProject>(scalar_exprs, fields, prev_node);
2832 new_project->setPushedDownWindowExpr();
2833 node_list.insert(node_itr, new_project);
2834 window_func_project_node->replaceInput(
2835 prev_node, new_project, old_index_to_new_index);
2838 nodes.assign(node_list.begin(), node_list.end());
2843 const int64_t default_val) noexcept {
2844 const auto it = obj.FindMember(
field);
2845 if (it == obj.MemberEnd()) {
2850 CHECK_EQ(
unsigned(0), lit->getScale());
2851 CHECK_EQ(
unsigned(0), lit->getTargetScale());
2852 return lit->getVal<int64_t>();
2856 const auto& inputs_json =
field(node,
"inputs");
2857 CHECK(inputs_json.IsArray() && !inputs_json.Size());
2860 const std::pair<const Catalog_Namespace::Catalog*, const TableDescriptor*>
2862 const auto& table_json =
field(scan_ra,
"table");
2863 CHECK(table_json.IsArray());
2864 CHECK_EQ(
unsigned(2), table_json.Size());
2868 const auto td = cat->getMetadataForTable(table_json[1].GetString());
2870 return {cat.get(), td};
2874 const auto& fields_json =
field(scan_ra,
"fieldNames");
2894 std::vector<std::shared_ptr<RelAlgNode>>
run(
const rapidjson::Value& rels,
2896 for (
auto rels_it = rels.Begin(); rels_it != rels.End(); ++rels_it) {
2897 const auto& crt_node = *rels_it;
2898 const auto id =
node_id(crt_node);
2900 CHECK(crt_node.IsObject());
2901 std::shared_ptr<RelAlgNode> ra_node =
nullptr;
2903 if (rel_op == std::string(
"EnumerableTableScan") ||
2904 rel_op == std::string(
"LogicalTableScan")) {
2906 }
else if (rel_op == std::string(
"LogicalProject")) {
2908 }
else if (rel_op == std::string(
"LogicalFilter")) {
2910 }
else if (rel_op == std::string(
"LogicalAggregate")) {
2912 }
else if (rel_op == std::string(
"LogicalJoin")) {
2914 }
else if (rel_op == std::string(
"LogicalSort")) {
2916 }
else if (rel_op == std::string(
"LogicalValues")) {
2918 }
else if (rel_op == std::string(
"LogicalTableModify")) {
2920 }
else if (rel_op == std::string(
"LogicalTableFunctionScan")) {
2922 }
else if (rel_op == std::string(
"LogicalUnion")) {
2927 nodes_.push_back(ra_node);
2930 return std::move(
nodes_);
2936 CHECK(scan_ra.IsObject());
2939 if (scan_ra.HasMember(
"hints")) {
2940 auto scan_node = std::make_shared<RelScan>(td, field_names, *
cat);
2944 return std::make_shared<RelScan>(td, field_names, *
cat);
2950 CHECK_EQ(
size_t(1), inputs.size());
2951 const auto& exprs_json =
field(proj_ra,
"exprs");
2952 CHECK(exprs_json.IsArray());
2953 std::vector<std::unique_ptr<const RexScalar>> exprs;
2954 for (
auto exprs_json_it = exprs_json.Begin(); exprs_json_it != exprs_json.End();
2958 const auto& fields =
field(proj_ra,
"fields");
2959 if (proj_ra.HasMember(
"hints")) {
2960 auto project_node = std::make_shared<RelProject>(
2963 return project_node;
2965 return std::make_shared<RelProject>(
2972 CHECK_EQ(
size_t(1), inputs.size());
2973 const auto id =
node_id(filter_ra);
2976 return std::make_shared<RelFilter>(condition, inputs.front());
2981 CHECK_EQ(
size_t(1), inputs.size());
2984 for (
size_t i = 0; i < group.size(); ++i) {
2987 if (agg_ra.HasMember(
"groups") || agg_ra.HasMember(
"indicator")) {
2990 const auto& aggs_json_arr =
field(agg_ra,
"aggs");
2991 CHECK(aggs_json_arr.IsArray());
2992 std::vector<std::unique_ptr<const RexAgg>> aggs;
2993 for (
auto aggs_json_arr_it = aggs_json_arr.Begin();
2994 aggs_json_arr_it != aggs_json_arr.End();
2995 ++aggs_json_arr_it) {
2998 if (agg_ra.HasMember(
"hints")) {
3000 std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
3004 return std::make_shared<RelAggregate>(group.size(), aggs, fields, inputs.front());
3010 CHECK_EQ(
size_t(2), inputs.size());
3013 if (join_ra.HasMember(
"hints")) {
3015 std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
3019 return std::make_shared<RelJoin>(inputs[0], inputs[1], filter_rex, join_type);
3024 CHECK_EQ(
size_t(1), inputs.size());
3025 std::vector<SortField> collation;
3026 const auto& collation_arr =
field(sort_ra,
"collation");
3027 CHECK(collation_arr.IsArray());
3028 for (
auto collation_arr_it = collation_arr.Begin();
3029 collation_arr_it != collation_arr.End();
3030 ++collation_arr_it) {
3031 const size_t field_idx =
json_i64(
field(*collation_arr_it,
"field"));
3034 collation.emplace_back(field_idx, sort_dir, null_pos);
3038 auto ret = std::make_shared<RelSort>(
3040 limit >= 0 ? std::make_optional<size_t>(limit) : std::nullopt,
3046 std::shared_ptr<RelModify>
dispatchModify(
const rapidjson::Value& logical_modify_ra) {
3048 CHECK_EQ(
size_t(1), inputs.size());
3050 const auto [
cat, table_descriptor] =
3052 if (table_descriptor->isView) {
3053 throw std::runtime_error(
"UPDATE of a view is unsupported.");
3056 bool flattened =
json_bool(
field(logical_modify_ra,
"flattened"));
3057 std::string op =
json_str(
field(logical_modify_ra,
"operation"));
3060 if (op ==
"UPDATE") {
3061 const auto& update_columns =
field(logical_modify_ra,
"updateColumnList");
3062 CHECK(update_columns.IsArray());
3064 for (
auto column_arr_it = update_columns.Begin();
3065 column_arr_it != update_columns.End();
3067 target_column_list.push_back(column_arr_it->GetString());
3071 auto modify_node = std::make_shared<RelModify>(
3072 *
cat, table_descriptor, flattened, op, target_column_list, inputs[0]);
3073 switch (modify_node->getOperation()) {
3075 modify_node->applyDeleteModificationsToInputNode();
3079 modify_node->applyUpdateModificationsToInputNode();
3083 throw std::runtime_error(
"Unsupported RelModify operation: " +
3091 const rapidjson::Value& table_func_ra,
3094 const auto& invocation =
field(table_func_ra,
"invocation");
3095 CHECK(invocation.IsObject());
3097 const auto& operands =
field(invocation,
"operands");
3098 CHECK(operands.IsArray());
3099 CHECK_GE(operands.Size(), unsigned(0));
3101 std::vector<const Rex*> col_inputs;
3102 std::vector<std::unique_ptr<const RexScalar>> table_func_inputs;
3103 std::vector<std::string> fields;
3105 for (
auto exprs_json_it = operands.Begin(); exprs_json_it != operands.End();
3107 const auto& expr_json = *exprs_json_it;
3108 CHECK(expr_json.IsObject());
3109 if (expr_json.HasMember(
"op")) {
3111 if (op_str ==
"CAST" && expr_json.HasMember(
"type")) {
3112 const auto& expr_type =
field(expr_json,
"type");
3113 CHECK(expr_type.IsObject());
3114 CHECK(expr_type.HasMember(
"type"));
3115 const auto& expr_type_name =
json_str(
field(expr_type,
"type"));
3116 if (expr_type_name ==
"CURSOR") {
3117 CHECK(expr_json.HasMember(
"operands"));
3118 const auto& expr_operands =
field(expr_json,
"operands");
3119 CHECK(expr_operands.IsArray());
3120 if (expr_operands.Size() != 1) {
3121 throw std::runtime_error(
3122 "Table functions currently only support one ResultSet input");
3124 auto pos =
field(expr_operands[0],
"input").GetInt();
3126 for (
size_t i = inputs[pos]->size(); i > 0; i--) {
3127 table_func_inputs.emplace_back(
3128 std::make_unique<RexAbstractInput>(col_inputs.size()));
3129 col_inputs.emplace_back(table_func_inputs.back().get());
3138 const auto& op_name =
field(invocation,
"op");
3139 CHECK(op_name.IsString());
3141 std::vector<std::unique_ptr<const RexScalar>> table_function_projected_outputs;
3142 const auto& row_types =
field(table_func_ra,
"rowType");
3143 CHECK(row_types.IsArray());
3144 CHECK_GE(row_types.Size(), unsigned(0));
3145 const auto& row_types_array = row_types.GetArray();
3146 for (
size_t i = 0; i < row_types_array.Size(); i++) {
3149 table_function_projected_outputs.emplace_back(std::make_unique<RexRef>(i));
3150 fields.emplace_back(
"");
3152 return std::make_shared<RelTableFunction>(op_name.GetString(),
3157 table_function_projected_outputs);
3161 const rapidjson::Value& logical_values_ra) {
3162 const auto& tuple_type_arr =
field(logical_values_ra,
"type");
3163 CHECK(tuple_type_arr.IsArray());
3164 std::vector<TargetMetaInfo> tuple_type;
3165 for (
auto tuple_type_arr_it = tuple_type_arr.Begin();
3166 tuple_type_arr_it != tuple_type_arr.End();
3167 ++tuple_type_arr_it) {
3168 auto component_type =
parse_type(*tuple_type_arr_it);
3169 const auto component_name =
json_str(
field(*tuple_type_arr_it,
"name"));
3170 if (component_type.is_none_encoded_string()) {
3174 component_type.set_size(4);
3176 tuple_type.emplace_back(component_name, component_type);
3178 const auto& inputs_arr =
field(logical_values_ra,
"inputs");
3179 CHECK(inputs_arr.IsArray());
3180 const auto& tuples_arr =
field(logical_values_ra,
"tuples");
3181 CHECK(tuples_arr.IsArray());
3183 if (inputs_arr.Size()) {
3187 std::vector<RelLogicalValues::RowValues> values;
3188 if (tuples_arr.Size()) {
3189 for (
const auto& row : tuples_arr.GetArray()) {
3190 CHECK(row.IsArray());
3191 const auto values_json = row.GetArray();
3192 if (!values.empty()) {
3193 CHECK_EQ(values[0].size(), values_json.Size());
3196 for (
const auto& value : values_json) {
3197 CHECK(value.IsObject());
3198 CHECK(value.HasMember(
"literal"));
3204 return std::make_shared<RelLogicalValues>(tuple_type, values);
3208 const rapidjson::Value& logical_union_ra) {
3210 auto const& all_type_bool =
field(logical_union_ra,
"all");
3211 CHECK(all_type_bool.IsBool());
3212 return std::make_shared<RelLogicalUnion>(std::move(inputs), all_type_bool.GetBool());
3216 if (node.HasMember(
"inputs")) {
3219 for (
const auto& str_id : str_input_ids) {
3220 ra_inputs.push_back(
nodes_[std::stoi(str_id)]);
3224 return {
prev(node)};
3228 auto option = str.substr(0, pos);
3229 std::string delim =
"=";
3230 size_t delim_pos = option.find(delim);
3231 auto key = option.substr(0, delim_pos);
3232 auto val = option.substr(delim_pos + 1, option.length());
3233 str.erase(0, pos + delim.length() + 1);
3238 std::string white_space_delim =
" ";
3239 int l = hint_string.length();
3240 hint_string = hint_string.erase(0, 1).substr(0, l - 2);
3242 auto global_hint_checker = [&](
const std::string& input_hint_name) ->
HintIdentifier {
3243 bool global_hint =
false;
3244 std::string hint_name = input_hint_name;
3245 auto global_hint_identifier = hint_name.substr(0, 2);
3246 if (global_hint_identifier.compare(
"g_") == 0) {
3248 hint_name = hint_name.substr(2, hint_string.length());
3250 return {global_hint, hint_name};
3253 global_hint_checker(hint_string.substr(0, hint_string.find(white_space_delim)));
3255 if ((pos = hint_string.find(
"options:")) != std::string::npos) {
3257 std::vector<std::string> tokens;
3258 bool kv_list_op =
false;
3259 std::string raw_options = hint_string.substr(pos + 8, hint_string.length() - 2);
3260 if (raw_options.find(
'{') != std::string::npos) {
3263 CHECK(raw_options.find(
'[') != std::string::npos);
3265 auto t1 = raw_options.erase(0, 1);
3266 raw_options = t1.substr(0, t1.length() - 1);
3267 std::string op_delim =
", ";
3270 std::unordered_map<std::string, std::string> kv_options;
3271 while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3273 kv_options.emplace(kv_pair.first, kv_pair.second);
3277 kv_options.emplace(kv_pair.first, kv_pair.second);
3278 return {hint_type, parsed_hint.global_hint,
false,
true, kv_options};
3280 std::vector<std::string> list_options;
3281 while ((pos = raw_options.find(op_delim)) != std::string::npos) {
3282 list_options.emplace_back(raw_options.substr(0, pos));
3283 raw_options.erase(0, pos + white_space_delim.length() + 1);
3286 list_options.emplace_back(raw_options.substr(0, pos));
3287 return {hint_type, parsed_hint.global_hint,
false,
false, list_options};
3291 return {hint_type, parsed_hint.global_hint,
true,
false};
3296 std::shared_ptr<RelAlgNode> node) {
3297 std::string hint_explained =
json_str(
field(json_node,
"hints"));
3299 std::string delim =
"|";
3300 std::vector<std::string> hint_list;
3301 while ((pos = hint_explained.find(delim)) != std::string::npos) {
3302 hint_list.emplace_back(hint_explained.substr(0, pos));
3303 hint_explained.erase(0, pos + delim.length());
3306 hint_list.emplace_back(hint_explained.substr(0, pos));
3308 const auto agg_node = std::dynamic_pointer_cast<
RelAggregate>(node);
3310 for (std::string& hint : hint_list) {
3312 agg_node->addHint(parsed_hint);
3315 const auto project_node = std::dynamic_pointer_cast<
RelProject>(node);
3317 for (std::string& hint : hint_list) {
3319 project_node->addHint(parsed_hint);
3322 const auto scan_node = std::dynamic_pointer_cast<
RelScan>(node);
3324 for (std::string& hint : hint_list) {
3326 scan_node->addHint(parsed_hint);
3329 const auto join_node = std::dynamic_pointer_cast<
RelJoin>(node);
3331 for (std::string& hint : hint_list) {
3333 join_node->addHint(parsed_hint);
3337 const auto compound_node = std::dynamic_pointer_cast<
RelCompound>(node);
3338 if (compound_node) {
3339 for (std::string& hint : hint_list) {
3341 compound_node->addHint(parsed_hint);
3346 std::shared_ptr<const RelAlgNode>
prev(
const rapidjson::Value& crt_node) {
3347 const auto id =
node_id(crt_node);
3353 std::vector<std::shared_ptr<RelAlgNode>>
nodes_;
3359 const bool optimize_dag) {
3360 rapidjson::Document query_ast;
3361 query_ast.Parse(query_ra.c_str());
3362 VLOG(2) <<
"Parsing query RA JSON: " << query_ra;
3363 if (query_ast.HasParseError()) {
3364 query_ast.GetParseError();
3365 LOG(
ERROR) <<
"Failed to parse RA tree from Calcite (offset "
3366 << query_ast.GetErrorOffset() <<
"):\n"
3367 << rapidjson::GetParseError_En(query_ast.GetParseError());
3368 VLOG(1) <<
"Failed to parse query RA: " << query_ra;
3369 throw std::runtime_error(
3370 "Failed to parse relational algebra tree. Possible query syntax error.");
3372 CHECK(query_ast.IsObject());
3375 return build(query_ast,
nullptr, optimize_dag);
3380 const rapidjson::Value& query_ast) {
3381 return build(query_ast, &root_dag,
true);
3386 const bool optimize_dag) {
3387 const auto& rels =
field(query_ast,
"rels");
3388 CHECK(rels.IsArray());
3390 auto rel_alg_dag_ptr = std::make_unique<RelAlgDag>();
3391 auto& rel_alg_dag = *rel_alg_dag_ptr;
3392 auto& nodes =
getNodes(rel_alg_dag);
3399 CHECK(!nodes.empty());
3408 return rel_alg_dag_ptr;
3418 <<
static_cast<int>(build_state);
3420 auto& nodes =
getNodes(rel_alg_dag);
3431 std::vector<const RelAlgNode*> filtered_left_deep_joins;
3432 std::vector<const RelAlgNode*> left_deep_joins;
3433 for (
const auto& node : nodes) {
3437 if (left_deep_join_root) {
3438 left_deep_joins.push_back(left_deep_join_root.get());
3439 if (std::dynamic_pointer_cast<const RelFilter>(left_deep_join_root)) {
3440 filtered_left_deep_joins.push_back(left_deep_join_root.get());
3444 if (filtered_left_deep_joins.empty()) {
3455 CHECK(nodes.back().use_count() == 1);
3464 for (
auto const& node :
nodes_) {
3466 callback(node.get());
3472 for (
auto& node :
nodes_) {
3474 node->resetQueryExecutionState();
3482 for (
size_t i = 0; i < ra->
inputCount(); ++i) {
3490 return cat(::
typeName(
this),
"(", ra_->toString(config),
")");
3497 const auto scan_node =
dynamic_cast<const RelScan*
>(
node_);
3500 auto table_name = scan_node->getTableDescriptor()->tableName;
3518 ret += expr->toString(config) +
" ";
3520 ret +=
", agg_exps=";
3522 ret += expr->toString(config) +
" ";
3524 ret +=
", scalar_sources=";
3526 ret += expr->toString(config) +
" ";
3549 if (rex_ab_input.
hash_) {
3550 return *rex_ab_input.
hash_;
3553 boost::hash_combine(*rex_ab_input.
hash_, rex_ab_input.
in_index_);
3554 return *rex_ab_input.
hash_;
3558 if (rex_literal.hash_) {
3559 return *rex_literal.hash_;
3561 rex_literal.hash_ =
typeid(RexLiteral).hash_code();
3562 boost::apply_visitor(
3563 [&rex_literal](
auto&& current_val) {
3564 using T = std::decay_t<decltype(current_val)>;
3565 if constexpr (!std::is_same_v<boost::blank, T>) {
3566 static_assert(std::is_same_v<int64_t, T> || std::is_same_v<double, T> ||
3567 std::is_same_v<std::string, T> || std::is_same_v<bool, T>);
3568 boost::hash_combine(*rex_literal.hash_, current_val);
3571 rex_literal.literal_);
3572 boost::hash_combine(*rex_literal.hash_, rex_literal.type_);
3573 boost::hash_combine(*rex_literal.hash_, rex_literal.target_type_);
3574 boost::hash_combine(*rex_literal.hash_, rex_literal.scale_);
3575 boost::hash_combine(*rex_literal.hash_, rex_literal.precision_);
3576 boost::hash_combine(*rex_literal.hash_, rex_literal.target_scale_);
3577 boost::hash_combine(*rex_literal.hash_, rex_literal.target_precision_);
3578 return *rex_literal.hash_;
3583 return *rex_op.
hash_;
3586 boost::hash_combine(*rex_op.
hash_, rex_op.
op_);
3589 return *rex_op.
hash_;
3593 if (rex_case.
hash_) {
3594 return *rex_case.
hash_;
3599 return *rex_case.
hash_;
3604 return *rex_op.
hash_;
3610 boost::hash_combine(*rex_op.
hash_, rex_op.
name_);
3611 return *rex_op.
hash_;
3617 boost::hash_combine(hash,
3623 if (rex_window.
hash_) {
3624 return *rex_window.
hash_;
3628 boost::hash_combine(*rex_window.
hash_, rex_window.
getName());
3629 boost::hash_combine(*rex_window.
hash_, rex_window.
is_rows_);
3634 auto get_window_bound_hash =
3637 boost::hash_combine(h, bound.unbounded);
3638 boost::hash_combine(h, bound.preceding);
3639 boost::hash_combine(h, bound.following);
3640 boost::hash_combine(h, bound.is_current_row);
3641 boost::hash_combine(h, bound.order_key);
3644 boost::hash_combine(*rex_window.
hash_,
3646 boost::hash_combine(*rex_window.
hash_,
3648 return *rex_window.
hash_;
3652 if (rex_ref.
hash_) {
3653 return *rex_ref.
hash_;
3656 boost::hash_combine(*rex_ref.
hash_, rex_ref.
index_);
3657 return *rex_ref.
hash_;
3661 if (rex_agg.
hash_) {
3662 return *rex_agg.
hash_;
3666 boost::hash_combine(*rex_agg.
hash_, rex_agg.
agg_);
3669 return *rex_agg.
hash_;
3673 if (rex_subq.hash_) {
3674 return *rex_subq.hash_;
3676 rex_subq.hash_ =
typeid(RexSubQuery).hash_code();
3677 boost::hash_combine(*rex_subq.hash_, rex_subq.ra_);
3678 return *rex_subq.hash_;
3682 if (rex_input.
hash_) {
3683 return *rex_input.
hash_;
3686 boost::hash_combine(*rex_input.
hash_, rex_input.
node_);
3687 boost::hash_combine(*rex_input.
hash_, rex_input.
getIndex());
3688 return *rex_input.
hash_;
3692 if (rel_scan.
hash_) {
3693 return *rel_scan.
hash_;
3699 return *rel_scan.
hash_;
3703 if (rel_project.
hash_) {
3704 return *rel_project.
hash_;
3708 boost::hash_combine(*rel_project.
hash_, rel_project.
fields_);
3709 boost::hash_combine(*rel_project.
hash_, rel_project.
inputs_);
3710 return *rel_project.
hash_;
3714 if (rel_agg.
hash_) {
3715 return *rel_agg.
hash_;
3722 return *rel_agg.
hash_;
3726 if (rel_join.
hash_) {
3727 return *rel_join.
hash_;
3731 boost::hash_combine(*rel_join.
hash_, rel_join.
inputs_);
3733 return *rel_join.
hash_;
3737 if (rel_tr_join.
hash_) {
3738 return *rel_tr_join.
hash_;
3741 boost::hash_combine(*rel_tr_join.
hash_, rel_tr_join.
lhs_);
3742 boost::hash_combine(*rel_tr_join.
hash_, rel_tr_join.
rhs_);
3746 boost::hash_combine(*rel_tr_join.
hash_, rel_tr_join.
op_type_);
3750 return *rel_tr_join.
hash_;
3754 if (rel_filter.
hash_) {
3755 return *rel_filter.
hash_;
3758 boost::hash_combine(*rel_filter.
hash_, rel_filter.
filter_);
3759 boost::hash_combine(*rel_filter.
hash_, rel_filter.
inputs_);
3760 return *rel_filter.
hash_;
3764 if (rel_join.
hash_) {
3765 return *rel_join.
hash_;
3771 boost::hash_combine(*rel_join.
hash_, rel_join.
inputs_);
3772 return *rel_join.
hash_;
3776 if (rel_compound.
hash_) {
3777 return *rel_compound.
hash_;
3781 boost::hash_combine(*rel_compound.
hash_, rel_compound.
is_agg_);
3786 boost::hash_combine(*rel_compound.
hash_, rel_compound.
fields_);
3787 boost::hash_combine(*rel_compound.
hash_, rel_compound.
inputs_);
3788 return *rel_compound.
hash_;
3792 if (rel_sort.
hash_) {
3793 return *rel_sort.
hash_;
3797 boost::hash_combine(*rel_sort.
hash_, rel_sort.
limit_.has_value());
3798 boost::hash_combine(*rel_sort.
hash_, rel_sort.
limit_.value_or(0));
3799 boost::hash_combine(*rel_sort.
hash_, rel_sort.
offset_);
3800 boost::hash_combine(*rel_sort.
hash_, rel_sort.
inputs_);
3801 return *rel_sort.
hash_;
3805 if (rel_modify.
hash_) {
3806 return *rel_modify.
hash_;
3812 boost::hash_combine(*rel_modify.
hash_,
3815 boost::hash_combine(*rel_modify.
hash_, rel_modify.
inputs_);
3816 return *rel_modify.
hash_;
3821 return *rel_tf.
hash_;
3829 return *rel_tf.
hash_;
3834 return *rel_lv.
hash_;
3837 for (
auto& target_meta_info : rel_lv.
tuple_type_) {
3838 boost::hash_combine(*rel_lv.
hash_, target_meta_info.get_resname());
3839 boost::hash_combine(*rel_lv.
hash_, target_meta_info.get_type_info().get_type_name());
3841 return *rel_lv.
hash_;
3846 return *rel_lv.
hash_;
3851 return *rel_lv.
hash_;
std::vector< std::shared_ptr< const RexScalar > > scalar_exprs_
DEVICE auto upper_bound(ARGS &&...args)
const size_t getGroupByCount() const
SQLTypes to_sql_type(const std::string &type_name)
void setGlobalQueryHints(const RegisteredQueryHint &global_hints)
std::optional< size_t > is_collected_window_function(size_t rex_hash) const
NullSortedPosition parse_nulls_position(const rapidjson::Value &collation)
bool is_agg(const Analyzer::Expr *expr)
std::unique_ptr< const RexScalar > condition_
std::unique_ptr< const RexOperator > disambiguate_operator(const RexOperator *rex_operator, const RANodeOutput &ra_output) noexcept
std::vector< ElementType > Container
RexWindowBound frame_start_bound_
RelCompound(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
const RexScalar * getThen(const size_t idx) const
std::shared_ptr< RelAggregate > dispatchAggregate(const rapidjson::Value &agg_ra)
std::shared_ptr< RelFilter > dispatchFilter(const rapidjson::Value &filter_ra, RelAlgDag &root_dag)
std::shared_ptr< RelAlgNode > ElementType
void set_notnulls(std::vector< TargetMetaInfo > *tmis0, std::vector< bool > const ¬nulls)
std::vector< std::unique_ptr< const RexScalar > > outer_conditions_per_level_
void mark_nops(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::unique_ptr< RexSubQuery > deepCopy() const
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
static std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > & getQueryHints(RelAlgDag &rel_alg_dag)
std::vector< std::unique_ptr< const RexScalar > > table_func_inputs_
std::optional< size_t > getOffsetForPushedDownExpr(WindowExprType type, size_t expr_offset) const
RexWindowFuncReplacementVisitor(std::unordered_set< size_t > &collected_window_func_hash, std::vector< std::unique_ptr< const RexScalar >> &new_rex_input_for_window_func, std::unordered_map< size_t, size_t > &window_func_to_new_rex_input_idx_map, RelProject *new_project, std::unordered_map< size_t, std::unique_ptr< const RexInput >> &new_rex_input_from_child_node)
std::vector< std::unique_ptr< const RexScalar > > parse_window_order_exprs(const rapidjson::Value &arr, RelAlgDag &root_dag)
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< bool > get_notnulls(std::vector< TargetMetaInfo > const &tmis0)
std::vector< std::unique_ptr< const RexScalar > > & scalar_exprs_for_new_project_
size_t size() const override
void addHint(const ExplainedQueryHint &hint_explained)
std::shared_ptr< const RelAlgNode > get_left_deep_join_root(const std::shared_ptr< RelAlgNode > &node)
const ElementType & operator*()
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool input_can_be_coalesced(const RelAlgNode *parent_node, const size_t index, const bool first_rex_is_input)
bool is_window_function_avg(const RexScalar *rex)
void eliminate_redundant_projection(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *rex_input) const final
std::vector< RexInput > RANodeOutput
std::unique_ptr< const RexCase > disambiguate_case(const RexCase *rex_case, const RANodeOutput &ra_output)
const RexScalar * getElse() const
static thread_local unsigned crt_id_
NullSortedPosition nulls_pos_
std::unique_ptr< const RexScalar > visitOperator(const RexOperator *rex_operator) const override
std::string function_name_
const RexScalar * outer_join_cond_
SqlWindowFunctionKind parse_window_function_kind(const std::string &name)
RexDeepCopyVisitor deep_copier_
std::shared_ptr< RelScan > dispatchTableScan(const rapidjson::Value &scan_ra)
RelProject(const TableDescriptor *td, const Catalog_Namespace::Catalog *catalog)
std::pair< std::shared_ptr< RelLeftDeepInnerJoin >, std::shared_ptr< const RelAlgNode > > create_left_deep_join(const std::shared_ptr< RelAlgNode > &left_deep_join_root)
RexScalar const * copyAndRedirectSource(RexScalar const *, size_t input_idx) const
const RelAlgNode * node_to_keep_
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
SQLAgg to_agg_kind(const std::string &agg_name)
std::shared_ptr< RelLogicalUnion > dispatchUnion(const rapidjson::Value &logical_union_ra)
std::vector< std::string > TargetColumnList
void advance(AdvancingMode mode)
std::unique_ptr< RexCase > parse_case(const rapidjson::Value &expr, RelAlgDag &root_dag)
const SQLTypeInfo & getType() const
std::unique_ptr< const RexScalar > get_new_rex_input(size_t rex_idx) const
Hints * getDeliveredHints()
std::shared_ptr< RelProject > dispatchProject(const rapidjson::Value &proj_ra, RelAlgDag &root_dag)
const bool json_bool(const rapidjson::Value &obj) noexcept
const RexScalar * getOperand(const size_t idx) const
std::vector< const Rex * > col_inputs_
const JoinType join_type_
std::string json_node_to_string(const rapidjson::Value &node) noexcept
bool hasEquivCollationOf(const RelSort &that) const
JoinType to_join_type(const std::string &join_type_name)
void resetQueryExecutionState()
std::shared_ptr< RelFilter > original_filter_
std::pair< bool, bool > need_pushdown_generic_expr(RelProject const *window_func_project_node)
const std::string json_str(const rapidjson::Value &obj) noexcept
std::unique_ptr< const RexScalar > else_expr_
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::unique_ptr< const RexSubQuery > parse_subquery(const rapidjson::Value &expr, RelAlgDag &root_dag)
void handle_query_hint(const std::vector< std::shared_ptr< RelAlgNode >> &nodes, RelAlgDag &rel_alg_dag) noexcept
DEVICE void sort(ARGS &&...args)
std::unordered_map< size_t, size_t > & expr_offset_cache_
void registerQueryHint(const RelAlgNode *node, const RegisteredQueryHint &query_hint)
std::vector< std::string > fields_
const std::pair< const Catalog_Namespace::Catalog *, const TableDescriptor * > getCatalogAndTableFromScanNode(const rapidjson::Value &scan_ra)
void pushDownExpressionInWindowFunction(const RexWindowFunctionOperator *window_expr) const
void addHint(const ExplainedQueryHint &hint_explained)
std::unique_ptr< const RexScalar > visitCase(const RexCase *rex_case) const override
const RexScalar * getWhen(const size_t idx) const
std::vector< size_t > indices_from_json_array(const rapidjson::Value &json_idx_arr) noexcept
const RegisteredQueryHint & getGlobalHints() const
#define TRANSIENT_DICT_DB_ID
void appendInput(std::string new_field_name, std::unique_ptr< const RexScalar > new_input)
void propagate_hints_to_new_project(std::shared_ptr< RelProject > prev_node, std::shared_ptr< RelProject > new_node, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
bool isRenamedInput(const RelAlgNode *node, const size_t index, const std::string &new_name)
RexDeepCopyVisitor copier_
size_t pushDownExpressionImpl(const RexScalar *expr) const
std::unique_ptr< const RexScalar > defaultResult() const override
void addHint(const ExplainedQueryHint &hint_explained)
std::unique_ptr< const RexAgg > parse_aggregate_expr(const rapidjson::Value &expr)
const TableDescriptor * td_
std::optional< size_t > getIdInPlanTree() const
std::unordered_map< size_t, const RexScalar * > & collected_window_func_
const std::string op_type_
#define TRANSIENT_DICT_ID
std::vector< std::unique_ptr< const RexScalar > > scalar_sources_
std::unique_ptr< RexAbstractInput > parse_abstract_input(const rapidjson::Value &expr) noexcept
static std::vector< std::shared_ptr< RexSubQuery > > & getSubqueries(RelAlgDag &rel_alg_dag)
std::vector< std::string > field_names_
std::unique_ptr< const RexScalar > disambiguate_rex(const RexScalar *, const RANodeOutput &)
std::unique_ptr< const RexScalar > visitLiteral(const RexLiteral *rex_literal) const override
const ElementType * operator->()
void add_window_function_pre_project(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const bool always_add_project_if_first_project_is_window_expr, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
const std::string getFieldName(const size_t i) const
std::unique_ptr< const RexScalar > visitSubQuery(const RexSubQuery *rex_subquery) const override
const std::string qualifier_
std::unordered_map< size_t, size_t > pushed_down_window_operands_offset_
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::vector< SortField > collation_
std::shared_ptr< RelJoin > dispatchJoin(const rapidjson::Value &join_ra, RelAlgDag &root_dag)
RelLogicalValues()=default
std::vector< std::unique_ptr< const RexScalar > > & new_rex_input_for_window_func_
void * defaultResult() const final
std::unordered_set< RexInput > RexInputSet
This file contains the class specification and related data structures for Catalog.
virtual T visit(const RexScalar *rex_scalar) const
bool hasCaseExprAsWindowOperand()
std::string to_string() const
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
void separate_window_function_expressions(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
static SysCatalog & instance()
bool aggregateResult(const bool &aggregate, const bool &next_result) const final
SQLOps getOperator() const
void setExecutionResult(const ExecutionResultShPtr result)
std::shared_ptr< RelTableFunction > dispatchTableFunction(const rapidjson::Value &table_func_ra, RelAlgDag &root_dag)
std::unordered_map< size_t, size_t > pushed_down_order_key_offset_
std::unordered_map< size_t, std::unique_ptr< const RexInput > > & new_rex_input_from_child_node_
virtual size_t toHash() const override
std::set< std::pair< const RelAlgNode *, int > > get_equiv_cols(const RelAlgNode *node, const size_t which_col)
void create_rex_input_for_new_project_node(RelAlgNode const *node, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs, std::vector< std::string > &fields)
std::vector< std::string > & fields_for_new_project_
bool visitRef(const RexRef *) const final
std::unique_ptr< RexOperator > parse_operator(const rapidjson::Value &expr, RelAlgDag &root_dag)
static std::unique_ptr< RelAlgDag > buildDagForSubquery(RelAlgDag &root_dag, const rapidjson::Value &query_ast)
std::unordered_set< size_t > & collected_window_func_hash_
std::unique_ptr< const RexScalar > visitInput(const RexInput *rex_input) const override
static QueryHint translateQueryHint(const std::string &hint_name)
DEVICE auto copy(ARGS &&...args)
std::vector< size_t > operands_
Hints * getDeliveredHints()
void coalesce_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< const RelAlgNode * > &left_deep_joins, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
std::vector< std::string > fields_
SQLOps to_sql_op(const std::string &op_str)
std::unique_ptr< Hints > hints_
const int64_t json_i64(const rapidjson::Value &obj) noexcept
const TableDescriptor * table_descriptor_
std::unique_ptr< Hints > hints_
std::vector< std::unique_ptr< const RexScalar > > copyRexScalars(std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources)
std::vector< std::shared_ptr< const RelAlgNode >> RelAlgInputs
std::vector< std::unique_ptr< const RexScalar > > scalar_exprs_
RetType visitOperator(const RexOperator *rex_operator) const final
bool defaultResult() const final
const double json_double(const rapidjson::Value &obj) noexcept
const std::vector< std::unique_ptr< const RexScalar > > & scalar_sources_
void addHint(const ExplainedQueryHint &hint_explained)
const unsigned FIRST_RA_NODE_ID
size_t branchCount() const
const RelAlgNode * getInput(const size_t idx) const
SQLTypeInfo parse_type(const rapidjson::Value &type_obj)
Checked json field retrieval.
const std::vector< std::shared_ptr< const Analyzer::Expr > > filter_ops_
void * visitCase(const RexCase *rex_case) const final
std::vector< std::shared_ptr< RelAlgNode > > nodes_
std::unique_ptr< const RexScalar > filter_
std::vector< const Rex * > remapTargetPointers(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_new, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_new, std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs_old, std::vector< std::unique_ptr< const RexScalar >> const &scalar_sources_old, std::vector< const Rex * > const &target_exprs_old)
std::vector< std::unique_ptr< const RexScalar > > operands_
std::optional< size_t > hash_
std::vector< std::string > fields_
std::vector< const Rex * > target_exprs_
void bind_inputs(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::optional< size_t > hash_
bool only_add_window_expr_
std::string toString(const Executor::ExtModuleKinds &kind)
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
bool find_generic_expr_in_window_func(RexWindowFunctionOperator const *window_expr, bool &has_generic_expr_in_window_func)
void bind_project_to_input(RelProject *project_node, const RANodeOutput &input) noexcept
static std::string yieldModifyOperationString(ModifyOperation const op)
std::vector< TargetMetaInfo > getCompatibleMetainfoTypes() const
virtual std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const =0
static std::unique_ptr< RelAlgDag > buildDag(const std::string &query_ra, const bool optimize_dag)
std::string tree_string(const RelAlgNode *ra, const size_t depth)
std::unordered_set< size_t > visited_
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::vector< SortField > parse_window_order_collation(const rapidjson::Value &arr, RelAlgDag &root_dag)
void compute_node_hash(const std::vector< std::shared_ptr< RelAlgNode >> &nodes)
Hints * getDeliveredHints()
void setTableFuncInputs(std::vector< std::unique_ptr< const RexScalar >> &&exprs)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
PushDownGenericExpressionInWindowFunction(std::shared_ptr< RelProject > new_project, std::vector< std::unique_ptr< const RexScalar >> &scalar_exprs_for_new_project, std::vector< std::string > &fields_for_new_project, std::unordered_map< size_t, size_t > &expr_offset_cache)
const RexScalar * getProjectAt(const size_t idx) const
RetType visitInput(const RexInput *input) const final
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
static RegisteredQueryHint defaults()
static std::unique_ptr< RelAlgDag > build(const rapidjson::Value &query_ast, RelAlgDag *root_dag, const bool optimize_dag)
int32_t countRexLiteralArgs() const
std::unique_ptr< Hints > hints_
std::vector< const Rex * > reproject_targets(const RelProject *simple_project, const std::vector< const Rex * > &target_exprs) noexcept
const ConstRexScalarPtrVector & getPartitionKeys() const
bool allStringCastsAreToDictionaryEncodedStrings() const
std::vector< std::shared_ptr< RelAlgNode > > run(const rapidjson::Value &rels, RelAlgDag &root_dag)
DEVICE auto lower_bound(ARGS &&...args)
std::unique_ptr< Hints > hints_
int64_t get_int_literal_field(const rapidjson::Value &obj, const char field[], const int64_t default_val) noexcept
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
void registerSubquery(std::shared_ptr< RexSubQuery > subquery)
std::vector< std::unique_ptr< const RexAgg > > agg_exprs_
std::vector< std::unique_ptr< const RexScalar > > parse_expr_array(const rapidjson::Value &arr, RelAlgDag &root_dag)
std::unique_ptr< const RexScalar > filter_expr_
static std::vector< std::shared_ptr< RelAlgNode > > & getNodes(RelAlgDag &rel_alg_dag)
bool hasWindowFunctionExpr() const
std::shared_ptr< RelModify > dispatchModify(const rapidjson::Value &logical_modify_ra)
std::vector< ElementType >::const_iterator Super
std::vector< std::unique_ptr< const RexAgg > > copyAggExprs(std::vector< std::unique_ptr< const RexAgg >> const &agg_exprs)
std::unique_ptr< RexLiteral > parse_literal(const rapidjson::Value &expr)
ConstRexScalarPtrVector order_keys_
std::vector< std::string > strings_from_json_array(const rapidjson::Value &json_str_arr) noexcept
std::unordered_map< QueryHint, ExplainedQueryHint > Hints
virtual size_t size() const =0
std::vector< SortField > collation_
RexWindowBound frame_end_bound_
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
std::string get_type_name() const
std::vector< std::pair< std::unique_ptr< const RexScalar >, std::unique_ptr< const RexScalar > > > expr_pair_list_
std::string typeName(const T *v)
ExplainedQueryHint parseHintString(std::string &hint_string)
void tryAddWindowExpr(RexScalar const *expr) const
void * visitOperator(const RexOperator *rex_operator) const final
bool is_window_function_operator(const RexScalar *rex)
std::unique_ptr< const RexScalar > condition_
void eachNode(std::function< void(RelAlgNode const *)> const &) const
std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const override
std::shared_ptr< RelSort > dispatchSort(const rapidjson::Value &sort_ra)
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
const std::vector< std::string > & getFields() const
std::unique_ptr< const RexScalar > visitRef(const RexRef *rex_ref) const override
std::string getFieldName(const size_t i) const
static void optimizeDag(RelAlgDag &rel_alg_dag)
std::shared_ptr< RelProject > new_project_
void handle_agg_over_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints)
bool g_enable_watchdog false
bool visitSubQuery(const RexSubQuery *) const final
ModifyOperation operation_
const ConstRexScalarPtrVector & getOrderKeys() const
std::unordered_map< size_t, size_t > pushed_down_partition_key_offset_
const std::string op_typeinfo_
std::unique_ptr< const RexScalar > parse_scalar_expr(const rapidjson::Value &expr, RelAlgDag &root_dag)
bool hasPartitionExpression()
RexInputReplacementVisitor(const RelAlgNode *node_to_keep, const std::vector< std::unique_ptr< const RexScalar >> &scalar_sources)
ConstRexScalarPtrVector partition_keys_
void create_compound(std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::vector< size_t > &pattern, std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint >> &query_hints) noexcept
bool visitInput(const RexInput *input) const final
std::vector< RexInput > n_outputs(const RelAlgNode *node, const size_t n)
std::shared_ptr< const RelAlgNode > prev(const rapidjson::Value &crt_node)
void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input) override
void getRelAlgHints(const rapidjson::Value &json_node, std::shared_ptr< RelAlgNode > node)
SortDirection parse_sort_direction(const rapidjson::Value &collation)
RexWindowFunctionOperator::RexWindowBound parse_window_bound(const rapidjson::Value &window_bound_obj, RelAlgDag &root_dag)
Common Enum definitions for SQL processing.
bool is_dict_encoded_string() const
bool found_case_expr_window_operand_
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
std::optional< size_t > limit_
std::vector< std::unique_ptr< const RexScalar >> RowValues
void bind_table_func_to_input(RelTableFunction *table_func_node, const RANodeOutput &input) noexcept
bool is_window_function_sum(const RexScalar *rex)
const std::string & getName() const
RetType visitCase(const RexCase *rex_case) const final
const size_t inputCount() const
void rebind_inputs_from_left_deep_join(const RexScalar *rex, const RelLeftDeepInnerJoin *left_deep_join)
void check_empty_inputs_field(const rapidjson::Value &node) noexcept
WindowFunctionCollector(std::unordered_map< size_t, const RexScalar * > &collected_window_func, bool only_add_window_expr)
HOST DEVICE bool get_notnull() const
unsigned node_id(const rapidjson::Value &ra_node) noexcept
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
size_t size() const override
size_t operator()(const std::pair< const RelAlgNode *, int > &input_col) const
std::unordered_map< size_t, size_t > & window_func_to_new_rex_input_idx_map_
RelAlgInputs getRelAlgInputs(const rapidjson::Value &node)
std::vector< std::string > getFieldNamesFromScanNode(const rapidjson::Value &scan_ra)
RelTableFunction()=default
TargetColumnList target_column_list_
std::shared_ptr< RelLogicalValues > dispatchLogicalValues(const rapidjson::Value &logical_values_ra)
std::vector< std::string > fields_
JoinType getJoinType() const
DEVICE void swap(ARGS &&...args)
std::unique_ptr< const RexScalar > RetType
RANodeOutput get_node_output(const RelAlgNode *ra_node)
virtual size_t toHash() const =0
bool visitLiteral(const RexLiteral *) const final
BuildState getBuildState() const
void reset_table_function_inputs(std::vector< const Rex * > &column_inputs, const std::vector< std::unique_ptr< const RexScalar >> &old_table_func_inputs, const std::vector< std::unique_ptr< const RexScalar >> &new_table_func_inputs)
void set_precision(int d)
std::vector< TargetMetaInfo > tuple_type_
std::pair< std::string, std::string > getKVOptionPair(std::string &str, size_t &pos)
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
bool same_ignoring_notnull(SQLTypeInfo ti0, SQLTypeInfo ti1)
std::vector< std::unique_ptr< const RexScalar > > target_exprs_
static void setBuildState(RelAlgDag &rel_alg_dag, const RelAlgDag::BuildState build_state)
RelProject * new_project_
static void resetRelAlgFirstId() noexcept