24 #include <unordered_map>
31 : crt_projects_(crt_inputs) {}
35 if (!source || !crt_projects_.count(source)) {
38 auto new_source = source->getInput(0);
44 if (
auto join = dynamic_cast<const RelJoin*>(new_source)) {
45 CHECK(new_input->getSourceNode() ==
join->getInput(0) ||
46 new_input->getSourceNode() ==
join->getInput(1));
48 CHECK_EQ(new_input->getSourceNode(), new_source);
50 return new_input->deepCopy();
60 : old_input_(old_input), new_input_(new_input) {}
64 if (old_source == old_input_) {
71 if (dynamic_cast<const RelAggregate*>(node) || dynamic_cast<const RelSort*>(node)) {
74 if (
auto join = dynamic_cast<const RelJoin*>(node)) {
75 if (
auto condition =
join->getCondition()) {
80 if (
auto project = dynamic_cast<const RelProject*>(node)) {
81 for (
size_t i = 0; i < project->size(); ++i) {
82 visit(project->getProjectAt(i));
86 if (
auto filter = dynamic_cast<const RelFilter*>(node)) {
87 visit(filter->getCondition());
100 const std::unordered_set<const RelProject*>& projects_to_remove) {
101 auto source = curr_project->
getInput(0);
102 while (
auto filter = dynamic_cast<const RelFilter*>(source)) {
105 if (
auto src_project = dynamic_cast<const RelProject*>(source)) {
106 if (projects_to_remove.count(src_project)) {
115 const std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
120 auto usrs_it = du_web.find(project);
121 CHECK(usrs_it != du_web.end());
122 for (
auto usr : usrs_it->second) {
123 if (!dynamic_cast<const RelProject*>(usr) && !
dynamic_cast<const RelFilter*
>(usr)) {
132 const std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
134 const std::unordered_set<const RelProject*>& projects_to_remove,
135 std::unordered_set<const RelProject*>& permutating_projects) {
137 if (project->
size() > source_size) {
141 if (project->
size() < source_size) {
142 auto usrs_it = du_web.find(project);
143 CHECK(usrs_it != du_web.end());
144 bool guard_found =
false;
145 while (usrs_it->second.size() == size_t(1)) {
146 auto only_usr = *usrs_it->second.begin();
147 if (dynamic_cast<const RelProject*>(only_usr)) {
151 if (dynamic_cast<const RelAggregate*>(only_usr) ||
152 dynamic_cast<const RelSort*>(only_usr) ||
153 dynamic_cast<const RelJoin*>(only_usr) ||
154 dynamic_cast<const RelTableFunction*>(only_usr) ||
155 dynamic_cast<const RelLogicalUnion*>(only_usr)) {
158 CHECK(dynamic_cast<const RelFilter*>(only_usr))
160 usrs_it = du_web.find(only_usr);
161 CHECK(usrs_it != du_web.end());
169 bool identical =
true;
170 for (
size_t i = 0; i < project->
size(); ++i) {
173 if (i != target->getIndex()) {
184 permutating_projects.insert(project);
193 const std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
195 CHECK(excluded_root);
197 CHECK(src_project && src_project->isSimple());
198 const auto indirect_join_src =
dynamic_cast<const RelJoin*
>(src_project->getInput(0));
199 std::unordered_map<size_t, size_t> old_to_new_idx;
200 for (
size_t i = 0; i < src_project->size(); ++i) {
201 auto rex_in =
dynamic_cast<const RexInput*
>(src_project->getProjectAt(i));
204 if (indirect_join_src !=
nullptr &&
205 indirect_join_src->getInput(1) == rex_in->getSourceNode()) {
206 src_base = indirect_join_src->getInput(0)->size();
208 old_to_new_idx.insert(std::make_pair(i, src_base + rex_in->getIndex()));
209 old_to_new_idx.insert(std::make_pair(i, rex_in->getIndex()));
211 CHECK(old_to_new_idx.size());
213 auto usrs_it = du_web.find(excluded_root);
214 CHECK(usrs_it != du_web.end());
215 std::vector<const RelAlgNode*> work_set(usrs_it->second.begin(), usrs_it->second.end());
216 while (!work_set.empty()) {
217 auto node = work_set.back();
219 auto modified_node =
const_cast<RelAlgNode*
>(node);
220 if (
auto filter = dynamic_cast<RelFilter*>(modified_node)) {
221 auto new_condition = renumber.visit(filter->getCondition());
222 filter->setCondition(new_condition);
223 auto usrs_it = du_web.find(filter);
224 CHECK(usrs_it != du_web.end() && usrs_it->second.size() == 1);
225 work_set.push_back(*usrs_it->second.begin());
228 if (
auto project = dynamic_cast<RelProject*>(modified_node)) {
229 std::vector<std::unique_ptr<const RexScalar>> new_exprs;
230 for (
size_t i = 0; i < project->size(); ++i) {
231 new_exprs.push_back(renumber.visit(project->getProjectAt(i)));
233 project->setExpressions(new_exprs);
242 std::shared_ptr<RelAlgNode> node,
243 const std::unordered_set<const RelProject*>& projects,
244 const std::unordered_set<const RelProject*>& permutating_projects,
245 const std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
247 if (dynamic_cast<RelLogicalUnion*>(node.get())) {
250 std::shared_ptr<const RelProject> src_project =
nullptr;
251 for (
size_t i = 0; i < node->inputCount(); ++i) {
253 std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(i))) {
254 if (projects.count(project.get())) {
255 src_project = project;
263 if (
auto join = std::dynamic_pointer_cast<RelJoin>(node)) {
265 src_project == node->getAndOwnInput(0)
266 ? std::dynamic_pointer_cast<
const RelProject>(node->getAndOwnInput(1))
267 : std::dynamic_pointer_cast<const RelProject>(node->getAndOwnInput(0));
268 join->replaceInput(src_project, src_project->getAndOwnInput(0));
270 auto usrs_it = du_web.find(
join.get());
271 CHECK(usrs_it != du_web.end());
272 for (
auto usr : usrs_it->second) {
273 rebinder.visitNode(usr);
276 if (other_project && projects.count(other_project.get())) {
277 join->replaceInput(other_project, other_project->getAndOwnInput(0));
279 other_project->getInput(0));
280 for (
auto usr : usrs_it->second) {
286 if (
auto project = std::dynamic_pointer_cast<RelProject>(node)) {
287 project->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
289 std::vector<std::unique_ptr<const RexScalar>> new_exprs;
290 for (
size_t i = 0; i < project->size(); ++i) {
291 new_exprs.push_back(redirector.
visit(project->getProjectAt(i)));
293 project->setExpressions(new_exprs);
296 if (
auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
297 const bool is_permutating_proj = permutating_projects.count(src_project.get());
298 if (is_permutating_proj || dynamic_cast<const RelJoin*>(src_project->getInput(0))) {
299 if (is_permutating_proj) {
302 filter->RelAlgNode::replaceInput(src_project, src_project->getAndOwnInput(0));
304 auto new_condition = redirector.
visit(filter->getCondition());
305 filter->setCondition(new_condition);
307 filter->replaceInput(src_project, src_project->getAndOwnInput(0));
311 if (std::dynamic_pointer_cast<RelSort>(node)) {
312 auto const src_project_input = src_project->getInput(0);
313 if (dynamic_cast<const RelScan*>(src_project_input) ||
314 dynamic_cast<const RelLogicalValues*>(src_project_input) ||
315 dynamic_cast<const RelLogicalUnion*>(src_project_input)) {
319 if (std::dynamic_pointer_cast<RelModify>(node)) {
322 if (std::dynamic_pointer_cast<RelTableFunction>(node)) {
325 CHECK(std::dynamic_pointer_cast<RelAggregate>(node) ||
326 std::dynamic_pointer_cast<RelSort>(node));
327 node->replaceInput(src_project, src_project->getAndOwnInput(0));
331 for (
auto nodeIt = nodes.rbegin(); nodeIt != nodes.rend(); ++nodeIt) {
332 if (nodeIt->use_count() == 1) {
333 VLOG(1) <<
"Node (ID: " << (*nodeIt)->getId() <<
") deleted.";
337 VLOG(2) <<
"Deleted Node (ID: " << (*nodeIt)->getId()
338 <<
") contents: " << node_substr << post_fix;
344 std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
345 for (
auto node : nodes) {
349 new_nodes.push_back(node);
351 nodes.swap(new_nodes);
355 if (
auto project = dynamic_cast<const RelProject*>(root)) {
359 if (dynamic_cast<const RelAggregate*>(root) || dynamic_cast<const RelScan*>(root) ||
360 dynamic_cast<const RelLogicalValues*>(root) ||
361 dynamic_cast<const RelModify*>(root)) {
362 return std::unordered_set<const RelProject*>{};
365 if (
auto join = dynamic_cast<const RelJoin*>(root)) {
368 lhs_projs.insert(rhs_projs.begin(), rhs_projs.end());
372 if (
auto logical_union = dynamic_cast<const RelLogicalUnion*>(root)) {
374 for (
size_t i = 1; i < logical_union->inputCount(); ++i) {
376 projections.insert(next.begin(), next.end());
381 CHECK(dynamic_cast<const RelFilter*>(root) || dynamic_cast<const RelSort*>(root))
388 if (dynamic_cast<const RelFilter*>(node) || dynamic_cast<const RelSort*>(node)) {
392 if (
auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
394 if (aggregate->getGroupByCount() == 1 && !input_idx) {
397 if (input_idx < aggregate->getGroupByCount()) {
402 if (
auto project = dynamic_cast<const RelProject*>(node)) {
403 CHECK_LT(input_idx, project->size());
404 if (
auto input = dynamic_cast<const RexInput*>(project->getProjectAt(input_idx))) {
406 return is_distinct(input->getIndex(), project->getInput(0));
410 CHECK(dynamic_cast<const RelJoin*>(node) || dynamic_cast<const RelScan*>(node));
416 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>>
build_du_web(
417 const std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
418 std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
419 std::unordered_set<const RelAlgNode*> visited;
420 std::vector<const RelAlgNode*> work_set;
421 for (
auto node : nodes) {
422 if (std::dynamic_pointer_cast<RelScan>(node) ||
423 std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
426 work_set.push_back(node.get());
427 while (!work_set.empty()) {
428 auto walker = work_set.back();
430 if (visited.count(walker)) {
433 CHECK(!web.count(walker));
435 web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
437 visited.insert(walker);
438 CHECK(dynamic_cast<const RelJoin*>(walker) ||
439 dynamic_cast<const RelProject*>(walker) ||
440 dynamic_cast<const RelAggregate*>(walker) ||
441 dynamic_cast<const RelFilter*>(walker) ||
442 dynamic_cast<const RelSort*>(walker) ||
443 dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
444 dynamic_cast<const RelLogicalValues*>(walker) ||
445 dynamic_cast<const RelTableFunction*>(walker) ||
446 dynamic_cast<const RelLogicalUnion*>(walker));
447 for (
size_t i = 0; i < walker->inputCount(); ++i) {
448 auto src = walker->getInput(i);
449 if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
452 if (web.empty() || !web.count(src)) {
453 web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
455 web[src].insert(walker);
456 work_set.push_back(src);
477 auto sort =
dynamic_cast<const RelSort*
>(next_node);
485 if (dynamic_cast<const RelSort*>(project->
getInput(0))) {
495 std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
496 auto sink = nodes.back();
497 for (
auto node : nodes) {
498 auto aggregate = std::dynamic_pointer_cast<
const RelAggregate>(node);
499 if (!aggregate || aggregate == sink ||
500 !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
504 std::dynamic_pointer_cast<
const RelProject>(aggregate->getAndOwnInput(0));
505 if (project && project->size() == aggregate->size() &&
506 project->getFields() == aggregate->getFields()) {
507 CHECK_EQ(
size_t(0), copies.count(aggregate));
508 copies.insert(aggregate);
511 for (
auto node : nodes) {
512 if (!node->inputCount()) {
515 auto last_source = node->getAndOwnInput(node->inputCount() - 1);
516 if (!copies.count(last_source)) {
519 auto aggregate = std::dynamic_pointer_cast<
const RelAggregate>(last_source);
521 if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
525 std::dynamic_pointer_cast<
const RelProject>(aggregate->getAndOwnInput(0));
527 CHECK_EQ(
size_t(1), project->size());
531 auto new_source = project->getAndOwnInput(0);
532 if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
533 std::dynamic_pointer_cast<const RelScan>(new_source)) {
534 node->replaceInput(last_source, new_source);
537 decltype(copies)().
swap(copies);
541 std::unordered_set<const RelProject*> projects;
542 std::unordered_set<const RelProject*> permutating_projects;
544 for (
auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
545 auto node = *node_it;
546 auto project = std::dynamic_pointer_cast<
RelProject>(node);
547 auto next_node_it = std::next(node_it);
548 if (project && project->isSimple() &&
549 (!visible_projs.count(project.get()) || !project->isRenaming()) &&
552 project.get(), next_node_it == nodes.end() ?
nullptr : next_node_it->get())) {
553 projects.insert(project.get());
557 for (
auto node : nodes) {
573 const RetType& next_result)
const override {
575 result.insert(next_result.begin(), next_result.end());
584 if (node_->inputCount() == 1) {
585 auto src = node_->getInput(0);
586 if (
auto join = dynamic_cast<const RelJoin*>(src)) {
588 const auto src2_in_offset =
join->getInput(0)->size();
590 result.emplace(src, input->
getIndex() + src2_in_offset);
592 result.emplace(src, input->
getIndex());
597 result.insert(*input);
605 if (
auto filter = dynamic_cast<const RelFilter*>(node)) {
606 auto rex_ins = collector.visit(filter->getCondition());
607 if (!rex_ins.empty()) {
608 return static_cast<size_t>(rex_ins.begin()->getIndex());
611 }
else if (
auto join = dynamic_cast<const RelJoin*>(node)) {
612 auto rex_ins = collector.visit(
join->getCondition());
613 if (!rex_ins.empty()) {
614 return static_cast<size_t>(rex_ins.begin()->getIndex());
620 return rhs_idx +
join->getInput(0)->size();
622 }
else if (
auto sort = dynamic_cast<const RelSort*>(node)) {
623 if (
sort->collationCount()) {
624 return sort->getCollation(0).getField();
633 const std::unordered_map<
const RelAlgNode*, std::unordered_set<size_t>>& live_outs) {
634 if (!node || dynamic_cast<const RelScan*>(node)) {
638 auto it = live_outs.find(node);
639 CHECK(it != live_outs.end());
640 auto live_out = it->second;
641 if (
auto project = dynamic_cast<const RelProject*>(node)) {
642 CHECK_EQ(
size_t(1), project->inputCount());
643 std::unordered_set<size_t> live_in;
644 for (
const auto& idx : live_out) {
646 auto partial_in = collector.visit(project->getProjectAt(idx));
647 for (
auto rex_in : partial_in) {
648 live_in.insert(rex_in.getIndex());
651 if (project->size() == 1 &&
652 dynamic_cast<const RexLiteral*
>(project->getProjectAt(0))) {
653 CHECK(live_in.empty());
658 if (
auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
659 CHECK_EQ(
size_t(1), aggregate->inputCount());
660 const auto group_key_count =
static_cast<size_t>(aggregate->getGroupByCount());
661 const auto agg_expr_count =
static_cast<size_t>(aggregate->getAggExprsCount());
662 std::unordered_set<size_t> live_in;
663 for (
size_t i = 0; i < group_key_count; ++i) {
666 bool has_count_star_only{
false};
667 for (
const auto& idx : live_out) {
668 if (idx < group_key_count) {
671 const auto agg_idx = idx - group_key_count;
673 const auto& cur_agg_expr = aggregate->getAggExprs()[agg_idx];
674 const auto n_operands = cur_agg_expr->size();
675 for (
size_t i = 0; i < n_operands; ++i) {
676 live_in.insert(static_cast<size_t>(cur_agg_expr->getOperand(i)));
678 if (n_operands == 0) {
679 has_count_star_only =
true;
682 if (has_count_star_only && !group_key_count) {
683 live_in.insert(
size_t(0));
687 if (
auto join = dynamic_cast<const RelJoin*>(node)) {
688 std::unordered_set<size_t> lhs_live_ins;
689 std::unordered_set<size_t> rhs_live_ins;
691 auto lhs =
join->getInput(0);
692 auto rhs =
join->getInput(1);
693 const auto rhs_idx_base = lhs->size();
694 for (
const auto idx : live_out) {
695 if (idx < rhs_idx_base) {
696 lhs_live_ins.insert(idx);
698 rhs_live_ins.insert(idx - rhs_idx_base);
701 auto rex_ins = collector.visit(
join->getCondition());
702 for (
const auto& rex_in : rex_ins) {
703 const auto in_idx =
static_cast<size_t>(rex_in.getIndex());
704 if (rex_in.getSourceNode() == lhs) {
705 lhs_live_ins.insert(in_idx);
708 if (rex_in.getSourceNode() == rhs) {
709 rhs_live_ins.insert(in_idx);
714 return {lhs_live_ins, rhs_live_ins};
716 if (
auto sort = dynamic_cast<const RelSort*>(node)) {
718 std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
719 for (
size_t i = 0; i <
sort->collationCount(); ++i) {
720 live_in.insert(
sort->getCollation(i).getField());
724 if (
auto filter = dynamic_cast<const RelFilter*>(node)) {
725 CHECK_EQ(
size_t(1), filter->inputCount());
726 std::unordered_set<size_t> live_in(live_out.begin(), live_out.end());
727 auto rex_ins = collector.visit(filter->getCondition());
728 for (
const auto& rex_in : rex_ins) {
729 live_in.insert(static_cast<size_t>(rex_in.getIndex()));
733 if (
auto table_func = dynamic_cast<const RelTableFunction*>(node)) {
734 const auto input_count = table_func->size();
735 std::unordered_set<size_t> live_in;
736 for (
size_t i = 0; i < input_count; i++) {
740 std::vector<std::unordered_set<size_t>>
result;
742 for (
size_t i = table_func->inputCount(); i > 0; i--) {
743 result.push_back(live_in);
748 if (
auto logical_union = dynamic_cast<const RelLogicalUnion*>(node)) {
749 return std::vector<std::unordered_set<size_t>>(logical_union->inputCount(), live_out);
755 const std::unordered_set<size_t>& live_outs) {
756 CHECK(!dynamic_cast<const RelScan*>(node));
757 if (
auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
758 for (
size_t i = aggregate->getGroupByCount(); i < aggregate->size(); ++i) {
759 if (!live_outs.count(i)) {
766 return node->
size() > live_outs.size();
770 return dynamic_cast<const RelAggregate*
>(node) || dynamic_cast<const RelProject*>(node);
776 const std::unordered_map<
const RelAlgNode*, std::unordered_map<size_t, size_t>>&
778 const std::unordered_set<const RelAlgNode*>& intact_nodes)
779 : liveouts_(liveouts), intact_nodes_(intact_nodes) {}
782 for (
size_t i = 0; i < node->
inputCount(); ++i) {
784 if (!dynamic_cast<const RelScan*>(src) && liveouts_.find(src) == liveouts_.end() &&
785 !intact_nodes_.count(src)) {
793 const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
800 std::unordered_map<
const RelAlgNode*, std::unordered_map<size_t, size_t>>&
802 const std::unordered_set<size_t>& old_liveouts,
803 const std::unordered_set<const RelAlgNode*>& intact_nodes,
804 const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
805 auto live_fields = old_liveouts;
806 if (
auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
807 for (
size_t i = 0; i < aggregate->getGroupByCount(); ++i) {
808 live_fields.insert(i);
812 new_liveouts.insert(std::make_pair(node, std::unordered_map<size_t, size_t>{}));
814 auto& new_indices = it_ok.first->second;
815 if (intact_nodes.count(node)) {
816 for (
size_t i = 0, e = node->
size(); i < e; ++i) {
817 new_indices.insert(std::make_pair(i, i));
822 auto node_sz_it = orig_node_sizes.find(node);
823 CHECK(node_sz_it != orig_node_sizes.end());
824 const auto node_size = node_sz_it->second;
825 CHECK_GT(node_size, live_fields.size());
828 LOG(
INFO) << node_substr << post_fix <<
" eliminated "
829 << node_size - live_fields.size() <<
" columns.";
830 std::vector<size_t> ordered_indices(live_fields.begin(), live_fields.end());
831 std::sort(ordered_indices.begin(), ordered_indices.end());
832 for (
size_t i = 0; i < ordered_indices.size(); ++i) {
833 new_indices.insert(std::make_pair(ordered_indices[i], i));
837 std::vector<size_t> ordered_indices;
838 for (
size_t i = 0, old_base = 0, new_base = 0; i < node->
inputCount(); ++i) {
840 auto src_renum_it = new_liveouts.find(src);
841 if (src_renum_it != new_liveouts.end()) {
842 for (
auto m : src_renum_it->second) {
843 new_indices.insert(std::make_pair(old_base + m.first, new_base + m.second));
845 new_base += src_renum_it->second.
size();
846 }
else if (dynamic_cast<const RelScan*>(src) || intact_nodes.count(src)) {
847 for (
size_t i = 0; i < src->size(); ++i) {
848 new_indices.insert(std::make_pair(old_base + i, new_base + i));
850 new_base += src->size();
854 auto src_sz_it = orig_node_sizes.find(src);
855 CHECK(src_sz_it != orig_node_sizes.end());
856 old_base += src_sz_it->second;
863 const std::unordered_map<
const RelAlgNode*, std::unordered_map<size_t, size_t>>&
865 : node_to_input_renum_(new_numbering) {}
868 auto node_it = node_to_input_renum_.find(source);
869 if (node_it != node_to_input_renum_.end()) {
870 auto old_to_new_num = node_it->second;
871 auto renum_it = old_to_new_num.find(input->
getIndex());
872 CHECK(renum_it != old_to_new_num.end());
873 return boost::make_unique<RexInput>(source, renum_it->second);
879 const std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>&
884 std::vector<std::unique_ptr<const RexAgg>>& agg_exprs,
885 const std::unordered_map<size_t, size_t>& new_numbering) {
886 std::vector<std::unique_ptr<const RexAgg>> new_exprs;
887 for (
auto& expr : agg_exprs) {
888 if (expr->size() >= 1) {
889 auto old_idx = expr->getOperand(0);
890 auto idx_it = new_numbering.find(old_idx);
891 if (idx_it != new_numbering.end()) {
892 std::vector<size_t> operands;
893 operands.push_back(idx_it->second);
894 if (expr->size() == 2) {
895 operands.push_back(expr->getOperand(1));
897 new_exprs.push_back(boost::make_unique<RexAgg>(
898 expr->getKind(), expr->isDistinct(), expr->getType(), operands));
902 new_exprs.push_back(std::move(expr));
908 const std::unordered_map<size_t, size_t>& new_numbering) {
909 auto field_idx = old_field.
getField();
910 auto idx_it = new_numbering.find(field_idx);
911 if (idx_it != new_numbering.end()) {
912 field_idx = idx_it->second;
918 std::vector<std::shared_ptr<RelAlgNode>>& nodes) {
919 std::unordered_map<const RelAlgNode*, std::unordered_set<size_t>> live_outs;
920 std::vector<const RelAlgNode*> work_set;
921 for (
auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
922 auto node = node_it->get();
923 if (dynamic_cast<const RelScan*>(node) || live_outs.count(node) ||
925 dynamic_cast<const RelTableFunction*>(node)) {
928 std::vector<size_t> all_live(node->size());
929 std::iota(all_live.begin(), all_live.end(), size_t(0));
930 live_outs.insert(std::make_pair(
931 node, std::unordered_set<size_t>(all_live.begin(), all_live.end())));
933 work_set.push_back(node);
934 while (!work_set.empty()) {
935 auto walker = work_set.back();
937 CHECK(!dynamic_cast<const RelScan*>(walker));
938 CHECK(live_outs.count(walker));
940 CHECK_EQ(live_ins.size(), walker->inputCount());
941 for (
size_t i = 0; i < walker->inputCount(); ++i) {
942 auto src = walker->getInput(i);
943 if (dynamic_cast<const RelScan*>(src) ||
944 dynamic_cast<const RelTableFunction*>(src) || live_ins[i].empty()) {
947 if (!live_outs.count(src)) {
948 live_outs.insert(std::make_pair(src, std::unordered_set<size_t>{}));
950 auto src_it = live_outs.find(src);
951 CHECK(src_it != live_outs.end());
952 auto& live_out = src_it->second;
953 bool changed =
false;
954 if (!live_out.empty()) {
955 live_out.insert(live_ins[i].begin(), live_ins[i].end());
958 for (
int idx : live_ins[i]) {
959 changed |= live_out.insert(idx).second;
963 work_set.push_back(src);
973 if (
auto scan = dynamic_cast<const RelScan*>(node)) {
974 return scan->getFieldName(index);
976 if (
auto aggregate = dynamic_cast<const RelAggregate*>(node)) {
977 CHECK_EQ(aggregate->size(), aggregate->getFields().size());
978 return aggregate->getFieldName(index);
980 if (
auto join = dynamic_cast<const RelJoin*>(node)) {
981 const auto lhs_size =
join->getInput(0)->size();
982 if (index < lhs_size) {
987 if (
auto project = dynamic_cast<const RelProject*>(node)) {
988 return project->getFieldName(index);
990 CHECK(dynamic_cast<const RelSort*>(node) || dynamic_cast<const RelFilter*>(node));
995 std::vector<std::shared_ptr<RelAlgNode>>& nodes,
996 std::unordered_map<
const RelAlgNode*, std::unordered_set<size_t>>& liveouts,
997 std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
999 std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1000 for (
auto node : nodes) {
1001 new_nodes.push_back(node);
1002 if (!std::dynamic_pointer_cast<RelFilter>(node)) {
1005 const auto filter = node.get();
1006 auto liveout_it = liveouts.find(filter);
1007 CHECK(liveout_it != liveouts.end());
1008 auto& outs = liveout_it->second;
1012 auto usrs_it = du_web.find(filter);
1013 CHECK(usrs_it != du_web.end());
1014 auto& usrs = usrs_it->second;
1018 auto only_usr =
const_cast<RelAlgNode*
>(*usrs.begin());
1020 std::vector<std::unique_ptr<const RexScalar>> exprs;
1021 std::vector<std::string> fields;
1022 for (
size_t i = 0; i < filter->size(); ++i) {
1023 exprs.push_back(boost::make_unique<RexInput>(filter, i));
1026 auto project_owner = std::make_shared<RelProject>(exprs, fields, node);
1027 auto project = project_owner.get();
1030 if (dynamic_cast<const RelJoin*>(only_usr)) {
1032 for (
auto usr : du_web[only_usr]) {
1037 liveouts.insert(std::make_pair(project, outs));
1040 usrs.insert(project);
1042 std::make_pair(project, std::unordered_set<const RelAlgNode*>{only_usr}));
1044 new_nodes.push_back(project_owner);
1046 if (new_nodes.size() > nodes.size()) {
1047 nodes.swap(new_nodes);
1051 std::pair<std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>,
1052 std::vector<const RelAlgNode*>>
1054 const std::unordered_map<
const RelAlgNode*, std::unordered_set<size_t>>& live_outs,
1055 const std::vector<std::shared_ptr<RelAlgNode>>& nodes,
1056 const std::unordered_set<const RelAlgNode*>& intact_nodes,
1057 const std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1059 const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1060 std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1061 liveouts_renumbering;
1062 std::vector<const RelAlgNode*> ready_nodes;
1064 for (
auto node : nodes) {
1069 auto live_pair = live_outs.find(node.get());
1070 CHECK(live_pair != live_outs.end());
1071 auto old_live_outs = live_pair->second;
1073 node.get(), liveouts_renumbering, old_live_outs, intact_nodes, orig_node_sizes);
1074 if (
auto aggregate = std::dynamic_pointer_cast<RelAggregate>(node)) {
1075 auto old_exprs = aggregate->getAggExprsAndRelease();
1076 std::vector<std::unique_ptr<const RexAgg>> new_exprs;
1077 auto key_name_it = aggregate->getFields().begin();
1078 std::vector<std::string> new_fields(key_name_it,
1079 key_name_it + aggregate->getGroupByCount());
1080 for (
size_t i = aggregate->getGroupByCount(), j = 0;
1081 i < aggregate->getFields().size() && j < old_exprs.size();
1083 if (old_live_outs.count(i)) {
1084 new_exprs.push_back(std::move(old_exprs[j]));
1085 new_fields.push_back(aggregate->getFieldName(i));
1088 aggregate->setAggExprs(new_exprs);
1089 aggregate->setFields(std::move(new_fields));
1090 }
else if (
auto project = std::dynamic_pointer_cast<RelProject>(node)) {
1091 auto old_exprs = project->getExpressionsAndRelease();
1092 std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1093 std::vector<std::string> new_fields;
1094 for (
size_t i = 0; i < old_exprs.size(); ++i) {
1095 if (old_live_outs.count(i)) {
1096 new_exprs.push_back(std::move(old_exprs[i]));
1097 new_fields.push_back(project->getFieldName(i));
1100 project->setExpressions(new_exprs);
1101 project->setFields(std::move(new_fields));
1105 auto usrs_it = du_web.find(node.get());
1106 CHECK(usrs_it != du_web.end());
1107 for (
auto usr : usrs_it->second) {
1109 ready_nodes.push_back(usr);
1113 return {liveouts_renumbering, ready_nodes};
1117 std::unordered_map<
const RelAlgNode*, std::unordered_map<size_t, size_t>>&
1118 liveout_renumbering,
1119 const std::vector<const RelAlgNode*>& ready_nodes,
1120 const std::unordered_map<
const RelAlgNode*, std::unordered_set<size_t>>& old_liveouts,
1121 const std::unordered_set<const RelAlgNode*>& intact_nodes,
1122 const std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1124 const std::unordered_map<const RelAlgNode*, size_t>& orig_node_sizes) {
1127 std::deque<const RelAlgNode*> work_set(ready_nodes.begin(), ready_nodes.end());
1128 while (!work_set.empty()) {
1129 auto walker = work_set.front();
1130 work_set.pop_front();
1131 CHECK(!dynamic_cast<const RelScan*>(walker));
1133 if (
auto project = dynamic_cast<RelProject*>(node)) {
1134 auto old_exprs = project->getExpressionsAndRelease();
1135 std::vector<std::unique_ptr<const RexScalar>> new_exprs;
1136 for (
auto& expr : old_exprs) {
1137 new_exprs.push_back(renumberer.
visit(expr.get()));
1139 project->setExpressions(new_exprs);
1140 }
else if (
auto aggregate = dynamic_cast<RelAggregate*>(node)) {
1141 auto src_it = liveout_renumbering.find(node->getInput(0));
1142 CHECK(src_it != liveout_renumbering.end());
1143 auto old_exprs = aggregate->getAggExprsAndRelease();
1145 aggregate->setAggExprs(new_exprs);
1146 }
else if (
auto join = dynamic_cast<RelJoin*>(node)) {
1147 auto new_condition = renumberer.
visit(
join->getCondition());
1148 join->setCondition(new_condition);
1149 }
else if (
auto filter = dynamic_cast<RelFilter*>(node)) {
1150 auto new_condition = renumberer.
visit(filter->getCondition());
1151 filter->setCondition(new_condition);
1152 }
else if (
auto sort = dynamic_cast<RelSort*>(node)) {
1153 auto src_it = liveout_renumbering.find(node->getInput(0));
1154 CHECK(src_it != liveout_renumbering.end());
1155 std::vector<SortField> new_collations;
1156 for (
size_t i = 0; i <
sort->collationCount(); ++i) {
1157 new_collations.push_back(
1160 sort->setCollation(std::move(new_collations));
1161 }
else if (!dynamic_cast<RelLogicalUnion*>(node)) {
1162 LOG(
FATAL) <<
"Unhandled node type: "
1170 auto live_pair = old_liveouts.find(node);
1171 CHECK(live_pair != old_liveouts.end());
1172 auto live_out = live_pair->second;
1174 node, liveout_renumbering, live_out, intact_nodes, orig_node_sizes);
1175 auto usrs_it = du_web.find(walker);
1176 CHECK(usrs_it != du_web.end());
1177 for (
auto usr : usrs_it->second) {
1179 work_set.push_back(usr);
1188 if (nodes.empty()) {
1191 auto root = nodes.back().get();
1195 CHECK(!dynamic_cast<const RelScan*>(
root) && !dynamic_cast<const RelJoin*>(
root));
1198 std::unordered_set<const RelAlgNode*> intact_nodes;
1199 bool has_dead_cols =
false;
1200 for (
auto live_pair : old_liveouts) {
1201 auto node = live_pair.first;
1202 const auto& outs = live_pair.second;
1204 LOG(
WARNING) <<
"RA node with no used column: "
1207 intact_nodes.insert(node);
1210 has_dead_cols =
true;
1212 intact_nodes.insert(node);
1215 if (!has_dead_cols) {
1221 for (
auto node : nodes) {
1226 for (
size_t i = 0; i < node->inputCount(); ++i) {
1227 auto source = node->getInput(i);
1228 if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1234 intact_nodes.insert(node.get());
1238 std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1239 for (
auto node : nodes) {
1240 orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1243 std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1244 liveout_renumbering;
1245 std::vector<const RelAlgNode*> ready_nodes;
1246 std::tie(liveout_renumbering, ready_nodes) =
1250 liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1255 if (!subqueries.empty()) {
1257 auto sort_live_ids_first = [&live_ids](
auto&
a,
auto& b) {
1258 return live_ids.count(
a->getId()) && !live_ids.count(b->getId());
1260 std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1261 size_t n_dead_subqueries;
1262 if (live_ids.count(subqueries.front()->getId())) {
1266 sort_live_ids_first);
1267 n_dead_subqueries = subqueries.cend() - first_dead_itr;
1269 n_dead_subqueries = subqueries.size();
1271 if (n_dead_subqueries) {
1272 VLOG(1) <<
"Eliminating " << n_dead_subqueries
1273 << (n_dead_subqueries == 1 ?
" subquery." :
" subqueries.");
1274 subqueries.resize(subqueries.size() - n_dead_subqueries);
1275 subqueries.shrink_to_fit();
1286 : old_to_new_in_idx_(old_to_new_idx), target_(new_src) {}
1289 CHECK_EQ(target_->inputCount(), size_t(1));
1291 auto idx_it = old_to_new_in_idx_.find(input->
getIndex());
1292 CHECK(idx_it != old_to_new_in_idx_.end());
1293 return boost::make_unique<RexInput>(target_, idx_it->second);
1304 idx_to_sub_condition)
1305 : idx_to_subcond_(idx_to_sub_condition) {}
1307 auto subcond_it = idx_to_subcond_.find(input->
getIndex());
1308 if (subcond_it != idx_to_subcond_.end()) {
1321 std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1324 for (
auto node : nodes) {
1325 auto project = std::dynamic_pointer_cast<
RelProject>(node);
1327 if (!project || project->isSimple() ||
1328 !
dynamic_cast<const RelScan*
>(project->getInput(0))) {
1331 auto usrs_it = web.find(project.get());
1332 CHECK(usrs_it != web.end());
1333 auto& usrs = usrs_it->second;
1334 if (usrs.size() != 1) {
1341 auto outs_it = liveouts.find(
join);
1342 CHECK(outs_it != liveouts.end());
1343 std::unordered_map<size_t, size_t> in_to_out_index;
1344 std::unordered_set<size_t> boolean_expr_indicies;
1345 bool discarded =
false;
1346 for (
size_t i = 0; i < project->size(); ++i) {
1347 auto oper =
dynamic_cast<const RexOperator*
>(project->getProjectAt(i));
1348 if (oper && oper->getType().get_type() ==
kBOOLEAN) {
1349 boolean_expr_indicies.insert(i);
1352 if (
auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1353 in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1359 if (discarded || boolean_expr_indicies.empty()) {
1362 const size_t index_base =
1363 join->getInput(0) == project.get() ? 0 :
join->getInput(0)->size();
1364 for (
auto i : boolean_expr_indicies) {
1365 auto join_idx = index_base + i;
1366 if (outs_it->second.count(join_idx)) {
1374 RexInputCollector collector(project.get());
1375 std::vector<size_t> unloaded_input_indices;
1376 std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1378 for (
auto i : boolean_expr_indicies) {
1379 auto rex_ins = collector.visit(project->getProjectAt(i));
1380 for (
auto& in : rex_ins) {
1381 CHECK_EQ(in.getSourceNode(), project->getInput(0));
1382 if (!in_to_out_index.count(in.getIndex())) {
1383 auto curr_out_index = project->size() + unloaded_input_indices.size();
1384 in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1385 unloaded_input_indices.push_back(in.getIndex());
1387 RexInputSinker sinker(in_to_out_index, project.get());
1388 in_idx_to_new_subcond.insert(
1389 std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1392 if (in_idx_to_new_subcond.empty()) {
1395 std::vector<std::unique_ptr<const RexScalar>> new_projections;
1396 for (
size_t i = 0; i < project->size(); ++i) {
1397 if (boolean_expr_indicies.count(i)) {
1398 new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1400 auto rex_input =
dynamic_cast<const RexInput*
>(project->getProjectAt(i));
1401 CHECK(rex_input !=
nullptr);
1402 new_projections.push_back(rex_input->deepCopy());
1405 for (
auto i : unloaded_input_indices) {
1406 new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1408 project->setExpressions(new_projections);
1410 SubConditionReplacer replacer(in_idx_to_new_subcond);
1411 auto new_condition = replacer.visit(
join->getCondition());
1412 join->setCondition(new_condition);
1421 : old_src_(old_src), new_src_(new_src) {}
1426 auto actual_new_src = new_src_;
1427 if (
auto join = dynamic_cast<const RelJoin*>(new_src_)) {
1428 actual_new_src =
join->getInput(0);
1430 auto src2_input_base = actual_new_src->size();
1431 if (input->
getIndex() >= src2_input_base) {
1432 actual_new_src =
join->getInput(1);
1433 return boost::make_unique<RexInput>(actual_new_src,
1434 input->
getIndex() - src2_input_base);
1438 return boost::make_unique<RexInput>(actual_new_src, input->
getIndex());
1447 std::shared_ptr<const RelAlgNode> old_def_node,
1448 std::shared_ptr<const RelAlgNode> new_def_node,
1449 std::unordered_map<
const RelAlgNode*, std::shared_ptr<RelAlgNode>>& deconst_mapping,
1450 std::unordered_map<
const RelAlgNode*, std::unordered_set<const RelAlgNode*>>&
1452 auto usrs_it = du_web.find(old_def_node.get());
1454 CHECK(usrs_it != du_web.end());
1455 for (
auto usr : usrs_it->second) {
1456 auto usr_it = deconst_mapping.find(usr);
1457 CHECK(usr_it != deconst_mapping.end());
1458 usr_it->second->replaceInput(old_def_node, new_def_node);
1460 auto new_usrs_it = du_web.find(new_def_node.get());
1461 CHECK(new_usrs_it != du_web.end());
1462 new_usrs_it->second.insert(usrs_it->second.begin(), usrs_it->second.end());
1463 usrs_it->second.clear();
1468 void fold_filters(std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1469 std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1470 for (
auto node : nodes) {
1471 deconst_mapping.insert(std::make_pair(node.get(), node));
1475 for (
auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1476 auto& node = *node_it;
1477 if (
auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1478 CHECK_EQ(filter->inputCount(), size_t(1));
1479 auto src_filter =
dynamic_cast<const RelFilter*
>(filter->getInput(0));
1483 auto siblings_it = web.find(src_filter);
1484 if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1487 auto src_it = deconst_mapping.find(src_filter);
1488 CHECK(src_it != deconst_mapping.end());
1489 auto folded_filter = std::dynamic_pointer_cast<
RelFilter>(src_it->second);
1490 CHECK(folded_filter);
1492 if (
auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1493 VLOG(1) <<
"Node ID=" << filter->getId() <<
" folded into "
1494 <<
"ID=" << folded_filter->getId();
1498 VLOG(2) <<
"Folded Node (ID: " << folded_filter->getId()
1499 <<
") contents: " << node_substr << post_fix;
1501 std::vector<std::unique_ptr<const RexScalar>> operands;
1502 operands.emplace_back(folded_filter->getAndReleaseCondition());
1503 auto old_condition =
dynamic_cast<const RexOperator*
>(operands.back().get());
1504 CHECK(old_condition && old_condition->getType().get_type() ==
kBOOLEAN);
1505 RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1506 operands.push_back(redirector.visit(rex_operator));
1507 auto other_condition =
dynamic_cast<const RexOperator*
>(operands.back().get());
1508 CHECK(other_condition && other_condition->getType().get_type() ==
kBOOLEAN);
1509 const bool notnull = old_condition->getType().get_notnull() &&
1510 other_condition->getType().get_notnull();
1511 auto new_condition = std::unique_ptr<const RexScalar>(
1513 folded_filter->setCondition(new_condition);
1515 deconst_mapping.erase(filter.get());
1516 web.erase(filter.get());
1517 web[filter->getInput(0)].erase(filter.get());
1523 if (!nodes.empty()) {
1524 auto sink = nodes.back();
1525 for (
auto node_it = std::next(nodes.rbegin()); node_it != nodes.rend(); ++node_it) {
1538 const size_t first_col_idx,
1539 const size_t last_col_idx) {
1540 if (
auto rex_op = dynamic_cast<const RexOperator*>(condition)) {
1541 switch (rex_op->getOperator()) {
1543 std::vector<const RexScalar*> subconditions;
1544 size_t complete_subcond_count = 0;
1545 for (
size_t i = 0; i < rex_op->size(); ++i) {
1547 rex_op->getOperand(i), source, first_col_idx, last_col_idx);
1548 if (conds.size() == size_t(1)) {
1549 ++complete_subcond_count;
1551 subconditions.insert(subconditions.end(), conds.begin(), conds.end());
1553 if (complete_subcond_count == rex_op->size()) {
1556 return {subconditions};
1562 rex_op->getOperand(0), source, first_col_idx, last_col_idx);
1564 rex_op->getOperand(1), source, first_col_idx, last_col_idx);
1565 const auto lhs_in = lhs_conds.size() == 1
1566 ?
dynamic_cast<const RexInput*
>(*lhs_conds.begin())
1568 const auto rhs_in = rhs_conds.size() == 1
1569 ?
dynamic_cast<const RexInput*
>(*rhs_conds.begin())
1571 if (lhs_in && rhs_in) {
1582 if (
auto rex_in = dynamic_cast<const RexInput*>(condition)) {
1583 if (rex_in->getSourceNode() == source) {
1584 const auto col_idx = rex_in->
getIndex();
1585 return {col_idx >= first_col_idx && col_idx <= last_col_idx ? condition :
nullptr};
1607 return boost::make_unique<RexInput>(
join_->
getInput(0), curr_idx);
1624 return boost::make_unique<RexLiteral>(
1625 true,
kBOOLEAN,
kBOOLEAN, unsigned(-2147483648), 1, unsigned(-2147483648), 1);
1635 std::vector<std::shared_ptr<RelAlgNode>>& nodes) noexcept {
1636 std::unordered_set<const RelAlgNode*> visited;
1638 for (
auto node : nodes) {
1639 if (visited.count(node.get())) {
1642 visited.insert(node.get());
1646 if (
auto literal = dynamic_cast<const RexLiteral*>(
join->getCondition())) {
1649 CHECK(literal->getType() ==
kBOOLEAN && literal->getVal<
bool>());
1650 size_t first_col_idx = 0;
1652 std::vector<const RelJoin*> join_seq{
join};
1654 auto usrs_it = web.find(curr_join);
1655 CHECK(usrs_it != web.end());
1656 if (usrs_it->second.size() != size_t(1)) {
1659 auto only_usr = *usrs_it->second.begin();
1660 if (
auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1661 if (
join == usr_join->getInput(1)) {
1662 const auto src1_offset = usr_join->getInput(0)->size();
1663 first_col_idx += src1_offset;
1665 join_seq.push_back(usr_join);
1666 curr_join = usr_join;
1670 filter =
dynamic_cast<const RelFilter*
>(only_usr);
1674 visited.insert(join_seq.begin(), join_seq.end());
1677 const auto src_join =
dynamic_cast<const RelJoin*
>(filter->
getInput(0));
1679 auto modified_filter =
const_cast<RelFilter*
>(filter);
1681 if (src_join ==
join) {
1682 std::unique_ptr<const RexScalar> filter_condition(
1683 modified_filter->getAndReleaseCondition());
1684 std::unique_ptr<const RexScalar> true_condition =
1685 boost::make_unique<RexLiteral>(
true,
1688 unsigned(-2147483648),
1690 unsigned(-2147483648),
1693 join->setCondition(filter_condition);
1696 const auto src1_base = src_join->getInput(0)->size();
1698 first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1700 first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1701 auto join_conditions =
1705 first_col_idx +
join->size() - 1);
1706 if (join_conditions.empty()) {
1711 if (join_conditions.size() == 1) {
1712 auto new_join_condition = rebaser.
visit(*join_conditions.begin());
1713 join->setCondition(new_join_condition);
1715 std::vector<std::unique_ptr<const RexScalar>> operands;
1716 bool notnull =
true;
1717 for (
size_t i = 0; i < join_conditions.size(); ++i) {
1718 operands.emplace_back(rebaser.
visit(join_conditions[i]));
1719 auto old_subcond =
dynamic_cast<const RexOperator*
>(join_conditions[i]);
1720 CHECK(old_subcond && old_subcond->getType().get_type() ==
kBOOLEAN);
1721 notnull = notnull && old_subcond->getType().get_notnull();
1723 auto new_join_condition = std::unique_ptr<const RexScalar>(
1725 join->setCondition(new_join_condition);
1730 modified_filter->setCondition(new_filter_condition);
1738 auto from_fields = from_node->getFields();
1739 if (!from_fields.empty()) {
1740 if (
auto proj_to = dynamic_cast<RelProject*>(to_node);
1741 proj_to && proj_to->getFields().size() == from_fields.size()) {
1742 proj_to->setFields(std::move(from_fields));
1743 }
else if (
auto agg_to = dynamic_cast<RelAggregate*>(to_node);
1744 agg_to && agg_to->getFields().size() == from_fields.size()) {
1745 agg_to->setFields(std::move(from_fields));
1746 }
else if (
auto compound_to = dynamic_cast<RelCompound*>(to_node);
1747 compound_to && compound_to->getFields().size() == from_fields.size()) {
1748 compound_to->setFields(std::move(from_fields));
1749 }
else if (
auto tf_to = dynamic_cast<RelTableFunction*>(to_node);
1750 tf_to && tf_to->getFields().size() == from_fields.size()) {
1751 tf_to->setFields(std::move(from_fields));
1760 if (nodes.size() < 3) {
1763 for (
size_t i = 0; i <= nodes.size() - 3;) {
1764 auto first_sort = std::dynamic_pointer_cast<
RelSort>(nodes[i]);
1765 const auto project = std::dynamic_pointer_cast<
const RelProject>(nodes[i + 1]);
1766 auto second_sort = std::dynamic_pointer_cast<
RelSort>(nodes[i + 2]);
1767 if (first_sort && second_sort && project && project->isIdentity() &&
1768 *first_sort == *second_sort) {
1770 const_cast<RelAlgNode*>(first_sort->getInput(0)));
1771 second_sort->replaceInput(second_sort->getAndOwnInput(0),
1772 first_sort->getAndOwnInput(0));
1774 nodes[i + 1].reset();
1781 std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1782 for (
auto node : nodes) {
1786 new_nodes.push_back(node);
1788 nodes.swap(new_nodes);
DEVICE auto upper_bound(ARGS &&...args)
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
SubConditionReplacer(const std::unordered_map< size_t, std::unique_ptr< const RexScalar >> &idx_to_sub_condition)
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const size_t target_count_
size_t get_actual_source_size(const RelProject *curr_project, const std::unordered_set< const RelProject * > &projects_to_remove)
size_t pick_always_live_col_idx(const RelAlgNode *node)
bool hasAllSrcReady(const RelAlgNode *node) const
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void hoist_filter_cond_to_cross_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
AvailabilityChecker(const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes)
size_t size() const override
void sink_projected_boolean_expr_to_join(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
RetType visitInput(const RexInput *input) const override
void eliminate_identical_copy(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
SortDirection getSortDir() const
bool does_redef_cols(const RelAlgNode *node)
const RexScalar * getCondition() const
RetType visitOperator(const RexOperator *rex_operator) const override
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
DEVICE void sort(ARGS &&...args)
RetType visitOperator(const RexOperator *rex_operator) const override
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool safe_to_redirect(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void simplify_sort(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
virtual T visit(const RexScalar *rex_scalar) const
const std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > > & liveouts_
std::vector< std::unordered_set< size_t > > get_live_ins(const RelAlgNode *node, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool fast_logging_check(Channel)
NullSortedPosition getNullsPosition() const
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
const RelAlgNode * getInput(const size_t idx) const
RetType visitInput(const RexInput *input) const override
virtual void replaceInput(std::shared_ptr< const RelAlgNode > old_input, std::shared_ptr< const RelAlgNode > input)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
virtual std::string toString(RelRexToStringConfig config=RelRexToStringConfig::defaults()) const =0
const std::unordered_set< const RelAlgNode * > & intact_nodes_
const RexScalar * getProjectAt(const size_t idx) const
void sync_field_names_if_necessary(std::shared_ptr< const RelProject > from_node, RelAlgNode *to_node) noexcept
static RelRexToStringConfig defaults()
DEVICE void iota(ARGS &&...args)
virtual size_t size() const =0
RetType visitInput(const RexInput *input) const override
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::vector< std::unique_ptr< const RexAgg > > renumber_rex_aggs(std::vector< std::unique_ptr< const RexAgg >> &agg_exprs, const std::unordered_map< size_t, size_t > &new_numbering)
SortField renumber_sort_field(const SortField &old_field, const std::unordered_map< size_t, size_t > &new_numbering)
SubConditionRemover(const std::vector< const RexScalar * > sub_conds)
std::unordered_set< const RexScalar * > sub_conditions_
void propagate_rex_input_renumber(const RelFilter *excluded_root, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
void add_new_indices_for(const RelAlgNode *node, std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &new_liveouts, const std::unordered_set< size_t > &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
std::string get_field_name(const RelAlgNode *node, size_t index)
RexSubQueryIdCollector is a visitor class that collects all RexSubQuery::getId() values for all RexSu...
void fold_filters(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
const std::unordered_map< size_t, std::unique_ptr< const RexScalar > > & idx_to_subcond_
const size_t inputCount() const
void eliminate_dead_subqueries(std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
JoinTargetRebaser(const RelJoin *join, const unsigned old_base)
DEVICE void swap(ARGS &&...args)
std::unique_ptr< const RexScalar > RetType
void eliminate_dead_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept