OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgOptimizer.h File Reference
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+ Include dependency graph for RelAlgOptimizer.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::unordered_map< const
RelAlgNode
*, std::unordered_set< const
RelAlgNode * > > 
build_du_web (const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_identical_copy (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_columns (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void eliminate_dead_subqueries (std::vector< std::shared_ptr< RexSubQuery >> &subqueries, RelAlgNode const *root)
 
void fold_filters (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void hoist_filter_cond_to_cross_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void simplify_sort (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 
void sink_projected_boolean_expr_to_join (std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
 

Function Documentation

std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*> > build_du_web ( const std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 416 of file RelAlgOptimizer.cpp.

References CHECK.

Referenced by eliminate_dead_columns(), eliminate_identical_copy(), fold_filters(), hoist_filter_cond_to_cross_join(), and sink_projected_boolean_expr_to_join().

417  {
418  std::unordered_map<const RelAlgNode*, std::unordered_set<const RelAlgNode*>> web;
419  std::unordered_set<const RelAlgNode*> visited;
420  std::vector<const RelAlgNode*> work_set;
421  for (auto node : nodes) {
422  if (std::dynamic_pointer_cast<RelScan>(node) ||
423  std::dynamic_pointer_cast<RelModify>(node) || visited.count(node.get())) {
424  continue;
425  }
426  work_set.push_back(node.get());
427  while (!work_set.empty()) {
428  auto walker = work_set.back();
429  work_set.pop_back();
430  if (visited.count(walker)) {
431  continue;
432  }
433  CHECK(!web.count(walker));
434  auto it_ok =
435  web.insert(std::make_pair(walker, std::unordered_set<const RelAlgNode*>{}));
436  CHECK(it_ok.second);
437  visited.insert(walker);
438  CHECK(dynamic_cast<const RelJoin*>(walker) ||
439  dynamic_cast<const RelProject*>(walker) ||
440  dynamic_cast<const RelAggregate*>(walker) ||
441  dynamic_cast<const RelFilter*>(walker) ||
442  dynamic_cast<const RelSort*>(walker) ||
443  dynamic_cast<const RelLeftDeepInnerJoin*>(walker) ||
444  dynamic_cast<const RelLogicalValues*>(walker) ||
445  dynamic_cast<const RelTableFunction*>(walker) ||
446  dynamic_cast<const RelLogicalUnion*>(walker));
447  for (size_t i = 0; i < walker->inputCount(); ++i) {
448  auto src = walker->getInput(i);
449  if (dynamic_cast<const RelScan*>(src) || dynamic_cast<const RelModify*>(src)) {
450  continue;
451  }
452  if (web.empty() || !web.count(src)) {
453  web.insert(std::make_pair(src, std::unordered_set<const RelAlgNode*>{}));
454  }
455  web[src].insert(walker);
456  work_set.push_back(src);
457  }
458  }
459  }
460  return web;
461 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void eliminate_dead_columns ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1187 of file RelAlgOptimizer.cpp.

References anonymous_namespace{RelAlgOptimizer.cpp}::any_dead_col_in(), build_du_web(), CHECK, RelRexToStringConfig::defaults(), anonymous_namespace{RelAlgOptimizer.cpp}::does_redef_cols(), LOG, anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::propagate_input_renumbering(), setup::root, anonymous_namespace{RelAlgOptimizer.cpp}::sweep_dead_columns(), anonymous_namespace{RelAlgOptimizer.cpp}::try_insert_coalesceable_proj(), and logger::WARNING.

Referenced by RelAlgDagBuilder::optimizeDag().

1187  {
1188  if (nodes.empty()) {
1189  return;
1190  }
1191  auto root = nodes.back().get();
1192  if (!root) {
1193  return;
1194  }
1195  CHECK(!dynamic_cast<const RelScan*>(root) && !dynamic_cast<const RelJoin*>(root));
1196  // Mark
1197  auto old_liveouts = mark_live_columns(nodes);
1198  std::unordered_set<const RelAlgNode*> intact_nodes;
1199  bool has_dead_cols = false;
1200  for (auto live_pair : old_liveouts) {
1201  auto node = live_pair.first;
1202  const auto& outs = live_pair.second;
1203  if (outs.empty()) {
1204  LOG(WARNING) << "RA node with no used column: "
1205  << node->toString(RelRexToStringConfig::defaults());
1206  // Ignore empty live_out due to some invalid node
1207  intact_nodes.insert(node);
1208  }
1209  if (any_dead_col_in(node, outs)) {
1210  has_dead_cols = true;
1211  } else {
1212  intact_nodes.insert(node);
1213  }
1214  }
1215  if (!has_dead_cols) {
1216  return;
1217  }
1218  auto web = build_du_web(nodes);
1219  try_insert_coalesceable_proj(nodes, old_liveouts, web);
1220 
1221  for (auto node : nodes) {
1222  if (intact_nodes.count(node.get()) || does_redef_cols(node.get())) {
1223  continue;
1224  }
1225  bool intact = true;
1226  for (size_t i = 0; i < node->inputCount(); ++i) {
1227  auto source = node->getInput(i);
1228  if (!dynamic_cast<const RelScan*>(source) && !intact_nodes.count(source)) {
1229  intact = false;
1230  break;
1231  }
1232  }
1233  if (intact) {
1234  intact_nodes.insert(node.get());
1235  }
1236  }
1237 
1238  std::unordered_map<const RelAlgNode*, size_t> orig_node_sizes;
1239  for (auto node : nodes) {
1240  orig_node_sizes.insert(std::make_pair(node.get(), node->size()));
1241  }
1242  // Sweep
1243  std::unordered_map<const RelAlgNode*, std::unordered_map<size_t, size_t>>
1244  liveout_renumbering;
1245  std::vector<const RelAlgNode*> ready_nodes;
1246  std::tie(liveout_renumbering, ready_nodes) =
1247  sweep_dead_columns(old_liveouts, nodes, intact_nodes, web, orig_node_sizes);
1248  // Propagate
1250  liveout_renumbering, ready_nodes, old_liveouts, intact_nodes, web, orig_node_sizes);
1251 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define LOG(tag)
Definition: Logger.h:285
bool does_redef_cols(const RelAlgNode *node)
void propagate_input_renumbering(std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t >> &liveout_renumbering, const std::vector< const RelAlgNode * > &ready_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &old_liveouts, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)
tuple root
Definition: setup.in.py:14
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool any_dead_col_in(const RelAlgNode *node, const std::unordered_set< size_t > &live_outs)
static RelRexToStringConfig defaults()
Definition: RelAlgDag.h:78
void try_insert_coalesceable_proj(std::vector< std::shared_ptr< RelAlgNode >> &nodes, std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &liveouts, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define CHECK(condition)
Definition: Logger.h:291
std::pair< std::unordered_map< const RelAlgNode *, std::unordered_map< size_t, size_t > >, std::vector< const RelAlgNode * > > sweep_dead_columns(const std::unordered_map< const RelAlgNode *, std::unordered_set< size_t >> &live_outs, const std::vector< std::shared_ptr< RelAlgNode >> &nodes, const std::unordered_set< const RelAlgNode * > &intact_nodes, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_map< const RelAlgNode *, size_t > &orig_node_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_dead_subqueries ( std::vector< std::shared_ptr< RexSubQuery >> &  subqueries,
RelAlgNode const *  root 
)

Definition at line 1253 of file RelAlgOptimizer.cpp.

References anonymous_namespace{Utm.h}::a, RexSubQueryIdCollector::getLiveRexSubQueryIds(), gpu_enabled::upper_bound(), and VLOG.

Referenced by RelAlgDagBuilder::optimizeDag().

1254  {
1255  if (!subqueries.empty()) {
1257  auto sort_live_ids_first = [&live_ids](auto& a, auto& b) {
1258  return live_ids.count(a->getId()) && !live_ids.count(b->getId());
1259  };
1260  std::stable_sort(subqueries.begin(), subqueries.end(), sort_live_ids_first);
1261  size_t n_dead_subqueries;
1262  if (live_ids.count(subqueries.front()->getId())) {
1263  auto first_dead_itr = std::upper_bound(subqueries.cbegin(),
1264  subqueries.cend(),
1265  subqueries.front(),
1266  sort_live_ids_first);
1267  n_dead_subqueries = subqueries.cend() - first_dead_itr;
1268  } else {
1269  n_dead_subqueries = subqueries.size();
1270  }
1271  if (n_dead_subqueries) {
1272  VLOG(1) << "Eliminating " << n_dead_subqueries
1273  << (n_dead_subqueries == 1 ? " subquery." : " subqueries.");
1274  subqueries.resize(subqueries.size() - n_dead_subqueries);
1275  subqueries.shrink_to_fit();
1276  }
1277  }
1278 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
tuple root
Definition: setup.in.py:14
constexpr double a
Definition: Utm.h:32
static Ids getLiveRexSubQueryIds(RelAlgNode const *)
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void eliminate_identical_copy ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 494 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), anonymous_namespace{RelAlgOptimizer.cpp}::get_visible_projects(), anonymous_namespace{RelAlgOptimizer.cpp}::is_distinct(), anonymous_namespace{RelAlgOptimizer.cpp}::is_identical_copy(), project_separates_sort(), anonymous_namespace{RelAlgOptimizer.cpp}::redirect_inputs_of(), and gpu_enabled::swap().

Referenced by RelAlgDagBuilder::optimizeDag().

494  {
495  std::unordered_set<std::shared_ptr<const RelAlgNode>> copies;
496  auto sink = nodes.back();
497  for (auto node : nodes) {
498  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(node);
499  if (!aggregate || aggregate == sink ||
500  !(aggregate->getGroupByCount() == 1 && aggregate->getAggExprsCount() == 0)) {
501  continue;
502  }
503  auto project =
504  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
505  if (project && project->size() == aggregate->size() &&
506  project->getFields() == aggregate->getFields()) {
507  CHECK_EQ(size_t(0), copies.count(aggregate));
508  copies.insert(aggregate);
509  }
510  }
511  for (auto node : nodes) {
512  if (!node->inputCount()) {
513  continue;
514  }
515  auto last_source = node->getAndOwnInput(node->inputCount() - 1);
516  if (!copies.count(last_source)) {
517  continue;
518  }
519  auto aggregate = std::dynamic_pointer_cast<const RelAggregate>(last_source);
520  CHECK(aggregate);
521  if (!std::dynamic_pointer_cast<const RelJoin>(node) || aggregate->size() != 1) {
522  continue;
523  }
524  auto project =
525  std::dynamic_pointer_cast<const RelProject>(aggregate->getAndOwnInput(0));
526  CHECK(project);
527  CHECK_EQ(size_t(1), project->size());
528  if (!is_distinct(size_t(0), project.get())) {
529  continue;
530  }
531  auto new_source = project->getAndOwnInput(0);
532  if (std::dynamic_pointer_cast<const RelSort>(new_source) ||
533  std::dynamic_pointer_cast<const RelScan>(new_source)) {
534  node->replaceInput(last_source, new_source);
535  }
536  }
537  decltype(copies)().swap(copies);
538 
539  auto web = build_du_web(nodes);
540 
541  std::unordered_set<const RelProject*> projects;
542  std::unordered_set<const RelProject*> permutating_projects;
543  auto const visible_projs = get_visible_projects(nodes.back().get());
544  for (auto node_it = nodes.begin(); node_it != nodes.end(); node_it++) {
545  auto node = *node_it;
546  auto project = std::dynamic_pointer_cast<RelProject>(node);
547  auto next_node_it = std::next(node_it);
548  if (project && project->isSimple() &&
549  (!visible_projs.count(project.get()) || !project->isRenaming()) &&
550  is_identical_copy(project.get(), web, projects, permutating_projects) &&
552  project.get(), next_node_it == nodes.end() ? nullptr : next_node_it->get())) {
553  projects.insert(project.get());
554  }
555  }
556 
557  for (auto node : nodes) {
558  redirect_inputs_of(node, projects, permutating_projects, web);
559  }
560 
561  cleanup_dead_nodes(nodes);
562 }
bool is_identical_copy(const RelProject *project, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web, const std::unordered_set< const RelProject * > &projects_to_remove, std::unordered_set< const RelProject * > &permutating_projects)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void redirect_inputs_of(std::shared_ptr< RelAlgNode > node, const std::unordered_set< const RelProject * > &projects, const std::unordered_set< const RelProject * > &permutating_projects, const std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
std::unordered_set< const RelProject * > get_visible_projects(const RelAlgNode *root)
bool project_separates_sort(const RelProject *project, const RelAlgNode *next_node)
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:291
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fold_filters ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1468 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, anonymous_namespace{RelAlgOptimizer.cpp}::cleanup_dead_nodes(), logger::DEBUG2, RelRexToStringConfig::defaults(), logger::fast_logging_check(), g_max_log_length, kAND, kBOOLEAN, anonymous_namespace{RelAlgOptimizer.cpp}::replace_all_usages(), substring(), and VLOG.

Referenced by RelAlgDagBuilder::optimizeDag().

1468  {
1469  std::unordered_map<const RelAlgNode*, std::shared_ptr<RelAlgNode>> deconst_mapping;
1470  for (auto node : nodes) {
1471  deconst_mapping.insert(std::make_pair(node.get(), node));
1472  }
1473 
1474  auto web = build_du_web(nodes);
1475  for (auto node_it = nodes.rbegin(); node_it != nodes.rend(); ++node_it) {
1476  auto& node = *node_it;
1477  if (auto filter = std::dynamic_pointer_cast<RelFilter>(node)) {
1478  CHECK_EQ(filter->inputCount(), size_t(1));
1479  auto src_filter = dynamic_cast<const RelFilter*>(filter->getInput(0));
1480  if (!src_filter) {
1481  continue;
1482  }
1483  auto siblings_it = web.find(src_filter);
1484  if (siblings_it == web.end() || siblings_it->second.size() != size_t(1)) {
1485  continue;
1486  }
1487  auto src_it = deconst_mapping.find(src_filter);
1488  CHECK(src_it != deconst_mapping.end());
1489  auto folded_filter = std::dynamic_pointer_cast<RelFilter>(src_it->second);
1490  CHECK(folded_filter);
1491  // TODO(miyu) : drop filter w/ only expression valued constant TRUE?
1492  if (auto rex_operator = dynamic_cast<const RexOperator*>(filter->getCondition())) {
1493  VLOG(1) << "Node ID=" << filter->getId() << " folded into "
1494  << "ID=" << folded_filter->getId();
1496  auto node_str = folded_filter->toString(RelRexToStringConfig::defaults());
1497  auto [node_substr, post_fix] = ::substring(node_str, g_max_log_length);
1498  VLOG(2) << "Folded Node (ID: " << folded_filter->getId()
1499  << ") contents: " << node_substr << post_fix;
1500  }
1501  std::vector<std::unique_ptr<const RexScalar>> operands;
1502  operands.emplace_back(folded_filter->getAndReleaseCondition());
1503  auto old_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1504  CHECK(old_condition && old_condition->getType().get_type() == kBOOLEAN);
1505  RexInputRedirector redirector(folded_filter.get(), folded_filter->getInput(0));
1506  operands.push_back(redirector.visit(rex_operator));
1507  auto other_condition = dynamic_cast<const RexOperator*>(operands.back().get());
1508  CHECK(other_condition && other_condition->getType().get_type() == kBOOLEAN);
1509  const bool notnull = old_condition->getType().get_notnull() &&
1510  other_condition->getType().get_notnull();
1511  auto new_condition = std::unique_ptr<const RexScalar>(
1512  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1513  folded_filter->setCondition(new_condition);
1514  replace_all_usages(filter, folded_filter, deconst_mapping, web);
1515  deconst_mapping.erase(filter.get());
1516  web.erase(filter.get());
1517  web[filter->getInput(0)].erase(filter.get());
1518  node.reset();
1519  }
1520  }
1521  }
1522 
1523  if (!nodes.empty()) {
1524  auto sink = nodes.back();
1525  for (auto node_it = std::next(nodes.rbegin()); node_it != nodes.rend(); ++node_it) {
1526  if (sink) {
1527  break;
1528  }
1529  sink = *node_it;
1530  }
1531  CHECK(sink);
1532  cleanup_dead_nodes(nodes);
1533  }
1534 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void cleanup_dead_nodes(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
bool fast_logging_check(Channel)
Definition: Logger.h:265
Definition: sqldefs.h:39
size_t g_max_log_length
Definition: Execute.cpp:176
static RelRexToStringConfig defaults()
Definition: RelAlgDag.h:78
#define CHECK(condition)
Definition: Logger.h:291
void replace_all_usages(std::shared_ptr< const RelAlgNode > old_def_node, std::shared_ptr< const RelAlgNode > new_def_node, std::unordered_map< const RelAlgNode *, std::shared_ptr< RelAlgNode >> &deconst_mapping, std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * >> &du_web)
#define VLOG(n)
Definition: Logger.h:388
std::pair< std::string_view, const char * > substring(const std::string &str, size_t substr_length)
return substring of str with postfix if str.size() &gt; substr_length

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void hoist_filter_cond_to_cross_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1634 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, find_hoistable_conditions(), RelFilter::getCondition(), RelAlgNode::getInput(), INNER, join(), kAND, kBOOLEAN, RelFilter::setCondition(), and RexVisitorBase< T >::visit().

Referenced by RelAlgDagBuilder::optimizeDag().

1635  {
1636  std::unordered_set<const RelAlgNode*> visited;
1637  auto web = build_du_web(nodes);
1638  for (auto node : nodes) {
1639  if (visited.count(node.get())) {
1640  continue;
1641  }
1642  visited.insert(node.get());
1643  auto join = dynamic_cast<RelJoin*>(node.get());
1644  if (join && join->getJoinType() == JoinType::INNER) {
1645  // Only allow cross join for now.
1646  if (auto literal = dynamic_cast<const RexLiteral*>(join->getCondition())) {
1647  // Assume Calcite always generates an inner join on constant boolean true for
1648  // cross join.
1649  CHECK(literal->getType() == kBOOLEAN && literal->getVal<bool>());
1650  size_t first_col_idx = 0;
1651  const RelFilter* filter = nullptr;
1652  std::vector<const RelJoin*> join_seq{join};
1653  for (const RelJoin* curr_join = join; !filter;) {
1654  auto usrs_it = web.find(curr_join);
1655  CHECK(usrs_it != web.end());
1656  if (usrs_it->second.size() != size_t(1)) {
1657  break;
1658  }
1659  auto only_usr = *usrs_it->second.begin();
1660  if (auto usr_join = dynamic_cast<const RelJoin*>(only_usr)) {
1661  if (join == usr_join->getInput(1)) {
1662  const auto src1_offset = usr_join->getInput(0)->size();
1663  first_col_idx += src1_offset;
1664  }
1665  join_seq.push_back(usr_join);
1666  curr_join = usr_join;
1667  continue;
1668  }
1669 
1670  filter = dynamic_cast<const RelFilter*>(only_usr);
1671  break;
1672  }
1673  if (!filter) {
1674  visited.insert(join_seq.begin(), join_seq.end());
1675  continue;
1676  }
1677  const auto src_join = dynamic_cast<const RelJoin*>(filter->getInput(0));
1678  CHECK(src_join);
1679  auto modified_filter = const_cast<RelFilter*>(filter);
1680 
1681  if (src_join == join) {
1682  std::unique_ptr<const RexScalar> filter_condition(
1683  modified_filter->getAndReleaseCondition());
1684  std::unique_ptr<const RexScalar> true_condition =
1685  boost::make_unique<RexLiteral>(true,
1686  kBOOLEAN,
1687  kBOOLEAN,
1688  unsigned(-2147483648),
1689  1,
1690  unsigned(-2147483648),
1691  1);
1692  modified_filter->setCondition(true_condition);
1693  join->setCondition(filter_condition);
1694  continue;
1695  }
1696  const auto src1_base = src_join->getInput(0)->size();
1697  auto source =
1698  first_col_idx < src1_base ? src_join->getInput(0) : src_join->getInput(1);
1699  first_col_idx =
1700  first_col_idx < src1_base ? first_col_idx : first_col_idx - src1_base;
1701  auto join_conditions =
1703  source,
1704  first_col_idx,
1705  first_col_idx + join->size() - 1);
1706  if (join_conditions.empty()) {
1707  continue;
1708  }
1709 
1710  JoinTargetRebaser rebaser(join, first_col_idx);
1711  if (join_conditions.size() == 1) {
1712  auto new_join_condition = rebaser.visit(*join_conditions.begin());
1713  join->setCondition(new_join_condition);
1714  } else {
1715  std::vector<std::unique_ptr<const RexScalar>> operands;
1716  bool notnull = true;
1717  for (size_t i = 0; i < join_conditions.size(); ++i) {
1718  operands.emplace_back(rebaser.visit(join_conditions[i]));
1719  auto old_subcond = dynamic_cast<const RexOperator*>(join_conditions[i]);
1720  CHECK(old_subcond && old_subcond->getType().get_type() == kBOOLEAN);
1721  notnull = notnull && old_subcond->getType().get_notnull();
1722  }
1723  auto new_join_condition = std::unique_ptr<const RexScalar>(
1724  new RexOperator(kAND, operands, SQLTypeInfo(kBOOLEAN, notnull)));
1725  join->setCondition(new_join_condition);
1726  }
1727 
1728  SubConditionRemover remover(join_conditions);
1729  auto new_filter_condition = remover.visit(filter->getCondition());
1730  modified_filter->setCondition(new_filter_condition);
1731  }
1732  }
1733  }
1734 }
std::vector< const RexScalar * > find_hoistable_conditions(const RexScalar *condition, const RelAlgNode *source, const size_t first_col_idx, const size_t last_col_idx)
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
void setCondition(std::unique_ptr< const RexScalar > &condition)
Definition: RelAlgDag.h:1902
const RexScalar * getCondition() const
Definition: RelAlgDag.h:1898
std::string join(T const &container, std::string const &delim)
const RelAlgNode * getInput(const size_t idx) const
Definition: RelAlgDag.h:877
Definition: sqldefs.h:39
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void simplify_sort ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1759 of file RelAlgOptimizer.cpp.

References sync_field_names_if_necessary().

Referenced by RelAlgDagBuilder::optimizeDag().

1759  {
1760  if (nodes.size() < 3) {
1761  return;
1762  }
1763  for (size_t i = 0; i <= nodes.size() - 3;) {
1764  auto first_sort = std::dynamic_pointer_cast<RelSort>(nodes[i]);
1765  const auto project = std::dynamic_pointer_cast<const RelProject>(nodes[i + 1]);
1766  auto second_sort = std::dynamic_pointer_cast<RelSort>(nodes[i + 2]);
1767  if (first_sort && second_sort && project && project->isIdentity() &&
1768  *first_sort == *second_sort) {
1769  sync_field_names_if_necessary(project, /* an input of the second sort */
1770  const_cast<RelAlgNode*>(first_sort->getInput(0)));
1771  second_sort->replaceInput(second_sort->getAndOwnInput(0),
1772  first_sort->getAndOwnInput(0));
1773  nodes[i].reset();
1774  nodes[i + 1].reset();
1775  i += 3;
1776  } else {
1777  ++i;
1778  }
1779  }
1780 
1781  std::vector<std::shared_ptr<RelAlgNode>> new_nodes;
1782  for (auto node : nodes) {
1783  if (!node) {
1784  continue;
1785  }
1786  new_nodes.push_back(node);
1787  }
1788  nodes.swap(new_nodes);
1789 }
void sync_field_names_if_necessary(std::shared_ptr< const RelProject > from_node, RelAlgNode *to_node) noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sink_projected_boolean_expr_to_join ( std::vector< std::shared_ptr< RelAlgNode >> &  nodes)
noexcept

Definition at line 1320 of file RelAlgOptimizer.cpp.

References build_du_web(), CHECK, CHECK_EQ, join(), kBOOLEAN, and anonymous_namespace{RelAlgOptimizer.cpp}::mark_live_columns().

Referenced by RelAlgDagBuilder::optimizeDag().

1321  {
1322  auto web = build_du_web(nodes);
1323  auto liveouts = mark_live_columns(nodes);
1324  for (auto node : nodes) {
1325  auto project = std::dynamic_pointer_cast<RelProject>(node);
1326  // TODO(miyu): relax RelScan limitation
1327  if (!project || project->isSimple() ||
1328  !dynamic_cast<const RelScan*>(project->getInput(0))) {
1329  continue;
1330  }
1331  auto usrs_it = web.find(project.get());
1332  CHECK(usrs_it != web.end());
1333  auto& usrs = usrs_it->second;
1334  if (usrs.size() != 1) {
1335  continue;
1336  }
1337  auto join = dynamic_cast<RelJoin*>(const_cast<RelAlgNode*>(*usrs.begin()));
1338  if (!join) {
1339  continue;
1340  }
1341  auto outs_it = liveouts.find(join);
1342  CHECK(outs_it != liveouts.end());
1343  std::unordered_map<size_t, size_t> in_to_out_index;
1344  std::unordered_set<size_t> boolean_expr_indicies;
1345  bool discarded = false;
1346  for (size_t i = 0; i < project->size(); ++i) {
1347  auto oper = dynamic_cast<const RexOperator*>(project->getProjectAt(i));
1348  if (oper && oper->getType().get_type() == kBOOLEAN) {
1349  boolean_expr_indicies.insert(i);
1350  } else {
1351  // TODO(miyu): relax?
1352  if (auto input = dynamic_cast<const RexInput*>(project->getProjectAt(i))) {
1353  in_to_out_index.insert(std::make_pair(input->getIndex(), i));
1354  } else {
1355  discarded = true;
1356  }
1357  }
1358  }
1359  if (discarded || boolean_expr_indicies.empty()) {
1360  continue;
1361  }
1362  const size_t index_base =
1363  join->getInput(0) == project.get() ? 0 : join->getInput(0)->size();
1364  for (auto i : boolean_expr_indicies) {
1365  auto join_idx = index_base + i;
1366  if (outs_it->second.count(join_idx)) {
1367  discarded = true;
1368  break;
1369  }
1370  }
1371  if (discarded) {
1372  continue;
1373  }
1374  RexInputCollector collector(project.get());
1375  std::vector<size_t> unloaded_input_indices;
1376  std::unordered_map<size_t, std::unique_ptr<const RexScalar>> in_idx_to_new_subcond;
1377  // Given all are dead right after join, safe to sink
1378  for (auto i : boolean_expr_indicies) {
1379  auto rex_ins = collector.visit(project->getProjectAt(i));
1380  for (auto& in : rex_ins) {
1381  CHECK_EQ(in.getSourceNode(), project->getInput(0));
1382  if (!in_to_out_index.count(in.getIndex())) {
1383  auto curr_out_index = project->size() + unloaded_input_indices.size();
1384  in_to_out_index.insert(std::make_pair(in.getIndex(), curr_out_index));
1385  unloaded_input_indices.push_back(in.getIndex());
1386  }
1387  RexInputSinker sinker(in_to_out_index, project.get());
1388  in_idx_to_new_subcond.insert(
1389  std::make_pair(i, sinker.visit(project->getProjectAt(i))));
1390  }
1391  }
1392  if (in_idx_to_new_subcond.empty()) {
1393  continue;
1394  }
1395  std::vector<std::unique_ptr<const RexScalar>> new_projections;
1396  for (size_t i = 0; i < project->size(); ++i) {
1397  if (boolean_expr_indicies.count(i)) {
1398  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), 0));
1399  } else {
1400  auto rex_input = dynamic_cast<const RexInput*>(project->getProjectAt(i));
1401  CHECK(rex_input != nullptr);
1402  new_projections.push_back(rex_input->deepCopy());
1403  }
1404  }
1405  for (auto i : unloaded_input_indices) {
1406  new_projections.push_back(boost::make_unique<RexInput>(project->getInput(0), i));
1407  }
1408  project->setExpressions(new_projections);
1409 
1410  SubConditionReplacer replacer(in_idx_to_new_subcond);
1411  auto new_condition = replacer.visit(join->getCondition());
1412  join->setCondition(new_condition);
1413  }
1414 }
std::unordered_map< const RelAlgNode *, std::unordered_set< const RelAlgNode * > > build_du_web(const std::vector< std::shared_ptr< RelAlgNode >> &nodes) noexcept
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string join(T const &container, std::string const &delim)
std::unordered_map< const RelAlgNode *, std::unordered_set< size_t > > mark_live_columns(std::vector< std::shared_ptr< RelAlgNode >> &nodes)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function: