OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExpressionRewrite.cpp File Reference
#include "QueryEngine/ExpressionRewrite.h"
#include <algorithm>
#include <boost/locale/conversion.hpp>
#include <unordered_set>
#include "Logger/Logger.h"
#include "Parser/ParserNode.h"
#include "QueryEngine/DeepCopyVisitor.h"
#include "QueryEngine/Execute.h"
#include "QueryEngine/ScalarExprVisitor.h"
#include "QueryEngine/WindowExpressionRewrite.h"
#include "Shared/sqldefs.h"
#include "StringOps/StringOps.h"
+ Include dependency graph for ExpressionRewrite.cpp:

Go to the source code of this file.

Classes

class  anonymous_namespace{ExpressionRewrite.cpp}::OrToInVisitor
 
class  anonymous_namespace{ExpressionRewrite.cpp}::RecursiveOrToInVisitor
 
class  anonymous_namespace{ExpressionRewrite.cpp}::ArrayElementStringLiteralEncodingVisitor
 
class  anonymous_namespace{ExpressionRewrite.cpp}::ConstantFoldingVisitor
 
class  JoinCoveredQualVisitor
 

Namespaces

 anonymous_namespace{ExpressionRewrite.cpp}
 

Functions

const Analyzer::Expranonymous_namespace{ExpressionRewrite.cpp}::strip_likelihood (const Analyzer::Expr *expr)
 
Analyzer::ExpressionPtr rewrite_array_elements (Analyzer::Expr const *expr)
 
Analyzer::ExpressionPtr rewrite_expr (const Analyzer::Expr *expr)
 
void anonymous_namespace{ExpressionRewrite.cpp}::update_input_to_nest_lv (std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, shared::ColumnKey const &column_key, int target_nest_lv)
 
int anonymous_namespace{ExpressionRewrite.cpp}::update_input_desc (std::vector< InputDescriptor > &input_descs, shared::ColumnKey const &column_key, int target_nest_lv)
 
auto anonymous_namespace{ExpressionRewrite.cpp}::update_input_col_desc (std::list< std::shared_ptr< const InputColDescriptor >> &input_col_desc, shared::ColumnKey const &column_key, int target_nest_lv)
 
BoundingBoxIntersectJoinTranslationResult translate_bounding_box_intersect_with_reordering (const std::shared_ptr< Analyzer::Expr > expr, std::vector< InputDescriptor > &input_descs, std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, std::vector< size_t > &input_permutation, std::list< std::shared_ptr< const InputColDescriptor >> &input_col_desc, const BoundingBoxIntersectJoinRewriteType rewrite_type, Executor const *executor)
 
BoundingBoxIntersectJoinTranslationInfo convert_bbox_intersect_join (JoinQualsPerNestingLevel const &join_quals, std::vector< InputDescriptor > &input_descs, std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, std::vector< size_t > &input_permutation, std::list< std::shared_ptr< const InputColDescriptor >> &input_col_desc, Executor const *executor)
 
std::list< std::shared_ptr
< Analyzer::Expr > > 
strip_join_covered_filter_quals (const std::list< std::shared_ptr< Analyzer::Expr >> &quals, const JoinQualsPerNestingLevel &join_quals)
 
std::shared_ptr< Analyzer::Exprfold_expr (const Analyzer::Expr *expr)
 
bool self_join_not_covered_by_left_deep_tree (const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
 
const int get_max_rte_scan_table (std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
 
size_t get_table_cardinality (shared::TableKey const &table_key, Executor const *executor)
 

Function Documentation

BoundingBoxIntersectJoinTranslationInfo convert_bbox_intersect_join ( JoinQualsPerNestingLevel const &  join_quals,
std::vector< InputDescriptor > &  input_descs,
std::unordered_map< const RelAlgNode *, int > &  input_to_nest_level,
std::vector< size_t > &  input_permutation,
std::list< std::shared_ptr< const InputColDescriptor >> &  input_col_desc,
Executor const *  executor 
)

Definition at line 1217 of file ExpressionRewrite.cpp.

References BBOX_INTERSECT_JOIN, BoundingBoxIntersectJoinTranslationResult::converted_bbox_intersect_join_info, g_enable_bbox_intersect_hashjoin, g_enable_distance_rangejoin, Analyzer::FunctionOper::getName(), BoundingBoxIntersectJoinSupportedFunction::is_bbox_intersect_supported_func(), kLE, kLT, RANGE_JOIN, BoundingBoxIntersectJoinTranslationResult::swap_arguments, translate_bounding_box_intersect_with_reordering(), JoinCondition::type, and UNKNOWN.

Referenced by RelAlgExecutor::createCompoundWorkUnit(), and RelAlgExecutor::createProjectWorkUnit().

1223  {
1224  if (!g_enable_bbox_intersect_hashjoin || join_quals.empty()) {
1225  return {join_quals, false, false};
1226  }
1227 
1228  JoinQualsPerNestingLevel join_condition_per_nesting_level;
1229  bool is_reordered{false};
1230  bool has_bbox_intersect{false};
1231  for (const auto& join_condition_in : join_quals) {
1232  JoinCondition join_condition{{}, join_condition_in.type};
1233 
1234  for (const auto& join_qual_expr_in : join_condition_in.quals) {
1235  bool try_to_rewrite_expr_to_bbox_intersect = false;
1238  auto func_oper = dynamic_cast<Analyzer::FunctionOper*>(join_qual_expr_in.get());
1239  if (func_oper) {
1240  const auto func_name = func_oper->getName();
1242  func_name)) {
1243  try_to_rewrite_expr_to_bbox_intersect = true;
1245  }
1246  }
1247  auto bin_oper = dynamic_cast<Analyzer::BinOper*>(join_qual_expr_in.get());
1248  if (bin_oper && (bin_oper->get_optype() == kLE || bin_oper->get_optype() == kLT)) {
1249  auto lhs =
1250  dynamic_cast<const Analyzer::GeoOperator*>(bin_oper->get_left_operand());
1251  auto rhs = dynamic_cast<const Analyzer::Constant*>(bin_oper->get_right_operand());
1252  if (g_enable_distance_rangejoin && lhs && rhs) {
1253  try_to_rewrite_expr_to_bbox_intersect = true;
1255  }
1256  }
1258  if (try_to_rewrite_expr_to_bbox_intersect) {
1259  translation_res =
1261  input_descs,
1262  input_to_nest_level,
1263  input_permutation,
1264  input_col_desc,
1265  rewrite_type,
1266  executor);
1267  }
1268  if (translation_res.converted_bbox_intersect_join_info) {
1269  const auto& bbox_intersect_quals =
1270  *translation_res.converted_bbox_intersect_join_info;
1271  has_bbox_intersect = true;
1272  // Add qual for bounding box intersection
1273  join_condition.quals.insert(join_condition.quals.end(),
1274  bbox_intersect_quals.join_quals.begin(),
1275  bbox_intersect_quals.join_quals.end());
1276  // Add original quals
1277  join_condition.quals.insert(join_condition.quals.end(),
1278  bbox_intersect_quals.quals.begin(),
1279  bbox_intersect_quals.quals.end());
1280  } else {
1281  join_condition.quals.push_back(join_qual_expr_in);
1282  }
1283  is_reordered |= translation_res.swap_arguments;
1284  }
1285  join_condition_per_nesting_level.push_back(join_condition);
1286  }
1287  return {join_condition_per_nesting_level, has_bbox_intersect, is_reordered};
1288 }
std::optional< BoundingBoxIntersectJoinConjunction > converted_bbox_intersect_join_info
Definition: sqldefs.h:37
std::vector< JoinCondition > JoinQualsPerNestingLevel
BoundingBoxIntersectJoinTranslationResult translate_bounding_box_intersect_with_reordering(const std::shared_ptr< Analyzer::Expr > expr, std::vector< InputDescriptor > &input_descs, std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, std::vector< size_t > &input_permutation, std::list< std::shared_ptr< const InputColDescriptor >> &input_col_desc, const BoundingBoxIntersectJoinRewriteType rewrite_type, Executor const *executor)
bool g_enable_distance_rangejoin
Definition: Execute.cpp:112
static bool is_bbox_intersect_supported_func(std::string_view target_func_name)
bool g_enable_bbox_intersect_hashjoin
Definition: Execute.cpp:109
Definition: sqldefs.h:35
BoundingBoxIntersectJoinRewriteType
std::string getName() const
Definition: Analyzer.h:2744

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr<Analyzer::Expr> fold_expr ( const Analyzer::Expr expr)

Definition at line 1356 of file ExpressionRewrite.cpp.

References kBIGINT, and anonymous_namespace{ExpressionRewrite.cpp}::strip_likelihood().

Referenced by RelAlgExecutor::createFilterWorkUnit(), RelAlgExecutor::makeJoinQuals(), anonymous_namespace{RelAlgExecutor.cpp}::set_transient_dict_maybe(), anonymous_namespace{RelAlgExecutor.cpp}::translate_quals(), anonymous_namespace{RelAlgExecutor.cpp}::translate_scalar_sources(), anonymous_namespace{RelAlgExecutor.cpp}::translate_targets(), RelAlgTranslator::translateBinaryGeoFunction(), RelAlgTranslator::translateDatePlusMinus(), RelAlgTranslator::translateGeoComparison(), and RelAlgTranslator::translateGeoFunctionArg().

1356  {
1357  if (!expr) {
1358  return nullptr;
1359  }
1360  const auto expr_no_likelihood = strip_likelihood(expr);
1361  ConstantFoldingVisitor visitor;
1362  auto rewritten_expr = visitor.visit(expr_no_likelihood);
1363  if (visitor.get_num_overflows() > 0 && rewritten_expr->get_type_info().is_integer() &&
1364  rewritten_expr->get_type_info().get_type() != kBIGINT) {
1365  auto rewritten_expr_const =
1366  std::dynamic_pointer_cast<const Analyzer::Constant>(rewritten_expr);
1367  if (!rewritten_expr_const) {
1368  // Integer expression didn't fold completely the first time due to
1369  // overflows in smaller type subexpressions, trying again with a cast
1370  const auto& ti = SQLTypeInfo(kBIGINT, false);
1371  auto bigint_expr_no_likelihood = expr_no_likelihood->deep_copy()->add_cast(ti);
1372  auto rewritten_expr_take2 = visitor.visit(bigint_expr_no_likelihood.get());
1373  auto rewritten_expr_take2_const =
1374  std::dynamic_pointer_cast<Analyzer::Constant>(rewritten_expr_take2);
1375  if (rewritten_expr_take2_const) {
1376  // Managed to fold, switch to the new constant
1377  rewritten_expr = rewritten_expr_take2_const;
1378  }
1379  }
1380  }
1381  const auto expr_with_likelihood = dynamic_cast<const Analyzer::LikelihoodExpr*>(expr);
1382  if (expr_with_likelihood) {
1383  // Add back likelihood
1384  return std::make_shared<Analyzer::LikelihoodExpr>(
1385  rewritten_expr, expr_with_likelihood->get_likelihood());
1386  }
1387  return rewritten_expr;
1388 }
const Analyzer::Expr * strip_likelihood(const Analyzer::Expr *expr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int get_max_rte_scan_table ( std::unordered_map< int, llvm::Value * > &  scan_idx_to_hash_pos)

Definition at line 1401 of file ExpressionRewrite.cpp.

Referenced by BaselineJoinHashTable::codegenKey(), PerfectJoinHashTable::codegenMatchingSet(), and PerfectJoinHashTable::codegenSlot().

1402  {
1403  int ret = INT32_MIN;
1404  for (auto& kv : scan_idx_to_hash_pos) {
1405  if (kv.first > ret) {
1406  ret = kv.first;
1407  }
1408  }
1409  return ret;
1410 }

+ Here is the caller graph for this function:

size_t get_table_cardinality ( shared::TableKey const &  table_key,
Executor const *  executor 
)

Definition at line 1412 of file ExpressionRewrite.cpp.

References CHECK, Catalog_Namespace::get_metadata_for_table(), get_temporary_table(), and shared::TableKey::table_id.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), and translate_bounding_box_intersect_with_reordering().

1413  {
1414  if (table_key.table_id > 0) {
1415  auto const td = Catalog_Namespace::get_metadata_for_table(table_key);
1416  CHECK(td);
1417  CHECK(td->fragmenter);
1418  return td->fragmenter->getNumRows();
1419  }
1420  auto temp_tbl = get_temporary_table(executor->getTemporaryTables(), table_key.table_id);
1421  return temp_tbl->rowCount();
1422 }
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:246
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Analyzer::ExpressionPtr rewrite_array_elements ( Analyzer::Expr const *  expr)

Definition at line 775 of file ExpressionRewrite.cpp.

Referenced by anonymous_namespace{RelAlgExecutor.cpp}::translate_scalar_sources(), anonymous_namespace{RelAlgExecutor.cpp}::translate_scalar_sources_for_update(), and anonymous_namespace{RelAlgExecutor.cpp}::translate_targets().

775  {
776  return ArrayElementStringLiteralEncodingVisitor().visit(expr);
777 }

+ Here is the caller graph for this function:

Analyzer::ExpressionPtr rewrite_expr ( const Analyzer::Expr expr)

Definition at line 779 of file ExpressionRewrite.cpp.

References rewrite_avg_window(), rewrite_sum_window(), and anonymous_namespace{ExpressionRewrite.cpp}::strip_likelihood().

Referenced by RelAlgExecutor::createFilterWorkUnit(), qual_to_conjunctive_form(), qual_to_disjunctive_form(), anonymous_namespace{RelAlgExecutor.cpp}::rewrite_quals(), QueryRewriter::rewriteConstrainedByIn(), anonymous_namespace{RelAlgExecutor.cpp}::translate_scalar_sources(), anonymous_namespace{RelAlgExecutor.cpp}::translate_scalar_sources_for_update(), and anonymous_namespace{RelAlgExecutor.cpp}::translate_targets().

779  {
780  const auto sum_window = rewrite_sum_window(expr);
781  if (sum_window) {
782  return sum_window;
783  }
784  const auto avg_window = rewrite_avg_window(expr);
785  if (avg_window) {
786  return avg_window;
787  }
788  const auto expr_no_likelihood = strip_likelihood(expr);
789  // The following check is not strictly needed, but seems silly to transform a
790  // simple string comparison to an IN just to codegen the same thing anyway.
791 
792  RecursiveOrToInVisitor visitor;
793  auto rewritten_expr = visitor.visit(expr_no_likelihood);
794  const auto expr_with_likelihood =
795  std::dynamic_pointer_cast<const Analyzer::LikelihoodExpr>(rewritten_expr);
796  if (expr_with_likelihood) {
797  // Add back likelihood
798  return std::make_shared<Analyzer::LikelihoodExpr>(
799  rewritten_expr, expr_with_likelihood->get_likelihood());
800  }
801  return rewritten_expr;
802 }
std::shared_ptr< Analyzer::WindowFunction > rewrite_avg_window(const Analyzer::Expr *expr)
std::shared_ptr< Analyzer::WindowFunction > rewrite_sum_window(const Analyzer::Expr *expr)
const Analyzer::Expr * strip_likelihood(const Analyzer::Expr *expr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool self_join_not_covered_by_left_deep_tree ( const Analyzer::ColumnVar key_side,
const Analyzer::ColumnVar val_side,
const int  max_rte_covered 
)

Definition at line 1390 of file ExpressionRewrite.cpp.

References Analyzer::ColumnVar::get_rte_idx(), and Analyzer::ColumnVar::getTableKey().

Referenced by BaselineJoinHashTable::codegenKey(), PerfectJoinHashTable::codegenMatchingSet(), and PerfectJoinHashTable::codegenSlot().

1392  {
1393  if (key_side->getTableKey() == val_side->getTableKey() &&
1394  key_side->get_rte_idx() == val_side->get_rte_idx() &&
1395  key_side->get_rte_idx() > max_rte_covered) {
1396  return true;
1397  }
1398  return false;
1399 }
int32_t get_rte_idx() const
Definition: Analyzer.h:202
shared::TableKey getTableKey() const
Definition: Analyzer.h:199

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::list<std::shared_ptr<Analyzer::Expr> > strip_join_covered_filter_quals ( const std::list< std::shared_ptr< Analyzer::Expr >> &  quals,
const JoinQualsPerNestingLevel join_quals 
)

Definition at line 1332 of file ExpressionRewrite.cpp.

References g_strip_join_covered_quals, and ScalarExprVisitor< T >::visit().

Referenced by RelAlgExecutionUnit::createCountAllExecutionUnit().

1334  {
1336  return quals;
1337  }
1338 
1339  if (join_quals.empty()) {
1340  return quals;
1341  }
1342 
1343  std::list<std::shared_ptr<Analyzer::Expr>> quals_to_return;
1344 
1345  JoinCoveredQualVisitor visitor(join_quals);
1346  for (const auto& qual : quals) {
1347  if (!visitor.visit(qual.get())) {
1348  // Not a covered qual, don't elide it from the filtered count
1349  quals_to_return.push_back(qual);
1350  }
1351  }
1352 
1353  return quals_to_return;
1354 }
bool g_strip_join_covered_quals
Definition: Execute.cpp:116

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

BoundingBoxIntersectJoinTranslationResult translate_bounding_box_intersect_with_reordering ( const std::shared_ptr< Analyzer::Expr expr,
std::vector< InputDescriptor > &  input_descs,
std::unordered_map< const RelAlgNode *, int > &  input_to_nest_level,
std::vector< size_t > &  input_permutation,
std::list< std::shared_ptr< const InputColDescriptor >> &  input_col_desc,
const BoundingBoxIntersectJoinRewriteType  rewrite_type,
Executor const *  executor 
)

Definition at line 857 of file ExpressionRewrite.cpp.

References run_benchmark_import::args, BBOX_INTERSECT_JOIN, CHECK, CHECK_EQ, CHECK_GE, Analyzer::Expr::collect_rte_idx(), BoundingBoxIntersectJoinTranslationResult::converted_bbox_intersect_join_info, BoundingBoxIntersectJoinTranslationResult::createEmptyResult(), g_enable_hashjoin_many_to_many, get_table_cardinality(), Analyzer::ColumnVar::getColumnKey(), logger::INFO, is_constructed_point(), BoundingBoxIntersectJoinSupportedFunction::is_many_to_many_func(), BoundingBoxIntersectJoinSupportedFunction::is_range_join_rewrite_target_func(), BoundingBoxIntersectJoinConjunction::join_quals, kARRAY, kBBOX_INTERSECT, kBOOLEAN, kDOUBLE, kGEOGRAPHY, kLE, kONE, kPOINT, LOG, BoundingBoxIntersectJoinConjunction::quals, RANGE_JOIN, run_benchmark_import::res, BoundingBoxIntersectJoinSupportedFunction::ST_DISTANCE_sv, BoundingBoxIntersectJoinSupportedFunction::ST_DWITHIN_POINT_POINT_sv, BoundingBoxIntersectJoinSupportedFunction::ST_INTERSECTSBOX_sv, gpu_enabled::swap(), BoundingBoxIntersectJoinTranslationResult::swap_arguments, anonymous_namespace{ExpressionRewrite.cpp}::update_input_col_desc(), anonymous_namespace{ExpressionRewrite.cpp}::update_input_desc(), anonymous_namespace{ExpressionRewrite.cpp}::update_input_to_nest_lv(), ScalarExprVisitor< T >::visit(), VLOG, and logger::WARNING.

Referenced by convert_bbox_intersect_join().

864  {
865  auto collect_table_cardinality = [&executor](const Analyzer::Expr* lhs,
866  const Analyzer::Expr* rhs) {
867  const auto lhs_cv = dynamic_cast<const Analyzer::ColumnVar*>(lhs);
868  const auto rhs_cv = dynamic_cast<const Analyzer::ColumnVar*>(rhs);
869  if (lhs_cv && rhs_cv) {
870  return std::make_pair<int64_t, int64_t>(
871  get_table_cardinality(lhs_cv->getTableKey(), executor),
872  get_table_cardinality(rhs_cv->getTableKey(), executor));
873  }
874  // otherwise, return an invalid table cardinality
875  return std::make_pair<int64_t, int64_t>(-1, -1);
876  };
877 
878  auto has_invalid_join_col_order = [](const Analyzer::Expr* lhs,
879  const Analyzer::Expr* rhs) {
880  // Check for compatible join ordering. If the join ordering does not match expected
881  // ordering for bounding box intersection, the join builder will fail.
882  std::set<int> lhs_rte_idx;
883  lhs->collect_rte_idx(lhs_rte_idx);
884  std::set<int> rhs_rte_idx;
885  rhs->collect_rte_idx(rhs_rte_idx);
886  auto has_invalid_num_join_cols = lhs_rte_idx.size() != 1 || rhs_rte_idx.size() != 1;
887  auto has_invalid_rte_idx = lhs_rte_idx > rhs_rte_idx;
888  return std::make_pair(has_invalid_num_join_cols || has_invalid_rte_idx,
889  has_invalid_rte_idx);
890  };
891  bool swap_args = false;
892  auto convert_to_range_join_oper =
893  [&](std::string_view func_name,
894  const std::shared_ptr<Analyzer::Expr> expr,
895  const Analyzer::BinOper* range_join_expr,
896  const Analyzer::GeoOperator* lhs,
897  const Analyzer::Constant* rhs) -> std::shared_ptr<Analyzer::BinOper> {
899  func_name)) {
900  CHECK_EQ(lhs->size(), size_t(2));
901  auto l_arg = lhs->getOperand(0);
902  // we try to build a join hash table for bounding box intersection based on the rhs
903  auto r_arg = lhs->getOperand(1);
904  const bool is_geography = l_arg->get_type_info().get_subtype() == kGEOGRAPHY ||
905  r_arg->get_type_info().get_subtype() == kGEOGRAPHY;
906  if (is_geography) {
907  VLOG(1) << "Range join not yet supported for geodesic distance "
908  << expr->toString();
909  return nullptr;
910  }
911  // Check for compatible join ordering. If the join ordering does not match expected
912  // ordering for bounding box intersection, the join builder will fail.
913  Analyzer::Expr* range_join_arg = r_arg;
914  Analyzer::Expr* bin_oper_arg = l_arg;
915  auto invalid_range_join_qual =
916  has_invalid_join_col_order(bin_oper_arg, range_join_arg);
917  if (invalid_range_join_qual.first) {
918  LOG(INFO) << "Unable to rewrite " << func_name
919  << " to exploit bounding box intersection. Cannot build hash table "
920  "over LHS type. "
921  "Check join order.\n"
922  << range_join_expr->toString();
923  return nullptr;
924  }
925  // swapping rule for range join argument
926  // lhs | rhs
927  // 1. pt | pt : swap if |lhs| < |rhs| or has invalid rte values
928  // 2. pt | non-pt : return nullptr
929  // 3. non-pt | pt : return nullptr
930  // 4. non-pt | non-pt : return nullptr
931  // todo (yoonmin) : improve logic for cases 2 and 3
932  bool lhs_is_point{l_arg->get_type_info().get_type() == kPOINT};
933  bool rhs_is_point{r_arg->get_type_info().get_type() == kPOINT};
934  if (!lhs_is_point || !rhs_is_point) {
935  // case 2 ~ 4
936  VLOG(1) << "Currently, we only support range hash join for Point-to-Point "
937  "distance query: fallback to a loop join";
938  return nullptr;
939  }
940  auto const card_info = collect_table_cardinality(range_join_arg, bin_oper_arg);
941  if (invalid_range_join_qual.second && card_info.first > 0 && lhs_is_point) {
942  swap_args = true;
943  } else if (card_info.first >= 0 && card_info.first < card_info.second) {
944  swap_args = true;
945  }
946  if (swap_args) {
947  // todo (yoonmin) : find the best reordering scheme when a query has multiple
948  // range join candidates; it needs a (cost-based) plan enumeration logic
949  // in our optimizer
950  auto r_cv = dynamic_cast<Analyzer::ColumnVar*>(lhs->getOperand(1));
951  auto l_cv = dynamic_cast<Analyzer::ColumnVar*>(lhs->getOperand(0));
952  if (r_cv && l_cv && input_descs.size() == 2) {
953  // to exploit range hash join, we need to assign point geometry to rhs
954  // specifically, we need to propagate the changes made here via various
955  // query metadata such as `input_desc`, `input_col_desc` and so on
956  // otherwise, we do not try swapping join arguments for safety
957  // and we do not try argument swapping if the input query has more two
958  // input tables; but it is enough to cover most of immerse use-cases
959  auto const r_col_key = r_cv->getColumnKey();
960  auto const l_col_key = l_cv->getColumnKey();
961  int r_rte_idx = r_cv->get_rte_idx();
962  int l_rte_idx = l_cv->get_rte_idx();
963  r_cv->set_rte_idx(l_rte_idx);
964  l_cv->set_rte_idx(r_rte_idx);
965  update_input_to_nest_lv(input_to_nest_level, r_col_key, l_rte_idx);
966  update_input_to_nest_lv(input_to_nest_level, l_col_key, r_rte_idx);
967  auto const r_input_desc_idx =
968  update_input_desc(input_descs, r_col_key, l_rte_idx);
969  CHECK_GE(r_input_desc_idx, 0);
970  auto const l_input_desc_idx =
971  update_input_desc(input_descs, l_col_key, r_rte_idx);
972  CHECK_GE(l_input_desc_idx, 0);
973  auto r_input_col_desc_it =
974  update_input_col_desc(input_col_desc, r_col_key, l_rte_idx);
975  CHECK(r_input_col_desc_it != input_col_desc.end());
976  auto l_input_col_desc_it =
977  update_input_col_desc(input_col_desc, l_col_key, r_rte_idx);
978  CHECK(l_input_col_desc_it != input_col_desc.end());
979  if (!input_permutation.empty()) {
980  auto r_itr =
981  std::find(input_permutation.begin(), input_permutation.end(), r_rte_idx);
982  CHECK(r_itr != input_permutation.end());
983  auto l_itr =
984  std::find(input_permutation.begin(), input_permutation.end(), l_rte_idx);
985  CHECK(l_itr != input_permutation.end());
986  std::swap(*r_itr, *l_itr);
987  }
988  std::swap(input_descs[r_input_desc_idx], input_descs[l_input_desc_idx]);
989  std::swap(r_input_col_desc_it, l_input_col_desc_it);
990  r_arg = lhs->getOperand(0);
991  l_arg = lhs->getOperand(1);
992  VLOG(1) << "Swap range join qual's input arguments to exploit hash join "
993  "framework for bounding box intersection";
994  invalid_range_join_qual.first = false;
995  }
996  }
997  const bool inclusive = range_join_expr->get_optype() == kLE;
998  auto range_expr = makeExpr<Analyzer::RangeOper>(
999  inclusive, inclusive, r_arg->deep_copy(), rhs->deep_copy());
1000  VLOG(1) << "Successfully converted to range hash join";
1001  return makeExpr<Analyzer::BinOper>(
1002  kBOOLEAN, kBBOX_INTERSECT, kONE, l_arg->deep_copy(), range_expr);
1003  }
1004  return nullptr;
1005  };
1006 
1007  /*
1008  * Currently, our hash join framework for bounding box intersection (bbox-intersect)
1009  * supports limited set of join quals when 1) the FunctionOperator is listed in the
1010  * function list, i.e., is_bbox_intersect_supported_func, 2) the argument order of the
1011  * join qual must match the input argument order of the corresponding native function,
1012  * and 3) input tables match rte index requirement (the column used to build a hash
1013  * table has larger rte compared with that of probing column). Depending on the type
1014  * of the function, we try to convert it to corresponding hash join qual if possible.
1015  * After rewriting, we create a join operator w/ bbox-intersect which is converted from
1016  * the original expression and return BoundingBoxIntersectJoinConjunction object which
1017  * is a pair of 1) the original expr and 2) converted join expr w/ bbox-intersect. Here,
1018  * returning the original expr means we additionally call its corresponding native
1019  * function to compute the result accurately (i.e., bbox-intersect hash join operates a
1020  * kind of filter expression which may include false-positive of the true resultset).
1021  * Note that ST_IntersectsBox is the only function that does not return the original
1022  * expr.
1023  * */
1024  std::shared_ptr<Analyzer::BinOper> bbox_intersect_oper{nullptr};
1025  bool needs_to_return_original_expr = false;
1026  std::string func_name{""};
1028  auto func_oper = dynamic_cast<Analyzer::FunctionOper*>(expr.get());
1029  CHECK(func_oper);
1030  func_name = func_oper->getName();
1033  LOG(WARNING) << "Many-to-many hashjoin support is disabled, unable to rewrite "
1034  << func_oper->toString() << " to use accelerated geo join.";
1036  }
1037  DeepCopyVisitor deep_copy_visitor;
1039  CHECK_GE(func_oper->getArity(), size_t(2));
1040  // this case returns {empty quals, bbox_intersect join quals} b/c our join key
1041  // matching logic for this case is the same as the implementation of
1042  // ST_IntersectsBox function Note that we can build a join hash table for
1043  // bbox_intersect regardless of table ordering and the argument order in this case
1044  // b/c selecting lhs and rhs by arguments 0 and 1 always match the rte index
1045  // requirement (rte_lhs < rte_rhs) so what table ordering we take, the rte index
1046  // requirement satisfies
1047  // TODO(adb): we will likely want to actually check for true bbox_intersect, but
1048  // this works for now
1049  auto lhs = func_oper->getOwnArg(0);
1050  auto rewritten_lhs = deep_copy_visitor.visit(lhs.get());
1051  CHECK(rewritten_lhs);
1052 
1053  auto rhs = func_oper->getOwnArg(1);
1054  auto rewritten_rhs = deep_copy_visitor.visit(rhs.get());
1055  CHECK(rewritten_rhs);
1056  bbox_intersect_oper = makeExpr<Analyzer::BinOper>(
1057  kBOOLEAN, kBBOX_INTERSECT, kONE, rewritten_lhs, rewritten_rhs);
1058  } else if (func_name ==
1060  CHECK_EQ(func_oper->getArity(), size_t(8));
1061  const auto lhs = func_oper->getOwnArg(0);
1062  const auto rhs = func_oper->getOwnArg(1);
1063  // the correctness of geo args used in the ST_DWithin function is checked by
1064  // geo translation logic, i.e., RelAlgTranslator::translateTernaryGeoFunction
1065  const auto distance_const_val =
1066  dynamic_cast<const Analyzer::Constant*>(func_oper->getArg(7));
1067  if (lhs && rhs && distance_const_val) {
1068  std::vector<std::shared_ptr<Analyzer::Expr>> args{lhs, rhs};
1069  auto range_oper = makeExpr<Analyzer::GeoOperator>(
1070  SQLTypeInfo(kDOUBLE, 0, 8, true),
1072  args,
1073  std::nullopt);
1074  auto distance_oper = makeExpr<Analyzer::BinOper>(
1075  kBOOLEAN, kLE, kONE, range_oper, distance_const_val->deep_copy());
1076  VLOG(1) << "Rewrite " << func_oper->getName() << " to ST_Distance_Point_Point";
1077  bbox_intersect_oper = convert_to_range_join_oper(
1079  distance_oper,
1080  distance_oper.get(),
1081  range_oper.get(),
1082  distance_const_val);
1083  needs_to_return_original_expr = true;
1084  }
1086  is_poly_mpoly_rewrite_target_func(func_name)) {
1087  // in the five functions fall into this case,
1088  // ST_Contains is for a pair of polygons, and for ST_Intersect cases they are
1089  // combo of polygon and multipolygon so what table orders we choose, rte index
1090  // requirement for bbox_intersect can be satisfied if we choose lhs and rhs
1091  // from left-to-right order (i.e., get lhs from the arg-1 instead of arg-3)
1092  // Note that we choose them from right-to-left argument order in the past
1093  CHECK_GE(func_oper->getArity(), size_t(4));
1094  auto lhs = func_oper->getOwnArg(1);
1095  auto rewritten_lhs = deep_copy_visitor.visit(lhs.get());
1096  CHECK(rewritten_lhs);
1097  auto rhs = func_oper->getOwnArg(3);
1098  auto rewritten_rhs = deep_copy_visitor.visit(rhs.get());
1099  CHECK(rewritten_rhs);
1100 
1101  bbox_intersect_oper = makeExpr<Analyzer::BinOper>(
1102  kBOOLEAN, kBBOX_INTERSECT, kONE, rewritten_lhs, rewritten_rhs);
1103  needs_to_return_original_expr = true;
1105  is_point_poly_rewrite_target_func(func_name)) {
1106  // now, we try to look at one more chance to exploit bbox_intersect by
1107  // rewriting the qual as: ST_INTERSECT(POLY, POINT) -> ST_INTERSECT(POINT, POLY)
1108  // to support efficient evaluation of 1) ST_Intersects_Point_Polygon and
1109  // 2) ST_Intersects_Point_MultiPolygon based on our hash join framework w/
1110  // bbox_intersect here, we have implementation of native functions for both 1)
1111  // Point-Polygon pair and 2) Polygon-Point pair, but we currently do not support
1112  // hash table generation on top of point column thus, the goal of this rewriting is
1113  // to place a non-point geometry to the right-side of the bbox_intersect_oper (to
1114  // build hash table based on it) iff the inner table is larger than that of
1115  // non-point geometry (to reduce expensive hash join performance)
1116  size_t point_arg_idx = 0;
1117  size_t poly_arg_idx = 2;
1118  if (func_oper->getOwnArg(point_arg_idx)->get_type_info().get_type() != kPOINT) {
1119  point_arg_idx = 2;
1120  poly_arg_idx = 1;
1121  }
1122  auto point_cv = func_oper->getOwnArg(point_arg_idx);
1123  auto poly_cv = func_oper->getOwnArg(poly_arg_idx);
1124  CHECK_EQ(point_cv->get_type_info().get_type(), kPOINT);
1125  CHECK_EQ(poly_cv->get_type_info().get_type(), kARRAY);
1126  auto rewritten_lhs = deep_copy_visitor.visit(point_cv.get());
1127  CHECK(rewritten_lhs);
1128  auto rewritten_rhs = deep_copy_visitor.visit(poly_cv.get());
1129  CHECK(rewritten_rhs);
1130  VLOG(1) << "Rewriting the " << func_name
1131  << " to use bounding box intersection with lhs as "
1132  << rewritten_lhs->toString() << " and rhs as " << rewritten_rhs->toString();
1133  bbox_intersect_oper = makeExpr<Analyzer::BinOper>(
1134  kBOOLEAN, kBBOX_INTERSECT, kONE, rewritten_lhs, rewritten_rhs);
1135  needs_to_return_original_expr = true;
1137  is_poly_point_rewrite_target_func(func_name)) {
1138  // rest of functions reaching here is poly and point geo join query
1139  // to use bbox_intersect in this case, poly column must have its rte == 1
1140  // lhs is the point col_var
1141  auto lhs = func_oper->getOwnArg(2);
1142  auto rewritten_lhs = deep_copy_visitor.visit(lhs.get());
1143  CHECK(rewritten_lhs);
1144  const auto& lhs_ti = rewritten_lhs->get_type_info();
1145 
1146  if (!lhs_ti.is_geometry() && !is_constructed_point(rewritten_lhs.get())) {
1147  // TODO(adb): If ST_Contains is passed geospatial literals instead of columns,
1148  // the function will be expanded during translation rather than during code
1149  // generation. While this scenario does not make sense for the bbox_intersect, we
1150  // need to detect and abort the bbox_intersect rewrite. Adding a
1151  // GeospatialConstant dervied class to the Analyzer may prove to be a better way
1152  // to handle geo literals, but for now we ensure the LHS type is a geospatial
1153  // type, which would mean the function has not been expanded to the physical
1154  // types, yet.
1155  LOG(INFO) << "Unable to rewrite " << func_name
1156  << " to bounding box intersection conjunction. LHS input type is "
1157  "neither a geospatial "
1158  "column nor a constructed point"
1159  << func_oper->toString();
1161  }
1162 
1163  // rhs is coordinates of the poly col
1164  auto rhs = func_oper->getOwnArg(1);
1165  auto rewritten_rhs = deep_copy_visitor.visit(rhs.get());
1166  CHECK(rewritten_rhs);
1167 
1168  if (has_invalid_join_col_order(lhs.get(), rhs.get()).first) {
1169  LOG(INFO) << "Unable to rewrite " << func_name
1170  << " to bounding box intersection conjunction. Cannot build hash table "
1171  "over LHS type. "
1172  "Check join order."
1173  << func_oper->toString();
1175  }
1176 
1177  VLOG(1) << "Rewriting " << func_name
1178  << " to use bounding box intersection with lhs as "
1179  << rewritten_lhs->toString() << " and rhs as " << rewritten_rhs->toString();
1180 
1181  bbox_intersect_oper = makeExpr<Analyzer::BinOper>(
1182  kBOOLEAN, kBBOX_INTERSECT, kONE, rewritten_lhs, rewritten_rhs);
1184  ST_APPROX_OVERLAPS_MULTIPOLYGON_POINT_sv) {
1185  needs_to_return_original_expr = true;
1186  }
1187  }
1188  } else if (rewrite_type == BoundingBoxIntersectJoinRewriteType::RANGE_JOIN) {
1189  auto bin_oper = dynamic_cast<Analyzer::BinOper*>(expr.get());
1190  CHECK(bin_oper);
1191  auto lhs = dynamic_cast<const Analyzer::GeoOperator*>(bin_oper->get_left_operand());
1192  CHECK(lhs);
1193  auto rhs = dynamic_cast<const Analyzer::Constant*>(bin_oper->get_right_operand());
1194  CHECK(rhs);
1195  func_name = lhs->getName();
1196  bbox_intersect_oper = convert_to_range_join_oper(func_name, expr, bin_oper, lhs, rhs);
1197  needs_to_return_original_expr = true;
1198  }
1199  const auto expr_str = !func_name.empty() ? func_name : expr->toString();
1200  if (bbox_intersect_oper) {
1202  res.swap_arguments = swap_args;
1203  BoundingBoxIntersectJoinConjunction bbox_intersect_join_qual;
1204  bbox_intersect_join_qual.join_quals.push_back(bbox_intersect_oper);
1205  if (needs_to_return_original_expr) {
1206  bbox_intersect_join_qual.quals.push_back(expr);
1207  }
1208  res.converted_bbox_intersect_join_info = bbox_intersect_join_qual;
1209  VLOG(1) << "Successfully converted " << expr_str
1210  << " to use bounding box intersection";
1211  return res;
1212  }
1213  VLOG(1) << "Bounding box intersection not enabled for " << expr_str;
1215 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::optional< BoundingBoxIntersectJoinConjunction > converted_bbox_intersect_join_info
std::list< std::shared_ptr< Analyzer::Expr > > join_quals
static constexpr std::string_view ST_DISTANCE_sv
#define LOG(tag)
Definition: Logger.h:285
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1682
Definition: sqldefs.h:37
size_t get_table_cardinality(shared::TableKey const &table_key, Executor const *executor)
static bool is_range_join_rewrite_target_func(std::string_view target_func_name)
#define CHECK_GE(x, y)
Definition: Logger.h:306
T visit(const Analyzer::Expr *expr) const
static BoundingBoxIntersectJoinTranslationResult createEmptyResult()
std::list< std::shared_ptr< Analyzer::Expr > > quals
static constexpr std::string_view ST_INTERSECTSBOX_sv
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:113
void update_input_to_nest_lv(std::unordered_map< const RelAlgNode *, int > &input_to_nest_level, shared::ColumnKey const &column_key, int target_nest_lv)
Definition: sqldefs.h:74
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
auto update_input_col_desc(std::list< std::shared_ptr< const InputColDescriptor >> &input_col_desc, shared::ColumnKey const &column_key, int target_nest_lv)
#define CHECK(condition)
Definition: Logger.h:291
int update_input_desc(std::vector< InputDescriptor > &input_descs, shared::ColumnKey const &column_key, int target_nest_lv)
static constexpr std::string_view ST_DWITHIN_POINT_POINT_sv
virtual void collect_rte_idx(std::set< int > &rte_idx_set) const
Definition: Analyzer.h:110
static bool is_many_to_many_func(std::string_view target_func_name)
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function: