OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashJoin.cpp File Reference
+ Include dependency graph for HashJoin.cpp:

Go to the source code of this file.

Classes

class  AllColumnVarsVisitor
 

Namespaces

 anonymous_namespace{HashJoin.cpp}
 

Functions

template<typename T >
std::string anonymous_namespace{HashJoin.cpp}::toStringFlat (const HashJoin *hash_table, const ExecutorDeviceType device_type, const int device_id)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferEntry &e)
 
std::ostream & operator<< (std::ostream &os, const DecodedJoinHashBufferSet &s)
 
std::ostream & operator<< (std::ostream &os, const InnerOuterStringOpInfos &inner_outer_string_op_infos)
 
std::string toString (const InnerOuterStringOpInfos &inner_outer_string_op_infos)
 
std::ostream & operator<< (std::ostream &os, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs)
 
std::string toString (const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs)
 
std::shared_ptr
< Analyzer::ColumnVar
getSyntheticColumnVar (std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
 
void setupSyntheticCaching (std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
 
std::vector< InputTableInfogetSyntheticInputTableInfo (std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
 
InnerOuter anonymous_namespace{HashJoin.cpp}::get_cols (const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
 
size_t get_shard_count (const Analyzer::BinOper *join_condition, const Executor *executor)
 

Variables

bool g_enable_bbox_intersect_hashjoin
 
size_t g_num_tuple_threshold_switch_to_baseline
 
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
 

Function Documentation

size_t get_shard_count ( const Analyzer::BinOper join_condition,
const Executor executor 
)

Definition at line 1084 of file HashJoin.cpp.

References anonymous_namespace{HashJoin.cpp}::get_cols(), and get_shard_count().

Referenced by get_shard_count(), BaselineJoinHashTable::getShardCountForCondition(), PerfectJoinHashTable::reify(), PerfectJoinHashTable::shardCount(), and Executor::skipFragmentPair().

1085  {
1086  const Analyzer::ColumnVar* inner_col{nullptr};
1087  const Analyzer::Expr* outer_col{nullptr};
1088  std::shared_ptr<Analyzer::BinOper> redirected_bin_oper;
1089  try {
1090  std::tie(inner_col, outer_col) =
1091  get_cols(join_condition, executor->getTemporaryTables());
1092  } catch (...) {
1093  return 0;
1094  }
1095  if (!inner_col || !outer_col) {
1096  return 0;
1097  }
1098  return get_shard_count({inner_col, outer_col}, executor);
1099 }
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1075
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:1084

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar ( std::string_view  table,
std::string_view  column,
int  rte_idx,
const Catalog_Namespace::Catalog catalog 
)

Definition at line 580 of file HashJoin.cpp.

References CHECK, Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getMetadataForColumn(), Catalog_Namespace::Catalog::getMetadataForTable(), kLINESTRING, kMULTIPOLYGON, kPOINT, and kPOLYGON.

Referenced by HashJoin::getSyntheticInstance().

584  {
585  auto tmeta = catalog.getMetadataForTable(std::string(table));
586  CHECK(tmeta);
587 
588  auto cmeta = catalog.getMetadataForColumn(tmeta->tableId, std::string(column));
589  CHECK(cmeta);
590 
591  auto ti = cmeta->columnType;
592 
593  if (ti.is_geometry() && ti.get_type() != kPOINT) {
594  int geoColumnId{0};
595  switch (ti.get_type()) {
596  case kLINESTRING: {
597  geoColumnId = cmeta->columnId + 2;
598  break;
599  }
600  case kPOLYGON: {
601  geoColumnId = cmeta->columnId + 3;
602  break;
603  }
604  case kMULTIPOLYGON: {
605  geoColumnId = cmeta->columnId + 4;
606  break;
607  }
608  default:
609  CHECK(false);
610  }
611  cmeta = catalog.getMetadataForColumn(tmeta->tableId, geoColumnId);
612  CHECK(cmeta);
613  ti = cmeta->columnType;
614  }
615 
616  auto cv = std::make_shared<Analyzer::ColumnVar>(
617  ti,
618  shared::ColumnKey{catalog.getDatabaseId(), tmeta->tableId, cmeta->columnId},
619  rte_idx);
620  return cv;
621 }
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
int getDatabaseId() const
Definition: Catalog.h:326
#define CHECK(condition)
Definition: Logger.h:291
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<InputTableInfo> getSyntheticInputTableInfo ( std::set< const Analyzer::ColumnVar * >  cvs,
Executor executor 
)

Definition at line 667 of file HashJoin.cpp.

References CHECK, and Catalog_Namespace::get_metadata_for_table().

Referenced by HashJoin::getSyntheticInstance().

669  {
670  std::unordered_set<shared::TableKey> phys_table_ids;
671  for (auto cv : cvs) {
672  phys_table_ids.insert(cv->getTableKey());
673  }
674 
675  // NOTE(sy): This vector ordering seems to work for now, but maybe we need to
676  // review how rte_idx is assigned for ColumnVars. See for example Analyzer.h
677  // and RelAlgExecutor.cpp and rte_idx there.
678  std::vector<InputTableInfo> query_infos;
679  query_infos.reserve(phys_table_ids.size());
680  for (const auto& table_key : phys_table_ids) {
681  auto td = Catalog_Namespace::get_metadata_for_table(table_key);
682  CHECK(td);
683  query_infos.push_back({table_key, td->fragmenter->getFragmentsForQuery()});
684  }
685 
686  return query_infos;
687 }
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferEntry e 
)

Definition at line 128 of file HashJoin.cpp.

References DecodedJoinHashBufferEntry::key, and DecodedJoinHashBufferEntry::payload.

128  {
129  os << " {{";
130  bool first = true;
131  for (auto k : e.key) {
132  if (!first) {
133  os << ",";
134  } else {
135  first = false;
136  }
137  os << k;
138  }
139  os << "}, ";
140  os << "{";
141  first = true;
142  for (auto p : e.payload) {
143  if (!first) {
144  os << ", ";
145  } else {
146  first = false;
147  }
148  os << p;
149  }
150  os << "}}";
151  return os;
152 }
std::set< int32_t > payload
Definition: HashTable.h:23
std::vector< int64_t > key
Definition: HashTable.h:22
std::ostream& operator<< ( std::ostream &  os,
const DecodedJoinHashBufferSet s 
)

Definition at line 154 of file HashJoin.cpp.

154  {
155  os << "{\n";
156  bool first = true;
157  for (auto e : s) {
158  if (!first) {
159  os << ",\n";
160  } else {
161  first = false;
162  }
163  os << e;
164  }
165  if (!s.empty()) {
166  os << "\n";
167  }
168  os << "}\n";
169  return os;
170 }
std::ostream& operator<< ( std::ostream &  os,
const InnerOuterStringOpInfos inner_outer_string_op_infos 
)

Definition at line 172 of file HashJoin.cpp.

173  {
174  os << "(" << inner_outer_string_op_infos.first << ", "
175  << inner_outer_string_op_infos.second << ")";
176  return os;
177 }
std::ostream& operator<< ( std::ostream &  os,
const std::vector< InnerOuterStringOpInfos > &  inner_outer_string_op_infos_pairs 
)

Definition at line 185 of file HashJoin.cpp.

187  {
188  os << "[";
189  bool first_elem = true;
190  for (const auto& inner_outer_string_op_infos : inner_outer_string_op_infos_pairs) {
191  if (!first_elem) {
192  os << ", ";
193  }
194  first_elem = false;
195  os << inner_outer_string_op_infos;
196  }
197  os << "]";
198  return os;
199 }
void setupSyntheticCaching ( std::set< const Analyzer::ColumnVar * >  cvs,
Executor executor 
)

Definition at line 651 of file HashJoin.cpp.

References PhysicalInput::table_id.

Referenced by HashJoin::getSyntheticInstance().

651  {
652  std::unordered_set<shared::TableKey> phys_table_ids;
653  for (auto cv : cvs) {
654  phys_table_ids.insert(cv->getTableKey());
655  }
656 
657  std::unordered_set<PhysicalInput> phys_inputs;
658  for (auto cv : cvs) {
659  const auto& column_key = cv->getColumnKey();
660  phys_inputs.emplace(
661  PhysicalInput{column_key.column_id, column_key.table_id, column_key.db_id});
662  }
663 
664  executor->setupCaching(phys_inputs, phys_table_ids);
665 }

+ Here is the caller graph for this function:

std::string toString ( const InnerOuterStringOpInfos inner_outer_string_op_infos)

Definition at line 179 of file HashJoin.cpp.

179  {
180  std::ostringstream os;
181  os << inner_outer_string_op_infos;
182  return os.str();
183 }
std::string toString ( const std::vector< InnerOuterStringOpInfos > &  inner_outer_string_op_infos_pairs)

Definition at line 201 of file HashJoin.cpp.

202  {
203  std::ostringstream os;
204  os << inner_outer_string_op_infos_pairs;
205  return os.str();
206 }

Variable Documentation

bool g_enable_bbox_intersect_hashjoin

Definition at line 109 of file Execute.cpp.

size_t g_num_tuple_threshold_switch_to_baseline

Definition at line 110 of file Execute.cpp.

size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline

Definition at line 111 of file Execute.cpp.