19 #include <llvm/IR/Value.h>
41 : std::runtime_error(
"The size of hash table is larger than a threshold (" +
42 ::
toString(cur_hash_table_size) +
" > " +
49 : std::runtime_error(
"Hash tables with more than 4B entries not supported yet") {}
57 : std::runtime_error(
"Hash join failed: Table '" + table_name +
58 "' must be replicated.") {}
65 constexpr
char const* strings[]{
"IGNORE",
"UNKNOWN",
"LHS",
"RHS"};
66 return os << strings[static_cast<int>(decision)];
88 :
HashJoinFail(
"Not enough memory for columns involved in join") {}
100 "Could not create hash table for bounding box intersection with less than "
101 "max allowed size of " +
102 std::
to_string(bbox_intersect_hash_table_max_bytes) +
" bytes") {}
105 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
107 std::vector<StringOps_Namespace::StringOpInfo>>;
116 void setBucketInfo(
const std::vector<double>& bucket_sizes_for_dimension,
117 const std::vector<InnerOuter> inner_outer_pairs);
139 const int device_id = 0,
140 bool raw =
false)
const = 0;
143 const int device_id)
const;
146 const int device_id)
const;
149 const int device_id)
const = 0;
163 const Executor* executor,
164 size_t rowid_size) noexcept;
169 std::ostringstream oss;
170 oss <<
"Hash tables with more than " << threshold
171 <<
" entries (# hash entries: " << num_entries <<
") on "
172 <<
::toString(memory_level) <<
" not supported yet";
181 const char* HashTypeStrings[3] = {
"OneToOne",
"OneToMany",
"ManyToMany"};
182 return HashTypeStrings[
static_cast<int>(ht)];
186 const std::vector<llvm::Value*>& hash_join_idx_args_in,
187 const bool is_sharded,
188 const bool col_is_nullable,
190 const int64_t sub_buff_size,
192 const bool is_bucketized =
false);
211 const Analyzer::ColumnVar* hash_col,
212 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
213 const Data_Namespace::
MemoryLevel effective_memory_level,
215 std::vector<std::shared_ptr<Chunk_NS::
Chunk>>& chunks_owner,
217 std::vector<std::shared_ptr<
void>>& malloc_owner,
223 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
228 const
int device_count,
237 std::string_view table1,
238 std::string_view column1,
239 const Catalog_Namespace::Catalog& catalog1,
240 std::string_view table2,
241 std::string_view column2,
242 const Catalog_Namespace::Catalog& catalog2,
245 const
int device_count,
251 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
254 const
int device_count,
259 std::vector<std::shared_ptr<Analyzer::BinOper>>,
262 const
int device_count,
267 const std::vector<
InnerOuter>& inner_outer_pairs) {
268 CHECK(!inner_outer_pairs.empty());
269 const auto first_inner_col = inner_outer_pairs.front().first;
270 return first_inner_col->getTableKey();
274 bool invalid_cache_key,
278 const size_t shard_count,
279 const Executor* executor);
286 const bool is_bbox_intersect =
false);
288 template <
typename T>
292 static std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
307 const int device_id)
const {
312 return hash_table->getHashTableBufferSize(device_type);
316 const int device_id)
const {
326 return hash_table->getCpuBuffer();
329 const auto gpu_buff = hash_table->getGpuBuffer();
334 return hash_table->getCpuBuffer();
339 auto empty_hash_tables =
345 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments);
348 const std::vector<InnerOuter>& inner_outer_pairs,
349 const Executor* executor,
350 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs = {});
352 static std::vector<const StringDictionaryProxy::IdMap*>
355 const std::vector<InnerOuterStringOpInfos>& string_op_infos_for_keys,
356 const Executor* executor);
358 static std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
360 const Executor* executor,
361 const bool has_string_ops);
367 const Executor* executor);
372 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
385 std::ostream& operator<<(std::ostream& os,
387 std::ostream& operator<<(
397 std::string_view table,
398 std::string_view column,
400 const Catalog_Namespace::Catalog& catalog);
402 size_t get_shard_count(const Analyzer::BinOper* join_condition, const Executor* executor);
405 std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*> equi_pair,
406 const Executor* executor);
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
static std::shared_ptr< HashJoin > getSyntheticInstance(std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
Defines data structures for the semantic analysis phase of query processing.
virtual int getInnerTableRteIdx() const noexcept=0
virtual size_t payloadBufferOff() const noexcept=0
virtual std::string getHashJoinType() const =0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
FailedToJoinOnVirtualColumn()
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
virtual HashType getHashType() const noexcept=0
std::vector< ChunkKey > cache_key_chunks
std::vector< const void * > sd_inner_proxy_per_key
virtual int getDeviceCount() const noexcept=0
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
void setBucketInfo(const std::vector< double > &bucket_sizes_for_dimension, const std::vector< InnerOuter > inner_outer_pairs)
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept=0
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
virtual llvm::Value * codegenSlot(const CompilationOptions &, const size_t)=0
TableMustBeReplicated(const std::string &table_name)
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
void freeHashBufferMemory()
virtual size_t offsetBufferOff() const noexcept=0
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t countBufferOff() const noexcept=0
const std::vector< JoinColumnTypeInfo > join_column_types
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::vector< void * > sd_outer_proxy_per_key
HashJoinFail(const std::string &err_msg, InnerQualDecision qual_decision)
static size_t getMaximumNumHashEntriesCanHold(MemoryLevel memory_level, const Executor *executor, size_t rowid_size) noexcept
static std::string generateTooManyHashEntriesErrMsg(size_t num_entries, size_t threshold, MemoryLevel memory_level)
static constexpr size_t MAX_NUM_HASH_ENTRIES
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
JoinHashTableTooBig(size_t cur_hash_table_size, size_t threshold_size)
std::string toString(const Executor::ExtModuleKinds &kind)
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t getComponentBufferSize() const noexcept=0
const std::vector< std::shared_ptr< Chunk_NS::Chunk > > chunks_owner
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
HashTable * getHashTableForDevice(const size_t device_id) const
virtual shared::TableKey getInnerTableId() const noexcept=0
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
TooManyHashEntries(const std::string &reason)
static std::string getHashTypeString(HashType ht) noexcept
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
HashJoinFail(const std::string &err_msg)
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
TooBigHashTableForBoundingBoxIntersect(const size_t bbox_intersect_hash_table_max_bytes)
std::pair< std::vector< StringOps_Namespace::StringOpInfo >, std::vector< StringOps_Namespace::StringOpInfo >> InnerOuterStringOpInfos
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_bbox_intersect=false)
static const T * getHashJoinColumn(const Analyzer::Expr *expr)
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
const std::vector< std::shared_ptr< void > > malloc_owner
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
std::vector< JoinBucketInfo > join_buckets
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
InnerQualDecision inner_qual_decision
const std::vector< JoinColumn > join_columns
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
virtual bool isBitwiseEq() const =0
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})