OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 enum class HashType : int { OneToOne, OneToMany, ManyToMany };
20 
22  std::vector<int64_t> key;
23  std::set<int32_t> payload;
24 
25  bool operator<(const DecodedJoinHashBufferEntry& other) const {
26  return std::tie(key, payload) < std::tie(other.key, other.payload);
27  }
28 
29  bool operator==(const DecodedJoinHashBufferEntry& other) const {
30  return key == other.key && payload == other.payload;
31  }
32 };
33 
35  public:
36  HashTableEntryInfo(size_t num_hash_entries,
37  size_t num_keys,
38  size_t rowid_size_in_bytes,
39  HashType layout,
40  bool for_window_framing = false)
41  : num_hash_entries_(num_hash_entries)
42  , num_keys_(num_keys)
43  , rowid_size_in_bytes_(rowid_size_in_bytes)
44  , layout_(layout)
45  , for_window_framing_(for_window_framing) {}
46 
47  virtual size_t computeTotalNumSlots() const = 0;
48  virtual size_t computeHashTableSize() const = 0;
49 
50  size_t getNumHashEntries() const { return num_hash_entries_; }
51  size_t getNumKeys() const { return num_keys_; }
52  size_t getRowIdSizeInBytes() const { return rowid_size_in_bytes_; }
53  HashType getHashTableLayout() const { return layout_; }
54  void setNumHashEntries(size_t num_hash_entries) {
55  num_hash_entries_ = num_hash_entries;
56  }
57  void setNumKeys(size_t num_keys) { num_keys_ = num_keys; }
58  void setRowIdSizeInBytes(size_t rowid_size_in_bytes) {
59  rowid_size_in_bytes_ = rowid_size_in_bytes;
60  }
61  void setHashTableLayout(HashType layout) { layout_ = layout; }
62  bool forWindowFraming() const { return for_window_framing_; }
63 
64  protected:
66  size_t num_keys_;
70 };
71 
72 using DecodedJoinHashBufferSet = std::set<DecodedJoinHashBufferEntry>;
73 
74 class HashTable {
75  public:
76  virtual ~HashTable() {}
77 
78  virtual size_t getHashTableBufferSize(const ExecutorDeviceType device_type) const = 0;
79 
80  virtual int8_t* getCpuBuffer() = 0;
81  virtual int8_t* getGpuBuffer() const = 0;
82  virtual HashType getLayout() const = 0;
83 
84  virtual size_t getEntryCount() const = 0;
85  virtual size_t getEmittedKeysCount() const = 0;
86  virtual size_t getRowIdSize() const = 0;
87 
90  size_t key_component_count, // number of key parts
91  size_t key_component_width, // width of a key part
92  size_t entry_count, // number of hashable entries
93  const int8_t* ptr1, // hash entries
94  const int8_t* ptr2, // offsets
95  const int8_t* ptr3, // counts
96  const int8_t* ptr4, // payloads (rowids)
97  size_t buffer_size);
98 
100  static std::string toString(
101  const std::string& type, // perfect, keyed, or geo
102  const std::string& layout_type, // one-to-one, one-to-many, many-to-many
103  size_t key_component_count, // number of key parts
104  size_t key_component_width, // width of a key part
105  size_t entry_count, // number of hashable entries
106  const int8_t* ptr1, // hash entries
107  const int8_t* ptr2, // offsets
108  const int8_t* ptr3, // counts
109  const int8_t* ptr4, // payloads (rowids)
110  size_t buffer_size,
111  bool raw = false);
112 };
virtual size_t computeHashTableSize() const =0
virtual int8_t * getCpuBuffer()=0
size_t num_hash_entries_
Definition: HashTable.h:65
void setRowIdSizeInBytes(size_t rowid_size_in_bytes)
Definition: HashTable.h:58
virtual ~HashTable()
Definition: HashTable.h:76
virtual size_t getRowIdSize() const =0
size_t num_keys_
Definition: HashTable.h:66
Definition: HashTable.h:21
virtual size_t getEntryCount() const =0
virtual int8_t * getGpuBuffer() const =0
ExecutorDeviceType
bool forWindowFraming() const
Definition: HashTable.h:62
bool for_window_framing_
Definition: HashTable.h:69
virtual size_t getHashTableBufferSize(const ExecutorDeviceType device_type) const =0
HashTableEntryInfo(size_t num_hash_entries, size_t num_keys, size_t rowid_size_in_bytes, HashType layout, bool for_window_framing=false)
Definition: HashTable.h:36
HashType getHashTableLayout() const
Definition: HashTable.h:53
virtual HashType getLayout() const =0
size_t getNumHashEntries() const
Definition: HashTable.h:50
virtual size_t getEmittedKeysCount() const =0
HashType layout_
Definition: HashTable.h:68
size_t getNumKeys() const
Definition: HashTable.h:51
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
Definition: HashTable.h:72
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
void setHashTableLayout(HashType layout)
Definition: HashTable.h:61
virtual size_t computeTotalNumSlots() const =0
Definition: HashTable.h:34
bool operator==(const DecodedJoinHashBufferEntry &other) const
Definition: HashTable.h:29
std::set< int32_t > payload
Definition: HashTable.h:23
void setNumHashEntries(size_t num_hash_entries)
Definition: HashTable.h:54
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139
void setNumKeys(size_t num_keys)
Definition: HashTable.h:57
std::vector< int64_t > key
Definition: HashTable.h:22
size_t getRowIdSizeInBytes() const
Definition: HashTable.h:52
size_t rowid_size_in_bytes_
Definition: HashTable.h:67
HashType
Definition: HashTable.h:19
bool operator<(const DecodedJoinHashBufferEntry &other) const
Definition: HashTable.h:25