OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PerfectHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <memory>
20 #include <vector>
21 
25 
27  public:
28  PerfectHashTableEntryInfo(size_t num_hash_entries,
29  size_t num_keys,
30  size_t rowid_size_in_bytes,
31  HashType layout,
32  bool for_window_framing = false)
33  : HashTableEntryInfo(num_hash_entries,
34  num_keys,
35  rowid_size_in_bytes,
36  layout,
37  for_window_framing) {}
38 
39  size_t computeTotalNumSlots() const override {
40  return layout_ == HashType::OneToOne
43  }
44 
45  size_t computeHashTableSize() const override {
47  }
48 };
49 
50 class PerfectHashTable : public HashTable {
51  public:
52  // CPU + GPU constructor
54  PerfectHashTableEntryInfo hash_table_entry_info,
55  Data_Namespace::DataMgr* data_mgr = nullptr,
56  const int device_id = -1)
57  : hash_table_entry_info_(hash_table_entry_info)
58  , data_mgr_(data_mgr)
59  , device_id_(device_id) {
60  if (device_type == ExecutorDeviceType::CPU) {
62  new int32_t[hash_table_entry_info.computeTotalNumSlots()]);
63  printInitLog(device_type);
64  }
65  }
66 
67  ~PerfectHashTable() override {
68 #ifdef HAVE_CUDA
72  }
73 #endif
74  }
75 
76  void allocateGpuMemory(const size_t num_entries) {
77  CHECK_GE(device_id_, 0);
80  auto const buf_size = num_entries * hash_table_entry_info_.getRowIdSizeInBytes();
84  VLOG(1) << "Allocate " << buf_size
85  << " bytes buffer on GPU to keep the hash table copied from CPU";
86  } else {
88  }
89  }
90 
91  size_t getHashTableBufferSize(const ExecutorDeviceType device_type) const override {
92  if (device_type == ExecutorDeviceType::CPU) {
94  } else {
96  }
97  }
98 
99  HashType getLayout() const override {
101  }
102 
103  int8_t* getCpuBuffer() override {
104  return reinterpret_cast<int8_t*>(cpu_hash_table_buff_.get());
105  }
106 
107  int8_t* getGpuBuffer() const override {
109  }
110 
111  size_t getEntryCount() const override {
113  }
114 
115  size_t getEmittedKeysCount() const override {
117  }
118 
119  size_t getRowIdSize() const override {
121  }
122 
123  void setHashEntryInfo(BucketizedHashEntryInfo& hash_entry_info) {
124  hash_entry_info_ = hash_entry_info;
125  }
126 
127  void setColumnNumElems(size_t elem) {
128  column_num_elems_ = elem;
129  }
130 
132  return hash_entry_info_;
133  }
134 
135  size_t getColumnNumElems() const {
136  return column_num_elems_;
137  }
138 
140  return hash_table_entry_info_;
141  }
142 
143  void printInitLog(ExecutorDeviceType device_type) {
144  std::string device_str = device_type == ExecutorDeviceType::CPU ? "CPU" : "GPU";
145  std::string layout_str =
147  : "OneToMany";
148  std::ostringstream oss;
149  oss << "Initialize a " << device_type << " perfect join hash table";
150  if (device_type == ExecutorDeviceType::GPU) {
151  oss << " for device " << device_id_;
152  }
153  oss << ", join type " << layout_str
154  << ", # hash entries: " << hash_table_entry_info_.getNumHashEntries()
155  << ", # entries stored in the payload buffer: "
157  << ", hash table size : " << hash_table_entry_info_.computeHashTableSize()
158  << " Bytes";
159  VLOG(1) << oss.str();
160  }
161 
162  private:
164  std::unique_ptr<int32_t[]> cpu_hash_table_buff_{nullptr};
170 };
BucketizedHashEntryInfo getHashEntryInfo() const
size_t num_hash_entries_
Definition: HashTable.h:65
PerfectHashTable(const ExecutorDeviceType device_type, PerfectHashTableEntryInfo hash_table_entry_info, Data_Namespace::DataMgr *data_mgr=nullptr, const int device_id=-1)
int8_t * getGpuBuffer() const override
int8_t * getCpuBuffer() override
HashType getLayout() const override
virtual int8_t * getMemoryPtr()=0
std::unique_ptr< int32_t[]> cpu_hash_table_buff_
size_t num_keys_
Definition: HashTable.h:66
size_t getHashTableBufferSize(const ExecutorDeviceType device_type) const override
#define CHECK_GE(x, y)
Definition: Logger.h:306
PerfectHashTableEntryInfo getHashTableEntryInfo() const
virtual size_t reservedSize() const =0
ExecutorDeviceType
Data_Namespace::AbstractBuffer * gpu_hash_table_buff_
bool for_window_framing_
Definition: HashTable.h:69
size_t getEntryCount() const override
size_t getEmittedKeysCount() const override
PerfectHashTableEntryInfo(size_t num_hash_entries, size_t num_keys, size_t rowid_size_in_bytes, HashType layout, bool for_window_framing=false)
HashType getHashTableLayout() const
Definition: HashTable.h:53
size_t getNumHashEntries() const
Definition: HashTable.h:50
PerfectHashTableEntryInfo hash_table_entry_info_
void setColumnNumElems(size_t elem)
An AbstractBuffer is a unit of data management for a data manager.
void setHashEntryInfo(BucketizedHashEntryInfo &hash_entry_info)
size_t computeTotalNumSlots() const override
HashType layout_
Definition: HashTable.h:68
size_t getNumKeys() const
Definition: HashTable.h:51
BucketizedHashEntryInfo hash_entry_info_
size_t computeHashTableSize() const override
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
Definition: HashTable.h:34
void allocateGpuMemory(const size_t num_entries)
#define CHECK(condition)
Definition: Logger.h:291
void printInitLog(ExecutorDeviceType device_type)
~PerfectHashTable() override
size_t getColumnNumElems() const
size_t getRowIdSize() const override
Allocate GPU memory using GpuBuffers via DataMgr.
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:614
size_t getRowIdSizeInBytes() const
Definition: HashTable.h:52
size_t rowid_size_in_bytes_
Definition: HashTable.h:67
Data_Namespace::DataMgr * data_mgr_
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:388