OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashJoinKeyHandlers.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_HASHJOINKEYHANDLERS_H
18 #define QUERYENGINE_HASHJOINKEYHANDLERS_H
19 
22 #include "Shared/SqlTypesLayout.h"
23 
24 #ifdef __CUDACC__
26 #else
27 #include "Logger/Logger.h"
31 #endif
32 
34 
35 #include <cmath>
36 
37 #include "Shared/funcannotations.h"
38 
40  GenericKeyHandler(const size_t key_component_count,
41  const bool should_skip_entries,
42  const JoinColumn* join_column_per_key,
43  const JoinColumnTypeInfo* type_info_per_key
44 #ifndef __CUDACC__
45  ,
46  const int32_t* const* sd_inner_to_outer_translation_maps,
47  const int32_t* sd_min_inner_elems
48 #endif
49  )
50  : key_component_count_(key_component_count)
51  , should_skip_entries_(should_skip_entries)
52  , join_column_per_key_(join_column_per_key)
53  , type_info_per_key_(type_info_per_key) {
54 #ifndef __CUDACC__
55  if (sd_inner_to_outer_translation_maps) {
56  CHECK(sd_min_inner_elems);
57  sd_inner_to_outer_translation_maps_ = sd_inner_to_outer_translation_maps;
58  sd_min_inner_elems_ = sd_min_inner_elems;
59  } else
60 #endif
61  {
63  sd_min_inner_elems_ = nullptr;
64  }
65  }
66 
67  template <typename T, typename KEY_BUFF_HANDLER>
68  DEVICE int operator()(JoinColumnIterator* join_column_iterators,
69  T* key_scratch_buff,
70  KEY_BUFF_HANDLER f) const {
71  bool skip_entry = false;
72  for (size_t key_component_index = 0; key_component_index < key_component_count_;
73  ++key_component_index) {
74  const auto& join_column_iterator = join_column_iterators[key_component_index];
75  int64_t elem = (*join_column_iterator).element;
76  if (should_skip_entries_ && elem == join_column_iterator.type_info->null_val &&
77  !join_column_iterator.type_info->uses_bw_eq) {
78  skip_entry = true;
79  break;
80  }
81 #ifndef __CUDACC__
82  // Translation map pts will already be set to nullptr if invalid
84  const auto sd_inner_to_outer_translation_map =
85  sd_inner_to_outer_translation_maps_[key_component_index];
86  const auto sd_min_inner_elem = sd_min_inner_elems_[key_component_index];
87  if (sd_inner_to_outer_translation_map &&
88  elem != join_column_iterator.type_info->null_val) {
89  const auto outer_id =
90  sd_inner_to_outer_translation_map[elem - sd_min_inner_elem];
91  if (outer_id == StringDictionary::INVALID_STR_ID) {
92  skip_entry = true;
93  break;
94  }
95  elem = outer_id;
96  }
97  }
98 #endif
99  key_scratch_buff[key_component_index] = elem;
100  }
101 
102  if (!skip_entry) {
103  return f(join_column_iterators[0].index, key_scratch_buff, key_component_count_);
104  }
105 
106  return 0;
107  }
108 
109  DEVICE size_t get_number_of_columns() const {
110  return key_component_count_;
111  }
112 
114  return key_component_count_;
115  }
116 
118  return join_column_per_key_;
119  }
120 
122  return type_info_per_key_;
123  }
124 
125  const size_t key_component_count_;
129  const int32_t* const* sd_inner_to_outer_translation_maps_;
130  const int32_t* sd_min_inner_elems_;
131 };
132 
134  BoundingBoxIntersectKeyHandler(const size_t key_dims_count,
135  const JoinColumn* join_column, // always 1 column
136  const double* bucket_sizes_for_dimension)
137  : key_dims_count_(key_dims_count)
138  , join_column_(join_column)
139  , bucket_sizes_for_dimension_(bucket_sizes_for_dimension) {}
140 
141  template <typename T, typename KEY_BUFF_HANDLER>
142  DEVICE int operator()(JoinColumnIterator* join_column_iterators,
143  T* key_scratch_buff,
144  KEY_BUFF_HANDLER f) const {
145  // TODO(adb): hard-coding the 2D case w/ bounds for now. Should support n-dims with a
146  // check to ensure we are not exceeding maximum number of dims for coalesced keys
147  double bounds[4];
148  for (size_t j = 0; j < 2 * key_dims_count_; j++) {
149  bounds[j] =
150  SUFFIX(fixed_width_double_decode_noinline)(join_column_iterators->ptr(), j);
151  }
152 
153  const auto x_bucket_sz = bucket_sizes_for_dimension_[0];
154  const auto y_bucket_sz = bucket_sizes_for_dimension_[1];
155 
156  for (int64_t x = floor(bounds[0] * x_bucket_sz); x <= floor(bounds[2] * x_bucket_sz);
157  x++) {
158  for (int64_t y = floor(bounds[1] * y_bucket_sz);
159  y <= floor(bounds[3] * y_bucket_sz);
160  y++) {
161  key_scratch_buff[0] = x;
162  key_scratch_buff[1] = y;
163 
164  const auto err =
165  f(join_column_iterators[0].index, key_scratch_buff, key_dims_count_);
166  if (err) {
167  return err;
168  }
169  }
170  }
171  return 0;
172  }
173 
174  DEVICE size_t get_number_of_columns() const { return 1; }
175 
176  DEVICE size_t get_key_component_count() const { return key_dims_count_; }
177 
178  DEVICE const JoinColumn* get_join_columns() const { return join_column_; }
179 
180  DEVICE const JoinColumnTypeInfo* get_join_column_type_infos() const { return nullptr; }
181 
182  const size_t key_dims_count_;
185 };
186 
188  explicit RangeKeyHandler(const bool is_compressed,
189  const size_t key_dims_count,
190  const JoinColumn* join_column, // always 1 column
191  const double* bucket_sizes_for_dimension)
192  : is_compressed_(is_compressed)
193  , key_dims_count_(key_dims_count)
194  , join_column_(join_column)
195  , bucket_sizes_for_dimension_(bucket_sizes_for_dimension) {}
196 
197  template <typename T, typename KEY_BUFF_HANDLER>
198  DEVICE int operator()(JoinColumnIterator* join_column_iterators,
199  T* key_scratch_buff,
200  KEY_BUFF_HANDLER f) const {
201  double coords[2];
202 
203  if (is_compressed_) {
206  join_column_iterators->ptr(), /*byte_width=*/4, 0));
209  join_column_iterators->ptr(), /*byte_width=*/4, 1));
210  } else {
211  coords[0] =
212  SUFFIX(fixed_width_double_decode_noinline)(join_column_iterators->ptr(), 0);
213  coords[1] =
214  SUFFIX(fixed_width_double_decode_noinline)(join_column_iterators->ptr(), 1);
215  }
216 
217  const auto x_bucket_sz = bucket_sizes_for_dimension_[0];
218  const auto y_bucket_sz = bucket_sizes_for_dimension_[1];
219 
220  key_scratch_buff[0] = floor(coords[0] * x_bucket_sz);
221  key_scratch_buff[1] = floor(coords[1] * y_bucket_sz);
222  const auto err = f(join_column_iterators[0].index, key_scratch_buff, key_dims_count_);
223  if (err) {
224  return err;
225  }
226 
227  return 0;
228  }
229 
230  DEVICE size_t get_number_of_columns() const { return 1; }
231 
232  DEVICE size_t get_key_component_count() const { return key_dims_count_; }
233 
234  DEVICE const JoinColumn* get_join_columns() const { return join_column_; }
235 
236  DEVICE const JoinColumnTypeInfo* get_join_column_type_infos() const { return nullptr; }
237 
238  const bool is_compressed_;
239  const size_t key_dims_count_;
242 };
243 
244 #endif // QUERYENGINE_HASHJOINKEYHANDLERS_H
DEVICE size_t get_key_component_count() const
const JoinColumn * join_column_per_key_
const size_t key_dims_count_
DEVICE const JoinColumnTypeInfo * get_join_column_type_infos() const
DEVICE double decompress_latitude_coord_geoint32(const int32_t compressed)
const size_t key_component_count_
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_int_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:91
DEVICE int operator()(JoinColumnIterator *join_column_iterators, T *key_scratch_buff, KEY_BUFF_HANDLER f) const
DEVICE const JoinColumn * get_join_columns() const
#define SUFFIX(name)
DEVICE const JoinColumnTypeInfo * get_join_column_type_infos() const
DEVICE const JoinColumnTypeInfo * get_join_column_type_infos() const
const double * bucket_sizes_for_dimension_
RangeKeyHandler(const bool is_compressed, const size_t key_dims_count, const JoinColumn *join_column, const double *bucket_sizes_for_dimension)
GenericKeyHandler(const size_t key_component_count, const bool should_skip_entries, const JoinColumn *join_column_per_key, const JoinColumnTypeInfo *type_info_per_key, const int32_t *const *sd_inner_to_outer_translation_maps, const int32_t *sd_min_inner_elems)
#define DEVICE
DEVICE int operator()(JoinColumnIterator *join_column_iterators, T *key_scratch_buff, KEY_BUFF_HANDLER f) const
DEVICE const JoinColumn * get_join_columns() const
static constexpr int32_t INVALID_STR_ID
Iterates over the rows of a JoinColumn across multiple fragments/chunks.
BoundingBoxIntersectKeyHandler(const size_t key_dims_count, const JoinColumn *join_column, const double *bucket_sizes_for_dimension)
DEVICE size_t get_key_component_count() const
DEVICE const JoinColumn * get_join_columns() const
DEVICE size_t get_number_of_columns() const
DEVICE double decompress_longitude_coord_geoint32(const int32_t compressed)
DEVICE int operator()(JoinColumnIterator *join_column_iterators, T *key_scratch_buff, KEY_BUFF_HANDLER f) const
const JoinColumnTypeInfo * type_info_per_key_
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
const int32_t *const * sd_inner_to_outer_translation_maps_
DEVICE size_t get_number_of_columns() const
#define CHECK(condition)
Definition: Logger.h:291
DEVICE NEVER_INLINE double SUFFIX() fixed_width_double_decode_noinline(const int8_t *byte_stream, const int64_t pos)
Definition: DecodersImpl.h:134
const JoinColumn * join_column_
const int32_t * sd_min_inner_elems_
DEVICE FORCE_INLINE const int8_t * ptr() const
DEVICE size_t get_key_component_count() const
DEVICE size_t get_number_of_columns() const
const bool should_skip_entries_