OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryTranslationMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 
25 #include "CodeGenerator.h"
26 #include "Execute.h"
27 #ifdef HAVE_CUDA
29 #include "GpuMemUtils.h"
30 #endif // HAVE_CUDA
31 #include "Parser/ParserNode.h"
32 #include "RuntimeFunctions.h"
33 #include "Shared/StringTransform.h"
34 #include "Shared/checked_alloc.h"
36 
37 #ifdef HAVE_TBB
38 #include <tbb/parallel_for.h>
39 #endif // HAVE_TBB
40 
42  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos) {
43  for (const auto& string_op_info : string_op_infos) {
44  if (string_op_info.hasNullLiteralArg()) {
45  return true;
46  }
47  }
48  return false;
49 }
50 
52  const shared::StringDictKey& source_string_dict_key,
53  const shared::StringDictKey& dest_string_dict_key,
54  const bool translate_intersection_only,
55  const SQLTypeInfo& output_ti,
56  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
57  const Data_Namespace::MemoryLevel memory_level,
58  const int device_count,
59  Executor* executor,
60  Data_Namespace::DataMgr* data_mgr,
61  const bool delay_translation)
62  : source_string_dict_key_(source_string_dict_key)
63  , dest_string_dict_key_(dest_string_dict_key)
64  , translate_intersection_only_(translate_intersection_only)
65  , output_ti_(output_ti)
66  , string_op_infos_(string_op_infos)
68  , memory_level_(memory_level)
69  , device_count_(device_count)
70  , executor_(executor)
71  , data_mgr_(data_mgr)
73 #ifdef HAVE_CUDA
76 #else
78 #endif // HAVE_CUDA
79  if (!delay_translation && !has_null_string_op_) {
82  }
83 }
84 
86  const shared::StringDictKey& source_string_dict_key,
87  const SQLTypeInfo& output_ti,
88  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
89  const Data_Namespace::MemoryLevel memory_level,
90  const int device_count,
91  Executor* executor,
92  Data_Namespace::DataMgr* data_mgr,
93  const bool delay_translation)
94  : source_string_dict_key_(source_string_dict_key)
95  , dest_string_dict_key_({-1, -1})
97  , output_ti_(output_ti)
98  , string_op_infos_(string_op_infos)
100  , memory_level_(memory_level)
101  , device_count_(device_count)
102  , executor_(executor)
103  , data_mgr_(data_mgr)
105 #ifdef HAVE_CUDA
107  memory_level == Data_Namespace::GPU_LEVEL);
108 #else
110 #endif // HAVE_CUDA
111  const auto& last_string_op_info = string_op_infos.back();
112  CHECK(!last_string_op_info.getReturnType().is_string());
113  if (!delay_translation && !has_null_string_op_) {
114  buildTranslationMap();
115  createKernelBuffers();
116  }
117 }
118 
120  CHECK(data_mgr_);
121  for (auto& device_buffer : device_buffers_) {
122  data_mgr_->free(device_buffer);
123  }
124 }
125 
127  if (dest_type_is_string_) {
128  host_translation_map_ = executor_->getStringProxyTranslationMap(
135  executor_->getRowSetMemoryOwner(),
136  true);
137  } else {
139  executor_->getStringProxyNumericTranslationMap(source_string_dict_key_,
141  executor_->getRowSetMemoryOwner(),
142  true);
143  }
144 }
145 
147 #ifdef HAVE_CUDA
149  const size_t translation_map_size_bytes = mapSize();
150  for (int device_id = 0; device_id < device_count_; ++device_id) {
152  data_mgr_, translation_map_size_bytes, device_id));
153  auto device_buffer =
154  reinterpret_cast<int8_t*>(device_buffers_.back()->getMemoryPtr());
156  reinterpret_cast<CUdeviceptr>(device_buffer),
157  data(),
158  translation_map_size_bytes,
159  device_id);
160  kernel_translation_maps_.push_back(device_buffer);
161  }
162  }
163 #else
165 #endif // HAVE_CUDA
167  kernel_translation_maps_.push_back(data());
168  }
169 }
170 
171 llvm::Value* StringDictionaryTranslationMgr::codegen(llvm::Value* input_str_id_lv,
172  const SQLTypeInfo& input_ti,
173  const bool add_nullcheck,
174  const CompilationOptions& co) const {
175  CHECK(kernel_translation_maps_.size() == static_cast<size_t>(device_count_) ||
177  if (!co.hoist_literals && kernel_translation_maps_.size() > 1UL) {
178  // Currently the only way to have multiple kernel translation maps is
179  // to be running on GPU, where we would need to have a different pointer
180  // per GPU to the translation map, as the address space is not shared
181  // between GPUs
182 
185 
186  // Since we currently cannot support different code per device, the only
187  // way to allow for a different kernel translation map/kernel per
188  // device(i.e. GPU) is via hoisting the map handle literal so that
189  // it can be paramertized as a kernel argument. Hence if literal
190  // hoisting is disabled (generally b/c we have an update query),
191  // the surest fire way of ensuring one and only one translation map
192  // that can have a hard-coded handle in the generated code is by running
193  // on CPU (which per the comment above currently always has a device
194  // count of 1).
195 
196  // This is not currently a major limitation as we currently run
197  // all update queries on CPU, but it would be if we want to run
198  // on multiple GPUs.
199 
200  // Todo(todd): Examine ways around the above limitation, likely either
201  // a dedicated kernel parameter for translation maps (like we have for
202  // join hash tables), or perhaps better for a number of reasons, reworking
203  // the translation map plumbing to use the join infra (which would also
204  // mean we could use pieces like the baseline hash join for multiple
205  // input string dictionaries, i.e. CONCAT on two string columns).
206 
207  throw QueryMustRunOnCpu();
208  }
209  CHECK(co.hoist_literals || kernel_translation_maps_.size() == 1UL);
210 
211  auto cgen_state_ptr = executor_->getCgenStatePtr();
212  AUTOMATIC_IR_METADATA(cgen_state_ptr);
213 
214  if (has_null_string_op_) {
215  // If any of the string ops can statically be determined to output all nulls
216  // (currently determined by whether any of the constant literal inputs to the
217  // string operation are null), then simply generate codegen a null
218  // dictionary-encoded value
219  const auto null_ti = SQLTypeInfo(kTEXT, true /* is_nullable */, kENCODING_DICT);
220  return static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(null_ti));
221  }
222 
223  std::vector<std::shared_ptr<const Analyzer::Constant>> constants_owned;
224  std::vector<const Analyzer::Constant*> constants;
225  for (const auto kernel_translation_map : kernel_translation_maps_) {
226  const int64_t translation_map_handle =
227  reinterpret_cast<int64_t>(kernel_translation_map);
228  const auto translation_map_handle_literal =
229  std::dynamic_pointer_cast<Analyzer::Constant>(
230  Parser::IntLiteral::analyzeValue(translation_map_handle));
231  CHECK(translation_map_handle_literal);
233  translation_map_handle_literal->get_type_info().get_compression());
234  constants_owned.push_back(translation_map_handle_literal);
235  constants.push_back(translation_map_handle_literal.get());
236  }
237  CHECK_GE(constants.size(), 1UL);
238  CHECK(co.hoist_literals || constants.size() == 1UL);
239 
240  CodeGenerator code_generator(executor_);
241 
242  const auto translation_map_handle_lvs =
243  co.hoist_literals
244  ? code_generator.codegenHoistedConstants(constants, kENCODING_NONE, {})
245  : code_generator.codegen(constants[0], false, co);
246  CHECK_EQ(size_t(1), translation_map_handle_lvs.size());
247 
248  std::unique_ptr<CodeGenerator::NullCheckCodegen> nullcheck_codegen;
249  const bool is_nullable = !input_ti.get_notnull();
250  const auto decoded_input_ti = SQLTypeInfo(kTEXT, is_nullable, kENCODING_DICT);
251  if (add_nullcheck && is_nullable) {
252  nullcheck_codegen = std::make_unique<CodeGenerator::NullCheckCodegen>(
253  cgen_state_ptr,
254  executor_,
255  input_str_id_lv,
256  decoded_input_ti,
257  "dict_encoded_str_cast_nullcheck");
258  }
259  llvm::Value* ret;
260  if (dest_type_is_string_) {
261  ret = cgen_state_ptr->emitCall(
262  "map_string_dict_id",
263  {input_str_id_lv,
264  cgen_state_ptr->castToTypeIn(translation_map_handle_lvs.front(), 64),
265  cgen_state_ptr->llInt(minSourceStringId())});
266  } else {
267  std::string fn_call = "map_string_to_datum_";
268  const auto sql_type = output_ti_.get_type();
269  switch (sql_type) {
270  case kBOOLEAN: {
271  fn_call += "bool";
272  break;
273  }
274  case kTINYINT:
275  case kSMALLINT:
276  case kINT:
277  case kBIGINT:
278  case kFLOAT:
279  case kDOUBLE: {
280  fn_call += to_lower(toString(sql_type));
281  break;
282  }
283  case kNUMERIC:
284  case kDECIMAL:
285  case kTIME:
286  case kTIMESTAMP:
287  case kDATE: {
288  fn_call += "bigint";
289  break;
290  }
291  default: {
292  throw std::runtime_error("Unimplemented type for string-to-numeric translation");
293  }
294  }
295  ret = cgen_state_ptr->emitCall(
296  fn_call,
297  {input_str_id_lv,
298  cgen_state_ptr->castToTypeIn(translation_map_handle_lvs.front(), 64),
299  cgen_state_ptr->llInt(minSourceStringId())});
300  }
301 
302  if (nullcheck_codegen) {
303  ret = nullcheck_codegen->finalize(cgen_state_ptr->inlineNull(output_ti_), ret);
304  }
305  return ret;
306 }
307 
309  if (dest_type_is_string_) {
310  return host_translation_map_ && !host_translation_map_->empty();
311  } else {
313  }
314 }
315 
317  if (isMapValid()) {
318  if (dest_type_is_string_) {
319  return reinterpret_cast<const int8_t*>(host_translation_map_->data());
320  } else {
321  return reinterpret_cast<const int8_t*>(host_numeric_translation_map_->data());
322  }
323  }
324  return nullptr;
325 }
326 
328  if (isMapValid()) {
329  return dest_type_is_string_ ? host_translation_map_->domainStart()
331  }
332  return 0;
333 }
334 
336  if (isMapValid()) {
337  const size_t num_elems = dest_type_is_string_
338  ? host_translation_map_->getVectorMap().size()
340  const size_t elem_size =
342  return num_elems * elem_size;
343  }
344  return 0UL;
345 }
StringDictionaryTranslationMgr(const shared::StringDictKey &source_string_dict_key, const shared::StringDictKey &dest_string_dict_key, const bool translate_intersection_only, const SQLTypeInfo &output_ti, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const Data_Namespace::MemoryLevel memory_level, const int device_count, Executor *executor, Data_Namespace::DataMgr *data_mgr, const bool delay_translation)
std::string to_lower(const std::string &str)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const Data_Namespace::MemoryLevel memory_level_
Definition: sqltypes.h:76
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:166
bool one_or_more_string_ops_is_null(const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos)
#define CHECK_GE(x, y)
Definition: Logger.h:306
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
output_ti_(output_ti)
const StringDictionaryProxy::TranslationMap< Datum > * host_numeric_translation_map_
Classes representing a parse tree.
int get_logical_size() const
Definition: sqltypes.h:421
string_op_infos_(string_op_infos)
executor_(executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool g_enable_smem_group_by true
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
const shared::StringDictKey dest_string_dict_key_
ExecutorDeviceType device_type
Definition: sqltypes.h:79
Definition: sqltypes.h:80
dest_type_is_string_(false)
std::vector< const int8_t * > kernel_translation_maps_
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
std::vector< T > const & getVectorMap() const
translate_intersection_only_(true)
data_mgr_(data_mgr)
void copy_to_nvidia_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:35
#define CHECK(condition)
Definition: Logger.h:291
std::vector< Data_Namespace::AbstractBuffer * > device_buffers_
device_count_(device_count)
Definition: sqltypes.h:72
Allocate GPU memory using GpuBuffers via DataMgr.
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:614
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
llvm::Value * codegen(llvm::Value *str_id_input, const SQLTypeInfo &input_ti, const bool add_nullcheck, const CompilationOptions &co) const
Definition: Datum.h:71
has_null_string_op_(one_or_more_string_ops_is_null(string_op_infos))
const shared::StringDictKey source_string_dict_key_
const std::vector< StringOps_Namespace::StringOpInfo > string_op_infos_
memory_level_(memory_level)