OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ColumnIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Codec.h"
19 #include "CodegenHelper.h"
20 #include "Execute.h"
21 #include "WindowContext.h"
22 
23 // Code generation routines and helpers for working with column expressions.
24 
25 namespace {
26 
27 // Return the right decoder for a given column expression. Doesn't handle
28 // variable length data. The decoder encapsulates the code generation logic.
29 std::shared_ptr<Decoder> get_col_decoder(const Analyzer::ColumnVar* col_var) {
30  const auto enc_type = col_var->get_compression();
31  const auto& ti = col_var->get_type_info();
32  switch (enc_type) {
33  case kENCODING_NONE: {
34  const auto int_type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
35  switch (int_type) {
36  case kBOOLEAN:
37  return std::make_shared<FixedWidthInt>(1);
38  case kTINYINT:
39  return std::make_shared<FixedWidthInt>(1);
40  case kSMALLINT:
41  return std::make_shared<FixedWidthInt>(2);
42  case kINT:
43  return std::make_shared<FixedWidthInt>(4);
44  case kBIGINT:
45  return std::make_shared<FixedWidthInt>(8);
46  case kFLOAT:
47  return std::make_shared<FixedWidthReal>(false);
48  case kDOUBLE:
49  return std::make_shared<FixedWidthReal>(true);
50  case kTIME:
51  case kTIMESTAMP:
52  case kDATE:
53  return std::make_shared<FixedWidthInt>(8);
54  default:
55  CHECK(false) << "ti=" << ti;
56  }
57  }
58  case kENCODING_DICT:
59  CHECK(ti.is_string());
60  // For dictionary-encoded columns encoded on less than 4 bytes, we can use
61  // unsigned representation for double the maximum cardinality. The inline
62  // null value is going to be the maximum value of the underlying type.
63  if (ti.get_size() < ti.get_logical_size()) {
64  return std::make_shared<FixedWidthUnsigned>(ti.get_size());
65  }
66  return std::make_shared<FixedWidthInt>(ti.get_size());
67  case kENCODING_FIXED: {
68  const auto bit_width = col_var->get_type_info().get_comp_param();
69  CHECK_EQ(0, bit_width % 8);
70  return std::make_shared<FixedWidthInt>(bit_width / 8);
71  }
73  CHECK(ti.is_date_in_days());
74  return col_var->get_type_info().get_comp_param() == 16
75  ? std::make_shared<FixedWidthSmallDate>(2)
76  : std::make_shared<FixedWidthSmallDate>(4);
77  }
78  default:
79  abort();
80  }
81 }
82 
83 size_t get_col_bit_width(const Analyzer::ColumnVar* col_var) {
84  const auto& type_info = col_var->get_type_info();
85  return get_bit_width(type_info);
86 }
87 
89  return col_var->get_rte_idx() == -1 ? 0 : col_var->get_rte_idx();
90 }
91 
92 } // namespace
93 
94 std::vector<llvm::Value*> CodeGenerator::codegenColumn(const Analyzer::ColumnVar* col_var,
95  const bool fetch_column,
96  const CompilationOptions& co) {
98  if (col_var->get_rte_idx() <= 0 ||
100  !foundOuterJoinMatch(col_var->get_rte_idx())) {
101  return codegenColVar(col_var, fetch_column, true, co);
102  }
103  return codegenOuterJoinNullPlaceholder(col_var, fetch_column, co);
104 }
105 
106 std::vector<llvm::Value*> CodeGenerator::codegenColVar(const Analyzer::ColumnVar* col_var,
107  const bool fetch_column,
108  const bool update_query_plan,
109  const CompilationOptions& co) {
111  const bool hoist_literals = co.hoist_literals;
112  const int rte_idx = adjusted_range_table_index(col_var);
113  CHECK_LT(static_cast<size_t>(rte_idx), cgen_state_->frag_offsets_.size());
114  const auto& column_key = col_var->getColumnKey();
115  if (column_key.table_id > 0) {
116  const auto cd = get_column_descriptor(column_key);
117  if (cd->isVirtualCol) {
118  CHECK(cd->columnName == "rowid");
119  return {codegenRowId(col_var, co)};
120  }
121  const auto col_ti = cd->columnType;
122  if (col_ti.usesFlatBuffer()) {
123  throw std::runtime_error(
124  "Flatbuffer storage in a real table column not supported yet");
125  }
126  if (col_ti.get_physical_coord_cols() > 0) {
127  std::vector<llvm::Value*> cols;
128  const auto col_id = column_key.column_id;
129  auto temp_column_key = column_key;
130  bool fetch_physical_columns = fetch_column;
131  for (auto i = 0; i < col_ti.get_physical_coord_cols(); i++) {
132  temp_column_key.column_id = col_id + i + 1;
133  const auto cd0 = get_column_descriptor(temp_column_key);
134  CHECK(cd0);
135  const auto col0_ti = cd0->columnType;
136  CHECK(!cd0->isVirtualCol);
137  const auto col0_var =
138  makeExpr<Analyzer::ColumnVar>(col0_ti, temp_column_key, rte_idx);
139  if (plan_state_->isColumnToFetch(temp_column_key)) {
140  // sync the fetch status for all physical columns
141  fetch_physical_columns = true;
142  }
143  const auto col = codegenColVar(col0_var.get(), fetch_physical_columns, false, co);
144  cols.insert(cols.end(), col.begin(), col.end());
145  if (!fetch_physical_columns && plan_state_->isLazyFetchColumn(col_var)) {
146  plan_state_->addColumnToNotFetch(temp_column_key);
147  }
148  }
149  if (!fetch_physical_columns && plan_state_->isLazyFetchColumn(col_var)) {
150  plan_state_->addColumnToNotFetch(column_key);
151  } else {
152  plan_state_->addColumnToFetch(column_key);
153  }
154  return cols;
155  }
156  }
157  const auto grouped_col_lv = resolveGroupedColumnReference(col_var);
158  if (grouped_col_lv) {
159  return {grouped_col_lv};
160  }
161  const auto col_var_hash = boost::hash_value(col_var->toString());
162  const auto window_func_context =
164  // only generate the decoding code once; if a column has been previously
165  // fetched in the generated IR, we'll reuse it
166  // here, we do not just use (local) column id since our analyzer may cast the same
167  // col_var with different types depending on the (aggregate) function that the col_var
168  // is used i.e., SELECT COUNT(DISTINCT x), MIN(x) FROM ...
169  if (!window_func_context) {
170  auto it = cgen_state_->fetch_cache_.find(col_var_hash);
171  if (it != cgen_state_->fetch_cache_.end()) {
172  return {it->second};
173  }
174  }
175  const auto hash_join_lhs = hashJoinLhs(col_var);
176  // Note(jclay): This has been prone to cause failures in some bounding box intersection.
177  // I believe most of the issues are worked out now, but a good place to check if
178  // failures are happening.
179 
180  // Use the already fetched left-hand side of an equi-join if the types are identical.
181  // Currently, types can only be different because of different underlying dictionaries.
182  if (hash_join_lhs && hash_join_lhs->get_type_info() == col_var->get_type_info()) {
183  if (plan_state_->isLazyFetchColumn(col_var)) {
184  plan_state_->addColumnToFetch(col_var->getColumnKey(), true);
185  }
186  return codegen(hash_join_lhs.get(), fetch_column, co);
187  }
188  auto pos_arg = posArg(col_var);
189  if (window_func_context) {
190  pos_arg = codegenWindowPosition(window_func_context, pos_arg);
191  }
192  auto col_byte_stream = colByteStream(col_var, fetch_column, hoist_literals);
193  if (plan_state_->isLazyFetchColumn(col_var)) {
194  if (update_query_plan) {
196  }
197  if (rte_idx > 0) {
198  const auto offset = cgen_state_->frag_offsets_[rte_idx];
199  if (offset) {
200  return {cgen_state_->ir_builder_.CreateAdd(pos_arg, offset)};
201  } else {
202  return {pos_arg};
203  }
204  }
205  return {pos_arg};
206  }
207  const auto& col_ti = col_var->get_type_info();
208  if (col_ti.is_string() && col_ti.get_compression() == kENCODING_NONE) {
209  const auto varlen_str_column_lvs =
210  codegenVariableLengthStringColVar(col_byte_stream, pos_arg);
211  if (!window_func_context) {
212  auto it_ok = cgen_state_->fetch_cache_.insert(
213  std::make_pair(col_var_hash, varlen_str_column_lvs));
214  CHECK(it_ok.second);
215  }
216  return varlen_str_column_lvs;
217  }
218  if (col_ti.usesFlatBuffer()) {
219  return {col_byte_stream};
220  }
221  if (col_ti.is_array() || col_ti.get_type() == kPOINT) {
222  return {col_byte_stream};
223  }
224  if (window_func_context) {
226  col_var, col_byte_stream, pos_arg, co, window_func_context)};
227  }
228  const auto fixed_length_column_lv =
229  codegenFixedLengthColVar(col_var, col_byte_stream, pos_arg);
230  auto it_ok = cgen_state_->fetch_cache_.insert(
231  std::make_pair(col_var_hash, std::vector<llvm::Value*>{fixed_length_column_lv}));
232  return {it_ok.first->second};
233 }
234 
236  const WindowFunctionContext* window_func_context,
237  llvm::Value* pos_arg) {
239  const auto window_position = cgen_state_->emitCall(
240  "row_number_window_func",
241  {cgen_state_->llInt(reinterpret_cast<const int64_t>(window_func_context->output())),
242  pos_arg});
243  return window_position;
244 }
245 
246 // Generate code for fixed length column types (number, timestamp or date,
247 // dictionary-encoded string)
249  const Analyzer::ColumnVar* col_var,
250  llvm::Value* col_byte_stream,
251  llvm::Value* pos_arg,
252  const WindowFunctionContext* window_function_context) {
254  const auto decoder = get_col_decoder(col_var);
255  auto dec_val = decoder->codegenDecode(col_byte_stream, pos_arg, cgen_state_->module_);
256  cgen_state_->ir_builder_.Insert(dec_val);
257  auto dec_type = dec_val->getType();
258  llvm::Value* dec_val_cast{nullptr};
259  const auto& col_ti = col_var->get_type_info();
260  if (dec_type->isIntegerTy()) {
261  auto dec_width = static_cast<llvm::IntegerType*>(dec_type)->getBitWidth();
262  auto col_width = get_col_bit_width(col_var);
263  dec_val_cast = cgen_state_->ir_builder_.CreateCast(
264  static_cast<size_t>(col_width) > dec_width ? llvm::Instruction::CastOps::SExt
265  : llvm::Instruction::CastOps::Trunc,
266  dec_val,
267  get_int_type(col_width, cgen_state_->context_));
268  bool adjust_fixed_enc_null = true;
269  if (window_function_context &&
270  window_function_context->getWindowFunction()->hasRangeModeFraming()) {
271  // we only need to cast it to 8 byte iff it is encoded type
272  // (i.e., the size of non-encoded timestamp type is 8 byte)
273  const auto order_key_ti =
274  window_function_context->getOrderKeyColumnBufferTypes().front();
275  if (order_key_ti.is_timestamp() && order_key_ti.get_size() == 4) {
276  adjust_fixed_enc_null = false;
277  }
278  }
279  if (adjust_fixed_enc_null &&
280  (col_ti.get_compression() == kENCODING_FIXED ||
281  (col_ti.get_compression() == kENCODING_DICT && col_ti.get_size() < 4)) &&
282  !col_ti.get_notnull()) {
283  dec_val_cast = codgenAdjustFixedEncNull(dec_val_cast, col_ti);
284  }
285  } else {
286  CHECK_EQ(kENCODING_NONE, col_ti.get_compression());
287  CHECK(dec_type->isFloatTy() || dec_type->isDoubleTy());
288  if (dec_type->isDoubleTy()) {
289  CHECK(col_ti.get_type() == kDOUBLE);
290  } else if (dec_type->isFloatTy()) {
291  CHECK(col_ti.get_type() == kFLOAT);
292  }
293  dec_val_cast = dec_val;
294  }
295  CHECK(dec_val_cast);
296  return dec_val_cast;
297 }
298 
300  const Analyzer::ColumnVar* col_var,
301  llvm::Value* col_byte_stream,
302  llvm::Value* pos_arg,
303  const CompilationOptions& co,
304  const WindowFunctionContext* window_function_context) {
306  const auto orig_bb = cgen_state_->ir_builder_.GetInsertBlock();
307  const auto pos_valid_bb = llvm::BasicBlock::Create(
308  cgen_state_->context_, "window.pos_valid", cgen_state_->current_func_);
309  const auto pos_notvalid_bb = llvm::BasicBlock::Create(
310  cgen_state_->context_, "window.pos_notvalid", cgen_state_->current_func_);
311  const auto pos_is_valid =
312  cgen_state_->ir_builder_.CreateICmpSGE(pos_arg, cgen_state_->llInt(int64_t(0)));
313  if (window_function_context->getWindowFunction()->getKind() ==
315  // NTH_VALUE needs to return null if N > partition size
316  // To do this, we store null value to the output buffer of the current row
317  // if following requirements for processing NTH_VALUE are not satisfied
318  // 1. current row is valid
319  // 2. N < partition size that the current row is included
320  const auto window_func_args = window_function_context->getWindowFunction()->getArgs();
321  auto n_value_ptr = dynamic_cast<Analyzer::Constant*>(window_func_args[1].get());
322  auto n_value_lv = cgen_state_->llInt((int64_t)n_value_ptr->get_constval().intval);
323  CHECK(n_value_lv);
324 
325  auto partition_index_lv = executor_->codegenCurrentPartitionIndex(
326  window_function_context, this, co, pos_arg);
327  // # elems per partition
328  const auto pi32_type =
329  llvm::PointerType::get(get_int_type(32, cgen_state_->context_), 0);
330  const auto partition_count_buf =
331  cgen_state_->llInt(reinterpret_cast<int64_t>(window_function_context->counts()));
332  auto partition_count_buf_ptr_lv = CodegenUtil::createPtrWithHoistedMemoryAddr(
333  cgen_state_,
334  this,
335  co,
336  partition_count_buf,
337  pi32_type,
339  .front();
340 
341  // # elems of the given partition
342  const auto num_elem_current_partition_ptr =
344  partition_count_buf_ptr_lv,
345  partition_index_lv);
346  const auto num_elem_current_partition_lv = cgen_state_->castToTypeIn(
347  cgen_state_->ir_builder_.CreateLoad(
348  num_elem_current_partition_ptr->getType()->getPointerElementType(),
349  num_elem_current_partition_ptr),
350  64);
351  auto is_valid_n_value_lv = cgen_state_->ir_builder_.CreateICmpSLT(
352  n_value_lv, num_elem_current_partition_lv, "is_valid_nth_value");
353  auto cond_lv = cgen_state_->ir_builder_.CreateAnd(
354  is_valid_n_value_lv, pos_is_valid, "is_valid_row_for_nth_value");
355  // return the current row value iff 1) it is a valid row and 2) N < partition_size
356  cgen_state_->ir_builder_.CreateCondBr(cond_lv, pos_valid_bb, pos_notvalid_bb);
357  } else {
358  // return the current row value if it is valid
359  cgen_state_->ir_builder_.CreateCondBr(pos_is_valid, pos_valid_bb, pos_notvalid_bb);
360  }
361  cgen_state_->ir_builder_.SetInsertPoint(pos_valid_bb);
362  const auto fixed_length_column_lv = codegenFixedLengthColVar(
363  col_var, col_byte_stream, pos_arg, window_function_context);
364  cgen_state_->ir_builder_.CreateBr(pos_notvalid_bb);
365  cgen_state_->ir_builder_.SetInsertPoint(pos_notvalid_bb);
366  const auto window_func_call_phi =
367  cgen_state_->ir_builder_.CreatePHI(fixed_length_column_lv->getType(), 2);
368  window_func_call_phi->addIncoming(fixed_length_column_lv, pos_valid_bb);
369  const auto& col_ti = col_var->get_type_info();
370  const auto null_lv =
371  col_ti.is_fp() ? static_cast<llvm::Value*>(cgen_state_->inlineFpNull(col_ti))
372  : static_cast<llvm::Value*>(cgen_state_->inlineIntNull(col_ti));
373  window_func_call_phi->addIncoming(null_lv, orig_bb);
374  return window_func_call_phi;
375 }
376 
378  llvm::Value* col_byte_stream,
379  llvm::Value* pos_arg) {
381  // real (not dictionary-encoded) strings; store the pointer to the payload
382  auto* const string_view = cgen_state_->emitExternalCall(
383  "string_decode", createStringViewStructType(), {col_byte_stream, pos_arg});
384  auto* str_lv = cgen_state_->ir_builder_.CreateExtractValue(string_view, 0);
385  auto* len_lv = cgen_state_->ir_builder_.CreateExtractValue(string_view, 1);
386  len_lv = cgen_state_->ir_builder_.CreateTrunc(
387  len_lv, llvm::Type::getInt32Ty(cgen_state_->context_));
388  return {string_view, str_lv, len_lv};
389 }
390 
392  const CompilationOptions& co) {
394  const auto offset_lv = cgen_state_->frag_offsets_[adjusted_range_table_index(col_var)];
395  llvm::Value* start_rowid_lv{nullptr};
396  const auto& table_generation = executor()->getTableGeneration(col_var->getTableKey());
397  if (table_generation.start_rowid > 0) {
398  // Handle the multi-node case: each leaf receives a start rowid used
399  // to offset the local rowid and generate a cluster-wide unique rowid.
400  Datum d;
401  d.bigintval = table_generation.start_rowid;
402  const auto start_rowid = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
403  const auto start_rowid_lvs = codegen(start_rowid.get(), kENCODING_NONE, {}, co);
404  CHECK_EQ(size_t(1), start_rowid_lvs.size());
405  start_rowid_lv = start_rowid_lvs.front();
406  }
407  auto rowid_lv = posArg(col_var);
408  if (offset_lv) {
409  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, offset_lv);
410  } else if (col_var->get_rte_idx() > 0) {
411  auto frag_off_ptr = get_arg_by_name(cgen_state_->row_func_, "frag_row_off");
412  auto input_off_ptr = cgen_state_->ir_builder_.CreateGEP(
413  frag_off_ptr->getType()->getScalarType()->getPointerElementType(),
414  frag_off_ptr,
415  cgen_state_->llInt(int32_t(col_var->get_rte_idx())));
416  auto rowid_offset_lv = cgen_state_->ir_builder_.CreateLoad(
417  input_off_ptr->getType()->getPointerElementType(), input_off_ptr);
418  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, rowid_offset_lv);
419  }
420  if (table_generation.start_rowid > 0) {
421  CHECK(start_rowid_lv);
422  rowid_lv = cgen_state_->ir_builder_.CreateAdd(rowid_lv, start_rowid_lv);
423  }
424  return rowid_lv;
425 }
426 
427 namespace {
428 
429 SQLTypes get_phys_int_type(const size_t byte_sz) {
430  switch (byte_sz) {
431  case 1:
432  return kBOOLEAN;
433  // TODO: kTINYINT
434  case 2:
435  return kSMALLINT;
436  case 4:
437  return kINT;
438  case 8:
439  return kBIGINT;
440  default:
441  CHECK(false);
442  }
443  return kNULLT;
444 }
445 
446 } // namespace
447 
448 llvm::Value* CodeGenerator::codgenAdjustFixedEncNull(llvm::Value* val,
449  const SQLTypeInfo& col_ti) {
451  CHECK_LT(col_ti.get_size(), col_ti.get_logical_size());
452  const auto col_phys_width = col_ti.get_size() * 8;
453  auto from_typename = "int" + std::to_string(col_phys_width) + "_t";
454  auto adjusted = cgen_state_->ir_builder_.CreateCast(
455  llvm::Instruction::CastOps::Trunc,
456  val,
457  get_int_type(col_phys_width, cgen_state_->context_));
458  if (col_ti.get_compression() == kENCODING_DICT) {
459  from_typename = "u" + from_typename;
460  llvm::Value* from_null{nullptr};
461  switch (col_ti.get_size()) {
462  case 1:
463  from_null = cgen_state_->llInt(std::numeric_limits<uint8_t>::max());
464  break;
465  case 2:
466  from_null = cgen_state_->llInt(std::numeric_limits<uint16_t>::max());
467  break;
468  default:
469  CHECK(false);
470  }
471  return cgen_state_->emitCall(
472  "cast_" + from_typename + "_to_" + numeric_type_name(col_ti) + "_nullable",
473  {adjusted, from_null, cgen_state_->inlineIntNull(col_ti)});
474  }
475  SQLTypeInfo col_phys_ti(get_phys_int_type(col_ti.get_size()),
476  col_ti.get_dimension(),
477  col_ti.get_scale(),
478  false,
480  0,
481  col_ti.get_subtype());
482  return cgen_state_->emitCall(
483  "cast_" + from_typename + "_to_" + numeric_type_name(col_ti) + "_nullable",
484  {adjusted,
485  cgen_state_->inlineIntNull(col_phys_ti),
486  cgen_state_->inlineIntNull(col_ti)});
487 }
488 
489 llvm::Value* CodeGenerator::foundOuterJoinMatch(const size_t nesting_level) const {
490  CHECK_GE(nesting_level, size_t(1));
491  CHECK_LE(nesting_level,
492  static_cast<size_t>(cgen_state_->outer_join_match_found_per_level_.size()));
493  return cgen_state_->outer_join_match_found_per_level_[nesting_level - 1];
494 }
495 
497  const Analyzer::ColumnVar* col_var,
498  const bool fetch_column,
499  const CompilationOptions& co) {
501  const auto grouped_col_lv = resolveGroupedColumnReference(col_var);
502  if (grouped_col_lv) {
503  return {grouped_col_lv};
504  }
505  const auto outer_join_args_bb = llvm::BasicBlock::Create(
506  cgen_state_->context_, "outer_join_args", cgen_state_->current_func_);
507  const auto outer_join_nulls_bb = llvm::BasicBlock::Create(
508  cgen_state_->context_, "outer_join_nulls", cgen_state_->current_func_);
509  const auto phi_bb = llvm::BasicBlock::Create(
510  cgen_state_->context_, "outer_join_phi", cgen_state_->current_func_);
511  const auto outer_join_match_lv = foundOuterJoinMatch(col_var->get_rte_idx());
512  CHECK(outer_join_match_lv);
513  cgen_state_->ir_builder_.CreateCondBr(
514  outer_join_match_lv, outer_join_args_bb, outer_join_nulls_bb);
515  const auto back_from_outer_join_bb = llvm::BasicBlock::Create(
516  cgen_state_->context_, "back_from_outer_join", cgen_state_->current_func_);
517  cgen_state_->ir_builder_.SetInsertPoint(outer_join_args_bb);
519  const auto orig_lvs = codegenColVar(col_var, fetch_column, true, co);
520  // sometimes col_var used in the join qual needs to cast its column to sync with
521  // the target join column's type which generates a code with a new bb like cast_bb
522  // if so, we need to keep that bb to correctly construct phi_bb
523  // i.e., use cast_bb instead of outer_join_args_bb for the "casted" column
524  // which is the right end point
525  const auto needs_casting_col_var = needCastForHashJoinLhs(col_var);
526  auto* cast_bb = cgen_state_->ir_builder_.GetInsertBlock();
527  cgen_state_->ir_builder_.CreateBr(phi_bb);
528  cgen_state_->ir_builder_.SetInsertPoint(outer_join_nulls_bb);
529  const auto& null_ti = col_var->get_type_info();
530  // since this represents a null constant, what value the datum object contains is
531  // meaningless we need to know what type we need to create a null constant and `null_ti`
532  // contains it
533  const auto null_constant = makeExpr<Analyzer::Constant>(null_ti, true, Datum{0});
534  auto const null_target_lvs = codegen(null_constant.get(), fetch_column, co);
535  cgen_state_->ir_builder_.CreateBr(phi_bb);
536  CHECK_EQ(orig_lvs.size(), null_target_lvs.size());
537  cgen_state_->ir_builder_.SetInsertPoint(phi_bb);
538  std::vector<llvm::Value*> target_lvs;
539  for (size_t i = 0; i < orig_lvs.size(); ++i) {
540  const auto target_type = orig_lvs[i]->getType();
541  const auto null_type = null_target_lvs[i]->getType();
542  CHECK_EQ(target_type, null_type);
543  auto target_phi = cgen_state_->ir_builder_.CreatePHI(target_type, 2);
544  const auto orig_lvs_bb = needs_casting_col_var ? cast_bb : outer_join_args_bb;
545  target_phi->addIncoming(orig_lvs[i], orig_lvs_bb);
546  target_phi->addIncoming(null_target_lvs[i], outer_join_nulls_bb);
547  target_lvs.push_back(target_phi);
548  }
549  cgen_state_->ir_builder_.CreateBr(back_from_outer_join_bb);
550  cgen_state_->ir_builder_.SetInsertPoint(back_from_outer_join_bb);
551  return target_lvs;
552 }
553 
555  const Analyzer::ColumnVar* col_var) {
556  if (col_var->get_rte_idx() >= 0) {
557  return nullptr;
558  }
559  const auto& column_key = col_var->getColumnKey();
560  CHECK((column_key.column_id == 0) ||
561  (col_var->get_rte_idx() >= 0 && column_key.table_id > 0));
562  const auto var = dynamic_cast<const Analyzer::Var*>(col_var);
563  CHECK(var);
564  const auto var_no = var->get_varno();
565  CHECK_GE(var_no, 1);
566  if (var->get_which_row() == Analyzer::Var::kGROUPBY) {
567  CHECK_LE(static_cast<size_t>(var_no), cgen_state_->group_by_expr_cache_.size());
568  return cgen_state_->group_by_expr_cache_[var_no - 1];
569  }
570  return nullptr;
571 }
572 
573 // returns the byte stream argument and the position for the given column
575  const bool fetch_column,
576  const bool hoist_literals) {
577  CHECK_GE(cgen_state_->row_func_->arg_size(), size_t(3));
578  const auto stream_arg_name =
579  "col_buf" + std::to_string(plan_state_->getLocalColumnId(col_var, fetch_column));
580  for (auto& arg : cgen_state_->row_func_->args()) {
581  if (arg.getName() == stream_arg_name) {
582  CHECK(arg.getType() == llvm::Type::getInt8PtrTy(cgen_state_->context_));
583  return &arg;
584  }
585  }
586  CHECK(false);
587  return nullptr;
588 }
589 
590 llvm::Value* CodeGenerator::posArg(const Analyzer::Expr* expr) const {
592  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(expr);
593  if (col_var && col_var->get_rte_idx() > 0) {
594  const auto hash_pos_it =
595  cgen_state_->scan_idx_to_hash_pos_.find(col_var->get_rte_idx());
596  CHECK(hash_pos_it != cgen_state_->scan_idx_to_hash_pos_.end());
597  if (hash_pos_it->second->getType()->isPointerTy()) {
598  CHECK(hash_pos_it->second->getType()->getPointerElementType()->isIntegerTy(32));
599  llvm::Value* result = cgen_state_->ir_builder_.CreateLoad(
600  hash_pos_it->second->getType()->getPointerElementType(), hash_pos_it->second);
601  result = cgen_state_->ir_builder_.CreateSExt(
602  result, get_int_type(64, cgen_state_->context_));
603  return result;
604  }
605  return hash_pos_it->second;
606  }
607  for (auto& arg : cgen_state_->row_func_->args()) {
608  if (arg.getName() == "pos") {
609  CHECK(arg.getType()->isIntegerTy(64));
610  return &arg;
611  }
612  }
613  abort();
614 }
615 
616 // todo (yoonmin) : we have to revisit this logic and its usage
617 // when supporting join between more types beyond integer-like types, i.e., float
619  const auto uoper = dynamic_cast<const Analyzer::UOper*>(expr);
620  if (!uoper || uoper->get_optype() != kCAST) {
621  return nullptr;
622  }
623  const auto& target_ti = uoper->get_type_info();
624  if (!target_ti.is_integer()) {
625  return nullptr;
626  }
627  return uoper->get_operand();
628 }
629 
630 std::shared_ptr<const Analyzer::Expr> CodeGenerator::hashJoinLhs(
631  const Analyzer::ColumnVar* rhs) const {
632  for (const auto& tautological_eq : plan_state_->join_info_.equi_join_tautologies_) {
633  CHECK(IS_EQUIVALENCE(tautological_eq->get_optype()));
634  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
635  tautological_eq->get_left_operand())) {
636  auto lhs_col = hashJoinLhsTuple(rhs, tautological_eq.get());
637  if (lhs_col) {
638  return lhs_col;
639  }
640  } else {
641  auto eq_right_op = tautological_eq->get_right_operand();
642  if (!rhs->get_type_info().is_string()) {
643  eq_right_op = remove_cast_to_int(eq_right_op);
644  }
645  if (!eq_right_op) {
646  eq_right_op = tautological_eq->get_right_operand();
647  }
648  if (*eq_right_op == *rhs) {
649  auto eq_left_op = tautological_eq->get_left_operand();
650  if (!eq_left_op->get_type_info().is_string()) {
651  eq_left_op = remove_cast_to_int(eq_left_op);
652  }
653  if (!eq_left_op) {
654  eq_left_op = tautological_eq->get_left_operand();
655  }
656  if (eq_left_op->get_type_info().is_geometry()) {
657  // skip cast for a geospatial lhs, since the rhs is likely to be a geospatial
658  // physical col without geospatial type info
659  return nullptr;
660  }
661  if (is_constructed_point(eq_left_op)) {
662  // skip cast for a constructed point lhs
663  return nullptr;
664  }
665  auto eq_left_op_col = dynamic_cast<const Analyzer::ColumnVar*>(eq_left_op);
666  if (!eq_left_op_col) {
667  if (dynamic_cast<const Analyzer::StringOper*>(eq_left_op)) {
668  return nullptr;
669  }
670  if (dynamic_cast<const Analyzer::FunctionOper*>(eq_left_op)) {
671  return nullptr;
672  }
673  auto const cast_expr = dynamic_cast<const Analyzer::UOper*>(eq_left_op);
674  if (cast_expr && cast_expr->get_type_info().is_date()) {
675  // sometimes we add cast operator explicitly when dealing w/ a join between
676  // (encoded) date types. And we have necessary casting logic for hash join
677  // depending on encoding types for date column.
678  // Therefore, we can just pass the column variable it is originated from
679  eq_left_op_col =
680  dynamic_cast<const Analyzer::ColumnVar*>(cast_expr->get_operand());
681  }
682  }
683  CHECK(eq_left_op_col) << "Expect Analyzer::ColumnVar* type expression: "
684  << eq_left_op->toString();
685  if (eq_left_op_col->get_rte_idx() != 0) {
686  return nullptr;
687  }
688  if (rhs->get_type_info().is_string()) {
689  return eq_left_op->deep_copy();
690  }
691  if (rhs->get_type_info().is_array()) {
692  // Note(jclay): Can this be restored from copy as above?
693  // If we fall through to the below return statement,
694  // a superfulous cast from DOUBLE[] to DOUBLE[] is made and
695  // this fails at a later stage in codegen.
696  return nullptr;
697  }
698  return makeExpr<Analyzer::UOper>(
699  rhs->get_type_info(), false, kCAST, eq_left_op->deep_copy());
700  }
701  }
702  }
703  return nullptr;
704 }
705 
707  for (const auto& tautological_eq : plan_state_->join_info_.equi_join_tautologies_) {
708  CHECK(IS_EQUIVALENCE(tautological_eq->get_optype()));
709  if (dynamic_cast<const Analyzer::ExpressionTuple*>(
710  tautological_eq->get_left_operand())) {
711  auto lhs_col = hashJoinLhsTuple(rhs, tautological_eq.get());
712  if (lhs_col) {
713  // our join column normalizer falls back to the loop join
714  // when columns of two join tables do not have the same types
715  // todo (yoonmin): relax this
716  return false;
717  }
718  } else {
719  auto eq_right_op = tautological_eq->get_right_operand();
720  if (!rhs->get_type_info().is_string()) {
721  eq_right_op = remove_cast_to_int(eq_right_op);
722  }
723  if (!eq_right_op) {
724  eq_right_op = tautological_eq->get_right_operand();
725  }
726  if (*eq_right_op == *rhs) {
727  auto eq_left_op = tautological_eq->get_left_operand();
728  if (!eq_left_op->get_type_info().is_string()) {
729  eq_left_op = remove_cast_to_int(eq_left_op);
730  }
731  if (!eq_left_op) {
732  eq_left_op = tautological_eq->get_left_operand();
733  }
734  if (eq_left_op->get_type_info().is_geometry()) {
735  // skip cast for a geospatial lhs, since the rhs is likely to be a geospatial
736  // physical col without geospatial type info
737  return false;
738  }
739  if (is_constructed_point(eq_left_op)) {
740  // skip cast for a constructed point lhs
741  return false;
742  }
743  auto eq_left_op_col = dynamic_cast<const Analyzer::ColumnVar*>(eq_left_op);
744  if (!eq_left_op_col) {
745  if (dynamic_cast<const Analyzer::StringOper*>(eq_left_op)) {
746  return false;
747  }
748  if (dynamic_cast<const Analyzer::FunctionOper*>(eq_left_op)) {
749  return false;
750  }
751  }
752  CHECK(eq_left_op_col);
753  if (eq_left_op_col->get_rte_idx() != 0) {
754  return false;
755  }
756  if (rhs->get_type_info().is_string()) {
757  return false;
758  }
759  if (rhs->get_type_info().is_array()) {
760  return false;
761  }
762  return true;
763  }
764  }
765  }
766  return false;
767 }
768 
769 std::shared_ptr<const Analyzer::ColumnVar> CodeGenerator::hashJoinLhsTuple(
770  const Analyzer::ColumnVar* rhs,
771  const Analyzer::BinOper* tautological_eq) const {
772  const auto lhs_tuple_expr =
773  dynamic_cast<const Analyzer::ExpressionTuple*>(tautological_eq->get_left_operand());
774  const auto rhs_tuple_expr = dynamic_cast<const Analyzer::ExpressionTuple*>(
775  tautological_eq->get_right_operand());
776  CHECK(lhs_tuple_expr && rhs_tuple_expr);
777  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
778  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
779  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
780  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
781  if (*rhs_tuple[i] == *rhs) {
782  const auto lhs_col =
783  std::static_pointer_cast<const Analyzer::ColumnVar>(lhs_tuple[i]);
784  return lhs_col->get_rte_idx() == 0 ? lhs_col : nullptr;
785  }
786  }
787  return nullptr;
788 }
bool hasRangeModeFraming() const
Definition: Analyzer.h:2959
JoinInfo join_info_
Definition: PlanState.h:63
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * castToTypeIn(llvm::Value *val, const size_t bit_width)
Definition: CgenState.cpp:150
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2925
std::vector< llvm::Value * > outer_join_match_found_per_level_
Definition: CgenState.h:395
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:618
std::unordered_map< size_t, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:385
Definition: sqltypes.h:76
SQLTypes
Definition: sqltypes.h:65
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:72
CgenState * cgen_state_
llvm::Value * codegenRowId(const Analyzer::ColumnVar *col_var, const CompilationOptions &co)
Definition: ColumnIR.cpp:391
void addColumnToFetch(const shared::ColumnKey &column_key, bool unmark_lazy_fetch=false)
Definition: PlanState.cpp:124
bool is_fp() const
Definition: sqltypes.h:573
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
const Expr * get_right_operand() const
Definition: Analyzer.h:456
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1682
llvm::IRBuilder ir_builder_
Definition: CgenState.h:384
std::vector< llvm::Value * > codegenOuterJoinNullPlaceholder(const Analyzer::ColumnVar *col_var, const bool fetch_column, const CompilationOptions &co)
Definition: ColumnIR.cpp:496
const std::vector< SQLTypeInfo > & getOrderKeyColumnBufferTypes() const
llvm::Value * posArg(const Analyzer::Expr *) const
Definition: ColumnIR.cpp:590
#define CHECK_GE(x, y)
Definition: Logger.h:306
llvm::Value * codgenAdjustFixedEncNull(llvm::Value *, const SQLTypeInfo &)
Definition: ColumnIR.cpp:448
Definition: sqldefs.h:51
const int8_t * output() const
llvm::Value * foundOuterJoinMatch(const size_t nesting_level) const
Definition: ColumnIR.cpp:489
const int32_t * counts() const
virtual std::vector< llvm::Value * > codegenColumn(const Analyzer::ColumnVar *, const bool fetch_column, const CompilationOptions &)
Definition: ColumnIR.cpp:94
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::shared_ptr< Decoder > get_col_decoder(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:29
EncodingType get_compression() const
Definition: Analyzer.h:204
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
std::string to_string(char const *&&v)
llvm::Function * row_func_
Definition: CgenState.h:374
void addColumnToNotFetch(const shared::ColumnKey &column_key)
Definition: PlanState.cpp:147
std::vector< llvm::Value * > group_by_expr_cache_
Definition: CgenState.h:391
std::shared_ptr< const Analyzer::Expr > hashJoinLhs(const Analyzer::ColumnVar *rhs) const
Definition: ColumnIR.cpp:630
llvm::Value * codegenWindowPosition(const WindowFunctionContext *window_func_context, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:235
llvm::Module * module_
Definition: CgenState.h:373
int getLocalColumnId(const Analyzer::ColumnVar *col_var, const bool fetch_column)
Definition: PlanState.cpp:52
bool isColumnToFetch(const shared::ColumnKey &column_key) const
Definition: PlanState.cpp:114
size_t get_bit_width(const SQLTypeInfo &ti)
llvm::LLVMContext & context_
Definition: CgenState.h:382
llvm::Function * current_func_
Definition: CgenState.h:376
llvm::Value * emitExternalCall(const std::string &fname, llvm::Type *ret_type, const std::vector< llvm::Value * > args, const std::vector< llvm::Attribute::AttrKind > &fnattrs={}, const bool has_struct_return=false)
Definition: CgenState.cpp:395
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
const std::vector< std::shared_ptr< Analyzer::Expr > > & getArgs() const
Definition: Analyzer.h:2927
bool isLazyFetchColumn(const Analyzer::Expr *target_expr) const
Definition: PlanState.cpp:22
int get_logical_size() const
Definition: sqltypes.h:421
size_t get_col_bit_width(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:83
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:65
std::string toString() const override
Definition: Analyzer.cpp:2717
int64_t bigintval
Definition: Datum.h:76
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
Definition: Execute.h:213
Executor * executor_
std::unordered_map< int, llvm::Value * > scan_idx_to_hash_pos_
Definition: CgenState.h:396
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< llvm::Value * > codegenColVar(const Analyzer::ColumnVar *, const bool fetch_column, const bool update_query_plan, const CompilationOptions &)
Definition: ColumnIR.cpp:106
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:217
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:561
static const int NUM_EXECUTION_DEVICES
PlanState * plan_state_
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:80
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
#define CHECK_LE(x, y)
Definition: Logger.h:304
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::vector< llvm::Value * > frag_offsets_
Definition: CgenState.h:393
llvm::Value * codegenFixedLengthColVarInWindow(const Analyzer::ColumnVar *col_var, llvm::Value *col_byte_stream, llvm::Value *pos_arg, const CompilationOptions &co, const WindowFunctionContext *window_function_context=nullptr)
Definition: ColumnIR.cpp:299
llvm::StructType * createStringViewStructType()
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:393
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:402
bool needCastForHashJoinLhs(const Analyzer::ColumnVar *rhs) const
Definition: ColumnIR.cpp:706
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:249
#define CHECK(condition)
Definition: Logger.h:291
llvm::Value * colByteStream(const Analyzer::ColumnVar *col_var, const bool fetch_column, const bool hoist_literals)
Definition: ColumnIR.cpp:574
const Expr * get_left_operand() const
Definition: Analyzer.h:455
const Analyzer::WindowFunction * getWindowFunction() const
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:230
Definition: sqltypes.h:72
bool is_string() const
Definition: sqltypes.h:561
std::shared_ptr< const Analyzer::ColumnVar > hashJoinLhsTuple(const Analyzer::ColumnVar *rhs, const Analyzer::BinOper *tautological_eq) const
Definition: ColumnIR.cpp:769
llvm::Value * codegenFixedLengthColVar(const Analyzer::ColumnVar *col_var, llvm::Value *col_byte_stream, llvm::Value *pos_arg, const WindowFunctionContext *window_function_context=nullptr)
Definition: ColumnIR.cpp:248
std::vector< llvm::Value * > createPtrWithHoistedMemoryAddr(CgenState *cgen_state, CodeGenerator *code_generator, CompilationOptions const &co, llvm::ConstantInt *ptr_int_val, llvm::Type *type, size_t num_devices_to_hoist_literal)
std::vector< llvm::Value * > codegenVariableLengthStringColVar(llvm::Value *col_byte_stream, llvm::Value *pos_arg)
Definition: ColumnIR.cpp:377
int32_t get_rte_idx() const
Definition: Analyzer.h:202
Definition: Datum.h:71
SQLTypes get_phys_int_type(const size_t byte_sz)
Definition: ColumnIR.cpp:429
int adjusted_range_table_index(const Analyzer::ColumnVar *col_var)
Definition: ColumnIR.cpp:88
bool is_array() const
Definition: sqltypes.h:585
shared::TableKey getTableKey() const
Definition: Analyzer.h:199
llvm::Value * resolveGroupedColumnReference(const Analyzer::ColumnVar *)
Definition: ColumnIR.cpp:554
llvm::ConstantFP * inlineFpNull(const SQLTypeInfo &)
Definition: CgenState.cpp:104
Executor * executor() const