OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InValuesIR.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CodeGenerator.h"
18 #include "Execute.h"
19 
20 #include <future>
21 #include <memory>
22 
23 llvm::Value* CodeGenerator::codegen(const Analyzer::InValues* expr,
24  const CompilationOptions& co) {
26  const auto in_arg = expr->get_arg();
27  if (is_unnest(in_arg)) {
28  throw std::runtime_error("IN not supported for unnested expressions");
29  }
30  const auto& expr_ti = expr->get_type_info();
31  CHECK(expr_ti.is_boolean());
32  const auto lhs_lvs = codegen(in_arg, true, co);
33  llvm::Value* result{nullptr};
34  if (expr_ti.get_notnull()) {
35  result = llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_),
36  false);
37  } else {
38  result = cgen_state_->llInt(int8_t(0));
39  }
40  CHECK(result);
41  if (co.hoist_literals) { // TODO(alex): remove this constraint
42  auto in_vals_bitmap = createInValuesBitmap(expr, co);
43  if (in_vals_bitmap) {
44  if (in_vals_bitmap->isEmpty()) {
45  return in_vals_bitmap->hasNull()
47  : result;
48  }
49  CHECK_EQ(size_t(1), lhs_lvs.size());
50  return cgen_state_->addInValuesBitmap(in_vals_bitmap)
51  ->codegen(lhs_lvs.front(), executor());
52  }
53  }
54  if (expr_ti.get_notnull()) {
55  for (auto in_val : expr->get_value_list()) {
56  result = cgen_state_->ir_builder_.CreateOr(
57  result,
58  toBool(
59  codegenCmp(kEQ, kONE, lhs_lvs, in_arg->get_type_info(), in_val.get(), co)));
60  }
61  } else {
62  for (auto in_val : expr->get_value_list()) {
63  const auto crt =
64  codegenCmp(kEQ, kONE, lhs_lvs, in_arg->get_type_info(), in_val.get(), co);
65  result = cgen_state_->emitCall("logical_or",
66  {result, crt, cgen_state_->inlineIntNull(expr_ti)});
67  }
68  }
69  return result;
70 }
71 
72 llvm::Value* CodeGenerator::codegen(const Analyzer::InIntegerSet* in_integer_set,
73  const CompilationOptions& co) {
75  const auto in_arg = in_integer_set->get_arg();
76  if (is_unnest(in_arg)) {
77  throw std::runtime_error("IN not supported for unnested expressions");
78  }
79  const auto& ti = in_integer_set->get_arg()->get_type_info();
80  const auto needle_null_val = inline_int_null_val(ti);
81  if (!co.hoist_literals) {
82  // We never run without literal hoisting in real world scenarios, this avoids a crash
83  // when testing.
84  throw std::runtime_error(
85  "IN subquery with many right-hand side values not supported when literal "
86  "hoisting is disabled");
87  }
88  auto in_vals_bitmap = std::make_unique<InValuesBitmap>(
89  in_integer_set->get_value_list(),
90  needle_null_val,
93  executor()->deviceCount(co.device_type),
94  executor()->data_mgr_,
95  co);
96  const auto& in_integer_set_ti = in_integer_set->get_type_info();
97  CHECK(in_integer_set_ti.is_boolean());
98  const auto lhs_lvs = codegen(in_arg, true, co);
99  llvm::Value* result{nullptr};
100  if (in_integer_set_ti.get_notnull()) {
101  result = llvm::ConstantInt::get(llvm::IntegerType::getInt1Ty(cgen_state_->context_),
102  false);
103  } else {
104  result = cgen_state_->llInt(int8_t(0));
105  }
106  CHECK(result);
107  CHECK_EQ(size_t(1), lhs_lvs.size());
108  return cgen_state_->addInValuesBitmap(in_vals_bitmap)
109  ->codegen(lhs_lvs.front(), executor());
110 }
111 
112 std::unique_ptr<InValuesBitmap> CodeGenerator::createInValuesBitmap(
113  const Analyzer::InValues* in_values,
114  const CompilationOptions& co) {
116  const auto& value_list = in_values->get_value_list();
117  const auto val_count = value_list.size();
118  const auto& ti = in_values->get_arg()->get_type_info();
119  if (!(ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT))) {
120  return nullptr;
121  }
122  const auto sdp =
123  ti.is_string()
124  ? executor()->getStringDictionaryProxy(
125  ti.getStringDictKey(), executor()->getRowSetMemoryOwner(), true)
126  : nullptr;
127  if (val_count > 3) {
128  using ListIterator = decltype(value_list.begin());
129  std::vector<int64_t> values;
130  const auto needle_null_val = inline_int_null_val(ti);
131  const int worker_count = val_count > 10000 ? cpu_threads() : int(1);
132  std::vector<std::vector<int64_t>> values_set(worker_count, std::vector<int64_t>());
133  std::vector<std::future<bool>> worker_threads;
134  auto start_it = value_list.begin();
135  for (size_t i = 0,
136  start_val = 0,
137  stride = (val_count + worker_count - 1) / worker_count;
138  i < val_count && start_val < val_count;
139  ++i, start_val += stride, std::advance(start_it, stride)) {
140  auto end_it = start_it;
141  std::advance(end_it, std::min(stride, val_count - start_val));
142  const auto do_work = [&](std::vector<int64_t>& out_vals,
143  const ListIterator start,
144  const ListIterator end) -> bool {
145  for (auto val_it = start; val_it != end; ++val_it) {
146  const auto& in_val = *val_it;
147  const auto in_val_const =
148  dynamic_cast<const Analyzer::Constant*>(extract_cast_arg(in_val.get()));
149  if (!in_val_const) {
150  return false;
151  }
152  const auto& in_val_ti = in_val->get_type_info();
153  CHECK(in_val_ti == ti || get_nullable_type_info(in_val_ti) == ti);
154  if (ti.is_string()) {
155  CHECK(sdp);
156  const auto string_id =
157  in_val_const->get_is_null()
158  ? needle_null_val
159  : sdp->getIdOfString(*in_val_const->get_constval().stringval);
160  if (string_id != StringDictionary::INVALID_STR_ID) {
161  out_vals.push_back(string_id);
162  }
163  } else {
164  out_vals.push_back(CodeGenerator::codegenIntConst(in_val_const, cgen_state_)
165  ->getSExtValue());
166  }
167  }
168  return true;
169  };
170  if (worker_count > 1) {
171  worker_threads.push_back(std::async(
172  std::launch::async, do_work, std::ref(values_set[i]), start_it, end_it));
173  } else {
174  do_work(std::ref(values), start_it, end_it);
175  }
176  }
177  bool success = true;
178  for (auto& worker : worker_threads) {
179  success &= worker.get();
180  }
181  if (!success) {
182  return nullptr;
183  }
184  if (worker_count > 1) {
185  size_t total_val_count = 0;
186  for (auto& vals : values_set) {
187  total_val_count += vals.size();
188  }
189  values.reserve(total_val_count);
190  for (auto& vals : values_set) {
191  values.insert(values.end(), vals.begin(), vals.end());
192  }
193  }
194  try {
195  return std::make_unique<InValuesBitmap>(values,
196  needle_null_val,
200  executor()->deviceCount(co.device_type),
201  executor()->data_mgr_,
202  co);
203  } catch (...) {
204  return nullptr;
205  }
206  }
207  return nullptr;
208 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const std::vector< int64_t > & get_value_list() const
Definition: Analyzer.h:695
CgenState * cgen_state_
llvm::IRBuilder ir_builder_
Definition: CgenState.h:384
Definition: sqldefs.h:32
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:222
future< Result > async(Fn &&fn, Args &&...args)
llvm::LLVMContext & context_
Definition: CgenState.h:382
static constexpr int32_t INVALID_STR_ID
llvm::ConstantInt * inlineIntNull(const SQLTypeInfo &)
Definition: CgenState.cpp:65
const InValuesBitmap * addInValuesBitmap(std::unique_ptr< InValuesBitmap > &in_values_bitmap)
Definition: CgenState.h:211
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Definition: CgenState.cpp:217
ExecutorDeviceType device_type
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
static llvm::ConstantInt * codegenIntConst(const Analyzer::Constant *constant, CgenState *cgen_state)
Definition: ConstantIR.cpp:89
Definition: sqldefs.h:74
llvm::Value * codegen(llvm::Value *needle, Executor *executor) const
const std::list< std::shared_ptr< Analyzer::Expr > > & get_value_list() const
Definition: Analyzer.h:646
llvm::Value * toBool(llvm::Value *)
Definition: LogicalIR.cpp:344
llvm::Value * codegenCmp(const Analyzer::BinOper *, const CompilationOptions &)
Definition: CompareIR.cpp:230
llvm::ConstantInt * llInt(const T v) const
Definition: CgenState.h:249
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::unique_ptr< InValuesBitmap > createInValuesBitmap(const Analyzer::InValues *, const CompilationOptions &)
Definition: InValuesIR.cpp:112
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1677
bool is_string() const
Definition: sqltypes.h:561
const Expr * get_arg() const
Definition: Analyzer.h:693
int cpu_threads()
Definition: thread_count.h:25
const Expr * get_arg() const
Definition: Analyzer.h:644
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1484
Executor * executor() const