OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Analyzer/Analyzer.h"
20 #include "DateTimePlusRewrite.h"
21 #include "DateTimeTranslator.h"
23 #include "ExpressionRewrite.h"
26 #include "Parser/ParserNode.h"
27 #include "RelAlgDag.h"
28 #include "ScalarExprVisitor.h"
29 #include "Shared/SqlTypesLayout.h"
30 #include "Shared/likely.h"
31 #include "Shared/scope.h"
32 #include "Shared/thread_count.h"
33 #include "WindowContext.h"
34 
35 #include <future>
36 #include <sstream>
37 
38 extern bool g_enable_watchdog;
44 
45 namespace {
46 
48  const int scale,
49  const int precision) {
50  SQLTypeInfo ti(sql_type, 0, 0, true);
51  if (ti.is_decimal()) {
52  ti.set_scale(scale);
53  ti.set_precision(precision);
54  }
55  return ti;
56 }
57 
58 } // namespace
59 
60 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier>
62  std::shared_ptr<Analyzer::Expr> rhs;
63  SQLQualifier sql_qual{kONE};
64  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
65  if (!rex_operator) {
66  return std::make_pair(rhs, sql_qual);
67  }
68  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
69  const auto qual_str = rex_function ? rex_function->getName() : "";
70  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
71  CHECK_EQ(size_t(1), rex_function->size());
72  rhs = translateScalarRex(rex_function->getOperand(0));
73  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
74  }
75  if (!rhs && rex_operator->getOperator() == kCAST) {
76  CHECK_EQ(size_t(1), rex_operator->size());
77  std::tie(rhs, sql_qual) = getQuantifiedRhs(rex_operator->getOperand(0));
78  }
79  return std::make_pair(rhs, sql_qual);
80 }
81 
82 namespace {
83 
84 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
85  const SQLTypeInfo& ti) noexcept {
86  Datum d{0};
87  bool is_null_const{false};
88  switch (ti.get_type()) {
89  case kBOOLEAN: {
90  const auto ival = boost::get<int64_t>(scalar_tv);
91  CHECK(ival);
92  if (*ival == inline_int_null_val(ti)) {
93  is_null_const = true;
94  } else {
95  d.boolval = *ival;
96  }
97  break;
98  }
99  case kTINYINT: {
100  const auto ival = boost::get<int64_t>(scalar_tv);
101  CHECK(ival);
102  if (*ival == inline_int_null_val(ti)) {
103  is_null_const = true;
104  } else {
105  d.tinyintval = *ival;
106  }
107  break;
108  }
109  case kSMALLINT: {
110  const auto ival = boost::get<int64_t>(scalar_tv);
111  CHECK(ival);
112  if (*ival == inline_int_null_val(ti)) {
113  is_null_const = true;
114  } else {
115  d.smallintval = *ival;
116  }
117  break;
118  }
119  case kINT: {
120  const auto ival = boost::get<int64_t>(scalar_tv);
121  CHECK(ival);
122  if (*ival == inline_int_null_val(ti)) {
123  is_null_const = true;
124  } else {
125  d.intval = *ival;
126  }
127  break;
128  }
129  case kDECIMAL:
130  case kNUMERIC:
131  case kBIGINT:
132  case kDATE:
133  case kTIME:
134  case kTIMESTAMP: {
135  const auto ival = boost::get<int64_t>(scalar_tv);
136  CHECK(ival);
137  if (*ival == inline_int_null_val(ti)) {
138  is_null_const = true;
139  } else {
140  d.bigintval = *ival;
141  }
142  break;
143  }
144  case kDOUBLE: {
145  const auto dval = boost::get<double>(scalar_tv);
146  CHECK(dval);
147  if (*dval == inline_fp_null_val(ti)) {
148  is_null_const = true;
149  } else {
150  d.doubleval = *dval;
151  }
152  break;
153  }
154  case kFLOAT: {
155  const auto fval = boost::get<float>(scalar_tv);
156  CHECK(fval);
157  if (*fval == inline_fp_null_val(ti)) {
158  is_null_const = true;
159  } else {
160  d.floatval = *fval;
161  }
162  break;
163  }
164  case kTEXT:
165  case kVARCHAR:
166  case kCHAR: {
167  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
168  CHECK(nullable_sptr);
169  if (boost::get<void*>(nullable_sptr)) {
170  is_null_const = true;
171  } else {
172  auto sptr = boost::get<std::string>(nullable_sptr);
173  d.stringval = new std::string(*sptr);
174  }
175  break;
176  }
177  default:
178  CHECK(false) << "Unhandled type: " << ti.get_type_name();
179  }
180  return {d, is_null_const};
181 }
182 
183 using Handler =
184  std::shared_ptr<Analyzer::Expr> (RelAlgTranslator::*)(RexScalar const*) const;
185 using IndexedHandler = std::pair<std::type_index, Handler>;
186 
187 template <typename... Ts>
188 std::array<IndexedHandler, sizeof...(Ts)> makeHandlers() {
189  return {IndexedHandler{std::type_index(typeid(Ts)),
190  &RelAlgTranslator::translateRexScalar<Ts>}...};
191 }
192 
193 struct ByTypeIndex {
194  std::type_index const type_index_;
195  ByTypeIndex(std::type_info const& type_info)
196  : type_index_(std::type_index(type_info)) {}
197  bool operator()(IndexedHandler const& pair) const { return pair.first == type_index_; }
198 };
199 
200 } // namespace
201 
202 template <>
203 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexInput>(
204  RexScalar const* rex) const {
205  return translateInput(static_cast<RexInput const*>(rex));
206 }
207 template <>
208 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexLiteral>(
209  RexScalar const* rex) const {
210  return translateLiteral(static_cast<RexLiteral const*>(rex));
211 }
212 template <>
213 std::shared_ptr<Analyzer::Expr>
214 RelAlgTranslator::translateRexScalar<RexWindowFunctionOperator>(
215  RexScalar const* rex) const {
216  return translateWindowFunction(static_cast<RexWindowFunctionOperator const*>(rex));
217 }
218 template <>
219 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexFunctionOperator>(
220  RexScalar const* rex) const {
221  return translateFunction(static_cast<RexFunctionOperator const*>(rex));
222 }
223 template <>
224 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexOperator>(
225  RexScalar const* rex) const {
226  return translateOper(static_cast<RexOperator const*>(rex));
227 }
228 template <>
229 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexCase>(
230  RexScalar const* rex) const {
231  return translateCase(static_cast<RexCase const*>(rex));
232 }
233 template <>
234 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexSubQuery>(
235  RexScalar const* rex) const {
236  return translateScalarSubquery(static_cast<RexSubQuery const*>(rex));
237 }
238 
239 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
240  RexScalar const* rex) const {
241  auto cache_itr = cache_.find(rex);
242  if (cache_itr == cache_.end()) {
243  // Order types from most likely to least as they are compared seriatim.
244  static auto const handlers = makeHandlers<RexInput,
245  RexLiteral,
246  RexOperator,
247  RexCase,
250  RexSubQuery>();
251  static_assert(std::is_trivially_destructible_v<decltype(handlers)>);
252  auto it = std::find_if(handlers.cbegin(), handlers.cend(), ByTypeIndex{typeid(*rex)});
253  CHECK(it != handlers.cend()) << "Unhandled type: " << typeid(*rex).name();
254  // Call handler based on typeid(*rex) and cache the std::shared_ptr<Analyzer::Expr>.
255  auto cached = cache_.emplace(rex, (this->*it->second)(rex));
256  CHECK(cached.second) << "Failed to emplace rex of type " << typeid(*rex).name();
257  cache_itr = cached.first;
258  }
259  return cache_itr->second;
260 }
261 
262 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translate(RexScalar const* rex) const {
263  ScopeGuard clear_cache{[this] { cache_.clear(); }};
264  return translateScalarRex(rex);
265 }
266 
267 namespace {
268 
269 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
270  return arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time() ||
271  (agg_kind == kMODE && arg_ti.is_string()) ||
272  !shared::is_any<kAVG, kMIN, kMAX, kSUM, kAPPROX_QUANTILE, kMODE>(agg_kind);
273 }
274 
275 bool is_distinct_supported(SQLAgg const agg_kind) {
276  return shared::is_any<kMIN, kMAX, kCOUNT, kAPPROX_COUNT_DISTINCT>(agg_kind);
277 }
278 
279 } // namespace
280 
281 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
282  const RexAgg* rex,
283  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
284  SQLAgg agg_kind = rex->getKind();
285  const bool is_distinct = rex->isDistinct();
286  const bool takes_arg{rex->size() > 0};
287  std::shared_ptr<Analyzer::Expr> arg_expr;
288  std::shared_ptr<Analyzer::Expr> arg1; // 2nd aggregate parameter
289  if (takes_arg) {
290  const auto operand = rex->getOperand(0);
291  CHECK_LT(operand, scalar_sources.size());
292  CHECK_LE(rex->size(), 2u);
293  arg_expr = scalar_sources[operand];
294  switch (agg_kind) {
296  if (rex->size() == 2) {
297  auto const const_arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
298  scalar_sources[rex->getOperand(1)]);
299  if (!const_arg1 || const_arg1->get_type_info().get_type() != kINT ||
300  const_arg1->get_constval().intval < 1 ||
301  const_arg1->get_constval().intval > 100) {
302  throw std::runtime_error(
303  "APPROX_COUNT_DISTINCT's second parameter must be a SMALLINT literal "
304  "between 1 and 100");
305  }
306  arg1 = scalar_sources[rex->getOperand(1)];
307  }
308  break;
309  case kAPPROX_QUANTILE:
310  if (g_cluster) {
311  throw std::runtime_error(
312  "APPROX_PERCENTILE/MEDIAN is not supported in distributed mode at this "
313  "time.");
314  }
315  // If second parameter is not given then APPROX_MEDIAN is assumed.
316  if (rex->size() == 2) {
317  arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
318  std::dynamic_pointer_cast<Analyzer::Constant>(
319  scalar_sources[rex->getOperand(1)])
320  ->add_cast(SQLTypeInfo(kDOUBLE)));
321  } else {
322 #ifdef _WIN32
323  Datum median;
324  median.doubleval = 0.5;
325 #else
326  constexpr Datum median{.doubleval = 0.5};
327 #endif
328  arg1 = std::make_shared<Analyzer::Constant>(kDOUBLE, false, median);
329  }
330  break;
331  case kMODE:
332  if (g_cluster) {
333  throw std::runtime_error(
334  "MODE is not supported in distributed mode at this time.");
335  }
336  break;
337  case kCOUNT_IF:
338  if (arg_expr->get_type_info().is_geometry()) {
339  throw std::runtime_error(
340  "COUNT_IF does not currently support geospatial types.");
341  }
342  break;
343  case kSUM_IF:
344  arg1 = scalar_sources[rex->getOperand(1)];
345  if (arg1->get_type_info().get_type() != kBOOLEAN) {
346  throw std::runtime_error("Conditional argument must be a boolean expression.");
347  }
348  break;
349  default:
350  break;
351  }
352  const auto& arg_ti = arg_expr->get_type_info();
353  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
354  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
355  " is not supported yet.");
356  }
357  if (is_distinct && !is_distinct_supported(agg_kind)) {
358  throw std::runtime_error(toString(agg_kind) +
359  " does not currently support the DISTINCT qualifier.");
360  }
361  }
362  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
363  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, arg1);
364 }
365 
366 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
367  const RexLiteral* rex_literal) {
368  auto lit_ti = build_type_info(
369  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
370  auto target_ti = build_type_info(rex_literal->getTargetType(),
371  rex_literal->getTargetScale(),
372  rex_literal->getTargetPrecision());
373  switch (rex_literal->getType()) {
374  case kINT:
375  case kBIGINT: {
376  Datum d;
377  d.bigintval = rex_literal->getVal<int64_t>();
378  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
379  }
380  case kDECIMAL: {
381  const auto val = rex_literal->getVal<int64_t>();
382  const int precision = rex_literal->getPrecision();
383  const int scale = rex_literal->getScale();
384  if (target_ti.is_fp() && !scale) {
385  return make_fp_constant(val, target_ti);
386  }
387  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
389  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
390  }
391  case kTEXT: {
392  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>(),
393  false);
394  }
395  case kBOOLEAN: {
396  Datum d;
397  d.boolval = rex_literal->getVal<bool>();
398  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
399  }
400  case kDOUBLE: {
401  Datum d;
402  d.doubleval = rex_literal->getVal<double>();
403  auto lit_expr =
404  makeExpr<Analyzer::Constant>(SQLTypeInfo(rex_literal->getType(),
405  rex_literal->getPrecision(),
406  rex_literal->getScale(),
407  false),
408  false,
409  d);
410  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
411  }
412  case kINTERVAL_DAY_TIME:
413  case kINTERVAL_YEAR_MONTH: {
414  Datum d;
415  d.bigintval = rex_literal->getVal<int64_t>();
416  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
417  }
418  case kTIME:
419  case kTIMESTAMP: {
420  Datum d;
421  d.bigintval =
422  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
423  ? rex_literal->getVal<int64_t>()
424  : rex_literal->getVal<int64_t>() / 1000;
425  return makeExpr<Analyzer::Constant>(
426  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
427  false,
428  d);
429  }
430  case kDATE: {
431  Datum d;
432  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
433  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
434  }
435  case kNULLT: {
436  if (target_ti.is_array()) {
438  // defaulting to valid sub-type for convenience
439  target_ti.set_subtype(kBOOLEAN);
440  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
441  }
442  if (target_ti.get_type() == kGEOMETRY) {
443  // Specific geo type will be set in a normalization step if needed.
444  return makeExpr<Analyzer::Constant>(kNULLT, true, Datum{0});
445  }
446  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
447  }
448  default: {
449  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
450  }
451  }
452  return nullptr;
453 }
454 
455 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
456  const RexSubQuery* rex_subquery) const {
457  if (just_explain_) {
458  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
459  }
460  CHECK(rex_subquery);
461  auto result = rex_subquery->getExecutionResult();
462  auto row_set = result->getRows();
463  const size_t row_count = row_set->rowCount();
464  if (row_count > size_t(1)) {
465  throw std::runtime_error("Scalar sub-query returned multiple rows");
466  }
467  auto ti = rex_subquery->getType();
468  if (g_cluster && ti.is_string()) {
469  throw std::runtime_error(
470  "Scalar sub-queries which return strings not supported in distributed mode");
471  }
472  if (row_count == size_t(0)) {
473  if (row_set->isValidationOnlyRes()) {
474  Datum d{0};
475  if (ti.is_string()) {
476  // keep the valid ptr to avoid crash during the query validation
477  // this ptr will be removed when destructing corresponding constant variable
478  d.stringval = new std::string();
479  }
480  if (ti.is_dict_encoded_string()) {
481  // we set a valid ptr for string literal in above which is not dictionary-encoded
482  ti.set_compression(EncodingType::kENCODING_NONE);
483  }
484  return makeExpr<Analyzer::Constant>(ti, false, d);
485  }
486  throw std::runtime_error("Scalar sub-query returned no results");
487  }
488  CHECK_EQ(row_count, size_t(1));
489  row_set->moveToBegin();
490  auto const first_row = row_set->getNextRow(ti.is_dict_encoded_string(), false);
491  CHECK_EQ(first_row.size(), size_t(1));
492  Datum d{0};
493  bool is_null_const{false};
494  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
495  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
496  if (ti.is_dict_encoded_string()) {
497  // we already translate the string, so let's make its type as a string literal
498  ti.set_compression(EncodingType::kENCODING_NONE);
499  }
500  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
501 }
502 
503 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
504  const RexInput* rex_input) const {
505  const auto source = rex_input->getSourceNode();
506  const auto it_rte_idx = input_to_nest_level_.find(source);
507  CHECK(it_rte_idx != input_to_nest_level_.end())
508  << "Not found in input_to_nest_level_, source="
509  << source->toString(RelRexToStringConfig::defaults());
510  const int rte_idx = it_rte_idx->second;
511  const auto scan_source = dynamic_cast<const RelScan*>(source);
512  const auto& in_metainfo = source->getOutputMetainfo();
513  if (scan_source) {
514  // We're at leaf (scan) level and not supposed to have input metadata,
515  // the name and type information come directly from the catalog.
516  CHECK(in_metainfo.empty());
517  const auto table_desc = scan_source->getTableDescriptor();
518  const auto& catalog = scan_source->getCatalog();
519  const auto cd =
520  catalog.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
521  CHECK(cd);
522  auto col_ti = cd->columnType;
523  if (col_ti.is_string()) {
524  col_ti.set_type(kTEXT);
525  }
526  if (cd->isVirtualCol) {
527  // TODO(alex): remove at some point, we only need this fixup for backwards
528  // compatibility with old imported data
529  CHECK_EQ("rowid", cd->columnName);
530  col_ti.set_size(8);
531  }
532  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
533  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
534  col_ti.set_notnull(false);
535  }
536  return std::make_shared<Analyzer::ColumnVar>(
537  col_ti,
538  shared::ColumnKey{catalog.getDatabaseId(), table_desc->tableId, cd->columnId},
539  rte_idx);
540  }
541  CHECK(!in_metainfo.empty()) << "for "
542  << source->toString(RelRexToStringConfig::defaults());
543  CHECK_GE(rte_idx, 0);
544  const int32_t col_id = rex_input->getIndex();
545  CHECK_LT(col_id, in_metainfo.size());
546  auto col_ti = in_metainfo[col_id].get_type_info();
547 
548  if (join_types_.size() > 0) {
549  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
550  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
551  col_ti.set_notnull(false);
552  }
553  }
554 
555  return std::make_shared<Analyzer::ColumnVar>(
556  col_ti, shared::ColumnKey{0, int32_t(-source->getId()), col_id}, rte_idx);
557 }
558 
559 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
560  const RexOperator* rex_operator) const {
561  CHECK_EQ(size_t(1), rex_operator->size());
562  auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
563  const auto sql_op = rex_operator->getOperator();
564  switch (sql_op) {
565  case kCAST: {
566  const auto& target_ti = rex_operator->getType();
567  CHECK_NE(kNULLT, target_ti.get_type());
568  const auto& operand_ti = operand_expr->get_type_info();
569  if (operand_ti.is_string() && target_ti.is_string()) {
570  return operand_expr;
571  }
572  if (target_ti.is_time() ||
573  operand_ti
574  .is_string()) { // TODO(alex): check and unify with the rest of the cases
575  // Do not propogate encoding on small dates
576  return target_ti.is_date_in_days()
577  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
578  : operand_expr->add_cast(target_ti);
579  }
580  if (!operand_ti.is_string() && target_ti.is_string()) {
581  return operand_expr->add_cast(target_ti);
582  }
583  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
584  }
585  case kENCODE_TEXT: {
586  SQLTypeInfo target_ti = rex_operator->getType();
587  if (target_ti.get_type() == kNULLT) {
588  if (auto const_expr =
589  dynamic_cast<const Analyzer::Constant*>(operand_expr.get())) {
590  if (const_expr->get_type_info() == kNULLT && const_expr->get_is_null()) {
591  // make a typed NULL constant and sync it to target_ti
592  operand_expr = makeExpr<Analyzer::Constant>(kTEXT, true);
593  target_ti.set_type(kTEXT);
594  }
595  }
596  }
597  CHECK_NE(kNULLT, target_ti.get_type());
598  const auto& operand_ti = operand_expr->get_type_info();
599  CHECK(operand_ti.is_string());
600  if (operand_ti.is_dict_encoded_string()) {
601  // No cast needed
602  return operand_expr;
603  }
604  if (operand_expr->get_num_column_vars(true) == 0UL) {
605  return operand_expr;
606  }
607  if (g_cluster) {
608  throw std::runtime_error(
609  "ENCODE_TEXT is not currently supported in distributed mode at this time.");
610  }
611  SQLTypeInfo casted_target_ti = operand_ti;
612  casted_target_ti.set_type(kTEXT);
613  casted_target_ti.set_compression(kENCODING_DICT);
614  casted_target_ti.set_comp_param(TRANSIENT_DICT_ID);
616  casted_target_ti.set_fixed_size();
617  return makeExpr<Analyzer::UOper>(
618  casted_target_ti, operand_expr->get_contains_agg(), kCAST, operand_expr);
619  }
620  case kNOT:
621  case kISNULL: {
622  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
623  }
624  case kISNOTNULL: {
625  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
626  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
627  }
628  case kMINUS: {
629  const auto& ti = operand_expr->get_type_info();
630  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
631  }
632  case kUNNEST: {
633  const auto& ti = operand_expr->get_type_info();
634  CHECK(ti.is_array());
635  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
636  }
637  default:
638  CHECK(false);
639  }
640  return nullptr;
641 }
642 
643 namespace {
644 
645 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
646  const ResultSet& val_set) {
648  return nullptr;
649  }
650  if (val_set.rowCount() > g_watchdog_in_clause_max_num_input_rows && g_enable_watchdog) {
651  std::ostringstream oss;
652  oss << "Unable to handle 'expr IN (subquery)': # input rows (" << val_set.rowCount()
653  << ") is larger than threshold 'g_watchdog_in_clause_max_num_input_rows':"
655  throw std::runtime_error(oss.str());
656  }
657  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
658  const size_t fetcher_count = cpu_threads();
659  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
660  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
661  std::vector<std::future<void>> fetcher_threads;
662  const auto& ti = arg->get_type_info();
663  const auto entry_count = val_set.entryCount();
664  for (size_t i = 0,
665  start_entry = 0,
666  stride = (entry_count + fetcher_count - 1) / fetcher_count;
667  i < fetcher_count && start_entry < entry_count;
668  ++i, start_entry += stride) {
669  const auto end_entry = std::min(start_entry + stride, entry_count);
670  fetcher_threads.push_back(std::async(
672  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
673  const size_t start,
674  const size_t end) {
675  for (auto index = start; index < end; ++index) {
676  auto row = val_set.getRowAt(index);
677  if (row.empty()) {
678  continue;
679  }
680  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
681  Datum d{0};
682  bool is_null_const{false};
683  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
684  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
685  auto ti_none_encoded = ti;
686  ti_none_encoded.set_compression(kENCODING_NONE);
687  auto none_encoded_string =
688  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
689  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
690  ti, false, kCAST, none_encoded_string);
691  in_vals.push_back(dict_encoded_string);
692  } else {
693  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
694  }
695  }
696  },
697  std::ref(expr_set[i]),
698  start_entry,
699  end_entry));
700  }
701  for (auto& child : fetcher_threads) {
702  child.get();
703  }
704 
705  val_set.moveToBegin();
706  for (auto& exprs : expr_set) {
707  value_exprs.splice(value_exprs.end(), exprs);
708  }
709  return makeExpr<Analyzer::InValues>(arg, value_exprs);
710 }
711 
712 } // namespace
713 
714 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
715 // regular Executor::codegen() mechanism. The creation of the expression out of
716 // subquery's result set is parallelized whenever possible. In addition, take advantage
717 // of additional information that elements in the right hand side are constants; see
718 // getInIntegerSetExpr().
719 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
720  const RexOperator* rex_operator) const {
721  if (just_explain_) {
722  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
723  }
724  CHECK(rex_operator->size() == 2);
725  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
726  const auto rhs = rex_operator->getOperand(1);
727  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
728  CHECK(rex_subquery);
729  auto ti = lhs->get_type_info();
730  auto result = rex_subquery->getExecutionResult();
731  CHECK(result);
732  auto& row_set = result->getRows();
733  CHECK_EQ(size_t(1), row_set->colCount());
734  const auto& rhs_ti = row_set->getColType(0);
735  if (rhs_ti.get_type() != ti.get_type()) {
736  throw std::runtime_error(
737  "The two sides of the IN operator must have the same type; found " +
738  ti.get_type_name() + " and " + rhs_ti.get_type_name());
739  }
740  ScopeGuard elapsed_time_log = [clock_begin = timer_start()] {
741  VLOG(1) << "RelAlgTranslator::translateInOper: took " << timer_stop(clock_begin)
742  << " ms";
743  };
744  row_set->moveToBegin();
745  std::shared_ptr<Analyzer::Expr> expr;
746  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
747  !row_set->didOutputColumnar()) {
748  expr = getInIntegerSetExpr(lhs, *row_set);
749  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
750  // Just let it fall through the usual InValues path at the end of this method,
751  // its codegen knows to use inline comparisons for few values.
752  if (expr) {
753  auto const num_values =
754  std::static_pointer_cast<Analyzer::InIntegerSet>(expr)->get_value_list().size();
755  if (num_values <= g_in_clause_num_elem_skip_bitmap) {
756  VLOG(1) << "Skip to build a bitmap for tiny integer-set case: # values ("
757  << ::toString(num_values) << ") <= threshold ("
759  expr = nullptr;
760  }
761  } else {
762  expr = get_in_values_expr(lhs, *row_set);
763  }
764  if (expr) {
765  return expr;
766  }
767  }
768  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
769  while (true) {
770  auto row = row_set->getNextRow(true, false);
771  if (row.empty()) {
772  break;
773  }
774  if (g_enable_watchdog &&
775  value_exprs.size() >= g_watchdog_in_clause_max_num_elem_non_bitmap) {
776  std::ostringstream oss;
777  oss << "Unable to handle 'expr IN (subquery)' via non-bitmap, # unique values ("
778  << value_exprs.size()
779  << ") is larger than the threshold "
780  "'g_watchdog_in_clause_max_num_elem_non_bitmap': "
782  throw std::runtime_error(oss.str());
783  }
784  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
785  Datum d{0};
786  bool is_null_const{false};
787  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
788  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
789  auto ti_none_encoded = ti;
790  ti_none_encoded.set_compression(kENCODING_NONE);
791  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
792  auto dict_encoded_string =
793  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
794  value_exprs.push_back(dict_encoded_string);
795  } else {
796  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
797  }
798  }
799  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
800 }
801 
802 namespace {
803 
805  std::vector<int64_t>& in_vals,
806  std::atomic<size_t>& total_in_vals_count,
807  const ResultSet* values_rowset,
808  const std::pair<int64_t, int64_t> values_rowset_slice,
809  const StringDictionaryProxy* source_dict,
810  const StringDictionaryProxy* dest_dict,
811  const int64_t needle_null_val) {
812  CHECK(in_vals.empty());
813  bool dicts_are_equal = source_dict == dest_dict;
814  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
815  ++index) {
816  const auto row = values_rowset->getOneColRow(index);
817  if (UNLIKELY(!row.valid)) {
818  continue;
819  }
820  if (dicts_are_equal) {
821  in_vals.push_back(row.value);
822  } else {
823  const int string_id =
824  row.value == needle_null_val
825  ? needle_null_val
826  : dest_dict->getIdOfString(source_dict->getString(row.value));
827  if (string_id != StringDictionary::INVALID_STR_ID) {
828  in_vals.push_back(string_id);
829  }
830  }
831  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
832  total_in_vals_count.fetch_add(1024) >=
834  std::ostringstream oss;
835  oss << "Unable to handle 'expr IN (subquery)' via bitmap, # unique encoded-string ("
836  << total_in_vals_count.load()
837  << ") is larger than the threshold 'g_watchdog_in_clause_max_num_elem_bitmap': "
839  throw std::runtime_error(oss.str());
840  }
841  }
842 }
843 
844 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
845  std::atomic<size_t>& total_in_vals_count,
846  const ResultSet* values_rowset,
847  const std::pair<int64_t, int64_t> values_rowset_slice) {
848  CHECK(in_vals.empty());
849  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
850  ++index) {
851  const auto row = values_rowset->getOneColRow(index);
852  if (row.valid) {
853  in_vals.push_back(row.value);
854  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
855  total_in_vals_count.fetch_add(1024) >=
857  std::ostringstream oss;
858  oss << "Unable to handle 'expr IN (subquery)' via bitmap, # unique integer "
859  "values ("
860  << total_in_vals_count.load()
861  << ") is larger than the threshold "
862  "'g_watchdog_in_clause_max_num_elem_bitmap': "
864  throw std::runtime_error(oss.str());
865  }
866  }
867  }
868 }
869 
870 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
871 // for a big right-hand side result. It only handles physical string dictionary ids,
872 // therefore it won't be able to handle a right-hand side sub-query with a CASE
873 // returning literals on some branches. That case isn't hard too handle either, but
874 // it's not clear it's actually important in practice.
875 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that
876 // this function isn't called in such cases.
878  std::vector<int64_t>& in_vals,
879  std::atomic<size_t>& total_in_vals_count,
880  const ResultSet* values_rowset,
881  const std::pair<int64_t, int64_t> values_rowset_slice,
882  const std::vector<LeafHostInfo>& leaf_hosts,
883  const DictRef source_dict_ref,
884  const DictRef dest_dict_ref,
885  const int32_t dest_generation,
886  const int64_t needle_null_val) {
887  CHECK(in_vals.empty());
888  std::vector<int32_t> source_ids;
889  source_ids.reserve(values_rowset->entryCount());
890  bool has_nulls = false;
891  if (source_dict_ref == dest_dict_ref) {
892  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
893  1); // Add 1 to cover interval
894  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
895  ++index) {
896  const auto row = values_rowset->getOneColRow(index);
897  if (!row.valid) {
898  continue;
899  }
900  if (row.value != needle_null_val) {
901  in_vals.push_back(row.value);
902  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
903  total_in_vals_count.fetch_add(1024) >=
905  std::ostringstream oss;
906  oss << "Unable to handle 'expr IN (subquery)' via bitmap, # unique "
907  "encoded-string values ("
908  << total_in_vals_count.load()
909  << ") is larger than the threshold "
910  "'g_watchdog_in_clause_max_num_elem_bitmap': "
912  throw std::runtime_error(oss.str());
913  }
914  } else {
915  has_nulls = true;
916  }
917  }
918  if (has_nulls) {
919  in_vals.push_back(
920  needle_null_val); // we've deduped null values as an optimization, although
921  // this is not required by consumer
922  }
923  return;
924  }
925  // Code path below is for when dictionaries are not shared
926  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
927  ++index) {
928  const auto row = values_rowset->getOneColRow(index);
929  if (row.valid) {
930  if (row.value != needle_null_val) {
931  source_ids.push_back(row.value);
932  } else {
933  has_nulls = true;
934  }
935  }
936  }
937  std::vector<int32_t> dest_ids;
938  translate_string_ids(dest_ids,
939  leaf_hosts.front(),
940  dest_dict_ref,
941  source_ids,
942  source_dict_ref,
943  dest_generation);
944  CHECK_EQ(dest_ids.size(), source_ids.size());
945  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
946  if (has_nulls) {
947  in_vals.push_back(needle_null_val);
948  }
949  for (const int32_t dest_id : dest_ids) {
950  if (dest_id != StringDictionary::INVALID_STR_ID) {
951  in_vals.push_back(dest_id);
952  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
953  total_in_vals_count.fetch_add(1024) >=
955  std::ostringstream oss;
956  oss << "Unable to handle 'expr IN (subquery)' via bitmap, # unique "
957  "encoded-string values ("
958  << total_in_vals_count.load()
959  << ") is larger than the threshold "
960  "'g_watchdog_in_clause_max_num_elem_bitmap': "
962  throw std::runtime_error(oss.str());
963  }
964  }
965  }
966 }
967 
968 } // namespace
969 
970 // The typical IN subquery involves either dictionary-encoded strings or integers.
971 // Analyzer::InValues is a very heavy representation of the right hand side of such
972 // a query since we already know the right hand would be a list of Analyzer::Constant
973 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
974 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
975 // representation of the IN expression which takes advantage of the this information.
976 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
977  std::shared_ptr<Analyzer::Expr> arg,
978  const ResultSet& val_set) const {
980  return nullptr;
981  }
982  std::vector<int64_t> value_exprs;
983  const size_t fetcher_count = cpu_threads();
984  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
985  std::vector<std::future<void>> fetcher_threads;
986  const auto& arg_type = arg->get_type_info();
987  const auto entry_count = val_set.entryCount();
988  CHECK_EQ(size_t(1), val_set.colCount());
989  const auto& col_type = val_set.getColType(0);
990  if (g_cluster && arg_type.is_string() &&
991  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
992  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
993  return nullptr;
994  }
995  std::atomic<size_t> total_in_vals_count{0};
996  for (size_t i = 0,
997  start_entry = 0,
998  stride = (entry_count + fetcher_count - 1) / fetcher_count;
999  i < fetcher_count && start_entry < entry_count;
1000  ++i, start_entry += stride) {
1001  expr_set[i].reserve(entry_count / fetcher_count);
1002  const auto end_entry = std::min(start_entry + stride, entry_count);
1003  if (arg_type.is_string()) {
1004  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
1005  auto col_expr = dynamic_cast<const Analyzer::ColumnVar*>(arg.get());
1006  CHECK(col_expr);
1007  const auto& dest_dict_key = arg_type.getStringDictKey();
1008  const auto& source_dict_key = col_type.getStringDictKey();
1009  const auto dd = executor_->getStringDictionaryProxy(
1010  arg_type.getStringDictKey(), val_set.getRowSetMemOwner(), true);
1011  const auto sd = executor_->getStringDictionaryProxy(
1012  col_type.getStringDictKey(), val_set.getRowSetMemOwner(), true);
1013  CHECK(sd);
1014  const auto needle_null_val = inline_int_null_val(arg_type);
1015  const auto catalog =
1016  Catalog_Namespace::SysCatalog::instance().getCatalog(source_dict_key.db_id);
1017  CHECK(catalog);
1018  fetcher_threads.push_back(std::async(
1020  [&val_set,
1021  &total_in_vals_count,
1022  sd,
1023  dd,
1024  &source_dict_key,
1025  &dest_dict_key,
1026  needle_null_val,
1027  catalog](std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
1028  if (g_cluster) {
1029  CHECK_GE(dd->getGeneration(), 0);
1031  in_vals,
1032  total_in_vals_count,
1033  &val_set,
1034  {start, end},
1035  catalog->getStringDictionaryHosts(),
1036  {source_dict_key.db_id, source_dict_key.dict_id},
1037  {dest_dict_key.db_id, dest_dict_key.dict_id},
1038  dd->getGeneration(),
1039  needle_null_val);
1040  } else {
1042  total_in_vals_count,
1043  &val_set,
1044  {start, end},
1045  sd,
1046  dd,
1047  needle_null_val);
1048  }
1049  },
1050  std::ref(expr_set[i]),
1051  start_entry,
1052  end_entry));
1053  } else {
1054  CHECK(arg_type.is_integer());
1055  fetcher_threads.push_back(std::async(
1057  [&val_set, &total_in_vals_count](
1058  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
1059  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
1060  },
1061  std::ref(expr_set[i]),
1062  start_entry,
1063  end_entry));
1064  }
1065  }
1066  for (auto& child : fetcher_threads) {
1067  child.get();
1068  }
1069 
1070  val_set.moveToBegin();
1071  value_exprs.reserve(entry_count);
1072  for (auto& exprs : expr_set) {
1073  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
1074  }
1075  return makeExpr<Analyzer::InIntegerSet>(
1076  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
1077 }
1078 
1079 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
1080  const RexOperator* rex_operator) const {
1081  CHECK_GT(rex_operator->size(), size_t(0));
1082  if (rex_operator->size() == 1) {
1083  return translateUoper(rex_operator);
1084  }
1085  const auto sql_op = rex_operator->getOperator();
1086  if (sql_op == kIN) {
1087  return translateInOper(rex_operator);
1088  }
1089  if (sql_op == kMINUS || sql_op == kPLUS) {
1090  auto date_plus_minus = translateDatePlusMinus(rex_operator);
1091  if (date_plus_minus) {
1092  return date_plus_minus;
1093  }
1094  }
1095  if (sql_op == kBBOX_INTERSECT) {
1096  return translateBoundingBoxIntersectOper(rex_operator);
1097  } else if (IS_COMPARISON(sql_op)) {
1098  auto geo_comp = translateGeoComparison(rex_operator);
1099  if (geo_comp) {
1100  return geo_comp;
1101  }
1102  }
1103  auto lhs = translateScalarRex(rex_operator->getOperand(0));
1104  for (size_t i = 1; i < rex_operator->size(); ++i) {
1105  std::shared_ptr<Analyzer::Expr> rhs;
1106  SQLQualifier sql_qual{kONE};
1107  const auto rhs_op = rex_operator->getOperand(i);
1108  std::tie(rhs, sql_qual) = getQuantifiedRhs(rhs_op);
1109  if (!rhs) {
1110  rhs = translateScalarRex(rhs_op);
1111  }
1112  CHECK(rhs);
1113 
1114  // Pass in executor to get string proxy info if cast needed between
1115  // string columns
1116  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs, executor_);
1117  }
1118  return lhs;
1119 }
1120 
1122  const RexOperator* rex_operator) const {
1123  const auto sql_op = rex_operator->getOperator();
1124  CHECK(sql_op == kBBOX_INTERSECT);
1125 
1126  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
1127  const auto lhs_ti = lhs->get_type_info();
1128  if (lhs_ti.is_geometry()) {
1129  return translateGeoBoundingBoxIntersectOper(rex_operator);
1130  } else {
1131  throw std::runtime_error(
1132  "Bounding Box Intersection equivalence is currently only supported for "
1133  "geospatial types");
1134  }
1135 }
1136 
1137 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
1138  const RexCase* rex_case) const {
1139  std::shared_ptr<Analyzer::Expr> else_expr;
1140  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1141  expr_list;
1142  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1143  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
1144  const auto then_expr = translateScalarRex(rex_case->getThen(i));
1145  expr_list.emplace_back(when_expr, then_expr);
1146  }
1147  if (rex_case->getElse()) {
1148  else_expr = translateScalarRex(rex_case->getElse());
1149  }
1150  return Parser::CaseExpr::normalize(expr_list, else_expr, executor_);
1151 }
1152 
1153 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateMLPredict(
1154  const RexFunctionOperator* rex_function) const {
1155  const auto num_operands = rex_function->size();
1156  CHECK_GE(num_operands, 2UL);
1157  auto model_value = translateScalarRex(rex_function->getOperand(0));
1158  std::vector<std::shared_ptr<Analyzer::Expr>> regressor_values;
1159  for (size_t regressor_idx = 1; regressor_idx < num_operands; ++regressor_idx) {
1160  regressor_values.emplace_back(
1161  translateScalarRex(rex_function->getOperand(regressor_idx)));
1162  }
1163  return makeExpr<Analyzer::MLPredictExpr>(model_value, regressor_values);
1164 }
1165 
1166 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translatePCAProject(
1167  const RexFunctionOperator* rex_function) const {
1168  const auto num_operands = rex_function->size();
1169  CHECK_GE(num_operands, 3UL);
1170  auto model_value = translateScalarRex(rex_function->getOperand(0));
1171  std::vector<std::shared_ptr<Analyzer::Expr>> feature_values;
1172  for (size_t feature_idx = 1; feature_idx < num_operands - 1; ++feature_idx) {
1173  feature_values.emplace_back(
1174  translateScalarRex(rex_function->getOperand(feature_idx)));
1175  }
1176  auto pc_dimension_value =
1177  translateScalarRex(rex_function->getOperand(num_operands - 1));
1178  return makeExpr<Analyzer::PCAProjectExpr>(
1179  model_value, feature_values, pc_dimension_value);
1180 }
1181 
1182 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWidthBucket(
1183  const RexFunctionOperator* rex_function) const {
1184  CHECK(rex_function->size() == 4);
1185  auto target_value = translateScalarRex(rex_function->getOperand(0));
1186  auto lower_bound = translateScalarRex(rex_function->getOperand(1));
1187  auto upper_bound = translateScalarRex(rex_function->getOperand(2));
1188  auto partition_count = translateScalarRex(rex_function->getOperand(3));
1189  if (!partition_count->get_type_info().is_integer()) {
1190  throw std::runtime_error(
1191  "PARTITION_COUNT expression of width_bucket function expects an integer type.");
1192  }
1193  auto check_numeric_type =
1194  [](const std::string& col_name, const Analyzer::Expr* expr, bool allow_null_type) {
1195  if (expr->get_type_info().get_type() == kNULLT) {
1196  if (!allow_null_type) {
1197  throw std::runtime_error(
1198  col_name + " expression of width_bucket function expects non-null type.");
1199  }
1200  return;
1201  }
1202  if (!expr->get_type_info().is_number()) {
1203  throw std::runtime_error(
1204  col_name + " expression of width_bucket function expects a numeric type.");
1205  }
1206  };
1207  // target value may have null value
1208  check_numeric_type("TARGET_VALUE", target_value.get(), true);
1209  check_numeric_type("LOWER_BOUND", lower_bound.get(), false);
1210  check_numeric_type("UPPER_BOUND", upper_bound.get(), false);
1211 
1212  auto cast_to_double_if_necessary = [](std::shared_ptr<Analyzer::Expr> arg) {
1213  const auto& arg_ti = arg->get_type_info();
1214  if (arg_ti.get_type() != kDOUBLE) {
1215  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1216  return arg->add_cast(double_ti);
1217  }
1218  return arg;
1219  };
1220  target_value = cast_to_double_if_necessary(target_value);
1221  lower_bound = cast_to_double_if_necessary(lower_bound);
1222  upper_bound = cast_to_double_if_necessary(upper_bound);
1223  return makeExpr<Analyzer::WidthBucketExpr>(
1224  target_value, lower_bound, upper_bound, partition_count);
1225 }
1226 
1227 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
1228  const RexFunctionOperator* rex_function) const {
1229  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1230  const auto arg = translateScalarRex(rex_function->getOperand(0));
1231  const auto like = translateScalarRex(rex_function->getOperand(1));
1232  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
1233  throw std::runtime_error("The matching pattern must be a literal.");
1234  }
1235  const auto escape = (rex_function->size() == 3)
1236  ? translateScalarRex(rex_function->getOperand(2))
1237  : nullptr;
1238  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
1239  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
1240 }
1241 
1242 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
1243  const RexFunctionOperator* rex_function) const {
1244  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1245  const auto arg = translateScalarRex(rex_function->getOperand(0));
1246  const auto pattern = translateScalarRex(rex_function->getOperand(1));
1247  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
1248  throw std::runtime_error("The matching pattern must be a literal.");
1249  }
1250  const auto escape = (rex_function->size() == 3)
1251  ? translateScalarRex(rex_function->getOperand(2))
1252  : nullptr;
1253  return Parser::RegexpExpr::get(arg, pattern, escape, false);
1254 }
1255 
1256 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
1257  const RexFunctionOperator* rex_function) const {
1258  CHECK(rex_function->size() == 1);
1259  const auto arg = translateScalarRex(rex_function->getOperand(0));
1260  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
1261 }
1262 
1263 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
1264  const RexFunctionOperator* rex_function) const {
1265  CHECK(rex_function->size() == 1);
1266  const auto arg = translateScalarRex(rex_function->getOperand(0));
1267  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
1268 }
1269 
1270 namespace {
1271 
1273  const std::shared_ptr<Analyzer::Constant> literal_expr) {
1274  if (!literal_expr || literal_expr->get_is_null()) {
1275  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
1276  }
1277 }
1278 
1279 } // namespace
1280 
1281 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
1282  const RexFunctionOperator* rex_function) const {
1283  CHECK_EQ(size_t(2), rex_function->size());
1284  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1285  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1287  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1288  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
1289  if (is_date_trunc) {
1290  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1291  } else {
1292  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1293  }
1294 }
1295 
1296 namespace {
1297 
1298 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
1299  const long val) {
1300  CHECK(ti.is_number());
1301  Datum datum{0};
1302  switch (ti.get_type()) {
1303  case kTINYINT: {
1304  datum.tinyintval = val;
1305  break;
1306  }
1307  case kSMALLINT: {
1308  datum.smallintval = val;
1309  break;
1310  }
1311  case kINT: {
1312  datum.intval = val;
1313  break;
1314  }
1315  case kBIGINT: {
1316  datum.bigintval = val;
1317  break;
1318  }
1319  case kDECIMAL:
1320  case kNUMERIC: {
1321  datum.bigintval = val * exp_to_scale(ti.get_scale());
1322  break;
1323  }
1324  case kFLOAT: {
1325  datum.floatval = val;
1326  break;
1327  }
1328  case kDOUBLE: {
1329  datum.doubleval = val;
1330  break;
1331  }
1332  default:
1333  CHECK(false);
1334  }
1335  return makeExpr<Analyzer::Constant>(ti, false, datum);
1336 }
1337 
1338 } // namespace
1339 
1340 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1341  const RexFunctionOperator* rex_function) const {
1342  CHECK_EQ(size_t(3), rex_function->size());
1343  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1344  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1346  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1347  const auto number_units_const =
1348  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1349  if (number_units_const && number_units_const->get_is_null()) {
1350  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1351  }
1352  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1353  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1354  const auto& datetime_ti = datetime->get_type_info();
1355  if (datetime_ti.get_type() == kTIME) {
1356  throw std::runtime_error("DateAdd operation not supported for TIME.");
1357  }
1358  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1359  const int dim = datetime_ti.get_dimension();
1360  return makeExpr<Analyzer::DateaddExpr>(
1361  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1362 }
1363 
1364 namespace {
1365 
1367  CHECK(op == kPLUS);
1368  return "DATETIME_PLUS"s;
1369 }
1370 
1371 } // namespace
1372 
1373 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1374  const RexOperator* rex_operator) const {
1375  if (rex_operator->size() != 2) {
1376  return nullptr;
1377  }
1378  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1379  const auto datetime_ti = datetime->get_type_info();
1380  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1381  if (datetime_ti.get_type() == kTIME) {
1382  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1383  }
1384  return nullptr;
1385  }
1386  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1387  const auto rhs_ti = rhs->get_type_info();
1388  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1389  if (datetime_ti.is_high_precision_timestamp() ||
1390  rhs_ti.is_high_precision_timestamp()) {
1391  throw std::runtime_error(
1392  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1393  "Use "
1394  "DATEDIFF.");
1395  }
1396  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1397  const auto& rex_operator_ti = rex_operator->getType();
1398  const auto datediff_field =
1399  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1400  auto result =
1401  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1402  // multiply 1000 to result since expected result should be in millisecond precision.
1403  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1404  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1405  kMULTIPLY,
1406  kONE,
1407  result,
1408  makeNumericConstant(bigint_ti, 1000));
1409  } else {
1410  return result;
1411  }
1412  }
1413  const auto op = rex_operator->getOperator();
1414  if (op == kPLUS) {
1415  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1416  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1417  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1418  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1419  if (date_trunc) {
1420  return date_trunc;
1421  }
1422  }
1423  const auto interval = fold_expr(rhs.get());
1424  auto interval_ti = interval->get_type_info();
1425  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1426  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1427  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1428  std::shared_ptr<Analyzer::Expr> interval_sec;
1429  if (interval_lit) {
1430  interval_sec =
1431  makeNumericConstant(bigint_ti,
1432  (op == kMINUS ? -interval_lit->get_constval().bigintval
1433  : interval_lit->get_constval().bigintval) /
1434  1000);
1435  } else {
1436  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1437  kDIVIDE,
1438  kONE,
1439  interval,
1440  makeNumericConstant(bigint_ti, 1000));
1441  if (op == kMINUS) {
1442  interval_sec =
1443  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1444  }
1445  }
1446  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1447  }
1448  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1449  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1450  bigint_ti, false, kUMINUS, interval)
1451  : interval;
1452  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1453 }
1454 
1455 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1456  const RexFunctionOperator* rex_function) const {
1457  CHECK_EQ(size_t(3), rex_function->size());
1458  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1459  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1461  const auto start = translateScalarRex(rex_function->getOperand(1));
1462  const auto end = translateScalarRex(rex_function->getOperand(2));
1463  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1464  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1465 }
1466 
1467 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1468  const RexFunctionOperator* rex_function) const {
1469  CHECK_EQ(size_t(2), rex_function->size());
1470  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1471  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1473  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1474  return ExtractExpr::generate(
1475  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1476 }
1477 
1478 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1479  const RexFunctionOperator* rex_function) const {
1480  CHECK_EQ(size_t(1), rex_function->size());
1481  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1482  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1483  rex_function->getName() == "CHAR_LENGTH"sv);
1484 }
1485 
1486 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1487  const RexFunctionOperator* rex_function) const {
1488  const auto& args = translateFunctionArgs(rex_function);
1489  CHECK_EQ(size_t(1), args.size());
1490  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1491  if (nullptr == expr || !expr->get_type_info().is_string() ||
1492  expr->get_type_info().is_varlen()) {
1493  throw std::runtime_error(rex_function->getName() +
1494  " expects a dictionary encoded text column.");
1495  }
1496  auto unnest_arg = dynamic_cast<Analyzer::UOper*>(expr);
1497  if (unnest_arg && unnest_arg->get_optype() == SQLOps::kUNNEST) {
1498  throw std::runtime_error(
1499  rex_function->getName() +
1500  " does not support unnest operator as its input expression.");
1501  }
1502  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1503 }
1504 
1505 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1506  const RexFunctionOperator* rex_function) const {
1507  CHECK_EQ(size_t(1), rex_function->size());
1508  auto arg = translateScalarRex(rex_function->getOperand(0));
1509  const auto& arg_ti = arg->get_type_info();
1510  if (arg_ti.get_type() != kDOUBLE) {
1511  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1512  arg = arg->add_cast(double_ti);
1513  }
1514  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1515 }
1516 
1517 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1518  const RexFunctionOperator* rex_function) const {
1519  std::string user{"SESSIONLESS_USER"};
1520  if (query_state_) {
1521  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1522  }
1523  return Parser::UserLiteral::get(user);
1524 }
1525 
1526 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateStringOper(
1527  const RexFunctionOperator* rex_function) const {
1528  const auto func_name = rex_function->getName();
1530  std::ostringstream oss;
1531  oss << "Function " << func_name << " not supported.";
1532  throw std::runtime_error(oss.str());
1533  }
1534  const auto string_op_kind = ::name_to_string_op_kind(func_name);
1535  auto args = translateFunctionArgs(rex_function);
1536 
1537  switch (string_op_kind) {
1539  return makeExpr<Analyzer::LowerStringOper>(args);
1541  return makeExpr<Analyzer::UpperStringOper>(args);
1543  return makeExpr<Analyzer::InitCapStringOper>(args);
1545  return makeExpr<Analyzer::ReverseStringOper>(args);
1547  return makeExpr<Analyzer::RepeatStringOper>(args);
1549  return makeExpr<Analyzer::ConcatStringOper>(args);
1550  case SqlStringOpKind::LPAD:
1551  case SqlStringOpKind::RPAD: {
1552  return makeExpr<Analyzer::PadStringOper>(string_op_kind, args);
1553  }
1554  case SqlStringOpKind::TRIM:
1556  case SqlStringOpKind::RTRIM: {
1557  return makeExpr<Analyzer::TrimStringOper>(string_op_kind, args);
1558  }
1560  return makeExpr<Analyzer::SubstringStringOper>(args);
1562  return makeExpr<Analyzer::OverlayStringOper>(args);
1564  return makeExpr<Analyzer::ReplaceStringOper>(args);
1566  return makeExpr<Analyzer::SplitPartStringOper>(args);
1568  return makeExpr<Analyzer::RegexpReplaceStringOper>(args);
1570  return makeExpr<Analyzer::RegexpSubstrStringOper>(args);
1572  return makeExpr<Analyzer::RegexpCountStringOper>(args);
1574  return makeExpr<Analyzer::JsonValueStringOper>(args);
1576  return makeExpr<Analyzer::Base64EncodeStringOper>(args);
1578  return makeExpr<Analyzer::Base64DecodeStringOper>(args);
1580  if (rex_function->getType().is_string() &&
1581  args.front()->get_type_info().is_string()) {
1582  // ignore try_cast and return string as is
1583  return args.front();
1584  }
1585  return makeExpr<Analyzer::TryStringCastOper>(rex_function->getType(), args);
1587  return makeExpr<Analyzer::PositionStringOper>(args);
1589  return makeExpr<Analyzer::JarowinklerSimilarityStringOper>(args);
1591  return makeExpr<Analyzer::LevenshteinDistanceStringOper>(args);
1592  case SqlStringOpKind::HASH:
1593  return makeExpr<Analyzer::HashStringOper>(args);
1595  return makeExpr<Analyzer::UrlEncodeStringOper>(args);
1597  return makeExpr<Analyzer::UrlDecodeStringOper>(args);
1598  default: {
1599  throw std::runtime_error("Unsupported string function.");
1600  }
1601  }
1602 }
1603 
1605  const RexFunctionOperator* rex_function) const {
1606  const auto ret_ti = rex_function->getType();
1607  const auto arg = translateScalarRex(rex_function->getOperand(0));
1608  const auto arg_ti = arg->get_type_info();
1609  if (!arg_ti.is_array()) {
1610  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1611  }
1612  if (arg_ti.get_subtype() == kARRAY) {
1613  throw std::runtime_error(rex_function->getName() +
1614  " expects one-dimension array expression.");
1615  }
1616  const auto array_size = arg_ti.get_size();
1617  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1618 
1619  if (array_size > 0) {
1620  if (array_elem_size <= 0) {
1621  throw std::runtime_error(rex_function->getName() +
1622  ": unexpected array element type.");
1623  }
1624  // Return cardinality of a fixed length array
1625  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1626  }
1627  // Variable length array cardinality will be calculated at runtime
1628  return makeExpr<Analyzer::CardinalityExpr>(arg);
1629 }
1630 
1631 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1632  const RexFunctionOperator* rex_function) const {
1633  CHECK_EQ(size_t(2), rex_function->size());
1634  const auto base = translateScalarRex(rex_function->getOperand(0));
1635  const auto index = translateScalarRex(rex_function->getOperand(1));
1636  return makeExpr<Analyzer::BinOper>(
1637  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1638 }
1639 
1640 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentDate() const {
1641  constexpr bool is_null = false;
1642  Datum datum;
1643  datum.bigintval = now_ - now_ % (24 * 60 * 60); // Assumes 0 < now_.
1644  return makeExpr<Analyzer::Constant>(kDATE, is_null, datum);
1645 }
1646 
1647 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTime() const {
1648  constexpr bool is_null = false;
1649  Datum datum;
1650  datum.bigintval = now_ % (24 * 60 * 60); // Assumes 0 < now_.
1651  return makeExpr<Analyzer::Constant>(kTIME, is_null, datum);
1652 }
1653 
1654 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTimestamp() const {
1656 }
1657 
1658 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1659  const RexFunctionOperator* rex_function) const {
1660  CHECK_EQ(size_t(1), rex_function->size());
1661  const auto arg = translateScalarRex(rex_function->getOperand(0));
1662  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1663  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1664  if (!arg_lit || arg_lit->get_is_null()) {
1665  throw std::runtime_error(datetime_err);
1666  }
1667  CHECK(arg_lit->get_type_info().is_string());
1668  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1669  throw std::runtime_error(datetime_err);
1670  }
1671  return translateCurrentTimestamp();
1672 }
1673 
1674 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1675  const RexFunctionOperator* rex_function) const {
1676  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1677  expr_list;
1678  CHECK_EQ(size_t(1), rex_function->size());
1679  const auto operand = translateScalarRex(rex_function->getOperand(0));
1680  const auto& operand_ti = operand->get_type_info();
1681  CHECK(operand_ti.is_number());
1682  const auto zero = makeNumericConstant(operand_ti, 0);
1683  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1684  const auto uminus_operand =
1685  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1686  expr_list.emplace_back(lt_zero, uminus_operand);
1687  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1688 }
1689 
1690 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1691  const RexFunctionOperator* rex_function) const {
1692  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1693  expr_list;
1694  CHECK_EQ(size_t(1), rex_function->size());
1695  const auto operand = translateScalarRex(rex_function->getOperand(0));
1696  const auto& operand_ti = operand->get_type_info();
1697  CHECK(operand_ti.is_number());
1698  const auto zero = makeNumericConstant(operand_ti, 0);
1699  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1700  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1701  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1702  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1703  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1704  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1705  return makeExpr<Analyzer::CaseExpr>(
1706  operand_ti,
1707  false,
1708  expr_list,
1709  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1710 }
1711 
1712 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1713  return makeExpr<Analyzer::OffsetInFragment>();
1714 }
1715 
1717  const RexFunctionOperator* rex_function) const {
1718  if (rex_function->getType().get_subtype() == kNULLT) {
1719  auto sql_type = rex_function->getType();
1720  CHECK(sql_type.get_type() == kARRAY);
1721 
1722  // FIX-ME: Deal with NULL arrays
1723  auto translated_function_args(translateFunctionArgs(rex_function));
1724  if (translated_function_args.size() > 0) {
1725  const auto first_element_logical_type =
1726  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1727 
1728  auto diff_elem_itr =
1729  std::find_if(translated_function_args.begin(),
1730  translated_function_args.end(),
1731  [first_element_logical_type](const auto expr) {
1732  const auto element_logical_type =
1733  get_nullable_logical_type_info(expr->get_type_info());
1734  if (first_element_logical_type != element_logical_type) {
1735  if (first_element_logical_type.is_none_encoded_string() &&
1736  element_logical_type.is_none_encoded_string()) {
1737  return false;
1738  }
1739  return true;
1740  }
1741  return false;
1742  });
1743  if (diff_elem_itr != translated_function_args.end()) {
1744  throw std::runtime_error(
1745  "Element " +
1746  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1747  " is not of the same type as other elements of the array. Consider casting "
1748  "to force this condition.\nElement Type: " +
1749  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1750  .to_string() +
1751  "\nArray type: " + first_element_logical_type.to_string());
1752  }
1753 
1754  if (first_element_logical_type.is_string()) {
1755  sql_type.set_subtype(kTEXT);
1756  sql_type.set_compression(kENCODING_DICT);
1757  if (first_element_logical_type.is_none_encoded_string()) {
1758  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1759  sql_type.setStringDictKey(shared::StringDictKey::kTransientDictKey);
1760  } else {
1761  CHECK(first_element_logical_type.is_dict_encoded_string());
1762  sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1763  sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1764  }
1765  } else if (first_element_logical_type.is_dict_encoded_string()) {
1766  sql_type.set_subtype(kTEXT);
1767  sql_type.set_compression(kENCODING_DICT);
1768  sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1769  sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1770  } else {
1771  sql_type.set_subtype(first_element_logical_type.get_type());
1772  sql_type.set_scale(first_element_logical_type.get_scale());
1773  sql_type.set_precision(first_element_logical_type.get_precision());
1774  }
1775 
1776  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1777  } else {
1778  // defaulting to valid sub-type for convenience
1779  sql_type.set_subtype(kBOOLEAN);
1780  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1781  }
1782  } else {
1783  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1784  translateFunctionArgs(rex_function));
1785  }
1786 }
1787 
1788 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1789  const RexFunctionOperator* rex_function) const {
1790  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1791  return translateLike(rex_function);
1792  }
1793  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1794  return translateRegexp(rex_function);
1795  }
1796  if (rex_function->getName() == "LIKELY"sv) {
1797  return translateLikely(rex_function);
1798  }
1799  if (rex_function->getName() == "UNLIKELY"sv) {
1800  return translateUnlikely(rex_function);
1801  }
1802  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1803  return translateExtract(rex_function);
1804  }
1805  if (rex_function->getName() == "DATEADD"sv) {
1806  return translateDateadd(rex_function);
1807  }
1808  if (rex_function->getName() == "DATEDIFF"sv) {
1809  return translateDatediff(rex_function);
1810  }
1811  if (rex_function->getName() == "DATEPART"sv) {
1812  return translateDatepart(rex_function);
1813  }
1814  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1815  return translateLength(rex_function);
1816  }
1817  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1818  return translateKeyForString(rex_function);
1819  }
1820  if (rex_function->getName() == "WIDTH_BUCKET"sv) {
1821  return translateWidthBucket(rex_function);
1822  }
1823  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1824  return translateSampleRatio(rex_function);
1825  }
1826  if (rex_function->getName() == "CURRENT_USER"sv) {
1827  return translateCurrentUser(rex_function);
1828  }
1829  if (rex_function->getName() == "ML_PREDICT"sv) {
1830  return translateMLPredict(rex_function);
1831  }
1832  if (rex_function->getName() == "PCA_PROJECT"sv) {
1833  return translatePCAProject(rex_function);
1834  }
1835  if (func_resolve(rex_function->getName(),
1836  "LOWER"sv,
1837  "UPPER"sv,
1838  "INITCAP"sv,
1839  "REVERSE"sv,
1840  "REPEAT"sv,
1841  "||"sv,
1842  "LPAD"sv,
1843  "RPAD"sv,
1844  "TRIM"sv,
1845  "LTRIM"sv,
1846  "RTRIM"sv,
1847  "SUBSTRING"sv,
1848  "OVERLAY"sv,
1849  "REPLACE"sv,
1850  "SPLIT_PART"sv,
1851  "REGEXP_REPLACE"sv,
1852  "REGEXP_SUBSTR"sv,
1853  "REGEXP_MATCH"sv,
1854  "REGEXP_COUNT"sv,
1855  "JSON_VALUE"sv,
1856  "BASE64_ENCODE"sv,
1857  "BASE64_DECODE"sv,
1858  "URL_ENCODE"sv,
1859  "URL_DECODE"sv,
1860  "TRY_CAST"sv,
1861  "POSITION"sv,
1862  "JAROWINKLER_SIMILARITY"sv,
1863  "LEVENSHTEIN_DISTANCE"sv,
1864  "HASH"sv)) {
1865  return translateStringOper(rex_function);
1866  }
1867  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1868  return translateCardinality(rex_function);
1869  }
1870  if (rex_function->getName() == "ITEM"sv) {
1871  return translateItem(rex_function);
1872  }
1873  if (rex_function->getName() == "CURRENT_DATE"sv) {
1874  return translateCurrentDate();
1875  }
1876  if (rex_function->getName() == "CURRENT_TIME"sv) {
1877  return translateCurrentTime();
1878  }
1879  if (rex_function->getName() == "CURRENT_TIMESTAMP"sv) {
1880  return translateCurrentTimestamp();
1881  }
1882  if (rex_function->getName() == "NOW"sv) {
1883  return translateCurrentTimestamp();
1884  }
1885  if (rex_function->getName() == "DATETIME"sv) {
1886  return translateDatetime(rex_function);
1887  }
1888  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1889  return translateHPTLiteral(rex_function);
1890  }
1891  if (rex_function->getName() == "ABS"sv) {
1892  return translateAbs(rex_function);
1893  }
1894  if (rex_function->getName() == "SIGN"sv) {
1895  return translateSign(rex_function);
1896  }
1897  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1898  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1899  rex_function->getType(),
1900  rex_function->getName(),
1901  translateFunctionArgs(rex_function));
1902  } else if (rex_function->getName() == "ROUND"sv) {
1903  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1904  translateFunctionArgs(rex_function);
1905 
1906  if (rex_function->size() == 1) {
1907  // push a 0 constant if 2nd operand is missing.
1908  // this needs to be done as calcite returns
1909  // only the 1st operand without defaulting the 2nd one
1910  // when the user did not specify the 2nd operand.
1911  SQLTypes t = kSMALLINT;
1912  Datum d;
1913  d.smallintval = 0;
1914  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1915  }
1916 
1917  // make sure we have only 2 operands
1918  CHECK(args.size() == 2);
1919 
1920  if (!args[0]->get_type_info().is_number()) {
1921  throw std::runtime_error("Only numeric 1st operands are supported");
1922  }
1923 
1924  // the 2nd operand does not need to be a constant
1925  // it can happily reference another integer column
1926  if (!args[1]->get_type_info().is_integer()) {
1927  throw std::runtime_error("Only integer 2nd operands are supported");
1928  }
1929 
1930  // Calcite may upcast decimals in a way that is
1931  // incompatible with the extension function input. Play it safe and stick with the
1932  // argument type instead.
1933  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1934  ? args[0]->get_type_info()
1935  : rex_function->getType();
1936 
1937  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1938  ret_ti, rex_function->getName(), args);
1939  }
1940  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1941  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1942  rex_function->getName(),
1943  translateFunctionArgs(rex_function));
1944  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1945  if (date_trunc) {
1946  return date_trunc;
1947  }
1948  return translateDateadd(rex_function);
1949  }
1950  if (rex_function->getName() == "/INT"sv) {
1951  CHECK_EQ(size_t(2), rex_function->size());
1952  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1953  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1954  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1955  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1956  }
1957  if (rex_function->getName() == "Reinterpret"sv) {
1958  CHECK_EQ(size_t(1), rex_function->size());
1959  return translateScalarRex(rex_function->getOperand(0));
1960  }
1961  if (func_resolve(rex_function->getName(),
1962  "ST_X"sv,
1963  "ST_Y"sv,
1964  "ST_XMin"sv,
1965  "ST_YMin"sv,
1966  "ST_XMax"sv,
1967  "ST_YMax"sv,
1968  "ST_NRings"sv,
1969  "ST_NumGeometries"sv,
1970  "ST_NPoints"sv,
1971  "ST_Length"sv,
1972  "ST_Perimeter"sv,
1973  "ST_Area"sv,
1974  "ST_SRID"sv,
1975  "HeavyDB_Geo_PolyBoundsPtr"sv)) {
1976  CHECK_EQ(rex_function->size(), size_t(1));
1977  return translateUnaryGeoFunction(rex_function);
1978  }
1979  if (func_resolve(rex_function->getName(), "ST_ConvexHull"sv)) {
1980  CHECK_EQ(rex_function->size(), size_t(1));
1981  SQLTypeInfo ti;
1982  return translateUnaryGeoConstructor(rex_function, ti, false);
1983  }
1984  if (func_resolve(rex_function->getName(),
1985  "convert_meters_to_pixel_width"sv,
1986  "convert_meters_to_pixel_height"sv,
1987  "is_point_in_view"sv,
1988  "is_point_size_in_view"sv)) {
1989  return translateFunctionWithGeoArg(rex_function);
1990  }
1991  if (func_resolve(rex_function->getName(),
1992  "ST_Distance"sv,
1993  "ST_MaxDistance"sv,
1994  "ST_Intersects"sv,
1995  "ST_Disjoint"sv,
1996  "ST_Contains"sv,
1997  "ST_IntersectsBox"sv,
1998  "ST_Approx_Overlaps"sv,
1999  "ST_Within"sv)) {
2000  CHECK_EQ(rex_function->size(), size_t(2));
2001  return translateBinaryGeoFunction(rex_function);
2002  }
2003  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
2004  CHECK_EQ(rex_function->size(), size_t(3));
2005  return translateTernaryGeoFunction(rex_function);
2006  }
2007  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
2008  CHECK_EQ(size_t(0), rex_function->size());
2009  return translateOffsetInFragment();
2010  }
2011  if (rex_function->getName() == "ARRAY"sv) {
2012  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
2013  return translateArrayFunction(rex_function);
2014  }
2015  if (func_resolve(rex_function->getName(),
2016  "ST_GeomFromText"sv,
2017  "ST_GeogFromText"sv,
2018  "ST_Centroid"sv,
2019  "ST_SetSRID"sv,
2020  "ST_Point"sv, // TODO: where should this and below live?
2021  "ST_PointN"sv,
2022  "ST_StartPoint"sv,
2023  "ST_EndPoint"sv,
2024  "ST_Transform"sv)) {
2025  SQLTypeInfo ti;
2026  return translateGeoProjection(rex_function, ti, false);
2027  }
2028  if (func_resolve(rex_function->getName(),
2029  "ST_Intersection"sv,
2030  "ST_Difference"sv,
2031  "ST_Union"sv,
2032  "ST_Buffer"sv,
2033  "ST_ConcaveHull"sv)) {
2034  CHECK_EQ(rex_function->size(), size_t(2));
2035  SQLTypeInfo ti;
2036  return translateBinaryGeoConstructor(rex_function, ti, false);
2037  }
2038  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
2039  CHECK_EQ(rex_function->size(), size_t(1));
2040  SQLTypeInfo ti;
2041  return translateUnaryGeoPredicate(rex_function, ti, false);
2042  }
2043  if (func_resolve(rex_function->getName(), "ST_Equals"sv)) {
2044  CHECK_EQ(rex_function->size(), size_t(2));
2045  // Attempt to generate a distance based check for points
2046  if (auto distance_check = translateBinaryGeoFunction(rex_function)) {
2047  return distance_check;
2048  }
2049  SQLTypeInfo ti;
2050  return translateBinaryGeoPredicate(rex_function, ti, false);
2051  }
2052 
2053  auto arg_expr_list = translateFunctionArgs(rex_function);
2054  if (rex_function->getName() == std::string("||") ||
2055  rex_function->getName() == std::string("SUBSTRING")) {
2056  SQLTypeInfo ret_ti(kTEXT, false);
2057  return makeExpr<Analyzer::FunctionOper>(
2058  ret_ti, rex_function->getName(), arg_expr_list);
2059  }
2060 
2061  // Reset possibly wrong return type of rex_function to the return
2062  // type of the optimal valid implementation. The return type can be
2063  // wrong in the case of multiple implementations of UDF functions
2064  // that have different return types but Calcite specifies the return
2065  // type according to the first implementation.
2066  SQLTypeInfo ret_ti;
2067  try {
2068  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
2069  auto ext_func_args = ext_func_sig.getInputArgs();
2070  CHECK_LE(arg_expr_list.size(), ext_func_args.size());
2071  for (size_t i = 0, di = 0; i < arg_expr_list.size(); i++) {
2072  CHECK_LT(i + di, ext_func_args.size());
2073  auto ext_func_arg = ext_func_args[i + di];
2074  if (ext_func_arg == ExtArgumentType::PInt8 ||
2075  ext_func_arg == ExtArgumentType::PInt16 ||
2076  ext_func_arg == ExtArgumentType::PInt32 ||
2077  ext_func_arg == ExtArgumentType::PInt64 ||
2078  ext_func_arg == ExtArgumentType::PFloat ||
2079  ext_func_arg == ExtArgumentType::PDouble ||
2080  ext_func_arg == ExtArgumentType::PBool) {
2081  di++;
2082  // pointer argument follows length argument:
2083  CHECK(ext_func_args[i + di] == ExtArgumentType::Int64);
2084  }
2085  // fold casts on constants
2086  if (auto constant =
2087  std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
2088  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_arg);
2089  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
2090  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
2091  }
2092  }
2093  }
2094 
2095  ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
2096  } catch (ExtensionFunctionBindingError& e) {
2097  LOG(WARNING) << "RelAlgTranslator::translateFunction: " << e.what();
2098  throw;
2099  }
2100 
2101  // By default, the extension function type will not allow nulls. If one of the
2102  // arguments is nullable, the extension function must also explicitly allow nulls.
2103  bool arguments_not_null = true;
2104  for (const auto& arg_expr : arg_expr_list) {
2105  if (!arg_expr->get_type_info().get_notnull()) {
2106  arguments_not_null = false;
2107  break;
2108  }
2109  }
2110  ret_ti.set_notnull(arguments_not_null);
2111 
2112  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
2113 }
2114 
2115 namespace {
2116 
2117 std::vector<Analyzer::OrderEntry> translate_collation(
2118  const std::vector<SortField>& sort_fields) {
2119  std::vector<Analyzer::OrderEntry> collation;
2120  for (size_t i = 0; i < sort_fields.size(); ++i) {
2121  const auto& sort_field = sort_fields[i];
2122  collation.emplace_back(i,
2123  sort_field.getSortDir() == SortDirection::Descending,
2124  sort_field.getNullsPosition() == NullSortedPosition::First);
2125  }
2126  return collation;
2127 }
2128 
2129 size_t determineTimeValMultiplierForTimeType(const SQLTypes& window_frame_bound_type,
2130  const Analyzer::Constant* const_expr) {
2131  const auto time_unit_val = const_expr->get_constval().bigintval;
2132  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
2133  if (time_unit_val == kMilliSecsPerSec) {
2134  return 1;
2135  } else if (time_unit_val == kMilliSecsPerMin) {
2136  return kSecsPerMin;
2137  } else if (time_unit_val == kMilliSecsPerHour) {
2138  return kSecsPerHour;
2139  }
2140  }
2141  CHECK(false);
2142  return kUNKNOWN_FIELD;
2143 }
2144 
2145 ExtractField determineTimeUnit(const SQLTypes& window_frame_bound_type,
2146  const Analyzer::Constant* const_expr) {
2147  const auto time_unit_val = const_expr->get_constval().bigintval;
2148  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
2149  if (time_unit_val == kMilliSecsPerSec) {
2150  return kSECOND;
2151  } else if (time_unit_val == kMilliSecsPerMin) {
2152  return kMINUTE;
2153  } else if (time_unit_val == kMilliSecsPerHour) {
2154  return kHOUR;
2155  } else if (time_unit_val == kMilliSecsPerDay) {
2156  return kDAY;
2157  }
2158  } else {
2159  CHECK(window_frame_bound_type == kINTERVAL_YEAR_MONTH);
2160  if (time_unit_val == 1) {
2161  return kMONTH;
2162  } else if (time_unit_val == 12) {
2163  return kYEAR;
2164  }
2165  }
2166  CHECK(false);
2167  return kUNKNOWN_FIELD;
2168 }
2169 
2172  if (bound.unbounded) {
2173  CHECK(!bound.bound_expr && !bound.is_current_row);
2174  if (bound.following) {
2176  } else if (bound.preceding) {
2178  }
2179  } else {
2180  if (bound.is_current_row) {
2181  CHECK(!bound.unbounded && !bound.bound_expr);
2183  } else {
2184  CHECK(!bound.unbounded && bound.bound_expr);
2185  if (bound.following) {
2187  } else if (bound.preceding) {
2189  }
2190  }
2191  }
2193 }
2194 
2196  const Datum& d,
2197  bool is_time_unit = false) {
2198  switch (t) {
2199  case kTINYINT:
2200  return d.tinyintval < 0;
2201  case kSMALLINT:
2202  return d.smallintval < 0;
2203  case kINT:
2204  return d.intval < 0;
2205  case kDOUBLE: {
2206  // the only case that double type is used is for handling time interval
2207  // i.e., represent tiny time units like nanosecond and microsecond as the
2208  // equivalent time value with SECOND time unit
2209  CHECK(is_time_unit);
2210  return d.doubleval < 0;
2211  }
2212  case kDECIMAL:
2213  case kNUMERIC:
2214  case kBIGINT:
2215  return d.bigintval < 0;
2216  default: {
2217  throw std::runtime_error(
2218  "We currently only support integer-type literal expression as a window "
2219  "frame bound expression");
2220  }
2221  }
2222 }
2223 
2224 } // namespace
2225 
2226 // this function returns three elements as a tuple as follows:
2227 // 1) `bound_expr` is invalid
2228 // 2) `bound_expr` has a negative constant
2229 // 3) a translated bound expr which has `Analyzer::Expr*` type
2230 std::tuple<bool, bool, std::shared_ptr<Analyzer::Expr>>
2232  bool negative_constant = false;
2233  if (dynamic_cast<const RexOperator*>(bound_expr)) {
2234  auto translated_expr = translateScalarRex(bound_expr);
2235  const auto bin_oper = dynamic_cast<const Analyzer::BinOper*>(translated_expr.get());
2236  auto time_literal_expr =
2237  dynamic_cast<const Analyzer::Constant*>(bin_oper->get_left_operand());
2238  CHECK(time_literal_expr);
2239  negative_constant =
2240  is_negative_framing_bound(time_literal_expr->get_type_info().get_type(),
2241  time_literal_expr->get_constval(),
2242  true);
2243  return std::make_tuple(false, negative_constant, translated_expr);
2244  } else if (dynamic_cast<const RexLiteral*>(bound_expr)) {
2245  auto translated_expr = translateScalarRex(bound_expr);
2246  if (auto literal_expr =
2247  dynamic_cast<const Analyzer::Constant*>(translated_expr.get())) {
2248  negative_constant = is_negative_framing_bound(
2249  literal_expr->get_type_info().get_type(), literal_expr->get_constval());
2250  return std::make_tuple(false, negative_constant, translated_expr);
2251  }
2252  }
2253  return std::make_tuple(true, negative_constant, nullptr);
2254 }
2255 
2256 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
2257  const RexWindowFunctionOperator* rex_window_function) const {
2258  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2259  for (size_t i = 0; i < rex_window_function->size(); ++i) {
2260  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
2261  }
2262  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
2263  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
2264  partition_keys.push_back(translateScalarRex(partition_key.get()));
2265  }
2266  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
2267  for (const auto& order_key : rex_window_function->getOrderKeys()) {
2268  order_keys.push_back(translateScalarRex(order_key.get()));
2269  }
2270  std::vector<Analyzer::OrderEntry> collation =
2271  translate_collation(rex_window_function->getCollation());
2272 
2273  auto ti = rex_window_function->getType();
2274  auto window_func_kind = rex_window_function->getKind();
2275  if (window_function_is_value(window_func_kind)) {
2276  CHECK_GE(args.size(), 1u);
2277  if (!window_function_is_value_with_frame(window_func_kind)) {
2278  // value window functions w/ frame have logic to access argument's typeinfo
2279  // during codegen, i.e., codegenWindowNavigationFunctionOnFrame(...)
2280  // but not for non-framed value window function, so we use their arg's typeinfo
2281  // as window function's typeinfo
2282  ti = args.front()->get_type_info();
2283  }
2284  // set value type window functions' nullability
2285  ti.set_notnull(false);
2286  }
2287 
2288  bool negative_constant = false;
2289  bool detect_invalid_frame_start_bound_expr = false;
2290  bool detect_invalid_frame_end_bound_expr = false;
2291  auto& frame_start_bound = rex_window_function->getFrameStartBound();
2292  auto& frame_end_bound = rex_window_function->getFrameEndBound();
2293  bool has_end_bound_frame_expr = false;
2294  std::shared_ptr<Analyzer::Expr> frame_start_bound_expr;
2295  SqlWindowFrameBoundType frame_start_bound_type =
2296  determine_frame_bound_type(frame_start_bound);
2297  std::shared_ptr<Analyzer::Expr> frame_end_bound_expr;
2298  SqlWindowFrameBoundType frame_end_bound_type =
2299  determine_frame_bound_type(frame_end_bound);
2300  bool has_framing_clause =
2302  auto frame_mode = rex_window_function->isRows()
2305  if (order_keys.empty()) {
2306  if (frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2307  frame_end_bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
2308  // Calcite sets UNBOUNDED PRECEDING ~ UNBOUNDED_FOLLOWING as its default frame
2309  // bound if the window context has no order by clause regardless of the existence
2310  // of user-given window frame bound but at this point we have no way to recognize
2311  // the absence of the frame definition of this window context
2312  has_framing_clause = false;
2313  }
2314  } else {
2315  if (frame_start_bound.bound_expr) {
2316  std::tie(detect_invalid_frame_start_bound_expr,
2317  negative_constant,
2318  frame_start_bound_expr) =
2319  translateFrameBoundExpr(frame_start_bound.bound_expr.get());
2320  }
2321 
2322  if (frame_end_bound.bound_expr) {
2323  std::tie(
2324  detect_invalid_frame_end_bound_expr, negative_constant, frame_end_bound_expr) =
2325  translateFrameBoundExpr(frame_end_bound.bound_expr.get());
2326  }
2327 
2328  // currently we only support literal expression as frame bound expression
2329  if (detect_invalid_frame_start_bound_expr || detect_invalid_frame_end_bound_expr) {
2330  throw std::runtime_error(
2331  "We currently only support literal expression as a window frame bound "
2332  "expression");
2333  }
2334 
2335  // note that Calcite already has frame-bound constraint checking logic, but we
2336  // also check various invalid cases for safety
2337  if (negative_constant) {
2338  throw std::runtime_error(
2339  "A constant expression for window framing should have nonnegative value.");
2340  }
2341 
2342  if (frame_start_bound_expr &&
2343  frame_start_bound_expr->get_type_info().is_timeinterval()) {
2344  frame_start_bound_expr = translateIntervalExprForWindowFraming(
2345  order_keys.front(),
2346  frame_start_bound_type == SqlWindowFrameBoundType::EXPR_PRECEDING,
2347  frame_start_bound_expr.get());
2348  }
2349 
2350  if (frame_end_bound_expr && frame_end_bound_expr->get_type_info().is_timeinterval()) {
2351  frame_end_bound_expr = translateIntervalExprForWindowFraming(
2352  order_keys.front(),
2353  frame_end_bound_type == SqlWindowFrameBoundType::EXPR_PRECEDING,
2354  frame_end_bound_expr.get());
2355  }
2356  }
2357 
2358  if (frame_start_bound.following) {
2359  if (frame_end_bound.is_current_row) {
2360  throw std::runtime_error(
2361  "Window framing starting from following row cannot end with current row.");
2362  } else if (has_end_bound_frame_expr && frame_end_bound.preceding) {
2363  throw std::runtime_error(
2364  "Window framing starting from following row cannot have preceding rows.");
2365  }
2366  }
2367 
2368  if (frame_start_bound.is_current_row && frame_end_bound.preceding &&
2369  !frame_end_bound.unbounded && has_end_bound_frame_expr) {
2370  throw std::runtime_error(
2371  "Window framing starting from current row cannot have preceding rows.");
2372  }
2373 
2374  if (!frame_start_bound_expr &&
2375  frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2376  !frame_end_bound_expr &&
2377  frame_end_bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
2378  has_framing_clause = false;
2379  VLOG(1) << "Ignore range framing mode with a frame bound between "
2380  "UNBOUNDED_PRECEDING and CURRENT_ROW";
2381  }
2382 
2383  if (has_framing_clause) {
2385  if (order_keys.size() != 1) {
2386  throw std::runtime_error(
2387  "Window framing with range mode requires a single order-by column");
2388  }
2389  std::set<const Analyzer::ColumnVar*,
2390  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2392  order_keys.front()->collect_column_var(colvar_set, false);
2393  for (auto cv : colvar_set) {
2394  if (!(cv->get_type_info().is_integer() || cv->get_type_info().is_fp() ||
2395  cv->get_type_info().is_time())) {
2396  has_framing_clause = false;
2397  VLOG(1) << "Range framing mode with non-number type ordering column is not "
2398  "supported yet, skip window framing";
2399  }
2400  }
2401  }
2402  }
2403 
2404  std::string const func_name = toString(window_func_kind);
2405  auto const num_args = args.size();
2406  bool need_order_by_clause = false;
2407  bool need_frame_def = false;
2408  switch (window_func_kind) {
2410  if (has_framing_clause && args.empty()) {
2411  args.push_back(
2412  makeExpr<Analyzer::Constant>(g_bigint_count ? kBIGINT : kINT, true));
2413  }
2414  break;
2415  }
2418  need_order_by_clause = true;
2419  need_frame_def = true;
2420  if (num_args != 2) {
2421  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2422  }
2423  Datum d;
2424  d.intval = 1;
2425  args.push_back(makeExpr<Analyzer::Constant>(kINT, false, d));
2426  const auto target_expr_cv =
2427  dynamic_cast<const Analyzer::ColumnVar*>(args.front().get());
2428  if (!target_expr_cv) {
2429  throw std::runtime_error("Currently, " + func_name +
2430  " only allows a column reference as its first argument");
2431  }
2432  const auto target_ti = target_expr_cv->get_type_info();
2433  if (target_ti.is_dict_encoded_string()) {
2434  // Calcite does not represent a window function having dictionary encoded text
2435  // type as its output properly, so we need to set its output type manually
2436  ti.set_compression(kENCODING_DICT);
2437  ti.set_comp_param(target_expr_cv->get_type_info().get_comp_param());
2438  ti.setStringDictKey(target_expr_cv->get_type_info().getStringDictKey());
2439  ti.set_fixed_size();
2440  }
2441  const auto target_offset_cv =
2442  dynamic_cast<const Analyzer::Constant*>(args[1].get());
2443  if (!target_expr_cv ||
2444  is_negative_framing_bound(target_offset_cv->get_type_info().get_type(),
2445  target_offset_cv->get_constval())) {
2446  throw std::runtime_error(
2447  "Currently, " + func_name +
2448  " only allows non-negative constant as its second argument");
2449  }
2450  break;
2451  }
2454  if (num_args != 1) {
2455  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2456  }
2457  need_order_by_clause = true;
2458  need_frame_def = true;
2459  break;
2462  if (has_framing_clause) {
2463  throw std::runtime_error(func_name + " does not support window framing clause");
2464  }
2465  auto const input_expr_ti = args.front()->get_type_info();
2466  if (input_expr_ti.is_string()) {
2467  throw std::runtime_error(func_name + " not supported on " +
2468  input_expr_ti.get_type_name() + " type yet");
2469  }
2470  need_order_by_clause = true;
2471  std::string const arg_str{args.front()->toString()};
2472  bool needs_inject_input_arg_ordering =
2473  !std::any_of(order_keys.cbegin(),
2474  order_keys.cend(),
2475  [&arg_str](std::shared_ptr<Analyzer::Expr> const& expr) {
2476  return boost::equals(arg_str, expr->toString());
2477  });
2478  if (needs_inject_input_arg_ordering) {
2479  VLOG(1) << "Inject " << args.front()->toString() << " as ordering column of the "
2480  << func_name << " function";
2481  order_keys.push_back(args.front());
2482  // forward_fill can fill null values if it is ordered with NULLS LAST
2483  // in contrast, we make NULLS FIRST ordering for the backward_fill function
2484  collation.emplace_back(collation.size() + 1,
2485  false,
2486  window_func_kind != SqlWindowFunctionKind::FORWARD_FILL);
2487  }
2488  break;
2489  }
2492  // todo (yoonmin) : args.size() will be three if we support default value
2493  if (num_args != 2) {
2494  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2495  }
2496  if (window_func_kind == SqlWindowFunctionKind::NTH_VALUE_IN_FRAME) {
2497  need_order_by_clause = true;
2498  need_frame_def = true;
2499  }
2500  if (!args[1]) {
2501  throw std::runtime_error(func_name +
2502  " must have a positional argument expression.");
2503  }
2504  bool has_valid_arg = false;
2505  if (args[1]->get_type_info().is_integer()) {
2506  if (auto* n_value_ptr = dynamic_cast<Analyzer::Constant*>(args[1].get())) {
2507  if (0 < n_value_ptr->get_constval().intval) {
2508  // i.e., having N larger than the partition size
2509  // set the proper N to match the zero-start index pos
2510  auto d = n_value_ptr->get_constval();
2511  d.intval -= 1;
2512  n_value_ptr->set_constval(d);
2513  has_valid_arg = true;
2514  }
2515  }
2516  }
2517  if (!has_valid_arg) {
2518  throw std::runtime_error("The positional argument of the " + func_name +
2519  " must be a positive integer constant.");
2520  }
2521  break;
2522  }
2524  if (order_keys.empty()) {
2525  throw std::runtime_error(
2526  func_name + " requires an ORDER BY sub-clause within the window clause");
2527  }
2528  if (has_framing_clause) {
2529  LOG(INFO)
2530  << window_func_kind
2531  << " must use a pre-defined window frame range (e.g., ROWS BETWEEN "
2532  "UNBOUNDED PRECEDING AND CURRENT ROW). "
2533  "Thus, we skip the user-defined window frame for this window function";
2534  }
2535  has_framing_clause = true;
2537  frame_start_bound_type = SqlWindowFrameBoundType::UNBOUNDED_PRECEDING;
2538  frame_end_bound_type = SqlWindowFrameBoundType::CURRENT_ROW;
2539  break;
2540  default:;
2541  }
2542 
2543  if (need_order_by_clause && order_keys.empty()) {
2544  throw std::runtime_error(func_name + " requires an ORDER BY clause");
2545  }
2546 
2547  if (need_frame_def && !has_framing_clause) {
2548  throw std::runtime_error(func_name + " requires window frame definition");
2549  }
2550 
2551  if (!has_framing_clause) {
2552  frame_start_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2553  frame_end_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2554  frame_start_bound_expr = nullptr;
2555  frame_end_bound_expr = nullptr;
2556  }
2557 
2558  return makeExpr<Analyzer::WindowFunction>(
2559  ti,
2560  rex_window_function->getKind(),
2561  args,
2562  partition_keys,
2563  order_keys,
2564  has_framing_clause ? frame_mode : Analyzer::WindowFunction::FrameBoundType::NONE,
2565  makeExpr<Analyzer::WindowFrame>(frame_start_bound_type, frame_start_bound_expr),
2566  makeExpr<Analyzer::WindowFrame>(frame_end_bound_type, frame_end_bound_expr),
2567  collation);
2568 }
2569 
2571  std::shared_ptr<Analyzer::Expr> order_key,
2572  bool for_preceding_bound,
2573  const Analyzer::Expr* expr) const {
2574  // translate time interval expression and prepare appropriate frame bound expression:
2575  // a) manually compute time unit datum: time type
2576  // b) use dateadd expression: date and timestamp
2577  const auto frame_bound_expr = dynamic_cast<const Analyzer::BinOper*>(expr);
2578  CHECK(frame_bound_expr);
2579  CHECK_EQ(frame_bound_expr->get_optype(), kMULTIPLY);
2580  const auto order_key_ti = order_key->get_type_info();
2581  const auto frame_bound_ti = frame_bound_expr->get_type_info();
2582  const auto time_val_expr =
2583  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_left_operand());
2584  const auto time_unit_val_expr =
2585  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_right_operand());
2586  ExtractField time_unit =
2587  determineTimeUnit(frame_bound_ti.get_type(), time_unit_val_expr);
2588  bool invalid_time_unit_type = false;
2589  bool invalid_frame_bound_expr_type = false;
2590  Datum d;
2591  auto prepare_time_value_datum = [&d,
2592  &invalid_frame_bound_expr_type,
2593  &time_val_expr,
2594  &for_preceding_bound](bool is_timestamp_second) {
2595  // currently, Calcite only accepts interval with second, so to represent
2596  // smaller time units like millisecond, we have to use decimal point like
2597  // INTERVAL 0.003 SECOND (for millisecond)
2598  // thus, depending on what time unit we want to represent, Calcite analyzes
2599  // the time value to one of following two types: integer and decimal (and
2600  // numeric) types
2601  switch (time_val_expr->get_type_info().get_type()) {
2602  case kTINYINT: {
2603  d.bigintval = time_val_expr->get_constval().tinyintval;
2604  break;
2605  }
2606  case kSMALLINT: {
2607  d.bigintval = time_val_expr->get_constval().smallintval;
2608  break;
2609  }
2610  case kINT: {
2611  d.bigintval = time_val_expr->get_constval().intval;
2612  break;
2613  }
2614  case kBIGINT: {
2615  d.bigintval = time_val_expr->get_constval().bigintval;
2616  break;
2617  }
2618  case kDECIMAL:
2619  case kNUMERIC: {
2620  if (!is_timestamp_second) {
2621  // date and time type only use integer type as their time value
2622  invalid_frame_bound_expr_type = true;
2623  break;
2624  }
2625  d.bigintval = time_val_expr->get_constval().bigintval;
2626  break;
2627  }
2628  case kDOUBLE: {
2629  if (!is_timestamp_second) {
2630  // date and time type only use integer type as their time value
2631  invalid_frame_bound_expr_type = true;
2632  break;
2633  }
2634  d.bigintval = time_val_expr->get_constval().doubleval *
2635  pow(10, time_val_expr->get_type_info().get_scale());
2636  break;
2637  }
2638  default: {
2639  invalid_frame_bound_expr_type = true;
2640  break;
2641  }
2642  }
2643  if (for_preceding_bound) {
2644  d.bigintval *= -1;
2645  }
2646  };
2647 
2648  switch (order_key_ti.get_type()) {
2649  case kTIME: {
2650  if (time_val_expr->get_type_info().is_integer()) {
2651  if (time_unit == kSECOND || time_unit == kMINUTE || time_unit == kHOUR) {
2652  const auto time_multiplier = determineTimeValMultiplierForTimeType(
2653  frame_bound_ti.get_type(), time_unit_val_expr);
2654  switch (time_val_expr->get_type_info().get_type()) {
2655  case kTINYINT: {
2656  d.bigintval = time_val_expr->get_constval().tinyintval * time_multiplier;
2657  break;
2658  }
2659  case kSMALLINT: {
2660  d.bigintval = time_val_expr->get_constval().smallintval * time_multiplier;
2661  break;
2662  }
2663  case kINT: {
2664  d.bigintval = time_val_expr->get_constval().intval * time_multiplier;
2665  break;
2666  }
2667  case kBIGINT: {
2668  d.bigintval = time_val_expr->get_constval().bigintval * time_multiplier;
2669  break;
2670  }
2671  default: {
2672  UNREACHABLE();
2673  break;
2674  }
2675  }
2676  } else {
2677  invalid_frame_bound_expr_type = true;
2678  }
2679  } else {
2680  invalid_time_unit_type = true;
2681  }
2682  if (invalid_frame_bound_expr_type) {
2683  throw std::runtime_error(
2684  "Invalid time unit is used to define window frame bound expression for " +
2685  order_key_ti.get_type_name() + " type");
2686  } else if (invalid_time_unit_type) {
2687  throw std::runtime_error(
2688  "Window frame bound expression has an invalid type for " +
2689  order_key_ti.get_type_name() + " type");
2690  }
2691  return std::make_shared<Analyzer::Constant>(kBIGINT, false, d);
2692  }
2693  case kDATE: {
2695  if (time_val_expr->get_type_info().is_integer()) {
2696  switch (time_unit) {
2697  case kDAY: {
2698  daField = to_dateadd_field("day");
2699  break;
2700  }
2701  case kMONTH: {
2702  daField = to_dateadd_field("month");
2703  break;
2704  }
2705  case kYEAR: {
2706  daField = to_dateadd_field("year");
2707  break;
2708  }
2709  default: {
2710  invalid_frame_bound_expr_type = true;
2711  break;
2712  }
2713  }
2714  } else {
2715  invalid_time_unit_type = true;
2716  }
2717  if (invalid_frame_bound_expr_type) {
2718  throw std::runtime_error(
2719  "Invalid time unit is used to define window frame bound expression for " +
2720  order_key_ti.get_type_name() + " type");
2721  } else if (invalid_time_unit_type) {
2722  throw std::runtime_error(
2723  "Window frame bound expression has an invalid type for " +
2724  order_key_ti.get_type_name() + " type");
2725  }
2727  prepare_time_value_datum(false);
2728  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2729  const int dim = order_key_ti.get_dimension();
2730  return makeExpr<Analyzer::DateaddExpr>(
2731  SQLTypeInfo(kTIMESTAMP, dim, 0, false), daField, cast_number_units, order_key);
2732  }
2733  case kTIMESTAMP: {
2735  switch (time_unit) {
2736  case kSECOND: {
2737  switch (time_val_expr->get_type_info().get_scale()) {
2738  case 0: {
2739  daField = to_dateadd_field("second");
2740  break;
2741  }
2742  case 3: {
2743  daField = to_dateadd_field("millisecond");
2744  break;
2745  }
2746  case 6: {
2747  daField = to_dateadd_field("microsecond");
2748  break;
2749  }
2750  case 9: {
2751  daField = to_dateadd_field("nanosecond");
2752  break;
2753  }
2754  default:
2755  UNREACHABLE();
2756  break;
2757  }
2758  prepare_time_value_datum(true);
2759  break;
2760  }
2761  case kMINUTE: {
2762  daField = to_dateadd_field("minute");
2763  prepare_time_value_datum(false);
2764  break;
2765  }
2766  case kHOUR: {
2767  daField = to_dateadd_field("hour");
2768  prepare_time_value_datum(false);
2769  break;
2770  }
2771  case kDAY: {
2772  daField = to_dateadd_field("day");
2773  prepare_time_value_datum(false);
2774  break;
2775  }
2776  case kMONTH: {
2777  daField = to_dateadd_field("month");
2778  prepare_time_value_datum(false);
2779  break;
2780  }
2781  case kYEAR: {
2782  daField = to_dateadd_field("year");
2783  prepare_time_value_datum(false);
2784  break;
2785  }
2786  default: {
2787  invalid_time_unit_type = true;
2788  break;
2789  }
2790  }
2791  if (!invalid_time_unit_type) {
2793  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2794  const int dim = order_key_ti.get_dimension();
2795  return makeExpr<Analyzer::DateaddExpr>(SQLTypeInfo(kTIMESTAMP, dim, 0, false),
2796  daField,
2797  cast_number_units,
2798  order_key);
2799  }
2800  return nullptr;
2801  }
2802  default: {
2803  UNREACHABLE();
2804  break;
2805  }
2806  }
2807  if (invalid_frame_bound_expr_type) {
2808  throw std::runtime_error(
2809  "Invalid time unit is used to define window frame bound expression for " +
2810  order_key_ti.get_type_name() + " type");
2811  } else if (invalid_time_unit_type) {
2812  throw std::runtime_error("Window frame bound expression has an invalid type for " +
2813  order_key_ti.get_type_name() + " type");
2814  }
2815  return nullptr;
2816 }
2817 
2819  const RexFunctionOperator* rex_function) const {
2820  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2821  for (size_t i = 0; i < rex_function->size(); ++i) {
2822  args.push_back(translateScalarRex(rex_function->getOperand(i)));
2823  }
2824  return args;
2825 }
2826 
2828  const std::shared_ptr<Analyzer::Expr> qual_expr) {
2829  CHECK(qual_expr);
2830  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2831  if (!bin_oper) {
2832  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2833  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
2834  }
2835 
2836  if (bin_oper->get_optype() == kAND) {
2837  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
2838  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
2839  auto simple_quals = lhs_cf.simple_quals;
2840  simple_quals.insert(
2841  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
2842  auto quals = lhs_cf.quals;
2843  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
2844  return {simple_quals, quals};
2845  }
2846  int rte_idx{0};
2847  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
2848  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
2849  : QualsConjunctiveForm{{}, {qual_expr}};
2850 }
2851 
2852 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
2853  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
2854  CHECK(qual_expr);
2855  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2856  if (!bin_oper) {
2857  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2858  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
2859  }
2860  if (bin_oper->get_optype() == kOR) {
2861  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
2862  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
2863  auto quals = lhs_df;
2864  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
2865  return quals;
2866  }
2867  return {qual_expr};
2868 }
2869 
2870 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
2871  const RexFunctionOperator* rex_function) const {
2872  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
2873  Therefore any string having fractional seconds more 3 places after the decimal
2874  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
2875  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
2876  calcite and translating them to generate our own casts.
2877  */
2878  CHECK_EQ(size_t(1), rex_function->size());
2879  const auto operand = translateScalarRex(rex_function->getOperand(0));
2880  const auto& operand_ti = operand->get_type_info();
2881  const auto& target_ti = rex_function->getType();
2882  if (!operand_ti.is_string()) {
2883  throw std::runtime_error(
2884  "High precision timestamp cast argument must be a string. Input type is: " +
2885  operand_ti.get_type_name());
2886  } else if (!target_ti.is_high_precision_timestamp()) {
2887  throw std::runtime_error(
2888  "Cast target type should be high precision timestamp. Input type is: " +
2889  target_ti.get_type_name());
2890  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
2891  throw std::runtime_error(
2892  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
2893  std::to_string(target_ti.get_dimension()) + ")");
2894  } else {
2895  return operand->add_cast(target_ti);
2896  }
2897 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
int8_t tinyintval
Definition: Datum.h:73
Defines data structures for the semantic analysis phase of query processing.
size_t g_watchdog_in_clause_max_num_elem_non_bitmap
Definition: Execute.cpp:85
Definition: sqldefs.h:74
SqlWindowFrameBoundType
Definition: sqldefs.h:202
SqlWindowFrameBoundType determine_frame_bound_type(const RexWindowFunctionOperator::RexWindowBound &bound)
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:440
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void set_compression(EncodingType c)
Definition: sqltypes.h:481
SQLAgg
Definition: sqldefs.h:76
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SqlStringOpKind name_to_string_op_kind(const std::string &func_name)
Definition: sqldefs.h:460
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:241
std::shared_ptr< Analyzer::Expr > translateCurrentTimestamp() const
std::shared_ptr< Analyzer::Expr > translateBinaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
SQLAgg getKind() const
Definition: RelAlgDag.h:799
Definition: sqltypes.h:76
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:215
SQLTypes
Definition: sqltypes.h:65
static constexpr int64_t kSecsPerHour
size_t getOperand(size_t idx) const
Definition: RelAlgDag.h:805
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
Definition: RelAlgDag.h:445
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:222
static constexpr int64_t kSecsPerMin
std::shared_ptr< Analyzer::Expr >(RelAlgTranslator::*)(RexScalar const *) const Handler
SQLQualifier
Definition: sqldefs.h:74
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:166
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1490
#define LOG(tag)
Definition: Logger.h:285
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:378
size_t size() const
Definition: RelAlgDag.h:364
static constexpr int64_t kMilliSecsPerDay
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:366
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
const std::vector< SortField > & getCollation() const
Definition: RelAlgDag.h:670
SQLOps
Definition: sqldefs.h:31
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
int8_t boolval
Definition: Datum.h:72
static bool isFramingAvailableWindowFunc(SqlWindowFunctionKind kind)
Definition: Analyzer.h:2966
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr, const Executor *executor=nullptr)
Definition: ParserNode.cpp:380
Definition: sqldefs.h:40
#define UNREACHABLE()
Definition: Logger.h:338
#define CHECK_GE(x, y)
Definition: Logger.h:306
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:51
Definition: sqldefs.h:32
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:435
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:184
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:43
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
bool operator()(IndexedHandler const &pair) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
bool is_number() const
Definition: sqltypes.h:576
#define CHECK_GT(x, y)
Definition: Logger.h:305
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< const RexScalar > bound_expr
Definition: RelAlgDag.h:581
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
int32_t intval
Definition: Datum.h:75
bool is_time() const
Definition: sqltypes.h:579
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static constexpr int64_t kMilliSecsPerMin
bool g_enable_string_functions
static constexpr int64_t kMilliSecsPerSec
std::shared_ptr< Analyzer::Expr > translateBoundingBoxIntersectOper(const RexOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:33
robin_hood::unordered_map< RexScalar const *, std::shared_ptr< Analyzer::Expr > > cache_
unsigned getIndex() const
Definition: RelAlgDag.h:174
Supported runtime functions management and retrieval.
future< Result > async(Fn &&fn, Args &&...args)
static SysCatalog & instance()
Definition: SysCatalog.h:343
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
Definition: RelAlgDag.h:376
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:29
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void set_fixed_size()
Definition: sqltypes.h:479
DateaddField
Definition: DateAdd.h:42
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:302
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateStringOper(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:796
size_t determineTimeValMultiplierForTimeType(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
void set_scale(int s)
Definition: sqltypes.h:475
int64_t bigintval
Definition: Datum.h:76
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > getQuantifiedRhs(const RexScalar *) const
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: RelAlgDag.h:433
std::shared_ptr< Analyzer::Expr > translateCurrentTime() const
bool g_bigint_count
Definition: sqldefs.h:39
bool is_distinct_supported(SQLAgg const agg_kind)
bool g_enable_watchdog
Definition: sqldefs.h:74
int16_t smallintval
Definition: Datum.h:74
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
DatetruncField to_datediff_field(const std::string &field)
bool is_boolean() const
Definition: sqltypes.h:582
std::array< IndexedHandler, sizeof...(Ts)> makeHandlers()
const RexWindowBound & getFrameEndBound() const
Definition: RelAlgDag.h:674
std::tuple< bool, bool, std::shared_ptr< Analyzer::Expr > > translateFrameBoundExpr(const RexScalar *bound_expr) const
std::shared_ptr< Analyzer::Expr > translate(const RexScalar *rex) const
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
Argument type based extension function binding.
std::string * stringval
Definition: Datum.h:81
std::shared_ptr< Analyzer::Expr > translatePCAProject(const RexFunctionOperator *) const
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:25
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
Definition: sqldefs.h:36
void set_comp_param(int p)
Definition: sqltypes.h:482
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:79
Definition: sqltypes.h:80
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
size_t g_watchdog_in_clause_max_num_input_rows
Definition: Execute.cpp:87
Definition: sqldefs.h:42
Definition: sqldefs.h:74
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:643
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &stringval, const bool is_null)
Definition: ParserNode.cpp:147
std::shared_ptr< Analyzer::Expr > translateIntervalExprForWindowFraming(std::shared_ptr< Analyzer::Expr > order_key, bool for_preceding_bound, const Analyzer::Expr *expr) const
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:304
bool is_negative_framing_bound(const SQLTypes t, const Datum &d, bool is_time_unit=false)
const RexWindowBound & getFrameStartBound() const
Definition: RelAlgDag.h:672
std::shared_ptr< Analyzer::Expr > translateUnaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:701
std::pair< std::type_index, Handler > IndexedHandler
void setStringDictKey(const shared::StringDictKey &dict_key)
Definition: sqltypes.h:1063
static RelRexToStringConfig defaults()
Definition: RelAlgDag.h:78
Datum get_constval() const
Definition: Analyzer.h:348
std::shared_ptr< Analyzer::Expr > translateMLPredict(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
Definition: RelAlgDag.h:641
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:1056
Definition: sqltypes.h:68
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:191
ExtractField
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
bool window_function_is_value_with_frame(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:47
size_t g_in_clause_num_elem_skip_bitmap
Definition: Execute.cpp:88
static const StringDictKey kTransientDictKey
Definition: DbObjectKeys.h:45
Definition: sqldefs.h:55
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:227
bool isDistinct() const
Definition: RelAlgDag.h:801
void set_notnull(bool n)
Definition: sqltypes.h:477
static constexpr int64_t kMilliSecsPerHour
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:653
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateWidthBucket(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
Definition: RelAlgDag.h:803
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:186
bool g_cluster
Definition: sqldefs.h:35
bool isRows() const
Definition: RelAlgDag.h:676
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:72
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
Definition: RelAlgDag.h:506
bool is_string() const
Definition: sqltypes.h:561
std::shared_ptr< Analyzer::Expr > translateCurrentDate() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
size_t g_watchdog_in_clause_max_num_elem_bitmap
Definition: Execute.cpp:86
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
int cpu_threads()
Definition: thread_count.h:25
const bool just_explain_
Definition: Datum.h:71
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
Definition: RelAlgDag.h:865
bool is_decimal() const
Definition: sqltypes.h:570
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1581
std::shared_ptr< Analyzer::Expr > translateBinaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:41
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
Definition: sqldefs.h:86
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
#define VLOG(n)
Definition: Logger.h:388
Type timer_start()
Definition: measure.h:42
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:473
std::shared_ptr< Analyzer::Expr > translateGeoBoundingBoxIntersectOper(const RexOperator *) const
#define IS_COMPARISON(X)
Definition: sqldefs.h:61
double doubleval
Definition: Datum.h:78
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
ExtractField determineTimeUnit(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:470