OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringOpsIR.cpp File Reference
#include "CodeGenerator.h"
#include "Execute.h"
#include "../Shared/funcannotations.h"
#include "../Shared/sqldefs.h"
#include "Parser/ParserNode.h"
#include "QueryEngine/ExpressionRewrite.h"
#include "StringOps/StringOps.h"
#include <boost/locale/conversion.hpp>
+ Include dependency graph for StringOpsIR.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{StringOpsIR.cpp}
 

Macros

#define DEF_APPLY_NUMERIC_STRING_OPS(value_type, value_name)
 
#define DEF_APPLY_MULTI_INPUT_NUMERIC_STRING_OPS(value_type, value_name)
 
#define DEF_CONVERT_TO_STRING_AND_ENCODE(value_type, value_name)
 

Functions

RUNTIME_EXPORT StringView string_decode (int8_t *chunk_iter_, int64_t pos)
 
RUNTIME_EXPORT StringView string_decompress (const int32_t string_id, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t string_compress (const StringView string_view, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t apply_string_ops_and_encode (const char *str_ptr, const int32_t str_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t apply_multi_input_string_ops_and_encode (const char *str1_ptr, const int32_t str1_len, const char *str2_ptr, const int32_t str2_len, const int64_t string_ops_handle, const int64_t string_dict_handle)
 
RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict (const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
 
RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict (const int32_t string_id, const int64_t source_string_dict_handle, const int64_t dest_string_dict_handle)
 
int32_t write_string_to_proxy (const std::string &str, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_bool (const int8_t operand, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_decimal (const int64_t operand, const int32_t precision, const int32_t scale, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_time (const int64_t operand, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_timestamp (const int64_t operand, const int32_t dimension, const int64_t string_dict_handle)
 
RUNTIME_EXPORT ALWAYS_INLINE
int32_t 
convert_to_string_and_encode_date (const int64_t operand, const int64_t string_dict_handle)
 
std::vector
< StringOps_Namespace::StringOpInfo
getStringOpInfos (const Analyzer::StringOper *expr)
 
std::unique_ptr
< StringDictionaryTranslationMgr
translate_dict_strings (const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
 
void pre_translate_string_ops (const Analyzer::StringOper *string_oper, Executor *executor)
 
std::vector< int32_t > anonymous_namespace{StringOpsIR.cpp}::get_compared_ids (const StringDictionaryProxy *dict, const SQLOps compare_operator, const std::string &pattern)
 

Macro Definition Documentation

#define DEF_APPLY_MULTI_INPUT_NUMERIC_STRING_OPS (   value_type,
  value_name 
)
Value:
extern "C" RUNTIME_EXPORT ALWAYS_INLINE value_type \
apply_multi_input_numeric_string_ops_##value_name( \
const char* str1_ptr, \
const int32_t str1_len, \
const char* str2_ptr, \
const int32_t str2_len, \
const int64_t string_ops_handle) { \
const std::string_view raw_str1(str1_ptr, str1_len); \
const std::string_view raw_str2(str2_ptr, str2_len); \
auto string_ops = \
reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle); \
const auto result_datum = string_ops->numericEval(raw_str1, raw_str2); \
return result_datum.value_name##val; \
}
#define RUNTIME_EXPORT
#define ALWAYS_INLINE

Definition at line 148 of file StringOpsIR.cpp.

#define DEF_APPLY_NUMERIC_STRING_OPS (   value_type,
  value_name 
)
Value:
extern "C" RUNTIME_EXPORT ALWAYS_INLINE value_type \
apply_numeric_string_ops_##value_name( \
const char* str_ptr, const int32_t str_len, const int64_t string_ops_handle) { \
const std::string_view raw_str(str_ptr, str_len); \
auto string_ops = \
reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle); \
const auto result_datum = string_ops->numericEval(raw_str); \
return result_datum.value_name##val; \
}
#define RUNTIME_EXPORT
#define ALWAYS_INLINE

Definition at line 127 of file StringOpsIR.cpp.

#define DEF_CONVERT_TO_STRING_AND_ENCODE (   value_type,
  value_name 
)
Value:
extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t \
convert_to_string_and_encode_##value_name(const value_type operand, \
const int64_t string_dict_handle) { \
return write_string_to_proxy(std::to_string(operand), string_dict_handle); \
}
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
std::string to_string(char const *&&v)
#define RUNTIME_EXPORT
#define ALWAYS_INLINE

Definition at line 177 of file StringOpsIR.cpp.

Function Documentation

RUNTIME_EXPORT int32_t apply_multi_input_string_ops_and_encode ( const char *  str1_ptr,
const int32_t  str1_len,
const char *  str2_ptr,
const int32_t  str2_len,
const int64_t  string_ops_handle,
const int64_t  string_dict_handle 
)

Definition at line 77 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

82  {
83  std::string_view raw_str1(str1_ptr, str1_len);
84  std::string_view raw_str2(str2_ptr, str2_len);
85  auto string_ops =
86  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
87  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
88  const auto result_str = string_ops->multi_input_eval(raw_str1, raw_str2);
89  if (result_str.empty()) {
90  return inline_int_null_value<int32_t>();
91  }
92  return string_dict_proxy->getOrAddTransient(result_str);
93 }
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

RUNTIME_EXPORT int32_t apply_string_ops_and_encode ( const char *  str_ptr,
const int32_t  str_len,
const int64_t  string_ops_handle,
const int64_t  string_dict_handle 
)

Definition at line 61 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

64  {
65  std::string raw_str(str_ptr, str_len);
66  auto string_ops =
67  reinterpret_cast<const StringOps_Namespace::StringOps*>(string_ops_handle);
68  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
69  const auto result_str = string_ops->operator()(raw_str);
70  if (result_str.empty()) {
71  return inline_int_null_value<int32_t>();
72  }
73  return string_dict_proxy->getOrAddTransient(result_str);
74 }
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_bool ( const int8_t  operand,
const int64_t  string_dict_handle 
)

Definition at line 194 of file StringOpsIR.cpp.

References write_string_to_proxy().

195  {
196  return write_string_to_proxy(operand == 1 ? "true" : "false", string_dict_handle);
197 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_date ( const int64_t  operand,
const int64_t  string_dict_handle 
)

Definition at line 231 of file StringOpsIR.cpp.

References shared::formatDate(), and write_string_to_proxy().

232  {
233  constexpr size_t buf_size = 64;
234  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
235  shared::formatDate(buf, buf_size, operand);
236  return write_string_to_proxy(buf, string_dict_handle);
237 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:29

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_decimal ( const int64_t  operand,
const int32_t  precision,
const int32_t  scale,
const int64_t  string_dict_handle 
)

Definition at line 200 of file StringOpsIR.cpp.

References shared::power10inv(), and write_string_to_proxy().

203  {
204  constexpr size_t buf_size = 64;
205  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
206  const double v = static_cast<double>(operand) * shared::power10inv(scale);
207  snprintf(buf, buf_size, "%*.*f", precision, scale, v);
208  return write_string_to_proxy(buf, string_dict_handle);
209 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
double power10inv(unsigned const x)
Definition: misc.h:291

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_time ( const int64_t  operand,
const int64_t  string_dict_handle 
)

Definition at line 212 of file StringOpsIR.cpp.

References shared::formatHMS(), and write_string_to_proxy().

213  {
214  constexpr size_t buf_size = 64;
215  char buf[buf_size];
216  shared::formatHMS(buf, buf_size, operand);
217  return write_string_to_proxy(buf, string_dict_handle);
218 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:98

+ Here is the call graph for this function:

RUNTIME_EXPORT ALWAYS_INLINE int32_t convert_to_string_and_encode_timestamp ( const int64_t  operand,
const int32_t  dimension,
const int64_t  string_dict_handle 
)

Definition at line 221 of file StringOpsIR.cpp.

References shared::formatDateTime(), and write_string_to_proxy().

223  {
224  constexpr size_t buf_size = 64;
225  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
226  shared::formatDateTime(buf, buf_size, operand, dimension);
227  return write_string_to_proxy(buf, string_dict_handle);
228 }
int32_t write_string_to_proxy(const std::string &str, const int64_t string_dict_handle)
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension, bool use_iso_format)
Definition: misc.cpp:47

+ Here is the call graph for this function:

std::vector<StringOps_Namespace::StringOpInfo> getStringOpInfos ( const Analyzer::StringOper expr)

Definition at line 277 of file StringOpsIR.cpp.

References CHECK, and Analyzer::StringOper::getChainedStringOpExprs().

Referenced by CodeGenerator::codegenPerRowStringOper(), and translate_dict_strings().

278  {
279  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
280  auto chained_string_op_exprs = expr->getChainedStringOpExprs();
281  if (chained_string_op_exprs.empty()) {
282  // Likely will change the below to a CHECK but until we have more confidence
283  // that all potential query patterns have nodes that might contain string ops folded,
284  // leaving as an error for now
285  throw std::runtime_error(
286  "Expected folded string operator but found operator unfolded.");
287  }
288  // Consider encapsulating below in an Analyzer::StringOper method to dedup
289  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
290  auto chained_string_op =
291  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
292  CHECK(chained_string_op);
293  StringOps_Namespace::StringOpInfo string_op_info(chained_string_op->get_kind(),
294  chained_string_op->get_type_info(),
295  chained_string_op->getLiteralArgs());
296  string_op_infos.emplace_back(string_op_info);
297  }
298  return string_op_infos;
299 }
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1601
#define CHECK(condition)
Definition: Logger.h:291
std::vector< std::shared_ptr< Analyzer::Expr > > getChainedStringOpExprs() const
Definition: Analyzer.h:1700

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t intersect_translate_string_id_to_other_dict ( const int32_t  string_id,
const int64_t  source_string_dict_handle,
const int64_t  dest_string_dict_handle 
)

Definition at line 96 of file StringOpsIR.cpp.

References StringDictionaryProxy::getString().

98  {
99  const auto source_string_dict_proxy =
100  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
101  auto dest_string_dict_proxy =
102  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
103  // Can we have StringDictionaryProxy::getString return a reference?
104  const auto source_str = source_string_dict_proxy->getString(string_id);
105  if (source_str.empty()) {
106  return inline_int_null_value<int32_t>();
107  }
108  return dest_string_dict_proxy->getIdOfString(source_str);
109 }
std::string getString(int32_t string_id) const

+ Here is the call graph for this function:

void pre_translate_string_ops ( const Analyzer::StringOper string_oper,
Executor executor 
)

Definition at line 650 of file StringOpsIR.cpp.

References CHECK, CHECK_GT, CHECK_NE, CPU, Analyzer::Expr::get_type_info(), Analyzer::StringOper::getArg(), Analyzer::StringOper::getArity(), TRANSIENT_DICT_ID, and translate_dict_strings().

Referenced by CodeGenerator::codegenDictLike(), and CodeGenerator::codegenDictRegexp().

651  {
652  // If here we are operating on top of one or more string functions, i.e. LOWER(str),
653  // and before running the dictionary LIKE/ILIKE or REGEXP_LIKE,
654  // we need to translate the strings first.
655 
656  // This approach is a temporary solution until we can implement the next stage
657  // of the string translation project, which will broaden the StringOper class to include
658  // operations that operate on strings but do not neccessarily return strings like
659  // LIKE/ILIKE/REGEXP_LIKE/CHAR_LENGTH At this point these aforementioned operators,
660  // including LIKE/ILIKE, will just become part of a StringOps chain (which will also
661  // avoid the overhead of serializing the transformed raw strings from previous string
662  // opers to the dictionary to only read back out and perform LIKE/ILIKE.)
663  CHECK_GT(string_oper->getArity(), 0UL);
664  const auto& string_oper_primary_arg_ti = string_oper->getArg(0)->get_type_info();
665  CHECK(string_oper_primary_arg_ti.is_dict_encoded_string());
666  CHECK_NE(string_oper_primary_arg_ti.getStringDictKey().dict_id, TRANSIENT_DICT_ID);
667  // Note the actual translation below will be cached by RowSetMemOwner
668  translate_dict_strings(string_oper, ExecutorDeviceType::CPU, executor);
669 }
std::unique_ptr< StringDictionaryTranslationMgr > translate_dict_strings(const Analyzer::StringOper *expr, const ExecutorDeviceType device_type, Executor *executor)
size_t getArity() const
Definition: Analyzer.h:1674
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
#define CHECK_GT(x, y)
Definition: Logger.h:305
#define CHECK_NE(x, y)
Definition: Logger.h:302
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
#define CHECK(condition)
Definition: Logger.h:291
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1688

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t string_compress ( const StringView  string_view,
const int64_t  string_dict_handle 
)

Definition at line 50 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient(), and StringView::stringView().

51  {
52  std::string_view const sv = string_view.stringView();
53  if (sv.empty()) {
54  return inline_int_null_value<int32_t>();
55  }
56  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
57  return string_dict_proxy->getOrAddTransient(sv);
58 }
std::string_view stringView() const
Definition: Datum.h:46
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

RUNTIME_EXPORT StringView string_decode ( int8_t *  chunk_iter_,
int64_t  pos 
)

Definition at line 28 of file StringOpsIR.cpp.

References CHECK, ChunkIter_get_nth(), VarlenDatum::is_null, VarlenDatum::length, and VarlenDatum::pointer.

28  {
29  auto chunk_iter = reinterpret_cast<ChunkIter*>(chunk_iter_);
30  VarlenDatum vd;
31  bool is_end;
32  ChunkIter_get_nth(chunk_iter, pos, false, &vd, &is_end);
33  CHECK(!is_end);
34  return vd.is_null ? StringView{nullptr, 0u}
35  : StringView{reinterpret_cast<char const*>(vd.pointer), vd.length};
36 }
bool is_null
Definition: Datum.h:59
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
int8_t * pointer
Definition: Datum.h:58
#define CHECK(condition)
Definition: Logger.h:291
size_t length
Definition: Datum.h:57

+ Here is the call graph for this function:

RUNTIME_EXPORT StringView string_decompress ( const int32_t  string_id,
const int64_t  string_dict_handle 
)

Definition at line 38 of file StringOpsIR.cpp.

References CHECK, StringDictionaryProxy::getStringBytes(), and NULL_INT.

39  {
40  if (string_id == NULL_INT) {
41  return {nullptr, 0};
42  }
43  auto string_dict_proxy =
44  reinterpret_cast<const StringDictionaryProxy*>(string_dict_handle);
45  auto string_bytes = string_dict_proxy->getStringBytes(string_id);
46  CHECK(string_bytes.first);
47  return {string_bytes.first, string_bytes.second};
48 }
std::pair< const char *, size_t > getStringBytes(int32_t string_id) const noexcept
#define NULL_INT
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::unique_ptr<StringDictionaryTranslationMgr> translate_dict_strings ( const Analyzer::StringOper expr,
const ExecutorDeviceType  device_type,
Executor executor 
)

Definition at line 490 of file StringOpsIR.cpp.

References CHECK, Data_Namespace::CPU_LEVEL, Analyzer::Expr::get_type_info(), Analyzer::StringOper::getArg(), SQLTypeInfo::getStringDictKey(), getStringOpInfos(), GPU, and Data_Namespace::GPU_LEVEL.

Referenced by CodeGenerator::codegen(), and pre_translate_string_ops().

493  {
494  const auto& expr_ti = expr->get_type_info();
495  const auto& primary_input_expr_ti = expr->getArg(0)->get_type_info();
496  const auto& dict_id = primary_input_expr_ti.getStringDictKey();
497  const auto string_op_infos = getStringOpInfos(expr);
498  CHECK(string_op_infos.size());
499 
500  if (string_op_infos.back().getReturnType().is_dict_encoded_string()) {
501  // string->string translation
502  auto string_dictionary_translation_mgr =
503  std::make_unique<StringDictionaryTranslationMgr>(
504  dict_id,
505  dict_id,
506  false, // translate_intersection_only
507  expr_ti,
508  string_op_infos,
511  executor->deviceCount(device_type),
512  executor,
513  executor->getDataMgr(),
514  false /* delay_translation */);
515  return string_dictionary_translation_mgr;
516  } else {
517  // string->numeric translation
518  auto string_dictionary_translation_mgr =
519  std::make_unique<StringDictionaryTranslationMgr>(
520  dict_id,
521  expr_ti,
522  string_op_infos,
525  executor->deviceCount(device_type),
526  executor,
527  executor->getDataMgr(),
528  false /* delay_translation */);
529  return string_dictionary_translation_mgr;
530  }
531 }
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::vector< StringOps_Namespace::StringOpInfo > getStringOpInfos(const Analyzer::StringOper *expr)
#define CHECK(condition)
Definition: Logger.h:291
const Expr * getArg(const size_t i) const
Definition: Analyzer.h:1688
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1057

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

RUNTIME_EXPORT int32_t union_translate_string_id_to_other_dict ( const int32_t  string_id,
const int64_t  source_string_dict_handle,
const int64_t  dest_string_dict_handle 
)

Definition at line 112 of file StringOpsIR.cpp.

References StringDictionaryProxy::getString().

114  {
115  const auto source_string_dict_proxy =
116  reinterpret_cast<StringDictionaryProxy*>(source_string_dict_handle);
117  auto dest_string_dict_proxy =
118  reinterpret_cast<StringDictionaryProxy*>(dest_string_dict_handle);
119  // Can we have StringDictionaryProxy::getString return a reference?
120  const auto source_str = source_string_dict_proxy->getString(string_id);
121  if (source_str.empty()) {
122  return inline_int_null_value<int32_t>();
123  }
124  return dest_string_dict_proxy->getOrAddTransient(source_str);
125 }
std::string getString(int32_t string_id) const

+ Here is the call graph for this function:

int32_t write_string_to_proxy ( const std::string &  str,
const int64_t  string_dict_handle 
)
inline

Definition at line 168 of file StringOpsIR.cpp.

References StringDictionaryProxy::getOrAddTransient().

Referenced by convert_to_string_and_encode_bool(), convert_to_string_and_encode_date(), convert_to_string_and_encode_decimal(), convert_to_string_and_encode_time(), and convert_to_string_and_encode_timestamp().

169  {
170  if (str.empty()) {
171  return inline_int_null_value<int32_t>();
172  }
173  auto string_dict_proxy = reinterpret_cast<StringDictionaryProxy*>(string_dict_handle);
174  return string_dict_proxy->getOrAddTransient(str);
175 }
int32_t getOrAddTransient(const std::string &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: