33 #include <string_view>
37 namespace StringOps_Namespace {
39 struct NullableStrType {
40 NullableStrType(
const std::string& str) : str(str),
is_null(str.empty()) {}
41 NullableStrType(
const std::string_view sv) : str(sv),
is_null(sv.empty()) {}
44 std::pair<std::string, bool> toPair()
const {
return {str,
is_null}; }
53 const std::optional<std::string>& var_str_optional_literal)
56 , has_var_str_literal_(var_str_optional_literal.has_value())
57 , var_str_literal_(!var_str_optional_literal.has_value()
59 : NullableStrType(var_str_optional_literal.value())) {}
63 const std::optional<std::string>& var_str_optional_literal)
65 , return_ti_(return_ti)
66 , has_var_str_literal_(var_str_optional_literal.has_value())
67 , var_str_literal_(!var_str_optional_literal.has_value()
69 : NullableStrType(var_str_optional_literal.value())) {}
71 virtual ~StringOp() =
default;
73 virtual NullableStrType operator()(std::string
const&)
const = 0;
75 virtual NullableStrType operator()(
const std::string& str1,
76 const std::string& str2)
const {
77 UNREACHABLE() <<
"operator(str1, str2) not allowed for this method";
79 return NullableStrType();
82 virtual NullableStrType operator()()
const {
83 if (var_str_literal_.is_null) {
84 return var_str_literal_;
86 CHECK(hasVarStringLiteral());
87 return operator()(var_str_literal_.str);
90 virtual Datum numericEval(
const std::string_view str)
const {
91 UNREACHABLE() <<
"numericEval not allowed for this method";
96 virtual Datum numericEval(
const std::string_view str1,
97 const std::string_view str2)
const {
98 UNREACHABLE() <<
"numericEval not allowed for this method";
103 virtual Datum numericEval()
const {
104 if (var_str_literal_.is_null) {
107 CHECK(hasVarStringLiteral());
108 return numericEval(var_str_literal_.str);
111 virtual const SQLTypeInfo& getReturnType()
const {
return return_ti_; }
113 const std::string& getVarStringLiteral()
const {
114 CHECK(hasVarStringLiteral());
115 return var_str_literal_.str;
118 bool hasVarStringLiteral()
const {
return has_var_str_literal_; }
121 static boost::regex generateRegex(
const std::string& op_name,
122 const std::string& regex_pattern,
123 const std::string& regex_params,
124 const bool supports_sub_matches);
128 const bool has_var_str_literal_{
false};
129 const NullableStrType var_str_literal_;
132 struct TryStringCast :
public StringOp {
135 const std::optional<std::string>& var_str_optional_literal)
138 NullableStrType operator()(
const std::string& str)
const override;
139 Datum numericEval(
const std::string_view str)
const override;
142 struct Position :
public StringOp {
144 Position(
const std::optional<std::string>& var_str_optional_literal,
145 const std::string& search_str)
148 var_str_optional_literal)
149 , search_str_(search_str)
152 Position(
const std::optional<std::string>& var_str_optional_literal,
153 const std::string& search_str,
157 var_str_optional_literal)
158 , search_str_(search_str)
159 , start_(start > 0 ? start - 1 : start) {}
161 NullableStrType operator()(
const std::string& str)
const override;
162 Datum numericEval(
const std::string_view str)
const override;
165 const std::string search_str_;
166 const int64_t start_;
169 struct JarowinklerSimilarity :
public StringOp {
170 JarowinklerSimilarity(
const std::optional<std::string>& var_str_optional_literal,
171 const std::string& str_literal)
174 var_str_optional_literal)
175 , str_literal_(str_literal) {}
177 JarowinklerSimilarity(
const std::optional<std::string>& var_str_optional_literal)
180 NullableStrType operator()(
const std::string& str)
const override;
182 Datum numericEval(
const std::string_view str)
const override;
183 Datum numericEval(
const std::string_view str1,
184 const std::string_view str2)
const override;
186 const std::string str_literal_;
189 struct LevenshteinDistance :
public StringOp {
190 LevenshteinDistance(
const std::optional<std::string>& var_str_optional_literal,
191 const std::string& str_literal)
194 var_str_optional_literal)
195 , str_literal_(str_literal) {}
197 LevenshteinDistance(
const std::optional<std::string>& var_str_optional_literal)
200 NullableStrType operator()(
const std::string& str)
const override;
202 Datum numericEval(
const std::string_view str)
const override;
203 Datum numericEval(
const std::string_view str1,
204 const std::string_view str2)
const override;
206 const std::string str_literal_;
209 struct Hash :
public StringOp {
211 Hash(
const std::optional<std::string>& var_str_optional_literal)
214 NullableStrType operator()(
const std::string& str)
const override;
215 Datum numericEval(
const std::string_view str)
const override;
218 struct Lower :
public StringOp {
219 Lower(
const std::optional<std::string>& var_str_optional_literal)
222 NullableStrType operator()(
const std::string& str)
const override;
225 struct Upper :
public StringOp {
226 Upper(
const std::optional<std::string>& var_str_optional_literal)
228 NullableStrType operator()(
const std::string& str)
const override;
231 inline std::bitset<256> build_char_bitmap(
const std::string& chars_to_set) {
232 std::bitset<256> char_bitmap;
233 for (
const auto& str_char : chars_to_set) {
234 char_bitmap.set(str_char);
239 struct InitCap :
public StringOp {
240 InitCap(
const std::optional<std::string>& var_str_optional_literal)
242 , delimiter_bitmap_(build_char_bitmap(InitCap::delimiter_chars)) {}
244 NullableStrType operator()(
const std::string& str)
const override;
247 static constexpr
char const* delimiter_chars = R
"(!?@"^#$&~_,.:;+-*%/|\[](){}<>)";
248 const std::bitset<256> delimiter_bitmap_;
251 struct Reverse :
public StringOp {
252 Reverse(
const std::optional<std::string>& var_str_optional_literal)
255 NullableStrType operator()(
const std::string& str)
const override;
258 struct Repeat :
public StringOp {
260 Repeat(
const std::optional<std::string>& var_str_optional_literal,
const int64_t
n)
262 , n_(n >= 0 ? n : 0UL) {
264 throw std::runtime_error(
"Number of repeats must be >= 0");
268 NullableStrType operator()(
const std::string& str)
const override;
274 struct Concat :
public StringOp {
275 Concat(
const std::optional<std::string>& var_str_optional_literal,
276 const std::string& str_literal,
277 const bool reverse_order)
279 var_str_optional_literal)
280 , str_literal_(str_literal)
281 , reverse_order_(reverse_order) {}
283 Concat(
const std::optional<std::string>& var_str_optional_literal)
285 , reverse_order_(
false) {}
287 NullableStrType operator()(
const std::string& str)
const override;
289 NullableStrType operator()(
const std::string& str1,
290 const std::string& str2)
const override;
292 const std::string str_literal_;
293 const bool reverse_order_;
296 struct Pad :
public StringOp {
298 enum class PadMode {
LEFT, RIGHT };
300 Pad(
const std::optional<std::string>& var_str_optional_literal,
302 const int64_t padded_length,
303 const std::string& padding_string)
304 : StringOp(op_kind, var_str_optional_literal)
305 , pad_mode_(Pad::op_kind_to_pad_mode(op_kind))
306 , padded_length_(static_cast<size_t>(padded_length))
307 , padding_string_(padding_string.empty() ?
" " : padding_string)
308 , padding_string_length_(padding_string.size())
309 , padding_char_(padding_string.empty() ?
' ' : padding_string[0]) {}
311 NullableStrType operator()(
const std::string& str)
const override;
314 std::string lpad(
const std::string& str)
const;
316 std::string rpad(
const std::string& str)
const;
320 const PadMode pad_mode_;
321 const size_t padded_length_;
322 const std::string padding_string_;
323 const size_t padding_string_length_;
324 const char padding_char_;
327 struct Trim :
public StringOp {
329 enum class TrimMode {
LEFT, RIGHT, BOTH };
331 Trim(
const std::optional<std::string>& var_str_optional_literal,
333 const std::string& trim_chars)
334 : StringOp(op_kind, var_str_optional_literal)
335 , trim_mode_(Trim::op_kind_to_trim_mode(op_kind))
336 , trim_char_bitmap_(build_char_bitmap(trim_chars.empty() ?
" " : trim_chars)) {}
338 NullableStrType operator()(
const std::string& str)
const override;
343 const TrimMode trim_mode_;
344 const std::bitset<256> trim_char_bitmap_;
347 struct Substring :
public StringOp {
353 Substring(
const std::optional<std::string>& var_str_optional_literal,
356 , start_(start > 0 ? start - 1 : start)
357 , length_(std::string::npos) {}
363 Substring(
const std::optional<std::string>& var_str_optional_literal,
365 const int64_t length)
367 , start_(start > 0 ? start - 1 : start)
368 , length_(static_cast<size_t>(length >= 0 ? length : 0)) {}
370 NullableStrType operator()(
const std::string& str)
const override;
373 const int64_t start_;
374 const size_t length_;
377 struct Overlay :
public StringOp {
378 Overlay(
const std::optional<std::string>& var_str_optional_literal,
379 const std::string& insert_str,
382 , insert_str_(insert_str)
383 , start_(start > 0 ? start - 1 : start)
384 , replacement_length_(insert_str_.size()) {}
386 Overlay(
const std::optional<std::string>& var_str_optional_literal,
387 const std::string& insert_str,
389 const int64_t replacement_length)
391 , insert_str_(insert_str)
392 , start_(start > 0 ? start - 1 : start)
393 , replacement_length_(
394 static_cast<size_t>(replacement_length >= 0 ? replacement_length : 0)) {}
396 NullableStrType operator()(
const std::string& base_str)
const override;
399 const std::string insert_str_;
400 const int64_t start_;
401 const size_t replacement_length_;
404 struct Replace :
public StringOp {
405 Replace(
const std::optional<std::string>& var_str_optional_literal,
406 const std::string& pattern_str,
407 const std::string& replacement_str)
409 , pattern_str_(pattern_str)
410 , replacement_str_(replacement_str)
411 , pattern_str_len_(pattern_str.size())
412 , replacement_str_len_(replacement_str.size()) {}
414 NullableStrType operator()(
const std::string& str)
const override;
416 const std::string pattern_str_;
417 const std::string replacement_str_;
418 const size_t pattern_str_len_;
419 const size_t replacement_str_len_;
422 struct SplitPart :
public StringOp {
423 SplitPart(
const std::optional<std::string>& var_str_optional_literal,
424 const std::string& delimiter,
425 const int64_t split_part)
427 , delimiter_(delimiter)
428 , split_part_(split_part == 0 ? 1UL : std::abs(split_part))
429 , delimiter_length_(delimiter.size())
430 , reverse_(split_part < 0) {}
432 NullableStrType operator()(
const std::string& str)
const override;
436 const std::string delimiter_;
437 const size_t split_part_;
438 const size_t delimiter_length_;
442 struct RegexpSubstr :
public StringOp {
444 RegexpSubstr(
const std::optional<std::string>& var_str_optional_literal,
445 const std::string& regex_pattern,
446 const int64_t start_pos,
447 const int64_t occurrence,
448 const std::string& regex_params,
449 const int64_t sub_match_group_idx)
451 , regex_pattern_str_(regex_pattern)
453 StringOp::generateRegex(
"REGEXP_SUBSTR", regex_pattern, regex_params,
true))
454 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos)
455 , occurrence_(occurrence > 0 ? occurrence - 1 : occurrence)
456 , sub_match_info_(set_sub_match_info(regex_params, sub_match_group_idx)) {}
458 NullableStrType operator()(
const std::string& str)
const override;
461 static std::string get_sub_match(
const boost::smatch& match,
462 const std::pair<bool, int64_t> sub_match_info);
464 static std::pair<bool, int64_t> set_sub_match_info(
const std::string& regex_pattern,
465 const int64_t sub_match_group_idx);
467 const std::string regex_pattern_str_;
468 const boost::regex regex_pattern_;
469 const int64_t start_pos_;
470 const int64_t occurrence_;
471 const std::pair<bool, int64_t> sub_match_info_;
474 struct RegexpReplace :
public StringOp {
476 RegexpReplace(
const std::optional<std::string>& var_str_optional_literal,
477 const std::string& regex_pattern,
478 const std::string& replacement,
479 const int64_t start_pos,
480 const int64_t occurrence,
481 const std::string& regex_params)
483 , regex_pattern_str_(regex_pattern)
485 StringOp::generateRegex(
"REGEXP_REPLACE", regex_pattern, regex_params,
false))
486 , replacement_(replacement)
487 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos)
488 , occurrence_(occurrence) {}
490 NullableStrType operator()(
const std::string& str)
const override;
493 static std::pair<size_t, size_t> get_nth_regex_match(
const std::string& str,
494 const size_t start_pos,
495 const boost::regex& regex_pattern,
496 const int64_t occurrence);
498 const std::string regex_pattern_str_;
499 const boost::regex regex_pattern_;
500 const std::string replacement_;
501 const int64_t start_pos_;
502 const int64_t occurrence_;
505 struct RegexpCount :
public StringOp {
507 RegexpCount(
const std::optional<std::string>& var_str_optional_literal,
508 const std::string& regex_pattern,
509 const int64_t start_pos,
510 const std::string& regex_params)
513 var_str_optional_literal)
514 , regex_pattern_str_(regex_pattern)
516 StringOp::generateRegex(
"REGEXP_COUNT", regex_pattern, regex_params,
true))
517 , start_pos_(start_pos > 0 ? start_pos - 1 : start_pos) {}
519 NullableStrType operator()(
const std::string& str)
const override;
520 Datum numericEval(
const std::string_view str)
const override;
523 const std::string regex_pattern_str_;
524 const boost::regex regex_pattern_;
525 const int64_t start_pos_;
546 struct JsonValue :
public StringOp {
548 JsonValue(
const std::optional<std::string>& var_str_optional_literal,
549 const std::string& json_path)
551 , json_parse_mode_(parse_json_parse_mode(json_path))
552 , json_keys_(parse_json_path(json_path)) {}
554 NullableStrType operator()(
const std::string& str)
const override;
557 enum class JsonKeyKind { JSON_OBJECT, JSON_ARRAY };
558 enum class JsonParseMode { PARSE_MODE_LAX, PARSE_MODE_STRICT };
561 JsonKeyKind key_kind;
562 std::string object_key;
566 JsonKey(
const std::string& object_key)
567 : key_kind(JsonKeyKind::JSON_OBJECT), object_key(object_key) {}
568 JsonKey(
const size_t array_key)
569 : key_kind(JsonKeyKind::JSON_ARRAY), array_key(array_key) {}
572 static JsonParseMode parse_json_parse_mode(std::string_view json_path);
573 static std::vector<JsonKey> parse_json_path(
const std::string& json_path);
574 inline NullableStrType handle_parse_error(
const std::string&
json_str)
const {
575 if (json_parse_mode_ == JsonParseMode::PARSE_MODE_LAX) {
576 return NullableStrType();
578 throw std::runtime_error(
"Could not parse: " + json_str +
".");
582 inline NullableStrType handle_key_error(
const std::string& json_str)
const {
583 if (json_parse_mode_ == JsonParseMode::PARSE_MODE_LAX) {
584 return NullableStrType();
586 throw std::runtime_error(
"Key not found or did not contain value in: " + json_str +
590 static constexpr
bool allow_strict_json_parsing{
false};
591 const JsonParseMode json_parse_mode_;
593 const std::vector<JsonKey> json_keys_;
596 struct Base64Encode :
public StringOp {
597 Base64Encode(
const std::optional<std::string>& var_str_optional_literal)
600 NullableStrType operator()(
const std::string& str)
const override;
603 struct Base64Decode :
public StringOp {
604 Base64Decode(
const std::optional<std::string>& var_str_optional_literal)
607 NullableStrType operator()(
const std::string& str)
const override;
610 struct UrlEncode :
public StringOp {
611 UrlEncode(
const std::optional<std::string>& var_str_optional_literal)
614 NullableStrType operator()(
const std::string& str)
const override;
617 struct UrlDecode :
public StringOp {
618 UrlDecode(
const std::optional<std::string>& var_str_optional_literal)
621 NullableStrType operator()(
const std::string& str)
const override;
624 struct NullOp :
public StringOp {
626 const std::optional<std::string>& var_str_optional_literal,
629 , op_kind_(op_kind) {}
631 NullableStrType operator()(
const std::string& str)
const override {
632 return NullableStrType();
638 std::unique_ptr<const StringOp>
gen_string_op(
const StringOpInfo& string_op_info);
641 const StringOpInfo& string_op_info);
647 StringOps() : string_ops_(genStringOpsFromOpInfos({})), num_ops_(0UL) {}
649 StringOps(
const std::vector<StringOpInfo>& string_op_infos)
650 : string_ops_(genStringOpsFromOpInfos(string_op_infos))
651 , num_ops_(string_op_infos.size()) {}
653 std::string operator()(
const std::string& str)
const;
655 std::string multi_input_eval(
const std::string_view str1,
656 const std::string_view str2)
const;
658 std::string_view operator()(
const std::string_view sv, std::string& sv_storage)
const;
660 Datum numericEval(
const std::string_view str)
const;
661 Datum numericEval(
const std::string_view str1,
const std::string_view str2)
const;
663 size_t size()
const {
return num_ops_; }
666 std::vector<std::unique_ptr<const StringOp>> genStringOpsFromOpInfos(
667 const std::vector<StringOpInfo>& string_op_infos)
const;
669 const std::vector<std::unique_ptr<const StringOp>> string_ops_;
670 const size_t num_ops_;
Datum apply_numeric_op_to_literals(const StringOpInfo &string_op_info)
const std::string json_str(const rapidjson::Value &obj) noexcept
Constants for Builtin SQL Types supported by HEAVY.AI.
CONSTEXPR DEVICE bool is_null(const T &value)
std::pair< std::string, bool > apply_string_op_to_literals(const StringOpInfo &string_op_info)
bool g_enable_smem_group_by true
Datum NullDatum(const SQLTypeInfo &ti)
bool g_enable_watchdog false
Common Enum definitions for SQL processing.
std::unique_ptr< const StringOp > gen_string_op(const StringOpInfo &string_op_info)