20 #include <rapidjson/document.h>
21 #include <boost/algorithm/string/predicate.hpp>
23 namespace StringOps_Namespace {
25 boost::regex StringOp::generateRegex(
const std::string& op_name,
26 const std::string& regex_pattern,
27 const std::string& regex_params,
28 const bool supports_sub_matches) {
29 bool is_case_sensitive =
false;
30 bool is_case_insensitive =
false;
32 for (
const auto& c : regex_params) {
35 is_case_sensitive =
true;
38 is_case_insensitive =
true;
41 if (!supports_sub_matches) {
42 throw std::runtime_error(op_name +
43 " does not support 'e' (sub-matches) option.");
50 if (supports_sub_matches) {
51 throw std::runtime_error(
"Unrecognized regex parameter for " + op_name +
52 ", expected either 'c' 'i', or 'e'.");
54 throw std::runtime_error(
"Unrecognized regex parameter for " + op_name +
55 ", expected either 'c' or 'i'.");
59 if (!is_case_sensitive && !is_case_insensitive) {
60 throw std::runtime_error(op_name +
61 " params must either specify case-sensitivity ('c') or "
62 "case-insensitivity ('i').");
64 if (is_case_sensitive && is_case_insensitive) {
65 throw std::runtime_error(op_name +
66 " params cannot specify both case-sensitivity ('c') and "
67 "case-insensitivity ('i').");
69 if (is_case_insensitive) {
70 return boost::regex(regex_pattern,
71 boost::regex_constants::extended |
72 boost::regex_constants::optimize |
73 boost::regex_constants::icase);
77 boost::regex_constants::extended | boost::regex_constants::optimize);
81 NullableStrType TryStringCast::operator()(
const std::string& str)
const {
82 UNREACHABLE() <<
"Invalid string output for TryStringCast";
83 return NullableStrType();
86 Datum TryStringCast::numericEval(
const std::string_view str)
const {
94 }
catch (std::runtime_error& e) {
99 NullableStrType Position::operator()(
const std::string& str)
const {
100 UNREACHABLE() <<
"Invalid string output for Position";
104 Datum Position::numericEval(
const std::string_view str)
const {
108 const int64_t str_len = str.size();
109 const int64_t wrapped_start = start_ >= 0 ? start_ : str_len + start_;
111 const auto search_index = str.find(search_str_, wrapped_start);
112 if (search_index == std::string::npos) {
115 return_datum.
bigintval =
static_cast<int64_t
>(search_index) + 1;
128 int s1_len = s1.size();
129 int s2_len = s2.size();
131 if (s1_len == 0 || s2_len == 0) {
135 int match_distance = std::max(s1_len, s2_len) / 2 - 1;
136 std::vector<bool> s1_match(s1_len,
false);
137 std::vector<bool> s2_match(s2_len,
false);
140 int transpositions = 0;
142 for (
int i = 0; i < s1_len; ++i) {
143 int start = std::max(0, i - match_distance);
144 int end = std::min(i + match_distance + 1, s2_len);
146 for (
int j = start; j < end; ++j) {
150 if (s1[i] != s2[j]) {
165 for (
int i = 0; i < s1_len; ++i) {
169 while (!s2_match[k]) {
172 if (s1[i] != s2[k]) {
178 double score = ((matches / (double)s1_len) + (matches / (double)s2_len) +
179 ((matches - transpositions / 2.0) / matches)) /
189 int n = std::min({
static_cast<int>(s1.size()),
190 static_cast<int>(s2.size()),
194 if (s1[l] != s2[l]) {
200 double jaro_winkler_score = jaro_score + winkler_adjustment;
202 return jaro_winkler_score * 100;
205 NullableStrType JarowinklerSimilarity::operator()(
const std::string& str)
const {
206 UNREACHABLE() <<
"Invalid string output for Jarowinkler Similarity";
210 Datum JarowinklerSimilarity::numericEval(
const std::string_view str)
const {
216 return_datum.
bigintval =
static_cast<int64_t
>(std::round(jaro_winkler_score));
220 Datum JarowinklerSimilarity::numericEval(
const std::string_view str1,
221 const std::string_view str2)
const {
222 if (str1.empty() || str2.empty()) {
227 return_datum.
bigintval =
static_cast<int64_t
>(std::round(jaro_winkler_score));
254 template <
typename T>
256 const size_t len1 = s1.size(), len2 = s2.size();
257 std::vector<std::vector<T>> d(len1 + 1, std::vector<T>(len2 + 1));
260 for (
size_t i = 1; i <= len1; ++i) {
263 for (
size_t i = 1; i <= len2; ++i) {
267 for (
size_t i = 1; i <= len1; ++i) {
268 for (
size_t j = 1; j <= len2; ++j) {
269 d[i][j] = std::min({d[i - 1][j] + 1,
271 d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1)});
275 return d[len1][len2];
279 const size_t max_len = std::max(s1.size(), s2.size());
282 return compute_levenshtein_distance_template<uint8_t>(s1, s2);
283 }
else if (max_len < 65536) {
284 return compute_levenshtein_distance_template<uint16_t>(s1, s2);
285 }
else if (max_len < std::numeric_limits<uint32_t>::max()) {
286 return compute_levenshtein_distance_template<uint32_t>(s1, s2);
288 return compute_levenshtein_distance_template<uint64_t>(s1, s2);
292 NullableStrType LevenshteinDistance::operator()(
const std::string& str)
const {
293 UNREACHABLE() <<
"Invalid string output for Levenshtein Distance";
297 Datum LevenshteinDistance::numericEval(
const std::string_view str)
const {
303 return_datum.
bigintval =
static_cast<int64_t
>(std::round(levenshtein_distance));
307 Datum LevenshteinDistance::numericEval(
const std::string_view str1,
308 const std::string_view str2)
const {
309 if (str1.empty() || str2.empty()) {
314 return_datum.
bigintval =
static_cast<int64_t
>(std::round(levenshtein_distance));
318 NullableStrType Hash::operator()(
const std::string& str)
const {
323 Datum Hash::numericEval(
const std::string_view str)
const {
327 uint64_t str_hash = 1;
329 for (
size_t i = 0; i < str.size(); ++i) {
330 str_hash = str_hash * 997u +
static_cast<unsigned char>(str[i]);
333 return_datum.
bigintval =
static_cast<int64_t
>(str_hash);
338 NullableStrType Lower::operator()(
const std::string& str)
const {
339 std::string output_str(str);
341 output_str.begin(), output_str.end(), output_str.begin(), [](
unsigned char c) {
342 return std::tolower(c);
347 NullableStrType Upper::operator()(
const std::string& str)
const {
348 std::string output_str(str);
350 output_str.begin(), output_str.end(), output_str.begin(), [](
unsigned char c) {
351 return std::toupper(c);
356 NullableStrType InitCap::operator()(
const std::string& str)
const {
357 std::string output_str(str);
358 bool last_char_whitespace =
true;
359 for (
auto& c : output_str) {
360 if (isspace(c) || delimiter_bitmap_[reinterpret_cast<const uint8_t&>(c)]) {
361 last_char_whitespace =
true;
364 if (last_char_whitespace) {
366 last_char_whitespace =
false;
374 NullableStrType Reverse::operator()(
const std::string& str)
const {
375 const std::string reversed_str = std::string(str.rbegin(), str.rend());
379 NullableStrType Repeat::operator()(
const std::string& str)
const {
380 std::string repeated_str;
381 repeated_str.reserve(str.size() * n_);
382 for (
size_t r = 0; r < n_; ++r) {
388 NullableStrType Concat::operator()(
const std::string& str)
const {
389 return reverse_order_ ? str_literal_ + str : str + str_literal_;
392 NullableStrType Concat::operator()(
const std::string& str1,
393 const std::string& str2)
const {
397 NullableStrType Pad::operator()(
const std::string& str)
const {
398 return pad_mode_ == Pad::PadMode::LEFT ? lpad(str) : rpad(str);
401 std::string Pad::lpad(
const std::string& str)
const {
402 const auto str_len = str.size();
403 const size_t chars_to_fill = str_len < padded_length_ ? padded_length_ - str_len : 0UL;
404 if (chars_to_fill == 0UL) {
405 return str.substr(0, padded_length_);
409 if (padding_string_length_ == 1UL) {
410 return std::string(chars_to_fill, padding_char_) + str;
413 std::string fitted_padding_str;
414 fitted_padding_str.reserve(chars_to_fill);
415 for (
size_t i = 0; i < chars_to_fill; ++i) {
416 fitted_padding_str.push_back(padding_string_[i % padding_string_length_]);
418 return fitted_padding_str + str;
421 std::string Pad::rpad(
const std::string& str)
const {
422 const auto str_len = str.size();
423 const size_t chars_to_fill = str_len < padded_length_ ? padded_length_ - str_len : 0UL;
424 if (chars_to_fill == 0UL) {
425 return str.substr(str_len - padded_length_, std::string::npos);
429 if (padding_string_length_ == 1UL) {
430 return str + std::string(chars_to_fill, padding_char_);
433 std::string fitted_padding_str;
434 fitted_padding_str.reserve(chars_to_fill);
435 for (
size_t i = 0; i < chars_to_fill; ++i) {
436 fitted_padding_str.push_back(padding_string_[i % padding_string_length_]);
438 return str + fitted_padding_str;
444 return PadMode::LEFT;
446 return PadMode::RIGHT;
450 return PadMode::LEFT;
454 NullableStrType Trim::operator()(
const std::string& str)
const {
455 const auto str_len = str.size();
456 size_t trim_begin = 0;
457 if (trim_mode_ == TrimMode::LEFT || trim_mode_ == TrimMode::BOTH) {
458 while (trim_begin < str_len &&
459 trim_char_bitmap_[reinterpret_cast<const uint8_t&>(str[trim_begin])]) {
463 size_t trim_end = str_len - 1;
464 if (trim_mode_ == TrimMode::RIGHT || trim_mode_ == TrimMode::BOTH) {
465 while (trim_end > trim_begin &&
466 trim_char_bitmap_[reinterpret_cast<const uint8_t&>(str[trim_end])]) {
470 if (trim_begin == 0 && trim_end == str_len - 1) {
473 return str.substr(trim_begin, trim_end + 1 - trim_begin);
476 Trim::TrimMode Trim::op_kind_to_trim_mode(
const SqlStringOpKind op_kind) {
479 return Trim::TrimMode::BOTH;
481 return Trim::TrimMode::LEFT;
483 return Trim::TrimMode::RIGHT;
487 return Trim::TrimMode::BOTH;
491 NullableStrType Substring::operator()(
const std::string& str)
const {
494 const int64_t str_len = str.size();
495 const int64_t wrapped_start = start_ >= 0 ? start_ : str_len + start_;
496 const size_t capped_start =
497 wrapped_start > str_len ? str_len : (wrapped_start < 0 ? 0 : wrapped_start);
498 return str.substr(capped_start, length_);
501 NullableStrType Overlay::operator()(
const std::string& base_str)
const {
504 const int64_t str_len = base_str.size();
505 const int64_t wrapped_start = start_ >= 0 ? start_ : str_len + start_;
506 const size_t capped_start =
507 wrapped_start > str_len ? str_len : (wrapped_start < 0 ? 0 : wrapped_start);
508 std::string replaced_str = base_str.substr(0, capped_start);
509 replaced_str += insert_str_;
510 const size_t remainder_start =
511 std::min(wrapped_start + replacement_length_,
size_t(str_len));
512 const size_t remainder_length =
static_cast<size_t>(str_len) - remainder_start;
513 replaced_str += base_str.substr(remainder_start, remainder_length);
517 NullableStrType Replace::operator()(
const std::string& str)
const {
518 std::string replaced_str(str);
520 size_t search_start_index = 0;
522 search_start_index = replaced_str.find(pattern_str_, search_start_index);
523 if (search_start_index == std::string::npos) {
526 replaced_str.replace(search_start_index, pattern_str_len_, replacement_str_);
527 search_start_index += replacement_str_len_;
532 NullableStrType SplitPart::operator()(
const std::string& str)
const {
536 if (delimiter_ ==
"") {
540 const size_t str_len = str.size();
541 size_t delimiter_pos = reverse_ ? str_len : 0UL;
542 size_t last_delimiter_pos;
543 size_t delimiter_idx = 0UL;
546 last_delimiter_pos = delimiter_pos;
547 delimiter_pos = reverse_ ? str.rfind(delimiter_, delimiter_pos - 1UL)
548 : str.find(delimiter_, delimiter_pos + delimiter_length_);
549 }
while (delimiter_pos != std::string::npos && ++delimiter_idx < split_part_);
551 if (delimiter_idx == 0UL && split_part_ == 1UL) {
557 if (delimiter_pos == std::string::npos &&
558 (delimiter_idx < split_part_ - 1UL || delimiter_idx < 1UL)) {
560 return NullableStrType();
564 const size_t substr_start =
565 delimiter_pos == std::string::npos ? 0UL : delimiter_pos + delimiter_length_;
566 return str.substr(substr_start, last_delimiter_pos - substr_start);
568 const size_t substr_start =
569 split_part_ == 1UL ? 0UL : last_delimiter_pos + delimiter_length_;
570 return str.substr(substr_start, delimiter_pos - substr_start);
574 NullableStrType RegexpReplace::operator()(
const std::string& str)
const {
575 const int64_t str_len = str.size();
576 const int64_t pos = start_pos_ < 0 ? str_len + start_pos_ : start_pos_;
577 const size_t wrapped_start = std::clamp(pos, int64_t(0), str_len);
578 if (occurrence_ == 0L) {
580 std::string::const_iterator replace_start(str.cbegin() + wrapped_start);
581 boost::regex_replace(std::back_inserter(result),
586 return str.substr(0UL, wrapped_start) +
result;
588 const auto occurrence_match_pos = RegexpReplace::get_nth_regex_match(
592 occurrence_ > 0 ? occurrence_ - 1 : occurrence_);
593 if (occurrence_match_pos.first == std::string::npos) {
598 std::string::const_iterator replace_start(str.cbegin() + occurrence_match_pos.first);
599 std::string::const_iterator replace_end(str.cbegin() + occurrence_match_pos.second);
600 std::string replaced_match;
601 boost::regex_replace(std::back_inserter(replaced_match),
606 return str.substr(0UL, occurrence_match_pos.first) + replaced_match +
607 str.substr(occurrence_match_pos.second, std::string::npos);
611 std::pair<size_t, size_t> RegexpReplace::get_nth_regex_match(
612 const std::string& str,
613 const size_t start_pos,
614 const boost::regex& regex_pattern,
615 const int64_t occurrence) {
616 std::vector<std::pair<size_t, size_t>> regex_match_positions;
617 std::string::const_iterator search_start(str.cbegin() + start_pos);
619 int64_t match_idx = 0;
620 size_t string_pos = start_pos;
621 while (boost::regex_search(search_start, str.cend(), match, regex_pattern)) {
622 string_pos += match.position(
size_t(0)) + match.length(0);
623 regex_match_positions.emplace_back(
624 std::make_pair(string_pos - match.length(0), string_pos));
625 if (match_idx++ == occurrence) {
626 return regex_match_positions.back();
629 match.suffix().first;
635 const int64_t wrapped_match = occurrence >= 0 ? occurrence : match_idx + occurrence;
636 if (wrapped_match < 0 || wrapped_match >= match_idx) {
638 return std::make_pair(std::string::npos, std::string::npos);
640 return regex_match_positions[wrapped_match];
643 NullableStrType RegexpSubstr::operator()(
const std::string& str)
const {
644 const int64_t str_len = str.size();
645 const int64_t pos = start_pos_ < 0 ? str_len + start_pos_ : start_pos_;
646 const size_t wrapped_start = std::clamp(pos, int64_t(0), str_len);
647 int64_t match_idx = 0;
649 std::vector<std::string> regex_matches;
650 std::string::const_iterator search_start(str.cbegin() + wrapped_start);
652 while (boost::regex_search(search_start, str.cend(), match, regex_pattern_)) {
653 if (match_idx++ == occurrence_) {
654 if (sub_match_info_.first) {
655 return RegexpSubstr::get_sub_match(match, sub_match_info_);
657 return NullableStrType(match[0]);
659 regex_matches.emplace_back(match[0]);
661 match.suffix().first;
663 const int64_t wrapped_match = occurrence_ >= 0 ? occurrence_ : match_idx + occurrence_;
664 if (wrapped_match < 0 || wrapped_match >= match_idx) {
665 return NullableStrType();
667 if (sub_match_info_.first) {
668 return RegexpSubstr::get_sub_match(match, sub_match_info_);
670 return regex_matches[wrapped_match];
673 std::string RegexpSubstr::get_sub_match(
const boost::smatch& match,
674 const std::pair<bool, int64_t> sub_match_info) {
675 const int64_t num_sub_matches = match.size() - 1;
676 const int64_t wrapped_sub_match = sub_match_info.second >= 0
677 ? sub_match_info.second
678 : num_sub_matches + sub_match_info.second;
679 if (wrapped_sub_match < 0 || wrapped_sub_match >= num_sub_matches) {
682 return match[wrapped_sub_match + 1];
685 std::pair<bool, int64_t> RegexpSubstr::set_sub_match_info(
686 const std::string& regex_pattern,
687 const int64_t sub_match_group_idx) {
688 if (regex_pattern.find(
"e", 0UL) == std::string::npos) {
689 return std::make_pair(
false, 0UL);
691 return std::make_pair(
692 true, sub_match_group_idx > 0L ? sub_match_group_idx - 1 : sub_match_group_idx);
695 NullableStrType RegexpCount::operator()(
const std::string& str)
const {
696 UNREACHABLE() <<
"Invalid string output for RegexpCount";
700 Datum RegexpCount::numericEval(
const std::string_view str_view)
const {
701 if (str_view.empty()) {
706 const int64_t str_len = str_view.size();
707 const int64_t pos = start_pos_ < 0 ? str_len + start_pos_ : start_pos_;
708 const size_t wrapped_start = std::clamp(pos, int64_t(0), str_len);
709 auto search_start = str_view.data() + wrapped_start;
710 auto search_end = str_view.data() + str_len;
711 boost::cregex_iterator iter(search_start, search_end, regex_pattern_);
712 boost::cregex_iterator end;
714 int64_t num_matches = std::distance(iter, end);
721 JsonValue::JsonParseMode JsonValue::parse_json_parse_mode(std::string_view json_path) {
722 size_t const string_pos = json_path.find(
'$');
723 if (string_pos == 0) {
725 return JsonValue::JsonParseMode::PARSE_MODE_LAX;
726 }
else if (string_pos == std::string::npos) {
727 throw std::runtime_error(
"JSON search path must include a '$' literal.");
729 std::string_view
const prefix = json_path.substr(0, string_pos);
730 if (boost::iequals(prefix, std::string_view(
"lax "))) {
731 return JsonValue::JsonParseMode::PARSE_MODE_LAX;
732 }
else if (boost::iequals(prefix, std::string_view(
"strict "))) {
733 if constexpr (JsonValue::allow_strict_json_parsing) {
734 return JsonValue::JsonParseMode::PARSE_MODE_STRICT;
736 throw std::runtime_error(
"Strict parsing not currently supported for JSON_VALUE.");
739 throw std::runtime_error(
"Issue parsing JSON_VALUE Parse Mode.");
743 std::vector<JsonValue::JsonKey> JsonValue::parse_json_path(
const std::string& json_path) {
745 size_t string_pos = json_path.find(
"$");
746 if (string_pos == std::string::npos) {
747 throw std::runtime_error(
"JSON search path must begin with '$' literal.");
752 static const auto& key_regex = *
new boost::regex(
753 R
"~(^(\.(([[:alpha:]][[:alnum:]_-]*)|"([[:alpha:]][ [:alnum:]_-]*)"))|\[([[:digit:]]+)\])~",
754 boost::regex_constants::extended | boost::regex_constants::optimize);
755 static_assert(std::is_trivially_destructible_v<decltype(key_regex)>);
757 std::string::const_iterator search_start(json_path.cbegin() + string_pos);
759 std::vector<JsonKey> json_keys;
760 while (boost::regex_search(search_start, json_path.cend(), match, key_regex)) {
762 if (match.position(
size_t(0)) != 0L) {
764 throw std::runtime_error(
"JSON search path parsing error: '" + json_path +
"'");
766 size_t matching_expr = 0;
767 if (match[3].matched) {
770 }
else if (match[4].matched) {
773 }
else if (match[5].matched) {
778 string_pos += match.length(0);
780 const std::string key_match(match[matching_expr].first, match[matching_expr].second);
782 if (isalpha(key_match[0])) {
784 json_keys.emplace_back(JsonKey(key_match));
787 json_keys.emplace_back(JsonKey(std::stoi(key_match)));
790 match.suffix().first;
792 if (json_keys.empty()) {
793 throw std::runtime_error(
"No keys found in JSON search path.");
795 if (string_pos < json_path.size()) {
796 throw std::runtime_error(
"JSON path parsing error.");
801 NullableStrType JsonValue::operator()(
const std::string& str)
const {
802 rapidjson::Document document;
803 if (document.Parse(str.c_str()).HasParseError()) {
804 if constexpr (JsonValue::allow_strict_json_parsing) {
805 return handle_parse_error(str);
807 return NullableStrType();
810 rapidjson::Value& json_val = document;
811 for (
const auto& json_key : json_keys_) {
812 switch (json_key.key_kind) {
813 case JsonKeyKind::JSON_OBJECT: {
814 if (!json_val.IsObject() || !json_val.HasMember(json_key.object_key)) {
815 if constexpr (JsonValue::allow_strict_json_parsing) {
816 return handle_key_error(str);
818 return NullableStrType();
821 json_val = json_val[json_key.object_key];
824 case JsonKeyKind::JSON_ARRAY: {
825 if (!json_val.IsArray() || json_val.Size() <= json_key.array_key) {
826 if constexpr (JsonValue::allow_strict_json_parsing) {
827 return handle_key_error(str);
829 return NullableStrType();
832 json_val = json_val[json_key.array_key];
838 if (json_val.IsString()) {
839 return NullableStrType(std::string(json_val.GetString()));
840 }
else if (json_val.IsNumber()) {
841 if (json_val.IsDouble()) {
843 }
else if (json_val.IsInt64()) {
845 }
else if (json_val.IsUint64()) {
851 if constexpr (JsonValue::allow_strict_json_parsing) {
852 return handle_key_error(str);
854 return NullableStrType();
857 }
else if (json_val.IsBool()) {
858 return NullableStrType(std::string(json_val.IsTrue() ?
"true" :
"false"));
859 }
else if (json_val.IsNull()) {
860 return NullableStrType();
864 if constexpr (JsonValue::allow_strict_json_parsing) {
865 return handle_key_error(str);
867 return NullableStrType();
872 NullableStrType Base64Encode::operator()(
const std::string& str)
const {
876 NullableStrType Base64Decode::operator()(
const std::string& str)
const {
883 return std::isalnum(c) || c ==
'-' || c ==
'.' || c ==
'_' || c ==
'~';
893 size_t n_percents = 0u;
894 if (2u < str.size()) {
895 for (
size_t i = 0u; i < str.size() - 2u; ++i) {
907 return 'A' <= hex ? std::toupper(hex) + (10 -
'A') : hex -
'0';
914 NullableStrType UrlEncode::operator()(
const std::string& str)
const {
915 constexpr
char const* tr =
"0123456789ABCDEF";
917 size_t const n_singular = std::count_if(str.begin(), str.end(),
is_singular);
919 encoded.reserve(str.size() + 2u * (str.size() - n_singular));
920 for (
char const c : str) {
922 encoded.append(1u, c);
923 }
else if (c ==
' ') {
924 encoded.append(1u,
'+');
926 encoded.append(1u,
'%');
927 encoded.append(1u, tr[(c >> 4) & 0xf]);
928 encoded.append(1u, tr[c & 0xf]);
935 NullableStrType UrlDecode::operator()(
const std::string& str)
const {
938 decoded.reserve(str.size() - 2u * n_percents);
939 for (
size_t i = 0u; i < str.size(); ++i) {
940 if (str[i] ==
'%' && i + 2u < str.size()) {
941 decoded.append(1u,
nibble(str[i + 1u]) << 4 ^
nibble(str[i + 2u]));
943 }
else if (str[i] ==
'+') {
944 decoded.append(1u,
' ');
946 decoded.append(1u, str[i]);
952 std::string StringOps::operator()(
const std::string& str)
const {
953 NullableStrType modified_str(str);
954 if (modified_str.is_null) {
957 for (
const auto& string_op : string_ops_) {
958 modified_str = string_op->operator()(modified_str.str);
959 if (modified_str.is_null) {
963 return modified_str.str;
966 std::string StringOps::multi_input_eval(
const std::string_view str1,
967 const std::string_view str2)
const {
968 NullableStrType modified_str1(str1);
969 NullableStrType modified_str2(str2);
970 if (modified_str1.is_null || modified_str2.is_null) {
973 for (
const auto& string_op : string_ops_) {
974 modified_str1 = string_op->operator()(modified_str1.str, modified_str2.str);
975 if (modified_str1.is_null) {
979 return modified_str1.str;
982 std::string_view StringOps::operator()(
const std::string_view sv,
983 std::string& sv_storage)
const {
985 NullableStrType nullable_str(sv);
986 for (
const auto& string_op : string_ops_) {
987 nullable_str = string_op->operator()(nullable_str.str);
988 if (nullable_str.is_null) {
992 sv_storage = nullable_str.str;
996 Datum StringOps::numericEval(
const std::string_view str)
const {
997 const auto num_string_producing_ops = string_ops_.size() - 1;
998 if (num_string_producing_ops == 0UL) {
1001 return string_ops_.back()->numericEval(str);
1003 NullableStrType modified_str(str);
1004 for (
size_t string_op_idx = 0; string_op_idx < num_string_producing_ops;
1006 const auto& string_op = string_ops_[string_op_idx];
1007 modified_str = string_op->operator()(modified_str.str);
1008 if (modified_str.is_null) {
1012 return string_ops_.back()->numericEval(modified_str.str);
1015 Datum StringOps::numericEval(
const std::string_view str1,
1016 const std::string_view str2)
const {
1017 const auto num_string_producing_ops = string_ops_.size() - 1;
1021 CHECK_EQ(num_string_producing_ops, 0UL);
1022 return string_ops_.back()->numericEval(str1, str2);
1025 std::vector<std::unique_ptr<const StringOp>> StringOps::genStringOpsFromOpInfos(
1026 const std::vector<StringOpInfo>& string_op_infos)
const {
1029 std::vector<std::unique_ptr<const StringOp>> string_ops;
1030 string_ops.reserve(string_op_infos.size());
1031 for (
const auto& string_op_info : string_op_infos) {
1040 std::optional<std::string> var_string_optional_literal;
1041 const auto op_kind = string_op_info.
getOpKind();
1045 return std::make_unique<const NullOp>(
1046 return_ti, var_string_optional_literal, op_kind);
1057 CHECK_EQ(num_non_variable_literals, 0UL);
1058 return std::make_unique<const Lower>(var_string_optional_literal);
1061 CHECK_EQ(num_non_variable_literals, 0UL);
1062 return std::make_unique<const Upper>(var_string_optional_literal);
1065 CHECK_EQ(num_non_variable_literals, 0UL);
1066 return std::make_unique<const InitCap>(var_string_optional_literal);
1069 CHECK_EQ(num_non_variable_literals, 0UL);
1070 return std::make_unique<const Reverse>(var_string_optional_literal);
1073 CHECK_EQ(num_non_variable_literals, 1UL);
1074 const auto num_repeats_literal = string_op_info.
getIntLiteral(1);
1075 return std::make_unique<const Repeat>(var_string_optional_literal,
1076 num_repeats_literal);
1080 CHECK_GE(num_non_variable_literals, 0UL);
1081 CHECK_LE(num_non_variable_literals, 1UL);
1082 if (num_non_variable_literals == 1UL) {
1085 return std::make_unique<const Concat>(var_string_optional_literal,
1089 return std::make_unique<const Concat>(var_string_optional_literal);
1094 CHECK_EQ(num_non_variable_literals, 2UL);
1095 const auto padded_length_literal = string_op_info.
getIntLiteral(1);
1097 return std::make_unique<Pad>(var_string_optional_literal,
1099 padded_length_literal,
1100 padding_string_literal);
1105 CHECK_EQ(num_non_variable_literals, 1UL);
1107 return std::make_unique<Trim>(
1108 var_string_optional_literal, op_kind, trim_chars_literal);
1111 CHECK_GE(num_non_variable_literals, 1UL);
1112 CHECK_LE(num_non_variable_literals, 2UL);
1113 const auto start_pos_literal = string_op_info.
getIntLiteral(1);
1115 if (has_length_literal) {
1116 const auto length_literal = string_op_info.
getIntLiteral(2);
1117 return std::make_unique<const Substring>(
1118 var_string_optional_literal, start_pos_literal, length_literal);
1120 return std::make_unique<const Substring>(var_string_optional_literal,
1125 CHECK_GE(num_non_variable_literals, 2UL);
1126 CHECK_LE(num_non_variable_literals, 3UL);
1128 const auto start_pos_literal = string_op_info.
getIntLiteral(2);
1130 if (has_length_literal) {
1131 const auto length_literal = string_op_info.
getIntLiteral(3);
1132 return std::make_unique<const Overlay>(var_string_optional_literal,
1133 replace_string_literal,
1137 return std::make_unique<const Overlay>(
1138 var_string_optional_literal, replace_string_literal, start_pos_literal);
1142 CHECK_GE(num_non_variable_literals, 2UL);
1143 CHECK_LE(num_non_variable_literals, 2UL);
1145 const auto replacement_string_literal = string_op_info.
getStringLiteral(2);
1146 return std::make_unique<const Replace>(var_string_optional_literal,
1147 pattern_string_literal,
1148 replacement_string_literal);
1151 CHECK_GE(num_non_variable_literals, 2UL);
1152 CHECK_LE(num_non_variable_literals, 2UL);
1154 const auto split_part_literal = string_op_info.
getIntLiteral(2);
1155 return std::make_unique<const SplitPart>(
1156 var_string_optional_literal, delimiter_literal, split_part_literal);
1159 CHECK_GE(num_non_variable_literals, 5UL);
1160 CHECK_LE(num_non_variable_literals, 5UL);
1163 const auto start_pos_literal = string_op_info.
getIntLiteral(3);
1164 const auto occurrence_literal = string_op_info.
getIntLiteral(4);
1166 return std::make_unique<const RegexpReplace>(var_string_optional_literal,
1168 replacement_literal,
1171 regex_params_literal);
1174 CHECK_GE(num_non_variable_literals, 5UL);
1175 CHECK_LE(num_non_variable_literals, 5UL);
1177 const auto start_pos_literal = string_op_info.
getIntLiteral(2);
1178 const auto occurrence_literal = string_op_info.
getIntLiteral(3);
1180 const auto sub_match_idx_literal = string_op_info.
getIntLiteral(5);
1181 return std::make_unique<const RegexpSubstr>(var_string_optional_literal,
1185 regex_params_literal,
1186 sub_match_idx_literal);
1189 CHECK_GE(num_non_variable_literals, 3UL);
1190 CHECK_LE(num_non_variable_literals, 3UL);
1192 const auto start_pos_literal = string_op_info.
getIntLiteral(2);
1194 return std::make_unique<const RegexpCount>(var_string_optional_literal,
1197 regex_params_literal);
1200 CHECK_EQ(num_non_variable_literals, 1UL);
1202 return std::make_unique<const JsonValue>(var_string_optional_literal,
1206 CHECK_EQ(num_non_variable_literals, 0UL);
1207 return std::make_unique<const Base64Encode>(var_string_optional_literal);
1210 CHECK_EQ(num_non_variable_literals, 0UL);
1211 return std::make_unique<const Base64Decode>(var_string_optional_literal);
1214 CHECK_EQ(num_non_variable_literals, 0UL);
1215 return std::make_unique<const UrlEncode>(var_string_optional_literal);
1218 CHECK_EQ(num_non_variable_literals, 0UL);
1219 return std::make_unique<const UrlDecode>(var_string_optional_literal);
1222 CHECK_EQ(num_non_variable_literals, 0UL);
1223 return std::make_unique<const TryStringCast>(return_ti,
1224 var_string_optional_literal);
1227 CHECK_GE(num_non_variable_literals, 1UL);
1228 CHECK_LE(num_non_variable_literals, 2UL);
1231 if (has_start_pos_literal) {
1232 const auto start_pos_literal = string_op_info.
getIntLiteral(2);
1233 return std::make_unique<const Position>(
1234 var_string_optional_literal, search_literal, start_pos_literal);
1236 return std::make_unique<const Position>(var_string_optional_literal,
1241 CHECK_GE(num_non_variable_literals, 0UL);
1242 CHECK_LE(num_non_variable_literals, 1UL);
1243 if (num_non_variable_literals == 1UL) {
1245 return std::make_unique<const JarowinklerSimilarity>(var_string_optional_literal,
1248 return std::make_unique<const JarowinklerSimilarity>(var_string_optional_literal);
1252 CHECK_GE(num_non_variable_literals, 0UL);
1253 CHECK_LE(num_non_variable_literals, 1UL);
1254 if (num_non_variable_literals == 1UL) {
1256 return std::make_unique<const LevenshteinDistance>(var_string_optional_literal,
1259 return std::make_unique<const LevenshteinDistance>(var_string_optional_literal);
1263 CHECK_EQ(num_non_variable_literals, 0UL);
1264 return std::make_unique<const Hash>(var_string_optional_literal);
1276 const std::string null_str{
""};
1277 return std::make_pair(null_str,
true);
1280 return string_op->operator()().toPair();
1286 return string_op->numericEval();
const SQLTypeInfo & getReturnType() const
Datum apply_numeric_op_to_literals(const StringOpInfo &string_op_info)
T compute_levenshtein_distance_template(std::string_view s1, std::string_view s2)
bool is_singular(char const c)
size_t numLiterals() const
bool intLiteralArgAtIdxExists(const size_t index) const
double compute_jaro_winkler_score(std::string_view s1, std::string_view s2)
int64_t getIntLiteral(const size_t index) const
size_t numNonVariableLiterals() const
bool hasNullLiteralArg() const
bool hasVarStringLiteral() const
std::pair< std::string, bool > apply_string_op_to_literals(const StringOpInfo &string_op_info)
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
OUTPUT transform(INPUT const &input, FUNC const &func)
Datum NullDatum(const SQLTypeInfo &ti)
constexpr int winkler_k_prefix_length
double compute_jaro_score(std::string_view s1, std::string_view s2)
int64_t compute_levenshtein_distance(std::string_view s1, std::string_view s2)
int nibble(char const hex)
std::string getStringLiteral(const size_t index) const
std::string decode_base64(const std::string &val, bool trim_nulls)
static std::string encode_base64(const std::string &val)
const SqlStringOpKind & getOpKind() const
size_t count_percents(std::string const &str)
constexpr double winkler_k_scaling_factor
std::unique_ptr< const StringOp > gen_string_op(const StringOpInfo &string_op_info)
bool is_normal(char const c)