OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringTestTableFunctions.cpp File Reference
#include "TableFunctionsTesting.h"
#include <iostream>
#include <string>
+ Include dependency graph for StringTestTableFunctions.cpp:

Go to the source code of this file.

Functions

EXTENSION_NOINLINE_HOST int32_t ct_binding_str_length__cpu_ (const Column< TextEncodingDict > &input_str, Column< TextEncodingDict > &out_str, Column< int64_t > &out_size)
 
EXTENSION_NOINLINE_HOST int32_t ct_binding_str_equals__cpu_ (const ColumnList< TextEncodingDict > &input_strings, Column< TextEncodingDict > &string_if_equal, Column< bool > &strings_are_equal)
 
EXTENSION_NOINLINE_HOST int32_t ct_substr__cpu_ (TableFunctionManager &mgr, const Column< TextEncodingDict > &input_str, const Column< int > &pos, const Column< int > &len, Column< TextEncodingDict > &output_substr)
 
EXTENSION_NOINLINE_HOST int32_t ct_string_concat__cpu_ (TableFunctionManager &mgr, const ColumnList< TextEncodingDict > &input_strings, const TextEncodingNone &separator, Column< TextEncodingDict > &concatted_string)
 
EXTENSION_NOINLINE_HOST int32_t ct_synthesize_new_dict__cpu_ (TableFunctionManager &mgr, const int64_t num_strings, Column< TextEncodingDict > &new_dict_col)
 
EXTENSION_NOINLINE int32_t ct_hamming_distance (const TextEncodingNone &str1, const TextEncodingNone &str2, Column< int32_t > &hamming_distance)
 
template<typename T >
TEMPLATE_NOINLINE int32_t ct_get_string_chars__template (const Column< T > &indices, const TextEncodingNone &str, const int32_t multiplier, Column< int32_t > &idx, Column< int8_t > &char_bytes)
 
template TEMPLATE_NOINLINE int32_t ct_get_string_chars__template (const Column< int16_t > &indices, const TextEncodingNone &str, const int32_t multiplier, Column< int32_t > &idx, Column< int8_t > &char_bytes)
 
template TEMPLATE_NOINLINE int32_t ct_get_string_chars__template (const Column< int32_t > &indices, const TextEncodingNone &str, const int32_t multiplier, Column< int32_t > &idx, Column< int8_t > &char_bytes)
 
EXTENSION_NOINLINE_HOST int32_t ct_string_to_chars__cpu_ (const TextEncodingNone &input, Column< int32_t > &char_idx, Column< int8_t > &char_bytes)
 

Function Documentation

EXTENSION_NOINLINE_HOST int32_t ct_binding_str_equals__cpu_ ( const ColumnList< TextEncodingDict > &  input_strings,
Column< TextEncodingDict > &  string_if_equal,
Column< bool > &  strings_are_equal 
)

Definition at line 40 of file StringTestTableFunctions.cpp.

References ColumnList< TextEncodingDict >::numCols(), set_output_row_size(), Column< TextEncodingDict >::setNull(), and ColumnList< TextEncodingDict >::size().

42  {
43  const int64_t num_rows = input_strings.size();
44  const int64_t num_cols = input_strings.numCols();
45  set_output_row_size(num_rows);
46  for (int64_t r = 0; r < num_rows; r++) {
47  bool are_equal = true;
48  if (num_cols > 0) {
49  std::string first_str = input_strings[0].getString(r);
50  for (int64_t c = 1; c != num_cols; ++c) {
51  if (input_strings[c].getString(r) != first_str) {
52  are_equal = false;
53  break;
54  }
55  }
56  strings_are_equal[r] = are_equal;
57  if (are_equal && num_cols > 0) {
58  string_if_equal[r] = input_strings[0][r];
59  } else {
60  string_if_equal.setNull(r);
61  }
62  }
63  }
64  return num_rows;
65 }
EXTENSION_NOINLINE_HOST void set_output_row_size(int64_t num_rows)
DEVICE int64_t size() const
DEVICE void setNull(int64_t index)
DEVICE int64_t numCols() const

+ Here is the call graph for this function:

EXTENSION_NOINLINE_HOST int32_t ct_binding_str_length__cpu_ ( const Column< TextEncodingDict > &  input_str,
Column< TextEncodingDict > &  out_str,
Column< int64_t > &  out_size 
)

Definition at line 26 of file StringTestTableFunctions.cpp.

References Column< TextEncodingDict >::getString(), set_output_row_size(), Column< T >::size(), and Column< TextEncodingDict >::size().

28  {
29  const int64_t num_rows = input_str.size();
30  set_output_row_size(num_rows);
31  for (int64_t i = 0; i < num_rows; i++) {
32  out_str[i] = input_str[i];
33  const std::string str = input_str.getString(i);
34  out_size[i] = str.size();
35  }
36  return num_rows;
37 }
DEVICE const std::string getString(int64_t index) const
EXTENSION_NOINLINE_HOST void set_output_row_size(int64_t num_rows)
DEVICE int64_t size() const
DEVICE int64_t size() const

+ Here is the call graph for this function:

template<typename T >
TEMPLATE_NOINLINE int32_t ct_get_string_chars__template ( const Column< T > &  indices,
const TextEncodingNone str,
const int32_t  multiplier,
Column< int32_t > &  idx,
Column< int8_t > &  char_bytes 
)

Definition at line 152 of file StringTestTableFunctions.cpp.

References TextEncodingNone::size(), and Column< T >::size().

156  {
157  const int32_t str_len = str.size();
158  // Note: we assume RowMultiplier is 1 for this test, was to make running on
159  // GPU easy Todo: Provide Constant RowMultiplier interface
160  if (multiplier != 1) {
161  return 0;
162  }
163  const int32_t num_input_rows = indices.size();
164  const int32_t num_output_rows = num_input_rows * multiplier;
165 
166 #ifdef __CUDACC__
167  const int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
168  const int32_t step = blockDim.x * gridDim.x;
169 #else
170  const int32_t start = 0;
171  const int32_t step = 1;
172 #endif
173 
174  for (int32_t i = start; i < num_output_rows; i += step) {
175  idx[i] = indices[i % num_output_rows];
176  char_bytes[i] = str[i % str_len]; // index < str_len ? str[i] : 0;
177  }
178  return num_output_rows;
179 }
DEVICE int64_t size() const
DEVICE ALWAYS_INLINE int64_t size() const
Definition: heavydbTypes.h:688

+ Here is the call graph for this function:

template TEMPLATE_NOINLINE int32_t ct_get_string_chars__template ( const Column< int16_t > &  indices,
const TextEncodingNone str,
const int32_t  multiplier,
Column< int32_t > &  idx,
Column< int8_t > &  char_bytes 
)
template TEMPLATE_NOINLINE int32_t ct_get_string_chars__template ( const Column< int32_t > &  indices,
const TextEncodingNone str,
const int32_t  multiplier,
Column< int32_t > &  idx,
Column< int8_t > &  char_bytes 
)
EXTENSION_NOINLINE int32_t ct_hamming_distance ( const TextEncodingNone str1,
const TextEncodingNone str2,
Column< int32_t > &  hamming_distance 
)

Definition at line 125 of file StringTestTableFunctions.cpp.

References Column< T >::ptr_, and TextEncodingNone::size().

127  {
128  const int32_t str_len = str1.size() <= str2.size() ? str1.size() : str2.size();
129 
130 #ifdef __CUDACC__
131  const int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
132  const int32_t step = blockDim.x * gridDim.x;
133  int32_t* output_ptr = hamming_distance.ptr_;
134 #else
135  const int32_t start = 0;
136  const int32_t step = 1;
137 #endif
138 
139  int32_t num_chars_unequal = 0;
140  for (int32_t i = start; i < str_len; i += step) {
141  num_chars_unequal += (str1[i] != str2[i]) ? 1 : 0;
142  }
143 #ifdef __CUDACC__
144  atomicAdd(output_ptr, num_chars_unequal);
145 #else
146  hamming_distance[0] = num_chars_unequal;
147 #endif
148  return 1;
149 }
DEVICE ALWAYS_INLINE int64_t size() const
Definition: heavydbTypes.h:688

+ Here is the call graph for this function:

EXTENSION_NOINLINE_HOST int32_t ct_string_concat__cpu_ ( TableFunctionManager mgr,
const ColumnList< TextEncodingDict > &  input_strings,
const TextEncodingNone separator,
Column< TextEncodingDict > &  concatted_string 
)

Definition at line 85 of file StringTestTableFunctions.cpp.

References Column< TextEncodingDict >::getOrAddTransient(), TextEncodingNone::getString(), ColumnList< TextEncodingDict >::numCols(), TableFunctionManager::set_output_row_size(), Column< TextEncodingDict >::setNull(), and ColumnList< TextEncodingDict >::size().

88  {
89  const int64_t num_rows = input_strings.size();
90  const int64_t num_cols = input_strings.numCols();
91  const std::string separator_str{separator.getString()};
92  mgr.set_output_row_size(num_rows);
93  for (int64_t row_idx = 0; row_idx < num_rows; row_idx++) {
94  if (num_cols > 0) {
95  std::string concatted_output{input_strings[0].getString(row_idx)};
96  for (int64_t col_idx = 1; col_idx < num_cols; ++col_idx) {
97  concatted_output += separator_str;
98  concatted_output += input_strings[col_idx].getString(row_idx);
99  }
100  const TextEncodingDict concatted_str_id =
101  concatted_string.getOrAddTransient(concatted_output);
102  concatted_string[row_idx] = concatted_str_id;
103  } else {
104  concatted_string.setNull(row_idx);
105  }
106  }
107  return num_rows;
108 }
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
std::string getString() const
Definition: heavydbTypes.h:641
DEVICE int64_t size() const
DEVICE void setNull(int64_t index)
DEVICE int64_t numCols() const
DEVICE const TextEncodingDict getOrAddTransient(const std::string &str)

+ Here is the call graph for this function:

EXTENSION_NOINLINE_HOST int32_t ct_string_to_chars__cpu_ ( const TextEncodingNone input,
Column< int32_t > &  char_idx,
Column< int8_t > &  char_bytes 
)

Definition at line 200 of file StringTestTableFunctions.cpp.

References TextEncodingNone::getString(), and set_output_row_size().

202  {
203  const std::string str{input.getString()};
204  const int64_t str_size(str.size());
205  set_output_row_size(str_size);
206  for (int32_t i = 0; i < str_size; ++i) {
207  char_idx[i] = i;
208  char_bytes[i] = str[i];
209  }
210  return str_size;
211 }
EXTENSION_NOINLINE_HOST void set_output_row_size(int64_t num_rows)
std::string getString() const
Definition: heavydbTypes.h:641

+ Here is the call graph for this function:

EXTENSION_NOINLINE_HOST int32_t ct_substr__cpu_ ( TableFunctionManager mgr,
const Column< TextEncodingDict > &  input_str,
const Column< int > &  pos,
const Column< int > &  len,
Column< TextEncodingDict > &  output_substr 
)

Definition at line 68 of file StringTestTableFunctions.cpp.

References Column< TextEncodingDict >::getOrAddTransient(), Column< TextEncodingDict >::getString(), TableFunctionManager::set_output_row_size(), Column< TextEncodingDict >::size(), and substring().

72  {
73  const int64_t num_rows = input_str.size();
74  mgr.set_output_row_size(num_rows);
75  for (int64_t row_idx = 0; row_idx < num_rows; row_idx++) {
76  const std::string input_string{input_str.getString(row_idx)};
77  const std::string substring = input_string.substr(pos[row_idx], len[row_idx]);
78  const TextEncodingDict substr_id = output_substr.getOrAddTransient(substring);
79  output_substr[row_idx] = substr_id;
80  }
81  return num_rows;
82 }
DEVICE const std::string getString(int64_t index) const
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
DEVICE int64_t size() const
DEVICE const TextEncodingDict getOrAddTransient(const std::string &str)
std::pair< std::string_view, const char * > substring(const std::string &str, size_t substr_length)
return substring of str with postfix if str.size() &gt; substr_length

+ Here is the call graph for this function:

EXTENSION_NOINLINE_HOST int32_t ct_synthesize_new_dict__cpu_ ( TableFunctionManager mgr,
const int64_t  num_strings,
Column< TextEncodingDict > &  new_dict_col 
)

Definition at line 111 of file StringTestTableFunctions.cpp.

References Column< TextEncodingDict >::getOrAddTransient(), TableFunctionManager::set_output_row_size(), and to_string().

113  {
114  mgr.set_output_row_size(num_strings);
115  for (int32_t s = 0; s < num_strings; ++s) {
116  const std::string new_string = "String_" + std::to_string(s);
117  const int32_t string_id = new_dict_col.getOrAddTransient(new_string);
118  new_dict_col[s] = string_id;
119  }
120  return num_strings;
121 }
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
std::string to_string(char const *&&v)
DEVICE const TextEncodingDict getOrAddTransient(const std::string &str)

+ Here is the call graph for this function: