OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionsText.hpp File Reference
#include <cstring>
#include "Shared/toString.h"
#include "heavydbTypes.h"
+ Include dependency graph for ExtensionFunctionsText.hpp:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

std::vector< std::string > __strtok_to_array (const std::string &text, const std::string &delimiters)
 
EXTENSION_NOINLINE Array
< TextEncodingDict
strtok_to_array (RowFunctionManager &mgr, TextEncodingNone &text, TextEncodingNone &delimiters)
 
EXTENSION_NOINLINE Array
< TextEncodingDict
strtok_to_array__1 (RowFunctionManager &mgr, TextEncodingDict text, TextEncodingNone &delimiters)
 

Function Documentation

std::vector<std::string> __strtok_to_array ( const std::string &  text,
const std::string &  delimiters 
)

Definition at line 29 of file ExtensionFunctionsText.hpp.

Referenced by strtok_to_array(), and strtok_to_array__1().

30  {
31  std::vector<std::string> vec;
32 
33  char* str = const_cast<char*>(text.c_str());
34  const char* del = delimiters.c_str();
35 
36  char* substr = strtok(str, del);
37  while (substr != NULL) {
38  std::string s(substr);
39  vec.emplace_back(s);
40  substr = strtok(NULL, del);
41  }
42 
43  return vec;
44 }

+ Here is the caller graph for this function:

EXTENSION_NOINLINE Array<TextEncodingDict> strtok_to_array ( RowFunctionManager mgr,
TextEncodingNone text,
TextEncodingNone delimiters 
)

Definition at line 47 of file ExtensionFunctionsText.hpp.

References __strtok_to_array(), RowFunctionManager::getOrAddTransient(), TextEncodingNone::getString(), TextEncodingNone::isNull(), TRANSIENT_DICT_DB_ID, and TRANSIENT_DICT_ID.

49  {
50  /*
51  Rules
52  -----
53  * If either parameters is NULL => a NULL is returned
54  * An empty array is returned if tokenization produces no tokens
55 
56  Note
57  ----
58  <delimiters> argument is optional on snowflake but HeavyDB dont' support
59  default values on UDFs at the moment. See:
60  https://github.com/heavyai/heavydb-internal/pull/6651
61 
62  Examples
63  --------
64  > select strtok_to_array('a.b.c', '.');
65  {a, b, c}
66 
67  > select strtok_to_array('user@gmail.com', '.@')
68  {user, gmail, com}
69 
70  > select strtok_to_array('', '.')
71  NULL
72 
73  > select strtok_to_array('a.b.c', '')
74  NULL
75  */
76 
77  if (text.isNull() || delimiters.isNull()) {
78  return Array<TextEncodingDict>(0, true);
79  }
80 
81  const auto& vec = __strtok_to_array(text.getString(), delimiters.getString());
82  Array<TextEncodingDict> out_arr(vec.size());
83  for (size_t i = 0; i < vec.size(); ++i) {
84  out_arr[i] = mgr.getOrAddTransient(TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID, vec[i]);
85  }
86  return out_arr;
87 }
std::string getString() const
Definition: heavydbTypes.h:641
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
int32_t getOrAddTransient(int32_t db_id, int32_t dict_id, std::string str)
Definition: heavydbTypes.h:314
DEVICE ALWAYS_INLINE bool isNull() const
Definition: heavydbTypes.h:691
std::vector< std::string > __strtok_to_array(const std::string &text, const std::string &delimiters)

+ Here is the call graph for this function:

EXTENSION_NOINLINE Array<TextEncodingDict> strtok_to_array__1 ( RowFunctionManager mgr,
TextEncodingDict  text,
TextEncodingNone delimiters 
)

Definition at line 90 of file ExtensionFunctionsText.hpp.

References __strtok_to_array(), GET_DICT_DB_ID, GET_DICT_ID, RowFunctionManager::getOrAddTransient(), RowFunctionManager::getString(), TextEncodingNone::getString(), TextEncodingDict::isNull(), TextEncodingNone::isNull(), TRANSIENT_DICT_DB_ID, and TRANSIENT_DICT_ID.

92  {
93  if (text.isNull() || delimiters.isNull()) {
94  return Array<TextEncodingDict>(0, true);
95  }
96 
97  std::string str = mgr.getString(GET_DICT_DB_ID(mgr, 0), GET_DICT_ID(mgr, 0), text);
98  const auto& vec = __strtok_to_array(str, delimiters.getString());
99  Array<TextEncodingDict> out_arr(vec.size());
100  for (size_t i = 0; i < vec.size(); ++i) {
101  out_arr[i] = mgr.getOrAddTransient(TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID, vec[i]);
102  }
103  return out_arr;
104 }
std::string getString() const
Definition: heavydbTypes.h:641
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
#define GET_DICT_ID(mgr, arg_idx)
Definition: heavydbTypes.h:141
DEVICE ALWAYS_INLINE bool isNull() const
Definition: heavydbTypes.h:232
#define GET_DICT_DB_ID(mgr, arg_idx)
Definition: heavydbTypes.h:139
int32_t getOrAddTransient(int32_t db_id, int32_t dict_id, std::string str)
Definition: heavydbTypes.h:314
DEVICE ALWAYS_INLINE bool isNull() const
Definition: heavydbTypes.h:691
std::string getString(int32_t db_id, int32_t dict_id, int32_t string_id)
Definition: heavydbTypes.h:299
std::vector< std::string > __strtok_to_array(const std::string &text, const std::string &delimiters)

+ Here is the call graph for this function: