OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExpressionParser.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file ExpressionParser.cpp
19  * @brief General Expression Parser using muparserx
20  *
21  */
22 
24 
25 #include <regex>
26 
27 #if defined(_MSC_VER)
28 #include <codecvt>
29 #include <locale>
30 #endif
31 
32 #include <boost/algorithm/string.hpp>
33 #include <boost/filesystem.hpp>
34 
35 #include <muparserx/mpParser.h>
36 
37 #include "Logger/Logger.h"
38 #include "Shared/StringTransform.h"
39 
40 namespace import_export {
41 
42 namespace {
43 
44 std::string ms_to_ss(const mup::string_type& s) {
45 #if defined(_MSC_VER)
46  std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
47  return converter.to_bytes(s);
48 #else
49  return s;
50 #endif
51 }
52 
53 mup::string_type ss_to_ms(const std::string& s) {
54 #if defined(_MSC_VER)
55  std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
56  return converter.from_bytes(s);
57 #else
58  return s;
59 #endif
60 }
61 
62 #define VALIDATE_ARG_TYPE(arg, t2) \
63  if (args[arg]->GetType() != t2) { \
64  mup::ErrorContext err; \
65  err.Errc = mup::ecINVALID_TYPE; \
66  err.Type1 = args[arg]->GetType(); \
67  err.Type2 = t2; \
68  err.Ident = GetIdent(); \
69  throw mup::ParserError(err); \
70  }
71 
72 #define THROW_INVALID_PARAMETER(arg, what) \
73  mup::ErrorContext err; \
74  err.Errc = mup::ecINVALID_PARAMETER; \
75  err.Arg = arg; \
76  err.Ident = GetIdent() + ss_to_ms(" (") + ss_to_ms(what) + ss_to_ms(")"); \
77  throw mup::ParserError(err);
78 
79 #define THROW_INVALID_PARAMETER_COUNT() \
80  mup::ErrorContext err; \
81  err.Errc = mup::ecINVALID_NUMBER_OF_PARAMETERS; \
82  err.Ident = GetIdent(); \
83  throw mup::ParserError(err);
84 
85 class Function_substr : public mup::ICallback {
86  public:
87  Function_substr() : mup::ICallback(mup::cmFUNC, _T("substr"), -1) {}
88  const mup::char_type* GetDesc() const final {
89  return _T("return a substring of a string");
90  };
91  mup::IToken* Clone() const final { return new Function_substr(*this); };
92  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
93  if (argc < 2 || argc > 3) {
95  }
96  VALIDATE_ARG_TYPE(0, 's');
97  VALIDATE_ARG_TYPE(1, 'i');
98  if (argc == 3) {
99  VALIDATE_ARG_TYPE(2, 'i');
100  }
101  auto const text = args[0]->GetString();
102  auto const start = args[1]->GetInteger();
103  if (start < 1) {
104  THROW_INVALID_PARAMETER(1, "bad 'start'");
105  }
106  if (argc == 2) {
107  if (start > static_cast<int>(text.length())) {
108  THROW_INVALID_PARAMETER(1, "bad 'start'");
109  }
110  *ret = text.substr(start - 1, std::string::npos);
111  } else {
112  auto const count = args[2]->GetInteger();
113  if (count < 1) {
114  THROW_INVALID_PARAMETER(2, "bad 'count'");
115  } else if ((start - 1) + count > static_cast<int>(text.length())) {
116  THROW_INVALID_PARAMETER(2, "bad 'start'/'count'");
117  }
118  *ret = text.substr(start - 1, count);
119  }
120  }
121 };
122 
123 class Function_regex_match : public mup::ICallback {
124  public:
125  Function_regex_match() : mup::ICallback(mup::cmFUNC, _T("regex_match"), 2) {}
126  const mup::char_type* GetDesc() const final {
127  return _T("return a regex-matched section of a string");
128  };
129  mup::IToken* Clone() const final { return new Function_regex_match(*this); };
130  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
131  CHECK_EQ(argc, 2);
132  VALIDATE_ARG_TYPE(0, 's');
133  VALIDATE_ARG_TYPE(1, 's');
134  auto const text = ms_to_ss(args[0]->GetString());
135  auto const pattern = ms_to_ss(args[1]->GetString());
136  try {
137  std::regex regex(pattern, std::regex_constants::extended);
138  std::smatch match;
139  std::regex_match(text, match, regex);
140  if (match.size() != 2u) {
141  throw std::runtime_error("must have exactly one match");
142  }
143  *ret = ss_to_ms(match[1]);
144  } catch (std::runtime_error& e) {
145  THROW_INVALID_PARAMETER(2, e.what());
146  }
147  }
148 };
149 
150 class Function_split_part : public mup::ICallback {
151  public:
152  Function_split_part() : mup::ICallback(mup::cmFUNC, _T("split_part"), 3) {}
153  const mup::char_type* GetDesc() const final {
154  return _T("split a string by a given separator, then return the nth token");
155  };
156  mup::IToken* Clone() const final { return new Function_split_part(*this); };
157  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
158  CHECK_EQ(argc, 3);
159  VALIDATE_ARG_TYPE(0, 's');
160  VALIDATE_ARG_TYPE(1, 's');
161  VALIDATE_ARG_TYPE(2, 'i');
162  auto const text = ms_to_ss(args[0]->GetString());
163  auto const delimiter = ms_to_ss(args[1]->GetString());
164  auto n = args[2]->GetInteger();
165  try {
166  std::vector<std::string> tokens;
167  // split on exact delimiter (cannot use boost::split)
168  size_t start{0u}, end{0u};
169  while (end != std::string::npos) {
170  end = text.find(delimiter, start);
171  tokens.push_back(text.substr(start, end - start));
172  start = end + delimiter.length();
173  }
174  if (tokens.size() == 0u) {
175  throw std::runtime_error("failed to split");
176  }
177  int index{0};
178  if (n < 0) {
179  // reverse index (-1 = last token)
180  index = static_cast<int>(tokens.size()) + n;
181  } else {
182  // forward index (1 = first token)
183  index = n - 1;
184  }
185  if (index < 0 || index >= static_cast<int>(tokens.size())) {
186  throw std::runtime_error("bad token index");
187  }
188  *ret = ss_to_ms(tokens[index]);
189  } catch (std::runtime_error& e) {
190  THROW_INVALID_PARAMETER(1, e.what());
191  }
192  }
193 };
194 
195 class Function_int : public mup::ICallback {
196  public:
197  Function_int() : mup::ICallback(mup::cmFUNC, _T("int"), 1) {}
198  const mup::char_type* GetDesc() const final { return _T("cast a value to an int"); };
199  mup::IToken* Clone() const final { return new Function_int(*this); };
200  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
201  CHECK_EQ(argc, 1);
202  switch (args[0]->GetType()) {
203  case 'i':
204  *ret = args[0]->GetInteger();
205  break;
206  case 'f':
207  *ret = static_cast<mup::int_type>(args[0]->GetFloat());
208  break;
209  case 's':
210  *ret = static_cast<mup::int_type>(std::stoll(ms_to_ss(args[0]->GetString())));
211  break;
212  case 'b':
213  *ret = args[0]->GetBool() ? static_cast<mup::int_type>(1)
214  : static_cast<mup::int_type>(0);
215  break;
216  default: {
217  THROW_INVALID_PARAMETER(0, "unsupported type");
218  }
219  }
220  }
221 };
222 
223 class Function_float : public mup::ICallback {
224  public:
225  Function_float() : mup::ICallback(mup::cmFUNC, _T("float"), 1) {}
226  const mup::char_type* GetDesc() const final { return _T("cast a value to a float"); };
227  mup::IToken* Clone() const final { return new Function_float(*this); };
228  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
229  CHECK_EQ(argc, 1);
230  switch (args[0]->GetType()) {
231  case 'i':
232  *ret = static_cast<mup::float_type>(args[0]->GetInteger());
233  break;
234  case 'f':
235  *ret = args[0]->GetFloat();
236  break;
237  case 's':
238  *ret = static_cast<mup::float_type>(std::stod(ms_to_ss(args[0]->GetString())));
239  break;
240  default: {
241  THROW_INVALID_PARAMETER(0, "unsupported type");
242  }
243  }
244  }
245 };
246 
247 class Function_double : public mup::ICallback {
248  public:
249  Function_double() : mup::ICallback(mup::cmFUNC, _T("double"), 1) {}
250  const mup::char_type* GetDesc() const final { return _T("cast a value to a double"); };
251  mup::IToken* Clone() const final { return new Function_double(*this); };
252  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
253  CHECK_EQ(argc, 1);
254  switch (args[0]->GetType()) {
255  case 'i':
256  *ret = static_cast<mup::float_type>(args[0]->GetInteger());
257  break;
258  case 'f':
259  *ret = args[0]->GetFloat();
260  break;
261  case 's':
262  *ret = static_cast<mup::float_type>(std::stod(ms_to_ss(args[0]->GetString())));
263  break;
264  default: {
265  THROW_INVALID_PARAMETER(0, "unsupported type");
266  }
267  }
268  }
269 };
270 
271 class Function_str : public mup::ICallback {
272  public:
273  Function_str() : mup::ICallback(mup::cmFUNC, _T("str"), 1) {}
274  const mup::char_type* GetDesc() const final { return _T("cast a value to a string"); };
275  mup::IToken* Clone() const final { return new Function_str(*this); };
276  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
277  CHECK_EQ(argc, 1);
278  switch (args[0]->GetType()) {
279  case 'i':
280  case 'f':
281  *ret = args[0]->ToString();
282  break;
283  case 's':
284  *ret = args[0]->GetString();
285  break;
286  case 'b':
287  *ret = args[0]->GetBool() ? ss_to_ms("true") : ss_to_ms("false");
288  break;
289  default: {
290  THROW_INVALID_PARAMETER(0, "unsupported type");
291  }
292  }
293  }
294 };
295 
296 class Function_bool : public mup::ICallback {
297  public:
298  Function_bool() : mup::ICallback(mup::cmFUNC, _T("bool"), 1) {}
299  const mup::char_type* GetDesc() const final { return _T("cast a value to a boolean"); };
300  mup::IToken* Clone() const final { return new Function_bool(*this); };
301  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
302  CHECK_EQ(argc, 1);
303  switch (args[0]->GetType()) {
304  case 'i':
305  *ret = args[0]->GetInteger() != 0;
306  break;
307  case 's': {
308  auto const s = strip(to_lower(ms_to_ss(args[0]->GetString())));
309  if (s == "true" || s == "t" || s == "1") {
310  *ret = true;
311  } else if (s == "false" || s == "f" || s == "0") {
312  *ret = false;
313  } else {
314  THROW_INVALID_PARAMETER(0, s.c_str());
315  }
316  } break;
317  case 'b':
318  *ret = args[0]->GetBool();
319  break;
320  default: {
321  THROW_INVALID_PARAMETER(0, "unsupported type");
322  }
323  }
324  }
325 };
326 
327 class Operator_not : public mup::IOprtInfix {
328  public:
329  Operator_not() : mup::IOprtInfix(_T("not"), mup::prINFIX) {}
330  const mup::char_type* GetDesc() const final { return _T("bool inversion operator"); }
331  mup::IToken* Clone() const final { return new Operator_not(*this); }
332  void Eval(mup::ptr_val_type& ret, const mup::ptr_val_type* args, int argc) final {
333  CHECK_EQ(argc, 1);
334  VALIDATE_ARG_TYPE(0, 'b');
335  *ret = !(args[0]->GetBool());
336  }
337 };
338 
339 mup::Value evaluate(mup::ParserX* parser) {
340  mup::Value result;
341  try {
342  result = parser->Eval();
343  } catch (mup::ParserError& err) {
344  throw std::runtime_error("Parser Error: " + ms_to_ss(err.GetMsg()));
345  } catch (std::exception& err) {
346  throw std::runtime_error("Unexpected muparserx Error: " + std::string(err.what()));
347  }
348  return result;
349 }
350 
351 } // namespace
352 
354  delete parser;
355 }
356 
358  : parser_{new mup::ParserX(mup::pckCOMMON | mup::pckUNIT | mup::pckNON_COMPLEX |
359  mup::pckSTRING)} {
360  // custom operators and functions
361  parser_->DefineFun(new Function_substr());
362  parser_->DefineFun(new Function_regex_match());
363  parser_->DefineFun(new Function_split_part());
364  parser_->DefineFun(new Function_int());
365  parser_->DefineFun(new Function_float());
366  parser_->DefineFun(new Function_double());
367  parser_->DefineFun(new Function_str());
368  parser_->DefineFun(new Function_bool());
369  parser_->DefineInfixOprt(new Operator_not());
370 }
371 
373  const std::string& value) {
374  parser_->DefineConst(ss_to_ms(name), ss_to_ms(value));
375 }
376 
377 void ExpressionParser::setIntConstant(const std::string& name, const int value) {
378  parser_->DefineConst(ss_to_ms(name), static_cast<mup::int_type>(value));
379 }
380 
381 void ExpressionParser::setExpression(const std::string& expression) {
382  parser_->SetExpr(ss_to_ms(expression));
383 }
384 
386  auto result = evaluate(parser_.get());
387  if (result.GetType() != 's') {
388  throw std::runtime_error("Expression is not a string");
389  }
390  return ms_to_ss(result.GetString());
391 }
392 
394  auto result = evaluate(parser_.get());
395  if (result.GetType() != 'i') {
396  throw std::runtime_error("Expression is not an int");
397  }
398  return static_cast<int>(result.GetInteger());
399 }
400 
402  auto result = evaluate(parser_.get());
403  if (result.GetType() != 'f') {
404  throw std::runtime_error("Expression is not a float/double");
405  }
406  return static_cast<double>(result.GetFloat());
407 }
408 
410  auto result = evaluate(parser_.get());
411  if (result.GetType() != 'b') {
412  throw std::runtime_error("Expression is not a boolean");
413  }
414  return result.GetBool();
415 }
416 
417 } // namespace import_export
std::string to_lower(const std::string &str)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
#define VALIDATE_ARG_TYPE(arg, t2)
#define THROW_INVALID_PARAMETER_COUNT()
void setIntConstant(const std::string &name, const int value)
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final
std::unique_ptr< mup::ParserX, ParserDeleter > parser_
void setStringConstant(const std::string &name, const std::string &value)
#define THROW_INVALID_PARAMETER(arg, what)
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
void setExpression(const std::string &expression)
void Eval(mup::ptr_val_type &ret, const mup::ptr_val_type *args, int argc) final