OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CalciteAdapter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CalciteAdapter.h"
18 
19 #include <boost/algorithm/string/predicate.hpp>
20 
21 #include "Logger/Logger.h"
22 #include "Shared/StringTransform.h"
24 
25 std::string pg_shim(std::string const& query) {
26  std::string result = query;
27  try {
28  static const auto& unnest_expr = *new boost::regex(
29  R"((\s+|,)(unnest)\s*\()", boost::regex::extended | boost::regex::icase);
30  static_assert(std::is_trivially_destructible_v<decltype(unnest_expr)>);
31  apply_shim(result, unnest_expr, [](std::string& result, const boost::smatch& what) {
32  result.replace(what.position(), what.length(), what[1] + "PG_UNNEST(");
33  });
34  } catch (const std::exception& e) {
35  // boost::regex throws an exception about the complexity of matching when
36  // the wrong type of quotes are used or they're mismatched. Let the query
37  // through unmodified
38  // this can be applied for all catch statements defined below
39  LOG(WARNING) << "Detect error while parsing PG_UNNEST: " << e.what();
40  return query;
41  }
42 
43  try {
44  static const auto& cast_true_expr =
45  *new boost::regex(R"(CAST\s*\(\s*'t'\s+AS\s+boolean\s*\))",
46  boost::regex::extended | boost::regex::icase);
47  static_assert(std::is_trivially_destructible_v<decltype(cast_true_expr)>);
48  apply_shim(
49  result, cast_true_expr, [](std::string& result, const boost::smatch& what) {
50  result.replace(what.position(), what.length(), "true");
51  });
52  } catch (const std::exception& e) {
53  LOG(WARNING) << "Detect error while parsing CAST AS BOOLEAN(TRUE): " << e.what();
54  return query;
55  }
56 
57  try {
58  static const auto& cast_false_expr =
59  *new boost::regex(R"(CAST\s*\(\s*'f'\s+AS\s+boolean\s*\))",
60  boost::regex::extended | boost::regex::icase);
61  static_assert(std::is_trivially_destructible_v<decltype(cast_false_expr)>);
62  apply_shim(
63  result, cast_false_expr, [](std::string& result, const boost::smatch& what) {
64  result.replace(what.position(), what.length(), "false");
65  });
66  } catch (const std::exception& e) {
67  LOG(WARNING) << "Detect error while parsing CAST AS BOOLEAN(FALSE): " << e.what();
68  return query;
69  }
70 
71  try {
72  static const auto& ilike_expr = *new boost::regex(
73  R"((\s+|\()((?!\()[^\s]+)\s+(not\s)?\s*ilike\s+('(?:[^']+|'')+')(\s+escape(\s+('[^']+')))?)",
74  boost::regex::perl | boost::regex::icase);
75  static_assert(std::is_trivially_destructible_v<decltype(ilike_expr)>);
76  apply_shim(result, ilike_expr, [](std::string& result, const boost::smatch& what) {
77  std::string esc = what[6];
78  result.replace(what.position(),
79  what.length(),
80  what[1] + what[3] + "PG_ILIKE(" + what[2] + ", " + what[4] +
81  (esc.empty() ? "" : ", " + esc) + ")");
82  });
83  } catch (const std::exception& e) {
84  LOG(WARNING) << "Detect error while parsing PG_ILIKE: " << e.what();
85  return query;
86  }
87 
88  try {
89  static const auto& regexp_expr = *new boost::regex(
90  R"((\s+)([^\s]+)\s+REGEXP\s+('(?:[^']+|'')+')(\s+escape(\s+('[^']+')))?)",
91  boost::regex::perl | boost::regex::icase);
92  static_assert(std::is_trivially_destructible_v<decltype(regexp_expr)>);
93  apply_shim(result, regexp_expr, [](std::string& result, const boost::smatch& what) {
94  std::string esc = what[6];
95  result.replace(what.position(),
96  what.length(),
97  what[1] + "REGEXP_LIKE(" + what[2] + ", " + what[3] +
98  (esc.empty() ? "" : ", " + esc) + ")");
99  });
100  } catch (const std::exception& e) {
101  LOG(WARNING) << "Detect error while parsing REGEXP_LIKE: " << e.what();
102  return query;
103  }
104 
105  try {
106  // Comparison operator needed to distinguish from other uses of ALL (e.g. UNION ALL)
107  static const auto& quant_expr =
108  *new boost::regex(R"(([<=>]\s*)(any|all)\s+([^(\s|;)]+))",
109  boost::regex::extended | boost::regex::icase);
110  static_assert(std::is_trivially_destructible_v<decltype(quant_expr)>);
111  apply_shim(result, quant_expr, [](std::string& result, const boost::smatch& what) {
112  auto const quant_fname = boost::iequals(what[2], "any") ? "PG_ANY(" : "PG_ALL(";
113  result.replace(
114  what.position(), what.length(), what[1] + quant_fname + what[3] + ')');
115  });
116  } catch (const std::exception& e) {
117  LOG(WARNING) << "Detect error while parsing PG_ANY|PG_ALL: " << e.what();
118  return query;
119  }
120 
121  try {
122  static const auto& immediate_cast_expr =
123  *new boost::regex(R"(TIMESTAMP\(([0369])\)\s+('[^']+'))",
124  boost::regex::extended | boost::regex::icase);
125  static_assert(std::is_trivially_destructible_v<decltype(immediate_cast_expr)>);
126  apply_shim(
127  result, immediate_cast_expr, [](std::string& result, const boost::smatch& what) {
128  result.replace(what.position(),
129  what.length(),
130  "CAST(" + what[2] + " AS TIMESTAMP(" + what[1] + "))");
131  });
132  } catch (const std::exception& e) {
133  LOG(WARNING) << "Detect error while parsing CAST AS TIMESTAMP: " << e.what();
134  return query;
135  }
136 
137  try {
138  static const auto& timestampadd_expr =
139  *new boost::regex(R"(DATE(ADD|DIFF|PART|_TRUNC)\s*\(\s*(\w+)\s*,)",
140  boost::regex::extended | boost::regex::icase);
141  static_assert(std::is_trivially_destructible_v<decltype(timestampadd_expr)>);
142  apply_shim(
143  result, timestampadd_expr, [](std::string& result, const boost::smatch& what) {
144  result.replace(
145  what.position(), what.length(), "DATE" + what[1] + "('" + what[2] + "',");
146  });
147  } catch (const std::exception& e) {
148  LOG(WARNING) << "Detect error while parsing DATE(ADD|DIFF|PART|_TRUNC): " << e.what();
149  return query;
150  }
151 
152  try {
153  static const auto& pg_extract_expr = *new boost::regex(
154  R"(PG_EXTRACT\s*\(\s*(\w+)\s*,)", boost::regex::extended | boost::regex::icase);
155  static_assert(std::is_trivially_destructible_v<decltype(pg_extract_expr)>);
156  apply_shim(
157  result, pg_extract_expr, [](std::string& result, const boost::smatch& what) {
158  result.replace(what.position(), what.length(), "PG_EXTRACT('" + what[1] + "',");
159  });
160 
161  static const auto& extract_expr_quoted =
162  *new boost::regex(R"(extract\s*\(\s*'(\w+)'\s+from\s+(.+)\))",
163  boost::regex::extended | boost::regex::icase);
164  static_assert(std::is_trivially_destructible_v<decltype(extract_expr_quoted)>);
165  apply_shim(
166  result, extract_expr_quoted, [](std::string& result, const boost::smatch& what) {
167  result.replace(what.position(),
168  what.length(),
169  "PG_EXTRACT('" + what[1] + "', " + what[2] + ")");
170  });
171 
172  static const auto& extract_expr =
173  *new boost::regex(R"(extract\s*\(\s*(\w+)\s+from\s+(.+)\))",
174  boost::regex::extended | boost::regex::icase);
175  static_assert(std::is_trivially_destructible_v<decltype(extract_expr)>);
176  apply_shim(result, extract_expr, [](std::string& result, const boost::smatch& what) {
177  result.replace(what.position(),
178  what.length(),
179  "PG_EXTRACT('" + what[1] + "', " + what[2] + ")");
180  });
181  } catch (const std::exception& e) {
182  LOG(WARNING) << "Detect error while parsing PG_EXTRACT: " << e.what();
183  return query;
184  }
185 
186  try {
187  static const auto& date_trunc_expr = *new boost::regex(
188  R"(([^_])date_trunc\s*)", boost::regex::extended | boost::regex::icase);
189  static_assert(std::is_trivially_destructible_v<decltype(date_trunc_expr)>);
190  apply_shim(
191  result, date_trunc_expr, [](std::string& result, const boost::smatch& what) {
192  result.replace(what.position(), what.length(), what[1] + "PG_DATE_TRUNC");
193  });
194  } catch (const std::exception& e) {
195  LOG(WARNING) << "Detect error while parsing PG_DATE_TRUNC: " << e.what();
196  return query;
197  }
198  try {
199  static const auto& timestampadd_expr_quoted =
200  *new boost::regex(R"(TIMESTAMP(ADD|DIFF)\s*\(\s*'(\w+)'\s*,)",
201  boost::regex::extended | boost::regex::icase);
202  static_assert(std::is_trivially_destructible_v<decltype(timestampadd_expr_quoted)>);
203  apply_shim(result,
204  timestampadd_expr_quoted,
205  [](std::string& result, const boost::smatch& what) {
206  result.replace(what.position(),
207  what.length(),
208  "DATE" + what[1] + "('" + what[2] + "',");
209  });
210  static const auto& timestampadd_expr =
211  *new boost::regex(R"(TIMESTAMP(ADD|DIFF)\s*\(\s*(\w+)\s*,)",
212  boost::regex::extended | boost::regex::icase);
213  static_assert(std::is_trivially_destructible_v<decltype(timestampadd_expr)>);
214  apply_shim(
215  result, timestampadd_expr, [](std::string& result, const boost::smatch& what) {
216  result.replace(
217  what.position(), what.length(), "DATE" + what[1] + "('" + what[2] + "',");
218  });
219  } catch (const std::exception& e) {
220  LOG(WARNING) << "Detect error while parsing TIMESTAMP(ADD|DIFF): " << e.what();
221  return query;
222  }
223  try {
224  static const auto& us_timestamp_cast_expr =
225  *new boost::regex(R"(CAST\s*\(\s*('[^']+')\s*AS\s*TIMESTAMP\(6\)\s*\))",
226  boost::regex::extended | boost::regex::icase);
227  static_assert(std::is_trivially_destructible_v<decltype(us_timestamp_cast_expr)>);
228  apply_shim(result,
229  us_timestamp_cast_expr,
230  [](std::string& result, const boost::smatch& what) {
231  result.replace(
232  what.position(), what.length(), "usTIMESTAMP(" + what[1] + ")");
233  });
234  } catch (const std::exception& e) {
235  LOG(WARNING) << "Detect error while parsing usTIMESTAMP: " << e.what();
236  return query;
237  }
238  try {
239  static const auto& ns_timestamp_cast_expr =
240  *new boost::regex(R"(CAST\s*\(\s*('[^']+')\s*AS\s*TIMESTAMP\(9\)\s*\))",
241  boost::regex::extended | boost::regex::icase);
242  static_assert(std::is_trivially_destructible_v<decltype(ns_timestamp_cast_expr)>);
243  apply_shim(result,
244  ns_timestamp_cast_expr,
245  [](std::string& result, const boost::smatch& what) {
246  result.replace(
247  what.position(), what.length(), "nsTIMESTAMP(" + what[1] + ")");
248  });
249  } catch (const std::exception& e) {
250  LOG(WARNING) << "Detect error while parsing nsTIMESTAMP: " << e.what();
251  return query;
252  }
253  try {
254  static const auto& corr_expr = *new boost::regex(
255  R"((\s+|,|\()(corr)\s*\()", boost::regex::extended | boost::regex::icase);
256  static_assert(std::is_trivially_destructible_v<decltype(corr_expr)>);
257  apply_shim(result, corr_expr, [](std::string& result, const boost::smatch& what) {
258  result.replace(what.position(), what.length(), what[1] + "CORRELATION(");
259  });
260  } catch (const std::exception& e) {
261  LOG(WARNING) << "Detect error while parsing CORRELATION: " << e.what();
262  return query;
263  }
264  try {
265  // the geography regex pattern is expensive and can sometimes run out of stack
266  // space on long queries. Treat it separately from the other shims.
267  static const auto& cast_to_geography_expr =
268  *new boost::regex(R"(CAST\s*\(\s*(((?!geography).)+)\s+AS\s+geography\s*\))",
269  boost::regex::perl | boost::regex::icase);
270  static_assert(std::is_trivially_destructible_v<decltype(cast_to_geography_expr)>);
271  apply_shim(result,
272  cast_to_geography_expr,
273  [](std::string& result, const boost::smatch& what) {
274  result.replace(
275  what.position(), what.length(), "CastToGeography(" + what[1] + ")");
276  });
277  } catch (const std::exception& e) {
278  LOG(WARNING) << "Detect error while parsing CastToGeography: " << e.what();
279  return query;
280  }
281  try {
282  static const auto& interval_subsecond_expr =
283  *new boost::regex(R"(interval\s+([0-9]+)\s+(millisecond|microsecond|nanosecond))",
284  boost::regex::extended | boost::regex::icase);
285  static_assert(std::is_trivially_destructible_v<decltype(interval_subsecond_expr)>);
286  apply_shim(
287  result,
288  interval_subsecond_expr,
289  [](std::string& result, const boost::smatch& what) {
290  std::string interval_str = what[1];
291  const std::string time_unit_str = to_lower(to_string(what[2]));
292  static const std::array<std::pair<std::string_view, size_t>, 3> precision_map{
293  std::make_pair("millisecond", 3),
294  std::make_pair("microsecond", 6),
295  std::make_pair("nanosecond", 9)};
296  static_assert(std::is_trivially_destructible_v<decltype(precision_map)>);
297  auto precision_it = std::find_if(
298  precision_map.cbegin(),
299  precision_map.cend(),
300  [&time_unit_str](const std::pair<std::string_view, size_t>& precision) {
301  return time_unit_str.compare(precision.first) == 0;
302  });
303  if (precision_it != precision_map.end()) {
304  std::ostringstream out;
305  const auto interval_time = std::strtod(interval_str.c_str(), nullptr);
306  double const scale = shared::power10(precision_it->second);
307  out << std::fixed << interval_time / scale;
308  interval_str = out.str();
309  result.replace(
310  what.position(), what.length(), "interval " + interval_str + " second");
311  }
312  });
313  } catch (const std::exception& e) {
314  LOG(WARNING) << "Detect error while parsing INTERVAL: " << e.what();
315  return query;
316  }
317  return result;
318 }
std::string to_lower(const std::string &str)
double power10(unsigned const x)
Definition: misc.h:284
#define LOG(tag)
Definition: Logger.h:285
std::string to_string(char const *&&v)
void apply_shim(std::string &result, const boost::regex &reg_expr, const std::function< void(std::string &, const boost::smatch &)> &shim_fn)
std::string pg_shim(std::string const &query)