OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionSignatureParser.java
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.mapd.parser.server;
18 
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
21 
22 import java.io.BufferedReader;
23 import java.io.File;
24 import java.io.FileReader;
25 import java.io.IOException;
26 import java.io.StringReader;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 
37  final static Logger HEAVYDBLOGGER =
38  LoggerFactory.getLogger(ExtensionFunctionSignatureParser.class);
39  // Windows DE supports slightly different types sizes.
40  private static String OS = System.getProperty("os.name").toLowerCase();
41  static private boolean isWindows() {
42  return (OS.indexOf("win") >= 0);
43  }
44 
45  static Map<String, ExtensionFunction> parse(final String file_path) throws IOException {
46  File file = new File(file_path);
47  FileReader fileReader = new FileReader(file);
48  BufferedReader bufferedReader = new BufferedReader(fileReader);
49  String line;
50  Pattern s = Pattern.compile("\\| ([\\` ]|used)+ ([\\w]+) '([\\w<>]+) \\((.*)\\)'");
51  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
52  while ((line = bufferedReader.readLine()) != null) {
53  Matcher m = s.matcher(line);
54  if (m.find()) {
55  final String name = m.group(2);
56  final String ret = m.group(3);
57  final String cs_param_list = m.group(4);
58  sigs.put(name, toSignature(ret, cs_param_list, false));
59  }
60  }
61  return sigs;
62  }
63 
64  static Map<String, ExtensionFunction> parseUdfAst(final String file_path)
65  throws IOException {
66  File file = new File(file_path);
67  FileReader fileReader = new FileReader(file);
68  BufferedReader bufferedReader = new BufferedReader(fileReader);
69  String line;
70  Pattern s = Pattern.compile("([<>:\\w]+) ([:\\w]+)(?:\\(\\))?\\((.*)\\)");
71  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
72  while ((line = bufferedReader.readLine()) != null) {
73  Matcher m = s.matcher(line);
74  if (m.find()) {
75  final String name = m.group(2);
76  final String ret = m.group(1);
77  final String cs_param_list = m.group(3);
78  if (cs_param_list.isEmpty()) {
79  continue;
80  }
81  sigs.put(name, toSignature(ret, cs_param_list, true));
82  }
83  }
84  return sigs;
85  }
86 
87  static Map<String, ExtensionFunction> parseFromString(final String udf_string)
88  throws IOException {
89  return parseFromString(udf_string, true);
90  }
91 
92  static Map<String, ExtensionFunction> parseFromString(
93  final String udf_string, final boolean is_row_func) throws IOException {
94  StringReader stringReader = new StringReader(udf_string);
95  BufferedReader bufferedReader = new BufferedReader(stringReader);
96  String line;
97  Pattern r = Pattern.compile("([\\w]+)\\s+'([\\w]+)\\s*\\((.*)\\)'");
98  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
99  while ((line = bufferedReader.readLine()) != null) {
100  Matcher m = r.matcher(line);
101  if (m.find()) {
102  final String name = m.group(1);
103  final String ret = m.group(2);
104  final String cs_param_list = m.group(3);
105  sigs.put(name, toSignature(ret, cs_param_list, is_row_func));
106  }
107  }
108  return sigs;
109  }
110  static String signaturesToJson(final Map<String, ExtensionFunction> sigs) {
111  List<String> json_sigs = new ArrayList<String>();
112  if (sigs != null) {
113  for (Map.Entry<String, ExtensionFunction> sig : sigs.entrySet()) {
114  if (sig.getValue().isRowUdf()) {
115  json_sigs.add(sig.getValue().toJson(sig.getKey()));
116  }
117  }
118  }
119  return "[" + join(json_sigs, ",") + "]";
120  }
121 
123  final String ret, final String cs_param_list, final boolean has_variable_name) {
124  return toSignature(ret, cs_param_list, has_variable_name, true);
125  }
126 
127  private static ExtensionFunction toSignature(final String ret,
128  final String cs_param_list,
129  final boolean has_variable_name,
130  final boolean is_row_func) {
131  String[] params = cs_param_list.split(",");
132  List<ExtensionFunction.ExtArgumentType> args =
133  new ArrayList<ExtensionFunction.ExtArgumentType>();
134  String uses_manager = "false";
135  List<Map<String, String>> annotations = new ArrayList<Map<String, String>>();
136  for (final String param : params) {
137  ExtensionFunction.ExtArgumentType arg_type;
138  if (param.contains("RowFunctionManager")) {
139  uses_manager = "true";
140  continue;
141  }
142  if (has_variable_name) {
143  String[] full_param = param.trim().split("\\s+");
144  if (full_param.length > 0) {
145  if (full_param[0].trim().compareTo("const") == 0) {
146  assert full_param.length > 1;
147  arg_type = deserializeType((full_param[1]).trim());
148  } else {
149  arg_type = deserializeType((full_param[0]).trim());
150  }
151  } else {
152  arg_type = deserializeType(full_param[0]);
153  }
154  } else {
155  arg_type = deserializeType(param.trim());
156  }
157  annotations.add(Collections.EMPTY_MAP);
158  if (arg_type != ExtensionFunction.ExtArgumentType.Void) {
159  args.add(arg_type);
160  }
161  }
162  assert is_row_func;
163  annotations.add(Collections.singletonMap("uses_manager", uses_manager));
164  return new ExtensionFunction(args, deserializeType(ret), annotations);
165  }
166  private static ExtensionFunction.ExtArgumentType deserializeType(
167  final String type_name) {
168  final String const_prefix = "const ";
169  final String std_namespace_prefix = "std::";
170 
171  if (type_name.startsWith(const_prefix)) {
172  return deserializeType(type_name.substring(const_prefix.length()));
173  }
174  if (type_name.startsWith(std_namespace_prefix)) {
175  return deserializeType(type_name.substring(std_namespace_prefix.length()));
176  }
177 
178  if (type_name.equals("bool") || type_name.equals("_Bool")) {
179  return ExtensionFunction.ExtArgumentType.Bool;
180  }
181  if (type_name.equals("int8_t") || type_name.equals("char")
182  || type_name.equals("int8")) {
183  return ExtensionFunction.ExtArgumentType.Int8;
184  }
185  if (type_name.equals("int16_t") || type_name.equals("short")
186  || type_name.equals("int16")) {
187  return ExtensionFunction.ExtArgumentType.Int16;
188  }
189  if (type_name.equals("int32_t") || type_name.equals("int")
190  || type_name.equals("int32") || (isWindows() && type_name.equals("long"))) {
191  return ExtensionFunction.ExtArgumentType.Int32;
192  }
193  if (type_name.equals("int64_t") || type_name.equals("size_t")
194  || type_name.equals("int64")) {
195  return ExtensionFunction.ExtArgumentType.Int64;
196  }
197  if ((!isWindows() && type_name.equals("long"))
198  || (isWindows() && type_name.equals("long long"))) {
199  return ExtensionFunction.ExtArgumentType.Int64;
200  }
201  if (type_name.equals("float") || type_name.equals("float32")) {
202  return ExtensionFunction.ExtArgumentType.Float;
203  }
204  if (type_name.equals("double") || type_name.equals("float64")) {
206  }
207  if (type_name.isEmpty() || type_name.equals("void")) {
208  return ExtensionFunction.ExtArgumentType.Void;
209  }
210  if (type_name.endsWith(" *")) {
211  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 2)));
212  }
213  if (type_name.endsWith("*")) {
214  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 1)));
215  }
216  if (type_name.endsWith("&")) {
217  return deserializeType(type_name.substring(0, type_name.length() - 1).trim());
218  }
219  if (type_name.equals("Array<bool>")) {
220  return ExtensionFunction.ExtArgumentType.ArrayBool;
221  }
222  if (type_name.equals("Array<int8_t>") || type_name.equals("Array<char>")) {
223  return ExtensionFunction.ExtArgumentType.ArrayInt8;
224  }
225  if (type_name.equals("Array<int16_t>") || type_name.equals("Array<short>")) {
226  return ExtensionFunction.ExtArgumentType.ArrayInt16;
227  }
228  if (type_name.equals("Array<int32_t>") || type_name.equals("Array<int>")) {
229  return ExtensionFunction.ExtArgumentType.ArrayInt32;
230  }
231  if (type_name.equals("Array<int64_t>") || type_name.equals("Array<size_t>")
232  || type_name.equals("Array<long>")) {
233  return ExtensionFunction.ExtArgumentType.ArrayInt64;
234  }
235  if (type_name.equals("Array<float>")) {
236  return ExtensionFunction.ExtArgumentType.ArrayFloat;
237  }
238  if (type_name.equals("Array<double>")) {
239  return ExtensionFunction.ExtArgumentType.ArrayDouble;
240  }
241  if (type_name.equals("Array<bool>")) {
242  return ExtensionFunction.ExtArgumentType.ArrayBool;
243  }
244  if (type_name.equals("Array<TextEncodingDict>")) {
245  return ExtensionFunction.ExtArgumentType.ArrayTextEncodingDict;
246  }
247  if (type_name.equals("Array<TextEncodingNone>")) {
248  return ExtensionFunction.ExtArgumentType.ArrayTextEncodingNone;
249  }
250  if (type_name.equals("TextEncodingDict")) {
251  return ExtensionFunction.ExtArgumentType.TextEncodingDict;
252  }
253  if (type_name.equals("RowFunctionManager")) {
254  // RowFunctionManager is not actually materialized in udfs
255  // return void as a convenience here to not generate a warning
256  // in the line below
257  return ExtensionFunction.ExtArgumentType.Void;
258  }
259  if (type_name.equals("Timestamp")) {
260  return ExtensionFunction.ExtArgumentType.Timestamp;
261  }
262  if (type_name.equals("TextEncodingNone")) {
263  return ExtensionFunction.ExtArgumentType.TextEncodingNone;
264  }
265  if (type_name.equals("Column<int8_t>") || type_name.equals("Column<char>")) {
266  return ExtensionFunction.ExtArgumentType.ColumnInt8;
267  }
268  if (type_name.equals("Column<int16_t>") || type_name.equals("Column<short>")) {
269  return ExtensionFunction.ExtArgumentType.ColumnInt16;
270  }
271  if (type_name.equals("Column<int32_t>") || type_name.equals("Column<int>")) {
272  return ExtensionFunction.ExtArgumentType.ColumnInt32;
273  }
274  if (type_name.equals("Column<int64_t>") || type_name.equals("Column<size_t>")
275  || type_name.equals("Column<long>")) {
276  return ExtensionFunction.ExtArgumentType.ColumnInt64;
277  }
278  if (type_name.equals("Column<float>")) {
279  return ExtensionFunction.ExtArgumentType.ColumnFloat;
280  }
281  if (type_name.equals("Column<double>")) {
282  return ExtensionFunction.ExtArgumentType.ColumnDouble;
283  }
284  if (type_name.equals("Column<TextEncodingDict>")) {
285  return ExtensionFunction.ExtArgumentType.ColumnTextEncodingDict;
286  }
287  if (type_name.equals("Column<Timestamp>")) {
288  return ExtensionFunction.ExtArgumentType.ColumnTimestamp;
289  }
290  if (type_name.equals("Cursor")) {
291  return ExtensionFunction.ExtArgumentType.Cursor;
292  }
293  if (type_name.equals("GeoPoint")) {
295  }
296  if (type_name.equals("GeoMultiPoint")) {
298  }
299  if (type_name.equals("GeoLineString")) {
301  }
302  if (type_name.equals("GeoMultiLineString")) {
304  }
305  if (type_name.equals("GeoPolygon")) {
307  }
308  if (type_name.equals("GeoMultiPolygon")) {
310  }
311  if (type_name.equals("ColumnList<int8_t>") || type_name.equals("ColumnList<char>")) {
312  return ExtensionFunction.ExtArgumentType.ColumnListInt8;
313  }
314  if (type_name.equals("ColumnList<int16_t>")
315  || type_name.equals("ColumnList<short>")) {
316  return ExtensionFunction.ExtArgumentType.ColumnListInt16;
317  }
318  if (type_name.equals("ColumnList<int32_t>") || type_name.equals("ColumnList<int>")) {
319  return ExtensionFunction.ExtArgumentType.ColumnListInt32;
320  }
321  if (type_name.equals("ColumnList<int64_t>") || type_name.equals("ColumnList<size_t>")
322  || type_name.equals("ColumnList<long>")) {
323  return ExtensionFunction.ExtArgumentType.ColumnListInt64;
324  }
325  if (type_name.equals("ColumnList<float>")) {
326  return ExtensionFunction.ExtArgumentType.ColumnListFloat;
327  }
328  if (type_name.equals("ColumnList<double>")) {
329  return ExtensionFunction.ExtArgumentType.ColumnListDouble;
330  }
331  if (type_name.equals("ColumnList<TextEncodingDict>")) {
332  return ExtensionFunction.ExtArgumentType.ColumnListTextEncodingDict;
333  }
334  HEAVYDBLOGGER.info(
335  "ExtensionfunctionSignatureParser::deserializeType: unknown type_name=`"
336  + type_name + "`");
337  // TODO: Return void for convenience. Consider sanitizing functions for supported
338  // types before they reach Calcite
339  return ExtensionFunction.ExtArgumentType.Void;
340  }
341 
342  private static ExtensionFunction.ExtArgumentType pointerType(
343  final ExtensionFunction.ExtArgumentType targetType) {
344  switch (targetType) {
345  case Bool:
346  return ExtensionFunction.ExtArgumentType.PBool;
347  case Int8:
348  return ExtensionFunction.ExtArgumentType.PInt8;
349  case Int16:
350  return ExtensionFunction.ExtArgumentType.PInt16;
351  case Int32:
352  return ExtensionFunction.ExtArgumentType.PInt32;
353  case Int64:
354  return ExtensionFunction.ExtArgumentType.PInt64;
355  case Float:
356  return ExtensionFunction.ExtArgumentType.PFloat;
357  case Double:
358  return ExtensionFunction.ExtArgumentType.PDouble;
359  default:
360  assert false;
361  return null;
362  }
363  }
364 
365  static String join(final List<String> strs, final String sep) {
366  StringBuilder sb = new StringBuilder();
367  if (strs.isEmpty()) {
368  return "";
369  }
370  sb.append(strs.get(0));
371  for (int i = 1; i < strs.size(); ++i) {
372  sb.append(sep).append(strs.get(i));
373  }
374  return sb.toString();
375  }
376 }
static Map< String, ExtensionFunction > parseFromString(final String udf_string)
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:158
struct GeoLineStringStruct GeoLineString
Definition: heavydbTypes.h:999
struct GeoPointStruct GeoPoint
Definition: heavydbTypes.h:963
static ExtensionFunction.ExtArgumentType pointerType(final ExtensionFunction.ExtArgumentType targetType)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
static Map< String, ExtensionFunction > parseFromString(final String udf_string, final boolean is_row_func)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name, final boolean is_row_func)
static Map< String, ExtensionFunction > parse(final String file_path)
struct GeoMultiPointStruct GeoMultiPoint
Definition: heavydbTypes.h:981
struct GeoMultiLineStringStruct GeoMultiLineString
tuple line
Definition: parse_ast.py:10
static String join(final List< String > strs, final String sep)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name)
dictionary params
Definition: report.py:27
static String signaturesToJson(final Map< String, ExtensionFunction > sigs)
static ExtensionFunction.ExtArgumentType deserializeType(final String type_name)
static Map< String, ExtensionFunction > parseUdfAst(final String file_path)
struct GeoMultiPolygonStruct GeoMultiPolygon
string name
Definition: setup.in.py:72
struct GeoPolygonStruct GeoPolygon
constexpr auto type_name() noexcept