OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
generate_TableFunctionsFactory_init.py
Go to the documentation of this file.
1 """Given a list of input files, scan for lines containing UDTF
2 specification statements in the following form:
3 
4  UDTF: function_name(<arguments>) -> <output column types> (, <template type specifications>)?
5 
6 where <arguments> is a comma-separated list of argument types. The
7 argument types specifications are:
8 
9 - scalar types:
10  Int8, Int16, Int32, Int64, Float, Double, Bool, TextEncodingDict, etc
11 - column types:
12  ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat, ColumnDouble, ColumnBool, etc
13 - column list types:
14  ColumnListInt8, ColumnListInt16, ColumnListInt32, ColumnListInt64, ColumnListFloat, ColumnListDouble, ColumnListBool, etc
15 - cursor type:
16  Cursor<t0, t1, ...>
17  where t0, t1 are column or column list types
18 - output buffer size parameter type:
19  RowMultiplier<i>, ConstantParameter<i>, Constant<i>, TableFunctionSpecifiedParameter<i>
20  where i is a literal integer.
21 
22 The output column types is a comma-separated list of column types, see above.
23 
24 In addition, the following equivalents are suppored:
25 
26  Column<T> == ColumnT
27  ColumnList<T> == ColumnListT
28  Cursor<T, V, ...> == Cursor<ColumnT, ColumnV, ...>
29  int8 == int8_t == Int8, etc
30  float == Float, double == Double, bool == Bool
31  T == ColumnT for output column types
32  RowMultiplier == RowMultiplier<i> where i is the one-based position of the sizer argument
33  when no sizer argument is provided, Constant<1> is assumed
34 
35 Argument types can be annotated using `|' (bar) symbol after an
36 argument type specification. An annotation is specified by a label and
37 a value separated by `=' (equal) symbol. Multiple annotations can be
38 specified by using `|` (bar) symbol as the annotations separator.
39 Supported annotation labels are:
40 
41 - name: to specify argument name
42 - input_id: to specify the dict id mapping for output TextEncodingDict columns.
43 - default: to specify a default value for an argument (scalar only)
44 
45 If argument type follows an identifier, it will be mapped to name
46 annotations. For example, the following argument type specifications
47 are equivalent:
48 
49  Int8 a
50  Int8 | name=a
51 
52 Template type specifications is a comma separated list of template
53 type assignments where values are lists of argument type names. For
54 instance:
55 
56  T = [Int8, Int16, Int32, Float], V = [Float, Double]
57 
58 """
59 # Author: Pearu Peterson
60 # Created: January 2021
61 
62 
63 import os
64 import sys
65 import warnings
66 
67 import TableFunctionsFactory_transformers as transformers
68 import TableFunctionsFactory_parser as parser
69 import TableFunctionsFactory_declbracket as declbracket
70 import TableFunctionsFactory_util as util
71 import TableFunctionsFactory_linker as linker
72 
73 
74 # fmt: off
75 separator = '$=>$'
76 
78  # TODO: try to parse the line to be certain about completeness.
79  # `$=>$' is used to separate the UDTF signature and the expected result
80  return line.endswith(',') or line.endswith('->') or line.endswith(separator) or line.endswith('|')
81 
82 
83 # fmt: off
84 def find_signatures(input_file):
85  """Returns a list of parsed UDTF signatures."""
86  signatures = []
87 
88  last_line = None
89  for line in open(input_file).readlines():
90  line = line.strip()
91  if last_line is not None:
92  line = last_line + ' ' + line
93  last_line = None
94  if not line.startswith('UDTF:'):
95  continue
96  if line_is_incomplete(line):
97  last_line = line
98  continue
99  last_line = None
100  line = line[5:].lstrip()
101  i = line.find('(')
102  j = line.find(')')
103  if i == -1 or j == -1:
104  sys.stderr.write('Invalid UDTF specification: `%s`. Skipping.\n' % (line))
105  continue
106 
107  expected_result = None
108  if separator in line:
109  line, expected_result = line.split(separator, 1)
110  expected_result = expected_result.strip().split(separator)
111  expected_result = list(map(lambda s: s.strip(), expected_result))
112 
113  ast = parser.Parser(line).parse()
114 
115  if expected_result is not None:
116  # Treat warnings as errors so that one can test TransformeWarnings
117  warnings.filterwarnings("error")
118 
119  # Template transformer expands templates into multiple lines
120  try:
121  result = transformers.Pipeline(
122  transformers.TemplateTransformer,
123  transformers.AmbiguousSignatureCheckTransformer,
124  transformers.FieldAnnotationTransformer,
125  transformers.TextEncodingDictTransformer,
126  transformers.DefaultValueAnnotationTransformer,
127  transformers.SupportedAnnotationsTransformer,
128  transformers.RangeAnnotationTransformer,
129  transformers.CursorAnnotationTransformer,
130  transformers.FixRowMultiplierPosArgTransformer,
131  transformers.RenameNodesTransformer,
132  transformers.AstPrinter)(ast)
133  except (transformers.TransformerException, transformers.TransformerWarning) as msg:
134  result = ['%s: %s' % (type(msg).__name__, msg)]
135  assert len(result) == len(expected_result), "\n\tresult: %s \n!= \n\texpected: %s" % (
136  '\n\t\t '.join(result),
137  '\n\t\t '.join(expected_result)
138  )
139  assert set(result) == set(expected_result), "\n\tresult: %s != \n\texpected: %s" % (
140  '\n\t\t '.join(result),
141  '\n\t\t '.join(expected_result),
142  )
143 
144  else:
145  signature = transformers.Pipeline(
146  transformers.TemplateTransformer,
147  transformers.AmbiguousSignatureCheckTransformer,
148  transformers.FieldAnnotationTransformer,
149  transformers.TextEncodingDictTransformer,
150  transformers.DefaultValueAnnotationTransformer,
151  transformers.SupportedAnnotationsTransformer,
152  transformers.RangeAnnotationTransformer,
153  transformers.CursorAnnotationTransformer,
154  transformers.FixRowMultiplierPosArgTransformer,
155  transformers.RenameNodesTransformer,
156  transformers.DeclBracketTransformer)(ast)
157 
158  signatures.extend(signature)
159 
160  return signatures
161 
162 
163 def format_function_args(input_types, output_types, uses_manager, use_generic_arg_name, emit_output_args):
164  cpp_args = []
165  name_args = []
166 
167  if uses_manager:
168  cpp_args.append('TableFunctionManager& mgr')
169  name_args.append('mgr')
170 
171  for idx, typ in enumerate(input_types):
172  cpp_arg, name = typ.format_cpp_type(idx,
173  use_generic_arg_name=use_generic_arg_name,
174  is_input=True)
175  cpp_args.append(cpp_arg)
176  name_args.append(name)
177 
178  if emit_output_args:
179  for idx, typ in enumerate(output_types):
180  cpp_arg, name = typ.format_cpp_type(idx,
181  use_generic_arg_name=use_generic_arg_name,
182  is_input=False)
183  cpp_args.append(cpp_arg)
184  name_args.append(name)
185 
186  cpp_args = ', '.join(cpp_args)
187  name_args = ', '.join(name_args)
188  return cpp_args, name_args
189 
190 
191 def build_template_function_call(caller, called, input_types, output_types, uses_manager):
192  cpp_args, name_args = format_function_args(input_types,
193  output_types,
194  uses_manager,
195  use_generic_arg_name=True,
196  emit_output_args=True)
197 
198  template = ("EXTENSION_NOINLINE int32_t\n"
199  "%s(%s) {\n"
200  " return %s(%s);\n"
201  "}\n") % (caller, cpp_args, called, name_args)
202  return template
203 
204 
205 def build_preflight_function(fn_name, sizer, input_types, output_types, uses_manager):
206 
207  def format_error_msg(err_msg, uses_manager):
208  if uses_manager:
209  return " return mgr.error_message(%s);\n" % (err_msg,)
210  else:
211  return " return table_function_error(%s);\n" % (err_msg,)
212 
213  cpp_args, _ = format_function_args(input_types,
214  output_types,
215  uses_manager,
216  use_generic_arg_name=False,
217  emit_output_args=False)
218 
219  if uses_manager:
220  fn = "EXTENSION_NOINLINE int32_t\n"
221  fn += "%s(%s) {\n" % (fn_name.lower() + "__preflight", cpp_args)
222  else:
223  fn = "EXTENSION_NOINLINE int32_t\n"
224  fn += "%s(%s) {\n" % (fn_name.lower() + "__preflight", cpp_args)
225 
226  for typ in input_types:
227  if isinstance(typ, declbracket.Declaration):
228  ann = typ.annotations
229  for key, value in ann:
230  if key == 'require':
231  err_msg = '"Constraint `%s` is not satisfied."' % (value[1:-1])
232 
233  fn += " if (!(%s)) {\n" % (value[1:-1].replace('\\', ''),)
234  fn += format_error_msg(err_msg, uses_manager)
235  fn += " }\n"
236 
237  if sizer.is_arg_sizer():
238  precomputed_nrows = str(sizer.args[0])
239  if '"' in precomputed_nrows:
240  precomputed_nrows = precomputed_nrows[1:-1]
241  # check to see if the precomputed number of rows > 0
242  err_msg = '"Output size expression `%s` evaluated in a negative value."' % (precomputed_nrows)
243  fn += " auto _output_size = %s;\n" % (precomputed_nrows)
244  fn += " if (_output_size < 0) {\n"
245  fn += format_error_msg(err_msg, uses_manager)
246  fn += " }\n"
247  fn += " return _output_size;\n"
248  else:
249  fn += " return 0;\n"
250  fn += "}\n\n"
251 
252  return fn
253 
254 
256  if sizer.is_arg_sizer():
257  return True
258  for arg_annotations in sig.input_annotations:
259  d = dict(arg_annotations)
260  if 'require' in d.keys():
261  return True
262  return False
263 
264 
265 def format_annotations(annotations_):
266  def fmt(k, v):
267  # type(v) is not always 'str'
268  if k == 'require' or k == 'default' and v[0] == "\"":
269  return v[1:-1]
270  return v
271 
272  s = "std::vector<std::map<std::string, std::string>>{"
273  s += ', '.join(('{' + ', '.join('{"%s", "%s"}' % (k, fmt(k, v)) for k, v in a) + '}') for a in annotations_)
274  s += "}"
275  return s
276 
277 
279  i = sig.name.rfind('_template')
280  return i >= 0 and '__' in sig.name[:i + 1]
281 
282 
283 def uses_manager(sig):
284  return sig.inputs and sig.inputs[0].name == 'TableFunctionManager'
285 
286 
288  # Any function that does not have _gpu_ suffix is a cpu function.
289  i = sig.name.rfind('_gpu_')
290  if i >= 0 and '__' in sig.name[:i + 1]:
291  if uses_manager(sig):
292  raise ValueError('Table function {} with gpu execution target cannot have TableFunctionManager argument'.format(sig.name))
293  return False
294  return True
295 
296 
298  # A function with TableFunctionManager argument is a cpu-only function
299  if uses_manager(sig):
300  return False
301  # Any function that does not have _cpu_ suffix is a gpu function.
302  i = sig.name.rfind('_cpu_')
303  return not (i >= 0 and '__' in sig.name[:i + 1])
304 
305 
306 def parse_annotations(input_files):
307 
308  counter = 0
309 
310  add_stmts = []
311  cpu_template_functions = []
312  gpu_template_functions = []
313  cpu_function_address_expressions = []
314  gpu_function_address_expressions = []
315  cond_fns = []
316 
317  for input_file in input_files:
318  for sig in find_signatures(input_file):
319 
320  # Compute sql_types, input_types, and sizer
321  sql_types_ = []
322  input_types_ = []
323  input_annotations = []
324 
325  sizer = None
326  if sig.sizer is not None:
327  expr = sig.sizer.value
328  sizer = declbracket.Bracket('kPreFlightParameter', (expr,))
329 
330  uses_manager = False
331  for i, (t, annot) in enumerate(zip(sig.inputs, sig.input_annotations)):
332  if t.is_output_buffer_sizer():
333  if t.is_user_specified():
334  sql_types_.append(declbracket.Bracket.parse('int32').normalize(kind='input'))
335  input_types_.append(sql_types_[-1])
336  input_annotations.append(annot)
337  assert sizer is None # exactly one sizer argument is allowed
338  assert len(t.args) == 1, t
339  sizer = t
340  elif t.name == 'Cursor':
341  for t_ in t.args:
342  input_types_.append(t_)
343  input_annotations.append(annot)
344  sql_types_.append(declbracket.Bracket('Cursor', args=()))
345  elif t.name == 'TableFunctionManager':
346  if i != 0:
347  raise ValueError('{} must appear as a first argument of {}, but found it at position {}.'.format(t, sig.name, i))
348  uses_manager = True
349  else:
350  input_types_.append(t)
351  input_annotations.append(annot)
352  if t.is_column_any():
353  # XXX: let Bracket handle mapping of column to cursor(column)
354  sql_types_.append(declbracket.Bracket('Cursor', args=()))
355  else:
356  sql_types_.append(t)
357 
358  if sizer is None:
359  name = 'kTableFunctionSpecifiedParameter'
360  idx = 1 # this sizer is not actually materialized in the UDTF
361  sizer = declbracket.Bracket(name, (idx,))
362 
363  assert sizer is not None
364  ns_output_types = tuple([a.apply_namespace(ns='ExtArgumentType') for a in sig.outputs])
365  ns_input_types = tuple([t.apply_namespace(ns='ExtArgumentType') for t in input_types_])
366  ns_sql_types = tuple([t.apply_namespace(ns='ExtArgumentType') for t in sql_types_])
367 
368  sig.function_annotations.append(('uses_manager', str(uses_manager).lower()))
369 
370  input_types = 'std::vector<ExtArgumentType>{%s}' % (', '.join(map(util.tostring, ns_input_types)))
371  output_types = 'std::vector<ExtArgumentType>{%s}' % (', '.join(map(util.tostring, ns_output_types)))
372  sql_types = 'std::vector<ExtArgumentType>{%s}' % (', '.join(map(util.tostring, ns_sql_types)))
373  annotations = format_annotations(input_annotations + sig.output_annotations + [sig.function_annotations])
374 
375  # Notice that input_types and sig.input_types, (and
376  # similarly, input_annotations and sig.input_annotations)
377  # have different lengths when the sizer argument is
378  # Constant or TableFunctionSpecifiedParameter. That is,
379  # input_types contains all the user-specified arguments
380  # while sig.input_types contains all arguments of the
381  # implementation of an UDTF.
382 
383  if must_emit_preflight_function(sig, sizer):
384  fn_name = '%s_%s' % (sig.name, str(counter)) if is_template_function(sig) else sig.name
385  check_fn = build_preflight_function(fn_name, sizer, input_types_, sig.outputs, uses_manager)
386  cond_fns.append(check_fn)
387 
388  if is_template_function(sig):
389  name = sig.name + '_' + str(counter)
390  counter += 1
391  t = build_template_function_call(name, sig.name, input_types_, sig.outputs, uses_manager)
392  address_expression = ('avoid_opt_address(reinterpret_cast<void*>(%s))' % name)
393  if is_cpu_function(sig):
394  cpu_template_functions.append(t)
395  cpu_function_address_expressions.append(address_expression)
396  if is_gpu_function(sig):
397  gpu_template_functions.append(t)
398  gpu_function_address_expressions.append(address_expression)
399  add = ('TableFunctionsFactory::add("%s", %s, %s, %s, %s, %s, /*is_runtime:*/false);'
400  % (name, sizer.format_sizer(), input_types, output_types, sql_types, annotations))
401  add_stmts.append(add)
402 
403  else:
404  add = ('TableFunctionsFactory::add("%s", %s, %s, %s, %s, %s, /*is_runtime:*/false);'
405  % (sig.name, sizer.format_sizer(), input_types, output_types, sql_types, annotations))
406  add_stmts.append(add)
407  address_expression = ('avoid_opt_address(reinterpret_cast<void*>(%s))' % sig.name)
408 
409  if is_cpu_function(sig):
410  cpu_function_address_expressions.append(address_expression)
411  if is_gpu_function(sig):
412  gpu_function_address_expressions.append(address_expression)
413 
414  return add_stmts, cpu_template_functions, gpu_template_functions, cpu_function_address_expressions, gpu_function_address_expressions, cond_fns
415 
416 
417 
418 
419 if len(sys.argv) < 3:
420 
421  input_files = [os.path.join(os.path.dirname(__file__), 'test_udtf_signatures.hpp')]
422  print('Running tests from %s' % (', '.join(input_files)))
423  add_stmts, _, _, _, _, _ = parse_annotations(input_files)
424 
425  print('Usage:\n %s %s input1.hpp input2.hpp ... output.hpp' % (sys.executable, sys.argv[0], ))
426 
427  sys.exit(1)
428 
429 input_files, output_filename = sys.argv[1:-1], sys.argv[-1]
430 cpu_output_header = os.path.splitext(output_filename)[0] + '_cpu.hpp'
431 gpu_output_header = os.path.splitext(output_filename)[0] + '_gpu.hpp'
432 assert input_files, sys.argv
433 
434 add_stmts = []
435 cpu_template_functions = []
436 gpu_template_functions = []
437 cpu_address_expressions = []
438 gpu_address_expressions = []
439 cond_fns = []
440 
441 canonical_input_files = [input_file[input_file.find("/QueryEngine/") + 1:] for input_file in input_files]
442 header_file = ['#include "' + canonical_input_file + '"' for canonical_input_file in canonical_input_files]
443 
444 dirname = os.path.dirname(output_filename)
445 
446 if dirname and not os.path.exists(dirname):
447  try:
448  os.makedirs(dirname)
449  except OSError as e:
450  import errno
451  if e.errno != errno.EEXIST:
452  raise
453 
454 
455 for input_file in input_files:
456  stmts, cpu_fns, gpu_fns, cpu_addr, gpu_addr, cond_funcs = parse_annotations([input_file])
457 
458  add_stmts.extend(stmts)
459  cpu_template_functions.extend(cpu_fns)
460  gpu_template_functions.extend(gpu_fns)
461  cpu_address_expressions.extend(cpu_addr)
462  gpu_address_expressions.extend(gpu_addr)
463  cond_fns.extend(cond_funcs)
464 
465  header_file = input_file[input_file.find("/QueryEngine/") + 1:]
466 
467  add_tf_generated_files = linker.GenerateAddTableFunctionsFiles(dirname, stmts,
468  header_file)
469  if add_tf_generated_files.should_generate_files():
470  add_tf_generated_files.generate_files()
471 
472  if len(cpu_fns):
473  cpu_generated_files = linker.GenerateTemplateFiles(dirname, cpu_fns,
474  header_file, 'cpu')
475  cpu_generated_files.generate_files()
476 
477  if len(gpu_fns):
478  gpu_generated_files = linker.GenerateTemplateFiles(dirname, gpu_fns,
479  header_file, 'gpu')
480  gpu_generated_files.generate_files()
481 
482 
483 def call_methods(add_stmts):
484  n_add_funcs = linker.GenerateAddTableFunctionsFiles.get_num_generated_files()
485  return [ 'table_functions::add_table_functions_%d();' % (i) for i in range(n_add_funcs+1) ]
486 
487 
488 content = '''
489 /*
490  This file is generated by %s. Do no edit!
491 */
492 
493 #include "QueryEngine/TableFunctions/TableFunctionsFactory.h"
494 
495 
496 /*
497  Include the UDTF template initiations:
498 */
499 %s
500 
501 // volatile+noinline prevents compiler optimization
502 #ifdef _WIN32
503 __declspec(noinline)
504 #else
505  __attribute__((noinline))
506 #endif
507 
508 #ifndef NO_OPT_ATTRIBUTE
509 #if defined(__clang__)
510 #define NO_OPT_ATTRIBUTE __attribute__((optnone))
511 
512 #elif defined(__GNUC__) || defined(__GNUG__)
513 #define NO_OPT_ATTRIBUTE __attribute__((optimize("O0")))
514 
515 #elif defined(_MSC_VER)
516 #define NO_OPT_ATTRIBUTE
517 
518 #endif
519 #endif
520 
521 #if defined(_MSC_VER)
522 #pragma optimize("", off)
523 #endif
524 
525 volatile
526 NO_OPT_ATTRIBUTE bool avoid_opt_address(void *address) {
527  return address != nullptr;
528 }
529 
530 NO_OPT_ATTRIBUTE bool functions_exist() {
531  bool ret = true;
532 
533  ret &= (%s);
534 
535  return ret;
536 }
537 
538 extern bool g_enable_table_functions;
539 
540 extern bool functions_exist_geo_column();
541 
542 // Each table function initialization module needs its own AddTableFunctions struct definition,
543 // otherwise, when calling an initialization function at runtime, symbol name conflicts will
544 // cause the wrong struct to be instantiated.
545 namespace {
546 struct AddTableFunctions {
547  NO_OPT_ATTRIBUTE void operator()() {
548  %s
549  }
550 };
551 } // anonymous namespace
552 
553 namespace table_functions {
554 
555 // Each table function initialization module should have its own init flag
556 static std::once_flag init_flag;
557 
558 static const char filename[] = __FILE__;
559 
560 template<const char *filename>
561 void TableFunctionsFactory::init() {
562  if (!g_enable_table_functions) {
563  return;
564  }
565 
566  if (!functions_exist() && !functions_exist_geo_column()) {
567  UNREACHABLE();
568  return;
569  }
570 
571  std::call_once(init_flag, AddTableFunctions{});
572 }
573 
574 extern "C" void init_table_functions() {
575  TableFunctionsFactory::init<filename>();
576 }
577 #if defined(_MSC_VER)
578 #pragma optimize("", on)
579 #endif
580 
581 // conditional check functions
582 %s
583 
584 } // namespace table_functions
585 
586 '''
587 
588 #####
589 
590 content = content % (
591  sys.argv[0],
592  '\n'.join(map(lambda x: '#include "%s"' % x, linker.BaseGenerateFiles.generated_header_files())),
593  ' &&\n'.join(cpu_address_expressions),
594  '\n '.join(call_methods(add_stmts)),
595  ''.join(cond_fns))
596 
597 
598 if not (os.path.exists(output_filename) and \
599  content == linker.get_existing_file_content(output_filename)):
600  with open(output_filename, 'w') as f:
601  f.write(content)
std::string join(T const &container, std::string const &delim)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66