1 """Given a list of input files, scan for lines containing UDTF
2 specification statements in the following form:
4 UDTF: function_name(<arguments>) -> <output column types> (, <template type specifications>)?
6 where <arguments> is a comma-separated list of argument types. The
7 argument types specifications are:
10 Int8, Int16, Int32, Int64, Float, Double, Bool, TextEncodingDict, etc
12 ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat, ColumnDouble, ColumnBool, etc
14 ColumnListInt8, ColumnListInt16, ColumnListInt32, ColumnListInt64, ColumnListFloat, ColumnListDouble, ColumnListBool, etc
17 where t0, t1 are column or column list types
18 - output buffer size parameter type:
19 RowMultiplier<i>, ConstantParameter<i>, Constant<i>, TableFunctionSpecifiedParameter<i>
20 where i is a literal integer.
22 The output column types is a comma-separated list of column types, see above.
24 In addition, the following equivalents are suppored:
27 ColumnList<T> == ColumnListT
28 Cursor<T, V, ...> == Cursor<ColumnT, ColumnV, ...>
29 int8 == int8_t == Int8, etc
30 float == Float, double == Double, bool == Bool
31 T == ColumnT for output column types
32 RowMultiplier == RowMultiplier<i> where i is the one-based position of the sizer argument
33 when no sizer argument is provided, Constant<1> is assumed
35 Argument types can be annotated using `|' (bar) symbol after an
36 argument type specification. An annotation is specified by a label and
37 a value separated by `=' (equal) symbol. Multiple annotations can be
38 specified by using `|` (bar) symbol as the annotations separator.
39 Supported annotation labels are:
41 - name: to specify argument name
42 - input_id: to specify the dict id mapping for output TextEncodingDict columns.
43 - default: to specify a default value for an argument (scalar only)
45 If argument type follows an identifier, it will be mapped to name
46 annotations. For example, the following argument type specifications
52 Template type specifications is a comma separated list of template
53 type assignments where values are lists of argument type names. For
56 T = [Int8, Int16, Int32, Float], V = [Float, Double]
67 import TableFunctionsFactory_transformers
as transformers
68 import TableFunctionsFactory_parser
as parser
69 import TableFunctionsFactory_declbracket
as declbracket
70 import TableFunctionsFactory_util
as util
71 import TableFunctionsFactory_linker
as linker
80 return line.endswith(
',')
or line.endswith(
'->')
or line.endswith(separator)
or line.endswith(
'|')
85 """Returns a list of parsed UDTF signatures."""
89 for line
in open(input_file).readlines():
91 if last_line
is not None:
92 line = last_line +
' ' + line
94 if not line.startswith(
'UDTF:'):
100 line = line[5:].lstrip()
103 if i == -1
or j == -1:
104 sys.stderr.write(
'Invalid UDTF specification: `%s`. Skipping.\n' % (line))
107 expected_result =
None
108 if separator
in line:
109 line, expected_result = line.split(separator, 1)
110 expected_result = expected_result.strip().
split(separator)
111 expected_result = list(map(
lambda s: s.strip(), expected_result))
113 ast = parser.Parser(line).parse()
115 if expected_result
is not None:
117 warnings.filterwarnings(
"error")
121 result = transformers.Pipeline(
122 transformers.TemplateTransformer,
123 transformers.AmbiguousSignatureCheckTransformer,
124 transformers.FieldAnnotationTransformer,
125 transformers.TextEncodingDictTransformer,
126 transformers.DefaultValueAnnotationTransformer,
127 transformers.SupportedAnnotationsTransformer,
128 transformers.RangeAnnotationTransformer,
129 transformers.CursorAnnotationTransformer,
130 transformers.FixRowMultiplierPosArgTransformer,
131 transformers.RenameNodesTransformer,
132 transformers.AstPrinter)(ast)
133 except (transformers.TransformerException, transformers.TransformerWarning)
as msg:
134 result = [
'%s: %s' % (
type(msg).__name__, msg)]
135 assert len(result) == len(expected_result),
"\n\tresult: %s \n!= \n\texpected: %s" % (
136 '\n\t\t '.
join(result),
137 '\n\t\t '.
join(expected_result)
139 assert set(result) == set(expected_result),
"\n\tresult: %s != \n\texpected: %s" % (
140 '\n\t\t '.
join(result),
141 '\n\t\t '.
join(expected_result),
145 signature = transformers.Pipeline(
146 transformers.TemplateTransformer,
147 transformers.AmbiguousSignatureCheckTransformer,
148 transformers.FieldAnnotationTransformer,
149 transformers.TextEncodingDictTransformer,
150 transformers.DefaultValueAnnotationTransformer,
151 transformers.SupportedAnnotationsTransformer,
152 transformers.RangeAnnotationTransformer,
153 transformers.CursorAnnotationTransformer,
154 transformers.FixRowMultiplierPosArgTransformer,
155 transformers.RenameNodesTransformer,
156 transformers.DeclBracketTransformer)(ast)
158 signatures.extend(signature)
168 cpp_args.append(
'TableFunctionManager& mgr')
169 name_args.append(
'mgr')
171 for idx, typ
in enumerate(input_types):
172 cpp_arg, name = typ.format_cpp_type(idx,
173 use_generic_arg_name=use_generic_arg_name,
175 cpp_args.append(cpp_arg)
176 name_args.append(name)
179 for idx, typ
in enumerate(output_types):
180 cpp_arg, name = typ.format_cpp_type(idx,
181 use_generic_arg_name=use_generic_arg_name,
183 cpp_args.append(cpp_arg)
184 name_args.append(name)
186 cpp_args =
', '.
join(cpp_args)
187 name_args =
', '.
join(name_args)
188 return cpp_args, name_args
195 use_generic_arg_name=
True,
196 emit_output_args=
True)
198 template = (
"EXTENSION_NOINLINE int32_t\n"
201 "}\n") % (caller, cpp_args, called, name_args)
207 def format_error_msg(err_msg, uses_manager):
209 return " return mgr.error_message(%s);\n" % (err_msg,)
211 return " return table_function_error(%s);\n" % (err_msg,)
216 use_generic_arg_name=
False,
217 emit_output_args=
False)
220 fn =
"EXTENSION_NOINLINE int32_t\n"
221 fn +=
"%s(%s) {\n" % (fn_name.lower() +
"__preflight", cpp_args)
223 fn =
"EXTENSION_NOINLINE int32_t\n"
224 fn +=
"%s(%s) {\n" % (fn_name.lower() +
"__preflight", cpp_args)
226 for typ
in input_types:
227 if isinstance(typ, declbracket.Declaration):
228 ann = typ.annotations
229 for key, value
in ann:
231 err_msg =
'"Constraint `%s` is not satisfied."' % (value[1:-1])
233 fn +=
" if (!(%s)) {\n" % (value[1:-1].replace(
'\\',
''),)
234 fn += format_error_msg(err_msg, uses_manager)
237 if sizer.is_arg_sizer():
238 precomputed_nrows = str(sizer.args[0])
239 if '"' in precomputed_nrows:
240 precomputed_nrows = precomputed_nrows[1:-1]
242 err_msg =
'"Output size expression `%s` evaluated in a negative value."' % (precomputed_nrows)
243 fn +=
" auto _output_size = %s;\n" % (precomputed_nrows)
244 fn +=
" if (_output_size < 0) {\n"
245 fn += format_error_msg(err_msg, uses_manager)
247 fn +=
" return _output_size;\n"
256 if sizer.is_arg_sizer():
258 for arg_annotations
in sig.input_annotations:
259 d = dict(arg_annotations)
260 if 'require' in d.keys():
268 if k ==
'require' or k ==
'default' and v[0] ==
"\"":
272 s =
"std::vector<std::map<std::string, std::string>>{"
273 s +=
', '.
join((
'{' +
', '.
join(
'{"%s", "%s"}' % (k, fmt(k, v))
for k, v
in a) +
'}')
for a
in annotations_)
279 i = sig.name.rfind(
'_template')
280 return i >= 0
and '__' in sig.name[:i + 1]
284 return sig.inputs
and sig.inputs[0].name ==
'TableFunctionManager'
289 i = sig.name.rfind(
'_gpu_')
290 if i >= 0
and '__' in sig.name[:i + 1]:
292 raise ValueError(
'Table function {} with gpu execution target cannot have TableFunctionManager argument'.format(sig.name))
302 i = sig.name.rfind(
'_cpu_')
303 return not (i >= 0
and '__' in sig.name[:i + 1])
311 cpu_template_functions = []
312 gpu_template_functions = []
313 cpu_function_address_expressions = []
314 gpu_function_address_expressions = []
317 for input_file
in input_files:
323 input_annotations = []
326 if sig.sizer
is not None:
327 expr = sig.sizer.value
328 sizer = declbracket.Bracket(
'kPreFlightParameter', (expr,))
331 for i, (t, annot)
in enumerate(zip(sig.inputs, sig.input_annotations)):
332 if t.is_output_buffer_sizer():
333 if t.is_user_specified():
334 sql_types_.append(declbracket.Bracket.parse(
'int32').normalize(kind=
'input'))
335 input_types_.append(sql_types_[-1])
336 input_annotations.append(annot)
338 assert len(t.args) == 1, t
340 elif t.name ==
'Cursor':
342 input_types_.append(t_)
343 input_annotations.append(annot)
344 sql_types_.append(declbracket.Bracket(
'Cursor', args=()))
345 elif t.name ==
'TableFunctionManager':
347 raise ValueError(
'{} must appear as a first argument of {}, but found it at position {}.'.format(t, sig.name, i))
350 input_types_.append(t)
351 input_annotations.append(annot)
352 if t.is_column_any():
354 sql_types_.append(declbracket.Bracket(
'Cursor', args=()))
359 name =
'kTableFunctionSpecifiedParameter'
361 sizer = declbracket.Bracket(name, (idx,))
363 assert sizer
is not None
364 ns_output_types = tuple([a.apply_namespace(ns=
'ExtArgumentType')
for a
in sig.outputs])
365 ns_input_types = tuple([t.apply_namespace(ns=
'ExtArgumentType')
for t
in input_types_])
366 ns_sql_types = tuple([t.apply_namespace(ns=
'ExtArgumentType')
for t
in sql_types_])
368 sig.function_annotations.append((
'uses_manager', str(uses_manager).lower()))
370 input_types =
'std::vector<ExtArgumentType>{%s}' % (
', '.
join(map(util.tostring, ns_input_types)))
371 output_types =
'std::vector<ExtArgumentType>{%s}' % (
', '.
join(map(util.tostring, ns_output_types)))
372 sql_types =
'std::vector<ExtArgumentType>{%s}' % (
', '.
join(map(util.tostring, ns_sql_types)))
373 annotations =
format_annotations(input_annotations + sig.output_annotations + [sig.function_annotations])
386 cond_fns.append(check_fn)
389 name = sig.name +
'_' + str(counter)
392 address_expression = (
'avoid_opt_address(reinterpret_cast<void*>(%s))' % name)
394 cpu_template_functions.append(t)
395 cpu_function_address_expressions.append(address_expression)
397 gpu_template_functions.append(t)
398 gpu_function_address_expressions.append(address_expression)
399 add = (
'TableFunctionsFactory::add("%s", %s, %s, %s, %s, %s, /*is_runtime:*/false);'
400 % (name, sizer.format_sizer(), input_types, output_types, sql_types, annotations))
401 add_stmts.append(add)
404 add = (
'TableFunctionsFactory::add("%s", %s, %s, %s, %s, %s, /*is_runtime:*/false);'
405 % (sig.name, sizer.format_sizer(), input_types, output_types, sql_types, annotations))
406 add_stmts.append(add)
407 address_expression = (
'avoid_opt_address(reinterpret_cast<void*>(%s))' % sig.name)
410 cpu_function_address_expressions.append(address_expression)
412 gpu_function_address_expressions.append(address_expression)
414 return add_stmts, cpu_template_functions, gpu_template_functions, cpu_function_address_expressions, gpu_function_address_expressions, cond_fns
419 if len(sys.argv) < 3:
421 input_files = [os.path.join(os.path.dirname(__file__),
'test_udtf_signatures.hpp')]
422 print(
'Running tests from %s' % (
', '.
join(input_files)))
425 print(
'Usage:\n %s %s input1.hpp input2.hpp ... output.hpp' % (sys.executable, sys.argv[0], ))
429 input_files, output_filename = sys.argv[1:-1], sys.argv[-1]
430 cpu_output_header = os.path.splitext(output_filename)[0] +
'_cpu.hpp'
431 gpu_output_header = os.path.splitext(output_filename)[0] +
'_gpu.hpp'
432 assert input_files, sys.argv
435 cpu_template_functions = []
436 gpu_template_functions = []
437 cpu_address_expressions = []
438 gpu_address_expressions = []
441 canonical_input_files = [input_file[input_file.find(
"/QueryEngine/") + 1:]
for input_file
in input_files]
442 header_file = [
'#include "' + canonical_input_file +
'"' for canonical_input_file
in canonical_input_files]
444 dirname = os.path.dirname(output_filename)
446 if dirname
and not os.path.exists(dirname):
451 if e.errno != errno.EEXIST:
455 for input_file
in input_files:
456 stmts, cpu_fns, gpu_fns, cpu_addr, gpu_addr, cond_funcs =
parse_annotations([input_file])
458 add_stmts.extend(stmts)
459 cpu_template_functions.extend(cpu_fns)
460 gpu_template_functions.extend(gpu_fns)
461 cpu_address_expressions.extend(cpu_addr)
462 gpu_address_expressions.extend(gpu_addr)
463 cond_fns.extend(cond_funcs)
465 header_file = input_file[input_file.find(
"/QueryEngine/") + 1:]
467 add_tf_generated_files = linker.GenerateAddTableFunctionsFiles(dirname, stmts,
469 if add_tf_generated_files.should_generate_files():
470 add_tf_generated_files.generate_files()
473 cpu_generated_files = linker.GenerateTemplateFiles(dirname, cpu_fns,
475 cpu_generated_files.generate_files()
478 gpu_generated_files = linker.GenerateTemplateFiles(dirname, gpu_fns,
480 gpu_generated_files.generate_files()
484 n_add_funcs = linker.GenerateAddTableFunctionsFiles.get_num_generated_files()
485 return [
'table_functions::add_table_functions_%d();' % (i)
for i
in range(n_add_funcs+1) ]
490 This file is generated by %s. Do no edit!
493 #include "QueryEngine/TableFunctions/TableFunctionsFactory.h"
497 Include the UDTF template initiations:
501 // volatile+noinline prevents compiler optimization
505 __attribute__((noinline))
508 #ifndef NO_OPT_ATTRIBUTE
509 #if defined(__clang__)
510 #define NO_OPT_ATTRIBUTE __attribute__((optnone))
512 #elif defined(__GNUC__) || defined(__GNUG__)
513 #define NO_OPT_ATTRIBUTE __attribute__((optimize("O0")))
515 #elif defined(_MSC_VER)
516 #define NO_OPT_ATTRIBUTE
521 #if defined(_MSC_VER)
522 #pragma optimize("", off)
526 NO_OPT_ATTRIBUTE bool avoid_opt_address(void *address) {
527 return address != nullptr;
530 NO_OPT_ATTRIBUTE bool functions_exist() {
538 extern bool g_enable_table_functions;
540 extern bool functions_exist_geo_column();
542 // Each table function initialization module needs its own AddTableFunctions struct definition,
543 // otherwise, when calling an initialization function at runtime, symbol name conflicts will
544 // cause the wrong struct to be instantiated.
546 struct AddTableFunctions {
547 NO_OPT_ATTRIBUTE void operator()() {
551 } // anonymous namespace
553 namespace table_functions {
555 // Each table function initialization module should have its own init flag
556 static std::once_flag init_flag;
558 static const char filename[] = __FILE__;
560 template<const char *filename>
561 void TableFunctionsFactory::init() {
562 if (!g_enable_table_functions) {
566 if (!functions_exist() && !functions_exist_geo_column()) {
571 std::call_once(init_flag, AddTableFunctions{});
574 extern "C" void init_table_functions() {
575 TableFunctionsFactory::init<filename>();
577 #if defined(_MSC_VER)
578 #pragma optimize("", on)
581 // conditional check functions
584 } // namespace table_functions
590 content = content % (
592 '\n'.
join(map(
lambda x:
'#include "%s"' % x, linker.BaseGenerateFiles.generated_header_files())),
593 ' &&\n'.
join(cpu_address_expressions),
598 if not (os.path.exists(output_filename)
and \
599 content == linker.get_existing_file_content(output_filename)):
600 with
open(output_filename,
'w')
as f:
def build_template_function_call
int open(const char *path, int flags, int mode)
def build_preflight_function
def must_emit_preflight_function