OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsFactory.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <string>
20 #include <vector>
21 
24 #include "Shared/toString.h"
26 
27 #define DEFAULT_ROW_MULTIPLIER_SUFFIX "__default_RowMultiplier_"
28 #define DEFAULT_ROW_MULTIPLIER_VALUE 1
29 #define PREFLIGHT_SUFFIX "__preflight"
30 
31 /*
32 
33  TableFunction represents a User-Defined Table Function (UDTF) and it
34  holds the following information:
35 
36  - the name of a table function that corresponds to its
37  implementation. The name must match the following pattern:
38 
39  \w[\w\d_]*([_][_](gpu_|cpu_|)\d*|)
40 
41  where the first part left to the double underscore is the
42  so-called SQL name of table function that is used in SQL query
43  context, and the right part determines a particular implementation
44  of the table function. One can define many implementations for the
45  same SQL table function with specializations to
46 
47  + different argument types (overloading support)
48 
49  + different execution context, CPU or GPU. When gpu or cpu is not
50  present, the implementation is assumed to be valid for both CPU
51  and GPU contexts.
52 
53  - the output sizer parameter <sizer> that determines the allocated
54  size of the output columns:
55 
56  + UserSpecifiedRowMultiplier - the allocated column size will be
57 
58  <sizer value> * <size of the input columns>
59 
60  where <sizer value> is user-specified integer value as specified
61  in the <sizer> argument position of the table function call.
62 
63  + UserSpecifiedConstantParameter - the allocated column size will
64  be user-specified integer value as specified in the <sizer>
65  argument position of the table function call.
66 
67  + Constant - the allocated output column size will be <sizer>.
68 
69  + TableFunctionSpecifiedParameter - The table function
70  implementation must call resize to allocate output column
71  buffers. The <sizer> value is not used.
72 
73  The actual size of the output column is returned by the table
74  function implementation that must be equal or smaller to the
75  allocated output column size.
76 
77  - the list of input argument types. The input argument type can be a
78  scalar or a column type (that is `Column<scalar>`). Supported
79  scalar types are int8, ..., int64, double, float, bool.
80 
81  - the list of output argument types. The output types of table
82  functions is always some column type. Hence, the output argument
83  types are stored as scalar types that correspond to the data type
84  of the output columns.
85 
86  - a boolean flag specifying the table function is a load-time or
87  run-time function. Run-time functions can be overwitten or removed
88  by users. Load-time functions cannot be redefined in run-time.
89 
90  Future notes:
91 
92  - introduce a list of output column names. Currently, the names of
93  output columns match the pattern
94 
95  out\d+
96 
97  but for better UX it would be nice to enable user-defined names
98  for output columns.
99 
100  */
101 
102 namespace table_functions {
103 
106  const size_t val{0};
107 
108  public:
109  std::string toString() const {
110  switch (type) {
112  return "kUserSpecifiedConstantParameter[" + std::to_string(val) + "]";
114  return "kUserSpecifiedRowMultiplier[" + std::to_string(val) + "]";
116  return "kConstant[" + std::to_string(val) + "]";
118  return "kTableFunctionSpecifiedParameter[" + std::to_string(val) + "]";
120  return "kPreFlightParameter[" + std::to_string(val) + "]";
121  }
122  return "";
123  }
124 };
125 
127  public:
128  TableFunction(const std::string& name,
129  const TableFunctionOutputRowSizer output_sizer,
130  const std::vector<ExtArgumentType>& input_args,
131  const std::vector<ExtArgumentType>& output_args,
132  const std::vector<ExtArgumentType>& sql_args,
133  const std::vector<std::map<std::string, std::string>>& annotations,
134  bool is_runtime,
135  bool uses_manager)
136  : name_(name)
137  , output_sizer_(output_sizer)
138  , input_args_(input_args)
139  , output_args_(output_args)
140  , sql_args_(sql_args)
141  , annotations_(annotations)
142  , is_runtime_(is_runtime)
143  , uses_manager_(uses_manager) {}
144 
145  std::vector<ExtArgumentType> getArgs(const bool ensure_column = false) const {
146  std::vector<ExtArgumentType> args;
147  args.insert(args.end(), input_args_.begin(), input_args_.end());
148  if (ensure_column) {
149  // map row dtype to column type
150  std::for_each(output_args_.begin(), output_args_.end(), [&args](auto t) {
151  args.push_back(ext_arg_type_ensure_column(t));
152  });
153  } else {
154  args.insert(args.end(), output_args_.begin(), output_args_.end());
155  }
156  return args;
157  }
158  const std::vector<ExtArgumentType>& getInputArgs() const { return input_args_; }
159  const std::vector<ExtArgumentType>& getOutputArgs() const { return output_args_; }
160  const std::vector<ExtArgumentType>& getSqlArgs() const { return sql_args_; }
161  const std::vector<std::map<std::string, std::string>>& getAnnotations() const {
162  return annotations_;
163  }
165 
166  SQLTypeInfo getInputSQLType(const size_t idx) const;
167  SQLTypeInfo getOutputSQLType(const size_t idx) const;
168 
169  int32_t countScalarArgs() const;
170 
171  size_t getInputsSize() const { return input_args_.size(); }
172  size_t getOutputsSize() const { return output_args_.size(); }
173 
174  std::string getName(const bool drop_suffix = false, const bool lower = false) const;
175 
176  std::string getSignature(const bool include_name, const bool include_output) const;
177 
180  }
181 
182  bool hasPreFlightOutputSizer() const {
184  }
185 
188  }
189 
190  bool hasConstantOutputSize() const {
193  }
194 
197  }
198 
202  }
203 
208  }
209 
215  }
216 
221  }
222 
225  }
226 
228 
229  size_t getOutputRowSizeParameter() const { return output_sizer_.val; }
230 
231  bool containsPreFlightFn() const;
232  std::string getPreFlightFnName() const;
233 
234  const std::map<std::string, std::string> getAnnotations(const size_t idx) const;
235  const std::map<std::string, std::string> getInputAnnotations(
236  const size_t input_arg_idx) const;
237  const std::string getInputAnnotation(const size_t input_arg_idx,
238  const std::string& key,
239  const std::string& default_) const;
240  const std::map<std::string, std::string> getOutputAnnotations(
241  const size_t output_arg_idx) const;
242  const std::string getOutputAnnotation(const size_t output_arg_idx,
243  const std::string& key,
244  const std::string& default_) const;
245  const std::string getFunctionAnnotation(const std::string& key,
246  const std::string& default_) const;
247  const std::map<std::string, std::string> getFunctionAnnotations() const;
248 
249  const std::vector<std::string> getCursorFields(const size_t sql_idx) const;
250  const std::string getArgTypes(const bool use_input_args) const;
251  const std::string getArgNames(const bool use_input_args) const;
252  const std::string getInputArgsDefaultValues() const;
253  std::pair<int32_t, int32_t> getInputID(const size_t idx) const;
254 
255  size_t getSqlOutputRowSizeParameter() const;
256 
257  size_t getOutputRowSizeParameter(const std::vector<SQLTypeInfo>& variant) const {
258  auto val = output_sizer_.val;
260  size_t col_index = 0;
261  size_t func_arg_index = 0;
262  for (const auto& ti : variant) {
263  func_arg_index++;
264  if (ti.is_column_list()) {
265  col_index += ti.get_dimension();
266  } else {
267  col_index++;
268  }
269  if (func_arg_index == val) {
270  val = col_index;
271  break;
272  }
273  }
274  }
275  return val;
276  }
277 
278  inline bool isRuntime() const { return is_runtime_; }
279 
280  inline bool usesManager() const { return uses_manager_; }
281 
282  inline bool isGPU() const {
283  return !usesManager() && (name_.find("_cpu_", name_.find("__")) == std::string::npos);
284  }
285 
286  inline bool isCPU() const {
287  return usesManager() || (name_.find("_gpu_", name_.find("__")) == std::string::npos);
288  }
289 
290  inline bool useDefaultSizer() const {
291  // Functions that use a default sizer value have one less argument
292  return (name_.find("_default_", name_.find("__")) != std::string::npos);
293  }
294 
295  std::string toString() const {
296  auto result = "TableFunction(" + name_ + ", input_args=[";
298  result += "], output_args=[";
300  result += "], sql_args=[";
302  result += "], is_runtime=" + std::string((is_runtime_ ? "true" : "false"));
303  result += ", uses_manager=" + std::string((uses_manager_ ? "true" : "false"));
304  result += ", sizer=" + ::toString(output_sizer_);
305  result += ", annotations=[";
306  for (auto annotation : annotations_) {
307  if (annotation.empty()) {
308  result += "{}, ";
309  } else {
310  result += "{";
311  for (auto it : annotation) {
312  result += ::toString(it.first) + ": " + ::toString(it.second);
313  }
314  result += "}, ";
315  }
316  }
317  result += "])";
318  return result;
319  }
320 
321  std::string toStringSQL() const {
322  auto result = name_ + "(";
324  result += ") -> (";
326  result += ")";
327  return result;
328  }
329 
330  private:
331  const std::string name_;
333  const std::vector<ExtArgumentType> input_args_;
334  const std::vector<ExtArgumentType> output_args_;
335  const std::vector<ExtArgumentType> sql_args_;
336  const std::vector<std::map<std::string, std::string>> annotations_;
337  const bool is_runtime_;
338  const bool uses_manager_;
339 };
340 
342  public:
343  static void add(const std::string& name,
344  const TableFunctionOutputRowSizer sizer,
345  const std::vector<ExtArgumentType>& input_args,
346  const std::vector<ExtArgumentType>& output_args,
347  const std::vector<ExtArgumentType>& sql_args,
348  const std::vector<std::map<std::string, std::string>>& annotations,
349  bool is_runtime = false);
350 
351  static std::vector<TableFunction> get_table_funcs(const std::string& name,
352  const bool is_gpu);
353  static std::vector<TableFunction> get_table_funcs(const std::string& name);
354  static std::vector<TableFunction> get_table_funcs(const bool is_runtime);
355  static std::vector<TableFunction> get_table_funcs();
356  template <const char* filename>
357  static void init();
358  static void reset();
359 
360  private:
361  static std::unordered_map<std::string, TableFunction> functions_;
362 
363  friend class ::ExtensionFunctionsWhitelist;
364 };
365 
366 extern "C" void init_table_functions();
367 
368 } // namespace table_functions
SQLTypeInfo getOutputSQLType(const size_t idx) const
const std::string getOutputAnnotation(const size_t output_arg_idx, const std::string &key, const std::string &default_) const
static std::vector< TableFunction > get_table_funcs()
static void add(const std::string &name, const TableFunctionOutputRowSizer sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime=false)
const TableFunctionOutputRowSizer output_sizer_
const std::map< std::string, std::string > getFunctionAnnotations() const
TableFunction(const std::string &name, const TableFunctionOutputRowSizer output_sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime, bool uses_manager)
const std::vector< std::map< std::string, std::string > > annotations_
const std::vector< ExtArgumentType > output_args_
std::pair< int32_t, int32_t > getInputID(const size_t idx) const
const std::string getFunctionAnnotation(const std::string &key, const std::string &default_) const
std::string to_string(char const *&&v)
ExtArgumentType ext_arg_type_ensure_column(const ExtArgumentType ext_arg_type)
std::string getSignature(const bool include_name, const bool include_output) const
const std::vector< ExtArgumentType > sql_args_
Supported runtime functions management and retrieval.
SQLTypeInfo getInputSQLType(const size_t idx) const
const std::string getArgNames(const bool use_input_args) const
std::vector< ExtArgumentType > getArgs(const bool ensure_column=false) const
const std::map< std::string, std::string > getOutputAnnotations(const size_t output_arg_idx) const
const std::vector< ExtArgumentType > & getOutputArgs() const
const std::string getInputAnnotation(const size_t input_arg_idx, const std::string &key, const std::string &default_) const
std::string getName(const bool drop_suffix=false, const bool lower=false) const
const std::string getArgTypes(const bool use_input_args) const
const std::string getInputArgsDefaultValues() const
const std::vector< std::string > getCursorFields(const size_t sql_idx) const
const std::vector< ExtArgumentType > & getInputArgs() const
const std::map< std::string, std::string > getInputAnnotations(const size_t input_arg_idx) const
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
const std::vector< ExtArgumentType > & getSqlArgs() const
static std::unordered_map< std::string, TableFunction > functions_
const std::vector< ExtArgumentType > input_args_
string name
Definition: setup.in.py:72
void init_table_functions()
size_t getOutputRowSizeParameter(const std::vector< SQLTypeInfo > &variant) const
OutputBufferSizeType getOutputRowSizeType() const
const ExtArgumentType getRet() const
static std::string toStringSQL(const std::vector< ExtArgumentType > &sig_types)
const std::vector< std::map< std::string, std::string > > & getAnnotations() const