OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OneAPIFunctions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 #ifdef HAVE_ONEDAL
21 
22 #include <cstring>
23 
24 #include "MLModel.h"
27 
28 #include "oneapi/dal/algo/dbscan.hpp"
29 #include "oneapi/dal/algo/decision_forest.hpp"
30 #include "oneapi/dal/algo/kmeans.hpp"
31 #include "oneapi/dal/algo/kmeans_init.hpp"
32 #include "oneapi/dal/algo/linear_regression.hpp"
33 #include "oneapi/dal/algo/pca.hpp"
34 #include "oneapi/dal/array.hpp"
35 #include "oneapi/dal/io/csv.hpp"
36 #include "oneapi/dal/table/common.hpp"
37 #include "oneapi/dal/table/homogen.hpp"
38 #include "oneapi/dal/table/row_accessor.hpp"
39 
40 #include <iostream>
41 
42 namespace dal = oneapi::dal;
43 
44 inline std::ostream& operator<<(std::ostream& stream, const dal::table& table) {
45  if (!table.has_data())
46  return stream;
47 
48  auto arr = dal::row_accessor<const float>(table).pull();
49  const auto x = arr.get_data();
50  const std::int32_t precision =
51  dal::detail::is_floating_point(table.get_metadata().get_data_type(0)) ? 3 : 0;
52 
53  if (table.get_row_count() <= 10) {
54  for (std::int64_t i = 0; i < table.get_row_count(); i++) {
55  for (std::int64_t j = 0; j < table.get_column_count(); j++) {
56  stream << std::setw(10) << std::setiosflags(std::ios::fixed)
57  << std::setprecision(precision) << x[i * table.get_column_count() + j];
58  }
59  stream << std::endl;
60  }
61  } else {
62  for (std::int64_t i = 0; i < 5; i++) {
63  for (std::int64_t j = 0; j < table.get_column_count(); j++) {
64  stream << std::setw(10) << std::setiosflags(std::ios::fixed)
65  << std::setprecision(precision) << x[i * table.get_column_count() + j];
66  }
67  stream << std::endl;
68  }
69  stream << "..." << (table.get_row_count() - 10) << " lines skipped..." << std::endl;
70  for (std::int64_t i = table.get_row_count() - 5; i < table.get_row_count(); i++) {
71  for (std::int64_t j = 0; j < table.get_column_count(); j++) {
72  stream << std::setw(10) << std::setiosflags(std::ios::fixed)
73  << std::setprecision(precision) << x[i * table.get_column_count() + j];
74  }
75  stream << std::endl;
76  }
77  }
78  return stream;
79 }
80 
81 template <typename T>
82 const dal::table prepare_oneapi_data_table(const T* data, const int64_t num_rows) {
83  auto data_arr = dal::array<T>::empty(num_rows);
84  std::copy(data, data + num_rows, data_arr.get_mutable_data());
85  const auto data_table =
86  dal::homogen_table::wrap(data_arr, num_rows, 1, dal::data_layout::column_major);
87  return data_table;
88 }
89 
90 template <typename T>
91 const dal::table prepare_oneapi_data_table(const std::vector<const T*>& data,
92  const int64_t num_rows) {
93  const size_t num_columns = data.size();
94  auto data_arr = dal::array<T>::empty(num_rows * num_columns);
95  T* raw_ptr = data_arr.get_mutable_data();
96  for (size_t i = 0; i < num_columns; ++i) {
97  const T* column_ptr = data[i];
98  for (int64_t j = 0; j < num_rows; ++j) {
99  raw_ptr[j * num_columns + i] = column_ptr[j];
100  }
101  }
102  return dal::homogen_table::wrap(data_arr, num_rows, num_columns);
103 }
104 
105 template <typename T>
106 const dal::table prepare_oneapi_pivoted_data_table(const T* data,
107  const int64_t num_elems) {
108  auto data_arr = dal::array<T>::empty(num_elems);
109  std::copy(data, data + num_elems, data_arr.get_mutable_data());
110  return dal::homogen_table::wrap(data_arr, 1, num_elems);
111 }
112 
113 template <typename T>
114 auto init_centroids_oneapi(const KMeansInitStrategy init_type,
115  const int num_clusters,
116  const dal::table features_table) {
117  switch (init_type) {
120  const auto kmeans_init_desc =
121  dal::kmeans_init::descriptor<T, dal::kmeans_init::method::dense>()
122  .set_cluster_count(num_clusters);
123  return dal::compute(kmeans_init_desc, features_table);
124  }
126  const auto kmeans_init_desc =
127  dal::kmeans_init::descriptor<T, dal::kmeans_init::method::random_dense>()
128  .set_cluster_count(num_clusters);
129  return dal::compute(kmeans_init_desc, features_table);
130  }
132  const auto kmeans_init_desc =
133  dal::kmeans_init::descriptor<T, dal::kmeans_init::method::parallel_plus_dense>()
134  .set_cluster_count(num_clusters);
135  return dal::compute(kmeans_init_desc, features_table);
136  }
137  default: {
138  throw std::runtime_error(
139  "Invalid Kmeans cluster centroid init type. Was expecting one of "
140  "DETERMINISTIC, RANDOM, PLUS_PLUS.");
141  }
142  }
143 }
144 
145 template <typename T>
146 NEVER_INLINE HOST int32_t
147 onedal_oneapi_kmeans_impl(const std::vector<const T*>& input_features,
148  int32_t* output_clusters,
149  const int64_t num_rows,
150  const int num_clusters,
151  const int num_iterations,
152  const KMeansInitStrategy kmeans_init_type) {
153  try {
154  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
155  const auto result_init =
156  init_centroids_oneapi<T>(kmeans_init_type, num_clusters, features_table);
157 
158  const auto kmeans_desc = dal::kmeans::descriptor<>()
159  .set_cluster_count(num_clusters)
160  .set_max_iteration_count(num_iterations)
161  .set_accuracy_threshold(0.001);
162  const dal::kmeans::train_result result_train =
163  dal::train(kmeans_desc, features_table, result_init.get_centroids());
164  auto arr = dal::row_accessor<const int32_t>(result_train.get_responses()).pull();
165  const auto x = arr.get_data();
166  std::memcpy(output_clusters, x, num_rows * sizeof(int32_t));
167  } catch (const std::exception& e) {
168  throw std::runtime_error(e.what());
169  }
170 
171  return num_rows;
172 }
173 
174 template <typename T>
175 NEVER_INLINE HOST int32_t
176 onedal_oneapi_dbscan_impl(const std::vector<const T*>& input_features,
177  int32_t* output_clusters,
178  const int64_t num_rows,
179  const double epsilon,
180  const int32_t min_observations) {
181  try {
182  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
183  auto dbscan_desc = dal::dbscan::descriptor<>(epsilon, min_observations);
184  dbscan_desc.set_result_options(dal::dbscan::result_options::responses);
185  const auto result_compute = dal::compute(dbscan_desc, features_table);
186 
187  auto arr = dal::row_accessor<const int32_t>(result_compute.get_responses()).pull();
188  const auto x = arr.get_data();
189  std::memcpy(output_clusters, x, num_rows * sizeof(int32_t));
190  } catch (const std::exception& e) {
191  throw std::runtime_error(e.what());
192  }
193  return num_rows;
194 }
195 
196 template <typename T>
197 NEVER_INLINE HOST std::pair<std::vector<std::vector<T>>, std::vector<T>>
198 onedal_oneapi_pca_impl(const std::vector<const T*>& input_features,
199  const int64_t num_rows) {
200  try {
201  // TODO: Do we want to parameterize PCA to allow using SVD other than default COV?
202  const auto pca_desc =
203  dal::pca::descriptor<T, dal::pca::method::cov>().set_deterministic(true);
204  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
205 
206  const auto result_train = dal::train(pca_desc, features_table);
207 
208  auto eigenvectors_table_asarray =
209  dal::row_accessor<const T>(result_train.get_eigenvectors()).pull();
210  const auto eigenvectors_data = eigenvectors_table_asarray.get_data();
211  const int64_t num_dims = result_train.get_eigenvectors().get_row_count();
212  std::vector<std::vector<T>> eigenvectors(num_dims, std::vector<T>(num_dims));
213  for (std::int64_t i = 0; i < num_dims; i++) {
214  for (std::int64_t j = 0; j < num_dims; j++) {
215  eigenvectors[i][j] = eigenvectors_data[i * num_dims + j];
216  }
217  }
218 
219  auto eigenvalues_table_asarray =
220  dal::row_accessor<const T>(result_train.get_eigenvalues()).pull();
221  const auto eigenvalues_data = eigenvalues_table_asarray.get_data();
222  std::vector<T> eigenvalues(eigenvalues_data, eigenvalues_data + num_dims);
223 
224  return std::make_pair(eigenvectors, eigenvalues);
225  } catch (std::exception& e) {
226  throw std::runtime_error(e.what());
227  }
228 }
229 
230 template <typename T>
231 int32_t extract_model_coefs(const dal::table& coefs_table,
232  int64_t* coef_idxs,
233  double* coefs) {
234  const int64_t num_coefs = coefs_table.get_column_count();
235 
236  auto coefs_table_data = dal::row_accessor<const float>(coefs_table).pull().get_data();
237  for (int64_t coef_idx = 0; coef_idx < num_coefs; ++coef_idx) {
238  coef_idxs[coef_idx] = coef_idx;
239  coefs[coef_idx] = coefs_table_data[coef_idx];
240  }
241 
242  return num_coefs;
243 }
244 
245 template <typename T>
246 NEVER_INLINE HOST int32_t
247 onedal_oneapi_linear_reg_fit_impl(const T* input_labels,
248  const std::vector<const T*>& input_features,
249  int64_t* output_coef_idxs,
250  double* output_coefs,
251  const int64_t num_rows) {
252  try {
253  const auto labels_table = prepare_oneapi_data_table(input_labels, num_rows);
254  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
255 
256  const auto lr_descriptor = dal::linear_regression::descriptor<>().set_result_options(
257  dal::linear_regression::result_options::coefficients |
258  dal::linear_regression::result_options::intercept);
259  const auto train_result = dal::train(lr_descriptor, features_table, labels_table);
260 
261  return extract_model_coefs<T>(train_result.get_model().get_packed_coefficients(),
262  output_coef_idxs,
263  output_coefs);
264  } catch (std::exception& e) {
265  throw std::runtime_error(e.what());
266  }
267 }
268 
269 template <typename T>
270 NEVER_INLINE HOST int32_t
271 onedal_oneapi_linear_reg_predict_impl(const std::shared_ptr<LinearRegressionModel>& model,
272  const std::vector<const T*>& input_features,
273  T* output_predictions,
274  const int64_t num_rows) {
275  CHECK(model->getModelType() == MLModelType::LINEAR_REG);
276  try {
277  if (model->getNumFeatures() != static_cast<int64_t>(input_features.size())) {
278  throw std::runtime_error(
279  "Number of model coefficients does not match number of input features.");
280  }
281 
282  const auto model_coefs = prepare_oneapi_pivoted_data_table(model->getCoefs().data(),
283  input_features.size() + 1);
284  auto lr_model = dal::linear_regression::model();
285  lr_model.set_packed_coefficients(model_coefs);
286 
287  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
288  const auto lr_descriptor = dal::linear_regression::descriptor<>().set_result_options(
289  dal::linear_regression::result_options::coefficients |
290  dal::linear_regression::result_options::intercept);
291  const auto test_result = dal::infer(lr_descriptor, features_table, lr_model);
292 
293  // For some reason if we construct the dal::row_accessor separately to then copy the
294  // memory later, the underlying array's destructor gets called and its memory is
295  // freed, so we construct it in-place instead.
296  std::memcpy(output_predictions,
297  dal::row_accessor<const T>(test_result.get_responses()).pull().get_data(),
298  num_rows * sizeof(T));
299  return num_rows;
300  } catch (std::exception& e) {
301  throw std::runtime_error(e.what());
302  }
303 }
304 
305 inline dal::decision_forest::variable_importance_mode
306 get_oneapi_var_importance_metric_type(const VarImportanceMetric var_importance_metric) {
307  switch (var_importance_metric) {
309  return dal::decision_forest::variable_importance_mode::none;
312  return dal::decision_forest::variable_importance_mode::mdi;
314  return dal::decision_forest::variable_importance_mode::mda_raw;
316  return dal::decision_forest::variable_importance_mode::mda_scaled;
317  default: {
318  std::ostringstream oss;
319  oss << "Invalid variable importance mode type. "
320  << "Was expecting one of DEFAULT, NONE, MDI, MDA, or MDA_SCALED.";
321  throw std::runtime_error(oss.str());
322  }
323  }
324 }
325 
326 template <typename T, typename Method>
327 NEVER_INLINE HOST void onedal_oneapi_random_forest_reg_fit_impl(
328  const std::string& model_name,
329  const T* input_labels,
330  const std::vector<const T*>& input_features,
331  const std::string& model_metadata,
332  const std::vector<std::vector<std::string>>& cat_feature_keys,
333  const int64_t num_rows,
334  const int64_t num_trees,
335  const double obs_per_tree_fraction,
336  const int64_t max_tree_depth,
337  const int64_t features_per_node,
338  const double impurity_threshold,
339  const bool bootstrap,
340  const int64_t min_obs_per_leaf_node,
341  const int64_t min_obs_per_split_node,
342  const double min_weight_fraction_in_leaf_node,
343  const double min_impurity_decrease_in_split_node,
344  const int64_t max_leaf_nodes,
345  const VarImportanceMetric var_importance_metric) {
346  constexpr bool compute_out_of_bag_error{false};
347  try {
348  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
349  const auto labels_table = prepare_oneapi_data_table(input_labels, num_rows);
350 
351  const auto error_metric =
352  compute_out_of_bag_error
353  ? dal::decision_forest::error_metric_mode::out_of_bag_error
354  : dal::decision_forest::error_metric_mode::none;
355 
356  const auto importance_metric =
357  get_oneapi_var_importance_metric_type(var_importance_metric);
358 
359  auto df_desc =
360  dal::decision_forest::descriptor<T,
361  Method,
362  dal::decision_forest::task::regression>{}
363  .set_tree_count(num_trees)
364  .set_observations_per_tree_fraction(obs_per_tree_fraction)
365  .set_max_tree_depth(max_tree_depth)
366  .set_features_per_node(features_per_node)
367  .set_impurity_threshold(impurity_threshold)
368  .set_bootstrap(bootstrap)
369  .set_min_observations_in_leaf_node(min_obs_per_leaf_node)
370  .set_min_observations_in_split_node(min_obs_per_split_node)
371  .set_min_weight_fraction_in_leaf_node(min_weight_fraction_in_leaf_node)
372  .set_min_impurity_decrease_in_split_node(min_impurity_decrease_in_split_node)
373  .set_max_leaf_nodes(max_leaf_nodes)
374  .set_error_metric_mode(error_metric)
375  .set_variable_importance_mode(importance_metric);
376 
377  const auto result_train = dal::train(df_desc, features_table, labels_table);
378 
379  const size_t num_features = input_features.size();
380  std::vector<double> variable_importance(
381  var_importance_metric != VarImportanceMetric::NONE ? num_features : 0);
382  if (var_importance_metric != VarImportanceMetric::NONE) {
383  auto var_importance_data =
384  dal::row_accessor<const T>(result_train.get_var_importance()).pull().get_data();
385  for (size_t feature_idx = 0; feature_idx < num_features; ++feature_idx) {
386  variable_importance[feature_idx] = var_importance_data[feature_idx];
387  }
388  }
389 
390  double out_of_bag_error{0};
391  if (compute_out_of_bag_error) {
392  auto oob_error_data =
393  dal::row_accessor<const T>(result_train.get_oob_err()).pull().get_data();
394  out_of_bag_error = oob_error_data[0];
395  }
396 
397  auto abstract_model = std::make_shared<OneAPIRandomForestRegressionModel>(
398  std::make_shared<df::model<df::task::regression>>(result_train.get_model()),
399  model_metadata,
400  cat_feature_keys,
401  variable_importance,
402  out_of_bag_error,
403  num_features);
404  g_ml_models.addModel(model_name, abstract_model);
405  } catch (std::exception& e) {
406  throw std::runtime_error(e.what());
407  }
408 }
409 
410 template <typename T>
411 NEVER_INLINE HOST int32_t onedal_oneapi_random_forest_reg_predict_impl(
412  const std::shared_ptr<OneAPIRandomForestRegressionModel>& model,
413  const std::vector<const T*>& input_features,
414  T* output_predictions,
415  const int64_t num_rows) {
416  CHECK(model->getModelType() == MLModelType::RANDOM_FOREST_REG);
417  try {
418  if (model->getNumFeatures() != static_cast<int64_t>(input_features.size())) {
419  throw std::runtime_error("Number of provided features does not match model.");
420  }
421  const auto features_table = prepare_oneapi_data_table(input_features, num_rows);
422 
423  // oneAPI's ::infer method expects a decision_forest::descriptor argument as input.
424  // The descriptor seems to have no effect on how the pre-trained model is executed
425  // though, so we pass a dummy descriptor rather than storing the descriptor originally
426  // used to train the model unnecessarily
427  auto dummy_desc =
428  dal::decision_forest::descriptor<T,
429  dal::decision_forest::method::hist,
430  dal::decision_forest::task::regression>{};
431 
432  const auto result_infer =
433  dal::infer(dummy_desc, *(model->getModel()), features_table);
434 
435  auto result_table_data =
436  dal::row_accessor<const T>(result_infer.get_responses()).pull().get_data();
437  std::memcpy(output_predictions, result_table_data, num_rows * sizeof(T));
438 
439  return num_rows;
440  } catch (std::exception& e) {
441  throw std::runtime_error(e.what());
442  }
443 }
444 
445 #endif // #ifdef HAVE_ONEDAL
446 #endif // #ifdef __CUDACC__
VarImportanceMetric
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
KMeansInitStrategy
#define HOST
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void addModel(const std::string &model_name, std::shared_ptr< AbstractMLModel > model)
Definition: MLModel.h:38
MLModelMap g_ml_models
Definition: MLModel.h:125
#define NEVER_INLINE
#define CHECK(condition)
Definition: Logger.h:291