OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TestTorchTableFunctions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cstdio> // use fprintf because cout had weird concurrency issues
18 #include <cstdlib>
19 #include <ctime>
20 
22 
23 #undef LOG
24 #undef CHECK
25 #undef LOG_IF
26 #undef VLOG
27 #undef CHECK_OP
28 #undef CHECK_EQ
29 #undef CHECK_LT
30 #undef CHECK_LE
31 #undef CHECK_GT
32 #undef CHECK_GE
33 #undef CHECK_NE
34 #undef GLOBAL
35 #include "torch/script.h"
36 #include "torch/torch.h"
37 
38 #ifndef __CUDACC__
39 
40 torch::Device _test_torch_tfs_device = torch::kCPU;
41 
44  Column<int64_t>& input,
45  Column<int64_t>& output) {
46  return 0;
47 }
48 
49 template <typename T>
50 TEMPLATE_NOINLINE int32_t
52  const Column<T>& input,
53  Column<T>& output) {
54  return 0;
55 }
56 
57 template TEMPLATE_NOINLINE int32_t
59  const Column<int64_t>& input,
60  Column<int64_t>& output);
61 template TEMPLATE_NOINLINE int32_t
63  const Column<double>& input,
64  Column<double>& output);
65 
66 /* Generates a column of random values with @num_elements rows, using PyTorch's default
67  * randn generator, which samples a normal distribution with mean 0 and variance 1. Is
68  * used to generate data to be fed to the model implemented in tf_test_torch_regression.
69  */
71  int32_t num_elements,
72  Column<double>& output) {
73  mgr.set_output_row_size(num_elements);
74  torch::Tensor random = torch::randn({num_elements}, at::dtype(at::kDouble));
75  random = random.unsqueeze(1);
76  double* data_ptr = (double*)random.data_ptr();
77 
78  for (int32_t i = 0; i < num_elements; ++i) {
79  output[i] = *data_ptr++;
80  }
81 
82  return num_elements;
83 }
84 
86  int32_t batch_size) {
87  int32_t poly_degree = cols.numCols();
88  torch::Tensor output = torch::empty({batch_size, poly_degree}, {torch::kCPU});
89 
90  // build a tensor of (batch_size, poly_degree) dimensions, where each row is sampled
91  // randomly from the input columns formated as (x, x^2 ..., x^poly_degree)
92  for (int i = 0; i < batch_size; i++) {
93  int32_t idx = rand() % cols.size();
94  for (int j = 0; j < poly_degree; j++) {
95  output[i][j] = cols[j][idx];
96  }
97  }
98 
99  return output.to(_test_torch_tfs_device);
100 }
101 
102 // Approximated function.
103 torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target) {
104  return x.mm(W_target) + b_target.item();
105 }
106 
107 // Creates a string description of a polynomial.
108 std::string poly_desc(torch::Tensor W, torch::Tensor b) {
109  auto size = W.size(0);
110  std::ostringstream stream;
111 
112  if (W.scalar_type() != c10::ScalarType::Float ||
113  b.scalar_type() != c10::ScalarType::Float) {
114  throw std::runtime_error(
115  "Attempted to print polynomial with non-float coefficients!");
116  }
117 
118  stream << "y = ";
119  for (int64_t i = 0; i < size; ++i)
120  stream << W[i].item<float>() << " x^" << size - i << " ";
121  stream << "+ " << b[0].item<float>();
122  return stream.str();
123 }
124 
125 // Builds a batch i.e. (x, f(x)) pair.
126 std::pair<torch::Tensor, torch::Tensor> get_batch(const ColumnList<double>& cols,
127  torch::Tensor W_target,
128  torch::Tensor b_target,
129  int32_t batch_size) {
130  auto x = make_features_from_columns(cols, batch_size);
131  auto y = f(x, W_target, b_target);
132  return std::make_pair(x, y);
133 }
134 
135 /* This code is very heavily based on (in large part copy-pasted) from PyTorch's official
136  * C++ API examples:
137  * https://github.com/pytorch/examples/tree/main/cpp/regression. It trains
138  * a single-layer Neural Network to fit a @poly_degree degree polynomial, using
139  * @batch_size, and optionally using CUDA-powered libtorch, if available.
140  * It optionally saves the model as a torchscript file with name @model_filename.
141  * The code has been modified to generate feature data through LibTorch, store it in a
142  * heavydb table, then pull data from that table to feed the model. It is very simplistic
143  * and naive, particularly in how data is sampled from the generated data, but as a
144  * proof-of-concept/example of how LibTorch can be used from within heavydb, it works.*/
145 EXTENSION_NOINLINE int32_t
147  const ColumnList<double>& features,
148  int32_t batch_size,
149  bool use_gpu,
150  bool save_model,
151  const TextEncodingNone& model_filename,
152  Column<double>& output) {
153  int32_t poly_degree = features.numCols();
154  // we output target and trained coefficients + bias
155  int32_t output_size = (poly_degree + 1) * 2;
156  mgr.set_output_row_size(output_size);
157  std::srand(std::time(nullptr)); // not ideal RNG, but fine for test purpooses
158 #ifdef HAVE_CUDA_TORCH
159  if (torch::cuda::is_available() && use_gpu) {
160  _test_torch_tfs_device = torch::kCUDA;
161  }
162 #endif
163 
164  auto W_target = torch::randn({poly_degree, 1}, at::device(_test_torch_tfs_device)) * 5;
165  auto b_target = torch::randn({1}, at::device(_test_torch_tfs_device)) * 5;
166 
167  // Define the model and optimizer
168  auto fc = torch::nn::Linear(W_target.size(0), 1);
169  fc->to(_test_torch_tfs_device);
170  torch::optim::SGD optim(fc->parameters(), .1);
171 
172  float loss = 0;
173  int64_t batch_idx = 0;
174 
175  while (++batch_idx) {
176  // Get data
177  torch::Tensor batch_x, batch_y;
178  std::tie(batch_x, batch_y) = get_batch(features, W_target, b_target, batch_size);
179 
180  // Reset gradients
181  optim.zero_grad();
182 
183  // Forward pass
184  auto output = torch::smooth_l1_loss(fc(batch_x), batch_y);
185  loss = output.item<float>();
186 
187  // Backward pass
188  output.backward();
189 
190  // Apply gradients
191  optim.step();
192 
193  // Stop criterion
194  if (loss < 1e-3f)
195  break;
196  }
197 
198  if (save_model) {
199  torch::save(fc, model_filename.getString());
200  }
201 
202  // output column with target + trained coefficients ordered by degree, then bias
203  torch::Tensor output_coefficients = fc->weight.view({-1}).cpu();
204  torch::Tensor goal_coefficients = W_target.view({-1}).cpu();
205  int32_t out_column_idx, input_idx;
206  for (out_column_idx = 0, input_idx = 0; input_idx < output_coefficients.size(0);
207  ++input_idx) {
208  output[out_column_idx++] = output_coefficients[input_idx].item<float>();
209  output[out_column_idx++] = goal_coefficients[input_idx].item<float>();
210  }
211  output[out_column_idx++] = fc->bias[0].item<float>();
212  output[out_column_idx] = b_target[0].item<float>();
213 
214  std::fprintf(stdout, "Loss: %lf after %ld batches\n", loss, batch_idx);
215  std::fprintf(stdout,
216  "==> Learned function:\t%s\n",
217  poly_desc(output_coefficients, fc->bias).c_str());
218  std::fprintf(stdout,
219  "==> Actual function:\t%s\n",
220  poly_desc(W_target.view({-1}).cpu(), b_target).c_str());
221 
222  return output_size;
223 }
224 
225 EXTENSION_NOINLINE int32_t
227  const TextEncodingNone& model_filename,
228  Column<bool>& output) {
229  mgr.set_output_row_size(1);
230  torch::jit::script::Module module;
231  try {
232  module = torch::jit::load(model_filename.getString());
233  } catch (const std::exception& e) {
234  return mgr.ERROR_MESSAGE("Error loading torchscript model: " + e.what());
235  }
236 
237  output[0] = true;
238  return 1;
239 }
240 
241 #endif // #ifndef __CUDACC__
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
#define EXTENSION_NOINLINE
Definition: heavydbTypes.h:58
void load(Archive &ar, ExplainedQueryHint &query_hint, const unsigned int version)
std::string getString() const
Definition: heavydbTypes.h:641
DEVICE int64_t numCols() const
void save(Archive &ar, const ExplainedQueryHint &query_hint, const unsigned int version)
torch::Tensor make_features_from_columns(const ColumnList< double > &cols, int32_t batch_size)
EXTENSION_NOINLINE int32_t tf_test_torch_load_model(TableFunctionManager &mgr, const TextEncodingNone &model_filename, Column< bool > &output)
TEMPLATE_NOINLINE int32_t tf_test_runtime_torch_template__template(TableFunctionManager &mgr, const Column< T > &input, Column< T > &output)
std::pair< torch::Tensor, torch::Tensor > get_batch(const ColumnList< double > &cols, torch::Tensor W_target, torch::Tensor b_target, int32_t batch_size)
EXTENSION_NOINLINE int32_t tf_test_torch_regression(TableFunctionManager &mgr, const ColumnList< double > &features, int32_t batch_size, bool use_gpu, bool save_model, const TextEncodingNone &model_filename, Column< double > &output)
std::string poly_desc(torch::Tensor W, torch::Tensor b)
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
DEVICE int64_t size() const
EXTENSION_NOINLINE int32_t tf_test_torch_generate_random_column(TableFunctionManager &mgr, int32_t num_elements, Column< double > &output)
torch::Device _test_torch_tfs_device
EXTENSION_NOINLINE int32_t tf_test_runtime_torch(TableFunctionManager &mgr, Column< int64_t > &input, Column< int64_t > &output)
#define TEMPLATE_NOINLINE
Definition: heavydbTypes.h:60