OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CrossSectionTableFunctions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
23 #include "Shared/measure.h"
24 #include "ThirdParty/kdtree-cpp/kdtree.hpp"
25 
26 namespace CrossSectionTableFunctions {
27 
28 //
29 // 1D implementation
30 //
31 
32 template <typename TLL, typename TV>
34  const Column<TLL>& lon,
35  const Column<TLL>& lat,
36  const Column<TV>& values,
37  const TLL line_x1,
38  const TLL line_y1,
39  const TLL line_x2,
40  const TLL line_y2,
41  const int32_t num_points,
43  // supported types
44  static_assert(std::is_same_v<double, TLL>);
45  static_assert(std::is_same_v<float, TV> || std::is_same_v<double, TV>);
46 
47  // validate sizes
48  CHECK_EQ(lon.size(), lat.size()) << "input column size mismatch";
49  CHECK_EQ(lon.size(), values.size()) << "input column size mismatch";
50 
51  // enforce parser validation
52  CHECK_GT(num_points, 1) << "num_points must be > 1";
53 
54  // any raster data?
55  if (values.size() == 0) {
56  // return LINESTRING EMPTY
58  mgr.set_output_row_size(1);
59  return 1;
60  }
61 
62  // report values (verbose only)
63  VLOG(2) << "tf_cross_section_1d: Input raster data has " << values.size()
64  << " values, min "
65  << *std::min_element(values.getPtr(), values.getPtr() + values.size())
66  << ", max "
67  << *std::max_element(values.getPtr(), values.getPtr() + values.size());
68 
69  // buckets
70  std::vector<TV> bucket_values(num_points, static_cast<TV>(0));
71  std::vector<int32_t> bucket_counts(num_points, 0);
72 
73  // start timer
74  auto scatter_timer = timer_start();
75 
76  // scatter points into buckets
77  // help from http://www.sunshine2k.de/coding/java/PointOnLine/PointOnLine.html
78  const TLL e1x = line_x2 - line_x1;
79  const TLL e1y = line_y2 - line_y1;
80  const TLL len2_e1 = (e1x * e1x) + (e1y * e1y);
81  int32_t bucket_misses = 0;
82  for (int i = 0; i < lon.size(); i++) {
83  // project point to line
84  const TLL e2x = lon[i] - line_x1;
85  const TLL e2y = lat[i] - line_y1;
86  const TLL dp_e1e2 = (e1x * e2x) + (e1y * e2y);
87  const TLL normalized_distance = dp_e1e2 / len2_e1;
88  // find bucket
89  auto const bucket_index = static_cast<int32_t>(normalized_distance * num_points);
90  // add to bucket?
91  if (bucket_index >= 0 && bucket_index < num_points) {
92  bucket_values[bucket_index] += values[i];
93  bucket_counts[bucket_index]++;
94  } else {
95  bucket_misses++;
96  }
97  }
98 
99  // stop timer
100  LOG(INFO) << "tf_cross_section_1d: scatter took " << timer_stop(scatter_timer) << "ms";
101 
102  // warn if any misses
103  LOG_IF(WARNING, bucket_misses > 0)
104  << "tf_cross_section_1d: had " << bucket_misses << " bucket misses";
105 
106  // size outputs
107  mgr.set_output_array_values_total_number(0, num_points * 2);
108  mgr.set_output_row_size(1);
109 
110  // generate LINESTRING
111  std::vector<double> coords;
112  coords.reserve(num_points * 2);
113  for (int i = 0; i < num_points; i++) {
114  auto const x = static_cast<TV>(i) / static_cast<TV>(num_points - 1);
115  auto const y = bucket_counts[i]
116  ? (bucket_values[i] / static_cast<TV>(bucket_counts[i]))
117  : static_cast<TV>(0);
118  coords.push_back(x);
119  coords.push_back(y);
120  }
121 
122  // set output linestring
123  line[0].fromCoords(coords);
124 
125  // done
126  return 1;
127 }
128 
129 //
130 // 2D implementation
131 //
132 
133 template <typename TLL, typename TY, typename TV>
135  const Column<TLL>& lon,
136  const Column<TLL>& lat,
137  const Column<TY>& y_axis,
138  const Column<TV>& values,
139  const TLL line_x1,
140  const TLL line_y1,
141  const TLL line_x2,
142  const TLL line_y2,
143  const int32_t num_points_x,
144  const int32_t num_points_y,
145  const TLL dwithin_distance,
146  Column<TLL>& x,
147  Column<TLL>& y,
148  Column<TV>& color) {
149  // supported types
150  static_assert(std::is_same_v<double, TLL>);
151  static_assert(std::is_same_v<float, TY> || std::is_same_v<double, TY>);
152  static_assert(std::is_same_v<float, TV> || std::is_same_v<double, TV>);
153 
154  // validate sizes
155  CHECK_EQ(lon.size(), lat.size()) << "input column size mismatch";
156  CHECK_EQ(lon.size(), y_axis.size()) << "input column size mismatch";
157  CHECK_EQ(lon.size(), values.size()) << "input column size mismatch";
158 
159  // enforce parser validation
160  CHECK_GT(num_points_x, 1) << "num_points_x must be > 1";
161  CHECK_GT(num_points_y, 1) << "num_points_y must be > 1";
162 
163  static constexpr int kNumNearest = 3;
164 
165  // enough raster data?
166  if (values.size() < kNumNearest) {
170  mgr.set_output_row_size(0);
171  return 0;
172  }
173 
174  // report values (verbose only)
175  VLOG(2) << "tf_cross_section_2d: Input raster data has " << values.size()
176  << " values, min "
177  << *std::min_element(values.getPtr(), values.getPtr() + values.size())
178  << ", max "
179  << *std::max_element(values.getPtr(), values.getPtr() + values.size());
180 
181  //
182  // build points for tree and capture y-axis range
183  //
184 
185  auto build_nodes_timer = timer_start();
186 
187  auto y_min = std::numeric_limits<double>::max();
188  auto y_max = -std::numeric_limits<double>::max();
189 
190  Kdtree::KdNodeVector nodes(lon.size());
191 
192  for (int i = 0; i < lon.size(); i++) {
193  auto& n = nodes[i];
194  auto const dlon = static_cast<double>(lon[i]);
195  auto const dlat = static_cast<double>(lat[i]);
196  auto const dy = static_cast<double>(y_axis[i]);
197  n.point = {dlon, dlat, dy};
198  n.data = nullptr;
199  n.index = i;
200  y_min = std::min(dy, y_min);
201  y_max = std::max(dy, y_max);
202  }
203 
204  LOG(INFO) << "tf_cross_section_2d: build nodes took " << timer_stop(build_nodes_timer)
205  << "ms";
206 
207  //
208  // build tree
209  //
210 
211  auto build_tree_timer = timer_start();
212 
213  Kdtree::KdTree tree(&nodes);
214 
215  LOG(INFO) << "tf_cross_section_2d: build tree took " << timer_stop(build_tree_timer)
216  << "ms";
217 
218  //
219  // size outputs
220  //
221 
222  auto const num_output_rows = num_points_x * num_points_y;
223 
224  mgr.set_output_item_values_total_number(0, num_output_rows);
225  mgr.set_output_item_values_total_number(1, num_output_rows);
226  mgr.set_output_item_values_total_number(2, num_output_rows);
227  mgr.set_output_row_size(num_output_rows);
228 
229  //
230  // compute mesh
231  //
232 
233  auto compute_mesh_timer = timer_start();
234 
235  auto const max_dist2 = dwithin_distance * dwithin_distance;
236 
237  static constexpr double kMinDistance2 = 0.0001;
238 
240  tbb::blocked_range<int32_t>(0, num_output_rows),
241  [&](const tbb::blocked_range<int32_t>& r) {
242  auto const start_idx = r.begin();
243  auto const end_idx = r.end();
244  for (int32_t output_index = start_idx; output_index < end_idx; output_index++) {
245  // discrete mesh-space point
246  auto const xi = output_index % num_points_x;
247  auto const yi = output_index / num_points_x;
248 
249  // normalized mesh-space point
250  auto const tx = static_cast<double>(xi) / static_cast<double>(num_points_x - 1);
251  auto const ty = static_cast<double>(yi) / static_cast<double>(num_points_y - 1);
252 
253  // world-space point
254  std::vector<double> p{(tx * (line_x2 - line_x1)) + line_x1,
255  (tx * (line_y2 - line_y1)) + line_y1,
256  (ty * (y_max - y_min)) + y_min};
257 
258  // get nearest N points from tree
259  Kdtree::KdNodeVector result;
260  tree.k_nearest_neighbors(p, kNumNearest, &result);
261  CHECK_EQ(result.size(), kNumNearest);
262 
263  // are the points close enough in lon/lat space?
264  int valid_indices[kNumNearest];
265  double valid_weights[kNumNearest];
266  double total_weight = 0.0;
267  size_t num_valid = 0u;
268  for (size_t i = 0; i < kNumNearest; i++) {
269  auto const dx = result[i].point[0] - p[0];
270  auto const dy = result[i].point[1] - p[1];
271  auto const dist2 = (dx * dx) + (dy * dy);
272  // discard points that are more than one mesh grid cell away
273  if (dist2 < max_dist2) {
274  // weights are inverse squared distance
275  auto const dz = result[i].point[2] - p[2];
276  auto const len2 = dist2 + (dz * dz);
277  auto const weight = 1.0 / std::max(len2, kMinDistance2);
278  // use this point
279  valid_indices[num_valid] = i;
280  valid_weights[num_valid] = weight;
281  total_weight += weight;
282  num_valid++;
283  }
284  }
285 
286  // compute final weighted value
287  TV col;
288  if constexpr (std::is_same_v<double, TV>) {
289  col = static_cast<TV>(NULL_DOUBLE);
290  } else {
291  col = static_cast<TV>(NULL_FLOAT);
292  }
293  if (num_valid > 0u && total_weight > 0.0) {
294  col = static_cast<TV>(0.0);
295  const double weight_multiplier = 1.0 / total_weight;
296  for (size_t i = 0; i < num_valid; i++) {
297  auto const index = valid_indices[i];
298  auto const weight = valid_weights[i];
299  auto const value = values[result[index].index];
300  col += static_cast<TV>(value * weight * weight_multiplier);
301  }
302  }
303 
304  // output the screen-space point and color
305  x[output_index] = tx;
306  y[output_index] = p[2];
307  color[output_index] = col;
308  }
309  });
310 
311  LOG(INFO) << "tf_cross_section_2d: compute mesh took " << timer_stop(compute_mesh_timer)
312  << "ms";
313 
314  // done
315  return num_output_rows;
316 }
317 
318 } // namespace CrossSectionTableFunctions
319 
320 //
321 // public TFs
322 //
323 
324 // clang-format off
325 /*
326  UDTF: tf_cross_section_1d__cpu_template(TableFunctionManager mgr,
327  Cursor<Column<TLL> lon, Column<TLL> lat, Column<TV> values> raster,
328  TLL line_x1, TLL line_y1, TLL line_x2, TLL line_y2,
329  int32_t num_points | require="num_points > 1") -> Column<GeoLineString> line,
330  TLL=[double], TV=[float,double]
331  */
332 // clang-format on
333 
334 template <typename TLL, typename TV>
336  const Column<TLL>& lon,
337  const Column<TLL>& lat,
338  const Column<TV>& values,
339  const TLL line_x1,
340  const TLL line_y1,
341  const TLL line_x2,
342  const TLL line_y2,
343  const int32_t num_points,
345  return CrossSectionTableFunctions::tf_cross_section_1d_impl<TLL, TV>(
346  mgr, lon, lat, values, line_x1, line_y1, line_x2, line_y2, num_points, line);
347 }
348 
349 // clang-format off
350 /*
351  UDTF: tf_cross_section_2d__cpu_template(TableFunctionManager mgr,
352  Cursor<Column<TLL> lon, Column<TLL> lat, Column<TY> y_axis, Column<TV> values> raster,
353  TLL line_x1, TLL line_y1, TLL line_x2, TLL line_y2,
354  int32_t num_points_x | require="num_points_x > 1",
355  int32_t num_points_y | require="num_points_y > 1",
356  double dwithin_distance | require="dwithin_distance > 0.0") -> Column<TLL> x, Column<TLL> y, Column<TV> color,
357  TLL=[double], TY=[float,double], TV=[float,double]
358  */
359 // clang-format on
360 
361 template <typename TLL, typename TY, typename TV>
363  const Column<TLL>& lon,
364  const Column<TLL>& lat,
365  const Column<TY>& y_axis,
366  const Column<TV>& values,
367  const TLL line_x1,
368  const TLL line_y1,
369  const TLL line_x2,
370  const TLL line_y2,
371  const int32_t num_points_x,
372  const int32_t num_points_y,
373  const TLL dwithin_distance,
374  Column<TLL>& x,
375  Column<TLL>& y,
376  Column<TV>& color) {
377  return CrossSectionTableFunctions::tf_cross_section_2d_impl<TLL, TY, TV>(
378  mgr,
379  lon,
380  lat,
381  y_axis,
382  values,
383  line_x1,
384  line_y1,
385  line_x2,
386  line_y2,
387  num_points_x,
388  num_points_y,
389  dwithin_distance,
390  x,
391  y,
392  color);
393 }
394 
395 #endif // __CUDACC__
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
#define CHECK_EQ(x, y)
Definition: Logger.h:301
int32_t tf_cross_section_1d_impl(TableFunctionManager &mgr, const Column< TLL > &lon, const Column< TLL > &lat, const Column< TV > &values, const TLL line_x1, const TLL line_y1, const TLL line_x2, const TLL line_y2, const int32_t num_points, Column< GeoLineString > &line)
#define NULL_DOUBLE
void set_output_array_values_total_number(int32_t index, int64_t output_array_values_total_number)
Definition: heavydbTypes.h:361
TEMPLATE_NOINLINE int32_t tf_cross_section_1d__cpu_template(TableFunctionManager &mgr, const Column< TLL > &lon, const Column< TLL > &lat, const Column< TV > &values, const TLL line_x1, const TLL line_y1, const TLL line_x2, const TLL line_y2, const int32_t num_points, Column< GeoLineString > &line)
#define NULL_FLOAT
int32_t tf_cross_section_2d_impl(TableFunctionManager &mgr, const Column< TLL > &lon, const Column< TLL > &lat, const Column< TY > &y_axis, const Column< TV > &values, const TLL line_x1, const TLL line_y1, const TLL line_x2, const TLL line_y2, const int32_t num_points_x, const int32_t num_points_y, const TLL dwithin_distance, Column< TLL > &x, Column< TLL > &y, Column< TV > &color)
#define LOG(tag)
Definition: Logger.h:285
DEVICE int64_t size() const
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
DEVICE T * getPtr() const
#define CHECK_GT(x, y)
Definition: Logger.h:305
#define LOG_IF(severity, condition)
Definition: Logger.h:384
tuple line
Definition: parse_ast.py:10
void set_output_item_values_total_number(int32_t index, int64_t output_item_values_total_number)
Definition: heavydbTypes.h:367
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
constexpr double n
Definition: Utm.h:38
TEMPLATE_NOINLINE int32_t tf_cross_section_2d__cpu_template(TableFunctionManager &mgr, const Column< TLL > &lon, const Column< TLL > &lat, const Column< TY > &y_axis, const Column< TV > &values, const TLL line_x1, const TLL line_y1, const TLL line_x2, const TLL line_y2, const int32_t num_points_x, const int32_t num_points_y, const TLL dwithin_distance, Column< TLL > &x, Column< TLL > &y, Column< TV > &color)
#define VLOG(n)
Definition: Logger.h:388
Type timer_start()
Definition: measure.h:42
#define TEMPLATE_NOINLINE
Definition: heavydbTypes.h:60