22 #include <tbb/parallel_for.h>
24 #include <boost/config.hpp>
25 #include <boost/graph/adjacency_list.hpp>
26 #include <boost/graph/dijkstra_shortest_paths.hpp>
27 #include <boost/graph/graph_traits.hpp>
28 #include <boost/property_map/property_map.hpp>
30 #include "ThirdParty/robin_hood/robin_hood.h"
38 struct get_mapped<int32_t> {
43 struct get_mapped<int64_t> {
56 std::vector<T2> idx_to_attr_map;
57 using AttrMap = robin_hood::unordered_flat_map<T2, int32_t>;
59 AttrMap attr_to_idx_map;
60 int32_t unique_idx = 0;
62 inline int32_t size()
const {
return static_cast<int64_t
>(idx_to_attr_map.size()); }
64 inline int32_t get_idx_for_key(
const T2& key)
const {
66 return attr_to_idx_map.at(key);
69 inline int64_t get_idx_for_key_safe(
const T2& key)
const {
71 const auto key_itr = attr_to_idx_map.find(key);
72 if (key_itr == attr_to_idx_map.end()) {
75 return key_itr->second;
78 inline T get_key_for_idx(
const int32_t idx)
const {
80 return idx_to_attr_map[idx];
83 AttrIdxMap(
const bool must_be_unique) : must_be_unique(must_be_unique) {}
87 const int64_t num_rows = col.
size();
88 if (idx_to_attr_map.empty()) {
92 idx_to_attr_map.reserve(num_rows);
93 attr_to_idx_map.reserve(num_rows);
95 for (int64_t row_idx = 0; row_idx < num_rows; ++row_idx) {
96 if (!col.
isNull(row_idx)) {
97 const bool is_new_elem = attr_to_idx_map.emplace(col[row_idx], unique_idx).second;
99 idx_to_attr_map.emplace_back(col[row_idx]);
102 if (must_be_unique) {
103 throw std::runtime_error(
104 "Expected unique elements but found duplicates for column.");
109 CHECK_EQ(attr_to_idx_map.size(), idx_to_attr_map.size());
110 CHECK_EQ(idx_to_attr_map.size(),
static_cast<size_t>(unique_idx));
114 template <
typename N>
115 std::vector<std::pair<N, N>> construct_key_normalized_edge_list(
118 const AttrIdxMap<N>& attr_idx_map) {
120 const int32_t num_edges = node1.
size();
121 std::vector<std::pair<N, N>> key_normalized_edge_list(num_edges);
123 tbb::blocked_range<int32_t>(0, num_edges),
124 [&](
const tbb::blocked_range<int32_t>& r) {
125 const int32_t r_end = r.end();
126 for (int32_t e = r.begin(); e < r_end; ++e) {
127 key_normalized_edge_list[e].first = attr_idx_map.get_idx_for_key(node1[e]);
128 key_normalized_edge_list[e].second = attr_idx_map.get_idx_for_key(node2[e]);
131 return key_normalized_edge_list;
134 template <
typename S>
135 struct TerminalNodes {
138 bool end_node_is_valid;
142 TerminalNodes(
const S start_node)
143 : start_node(start_node), end_node(start_node), end_node_is_valid(
false) {}
144 TerminalNodes(
const S start_node,
const S end_node)
145 : start_node(start_node), end_node(end_node), end_node_is_valid(
true) {}
148 template <
typename N,
typename D>
149 struct GraphTraversalResults {
150 using graph_t = boost::adjacency_list<boost::listS,
154 boost::property<boost::edge_weight_t, int32_t>>;
155 using vertex_descriptor = boost::graph_traits<graph_t>::vertex_descriptor;
156 AttrIdxMap<N> attr_idx_map;
157 int32_t start_node_idx, end_node_idx;
159 std::vector<vertex_descriptor> parents;
160 std::vector<D> graph_distances;
161 int64_t num_vertices{0};
162 boost::graph_traits<graph_t>::vertex_iterator vi, vend;
164 GraphTraversalResults() : attr_idx_map(
false) {}
166 void buildEdgeGraph(
const std::vector<std::pair<N, N>>& edge_list,
169 edge_graph = graph_t(edge_list.data(),
170 edge_list.data() + edge_list.size(),
172 attr_idx_map.size());
173 num_vertices = boost::num_vertices(edge_graph);
174 parents.resize(num_vertices);
175 graph_distances.resize(num_vertices);
176 boost::tie(vi, vend) = boost::vertices(edge_graph);
180 template <
typename N,
typename D,
typename S>
181 GraphTraversalResults<N, D> graph_shortest_path_impl(
185 const TerminalNodes<S>& terminal_nodes) {
187 using graph_t = boost::adjacency_list<boost::listS,
191 boost::property<boost::edge_weight_t, int32_t>>;
192 using vertex_descriptor = boost::graph_traits<graph_t>::vertex_descriptor;
193 const int64_t num_edges = node1.
size();
195 std::vector<N> translated_node2_data;
196 N start_node_translated;
197 N end_node_translated;
198 if constexpr (std::is_same_v<N, TextEncodingDict>) {
199 const auto node1_sdp = node1.string_dict_proxy_;
200 const auto node2_sdp = node2.string_dict_proxy_;
201 if (node1_sdp->getDictKey() != node2_sdp->getDictKey()) {
202 auto translation_timer =
DEBUG_TIMER(
"Dictionary Translation");
203 const auto translation_map =
204 node2_sdp->buildUnionTranslationMapToOtherProxy(node1_sdp, {});
205 translated_node2_data.resize(num_edges);
206 for (int64_t edge_idx = 0; edge_idx < num_edges; ++edge_idx) {
207 const N original_val = node2[edge_idx];
208 translated_node2_data[edge_idx] =
209 original_val == inline_null_value<N>()
211 : static_cast<N>(translation_map[original_val]);
214 translated_node2_data.data(), node2.
num_rows_, node1.string_dict_proxy_);
216 if constexpr (!std::is_same_v<S, TextEncodingNone>) {
217 throw std::runtime_error(
218 "Starting node should be of text type to match graph nodes.");
220 start_node_translated =
221 node1.string_dict_proxy_->getIdOfString(terminal_nodes.start_node);
222 if (terminal_nodes.end_node_is_valid) {
223 end_node_translated =
224 node1.string_dict_proxy_->getIdOfString(terminal_nodes.end_node);
228 throw std::runtime_error(
"Starting node not found.");
231 if constexpr (std::is_same_v<S, TextEncodingNone>) {
232 throw std::runtime_error(
233 "Starting node should be of integer type to match graph nodes.");
235 start_node_translated = terminal_nodes.start_node;
236 end_node_translated = terminal_nodes.end_node;
240 GraphTraversalResults<N, D> graph_traversal_results;
241 graph_traversal_results.attr_idx_map.add_column(node1);
242 graph_traversal_results.attr_idx_map.add_column(new_node2);
244 const auto edge_list = construct_key_normalized_edge_list(
245 node1, new_node2, graph_traversal_results.attr_idx_map);
246 graph_traversal_results.buildEdgeGraph(edge_list, distance);
247 graph_traversal_results.start_node_idx =
248 graph_traversal_results.attr_idx_map.get_idx_for_key_safe(start_node_translated);
249 graph_traversal_results.end_node_idx =
250 terminal_nodes.end_node_is_valid
251 ? graph_traversal_results.attr_idx_map.get_idx_for_key_safe(end_node_translated)
254 if (graph_traversal_results.start_node_idx < 0) {
255 throw std::runtime_error(
"Starting node not found.");
257 if (terminal_nodes.end_node_is_valid && graph_traversal_results.end_node_idx < 0) {
258 throw std::runtime_error(
"End node not found.");
260 vertex_descriptor start_vertex = boost::vertex(graph_traversal_results.start_node_idx,
261 graph_traversal_results.edge_graph);
263 auto shortest_paths_timer =
DEBUG_TIMER(
"Djikstra Shortest Paths");
264 boost::dijkstra_shortest_paths(
265 graph_traversal_results.edge_graph,
267 boost::predecessor_map(
268 boost::make_iterator_property_map(
269 graph_traversal_results.parents.begin(),
270 boost::get(boost::vertex_index, graph_traversal_results.edge_graph)))
271 .distance_map(boost::make_iterator_property_map(
272 graph_traversal_results.graph_distances.begin(),
273 boost::get(boost::vertex_index, graph_traversal_results.edge_graph))));
275 return graph_traversal_results;
289 template <
typename N,
typename D,
typename S>
300 GraphTraversalResults<N, D> graph_traversal_results;
301 TerminalNodes<S> terminal_nodes(start_node, end_node);
303 graph_traversal_results =
304 graph_shortest_path_impl(node1, node2, distance, terminal_nodes);
305 }
catch (std::exception& e) {
306 return mgr.ERROR_MESSAGE(e.what());
308 auto output_timer =
DEBUG_TIMER(
"Output shortest path results");
309 int32_t current_node_idx = graph_traversal_results.end_node_idx;
311 int32_t num_hops = 0;
312 const D total_distance_to_origin_node =
313 graph_traversal_results.graph_distances[current_node_idx];
317 if (total_distance_to_origin_node != std::numeric_limits<D>::max()) {
318 while (current_node_idx != graph_traversal_results.start_node_idx) {
319 current_node_idx = graph_traversal_results.parents[current_node_idx];
323 current_node_idx = graph_traversal_results.end_node_idx;
324 int32_t path_step_idx = num_hops;
325 const auto last_node_key =
326 graph_traversal_results.attr_idx_map.get_key_for_idx(current_node_idx);
327 output_path_step[path_step_idx] = path_step_idx + 1;
328 output_node[path_step_idx] = last_node_key;
329 output_distance[path_step_idx] = total_distance_to_origin_node;
330 while (current_node_idx != graph_traversal_results.start_node_idx) {
331 current_node_idx = graph_traversal_results.parents[current_node_idx];
333 output_path_step[path_step_idx] = path_step_idx + 1;
334 output_node[path_step_idx] =
335 graph_traversal_results.attr_idx_map.get_key_for_idx(current_node_idx);
336 output_distance[path_step_idx] =
337 graph_traversal_results.graph_distances[current_node_idx];
357 template <
typename N,
typename D,
typename S>
368 GraphTraversalResults<N, D> graph_traversal_results;
369 TerminalNodes<S> terminal_nodes(start_node);
371 graph_traversal_results =
372 graph_shortest_path_impl(node1, node2, distance, terminal_nodes);
373 }
catch (std::exception& e) {
374 return mgr.ERROR_MESSAGE(e.what());
376 auto output_timer =
DEBUG_TIMER(
"Output shortest paths results");
378 const N node1_val = graph_traversal_results.attr_idx_map.get_key_for_idx(
379 graph_traversal_results.start_node_idx);
381 tbb::blocked_range<int32_t>(0, graph_traversal_results.num_vertices),
382 [&](
const tbb::blocked_range<int32_t>& r) {
383 const int32_t r_end = r.end();
384 for (int32_t vertex_idx = r.begin(); vertex_idx < r_end; ++vertex_idx) {
385 out_node1[vertex_idx] = node1_val;
386 out_node2[vertex_idx] =
387 graph_traversal_results.attr_idx_map.get_key_for_idx(vertex_idx);
388 out_distance[vertex_idx] = graph_traversal_results.graph_distances[vertex_idx];
389 if (out_distance[vertex_idx] == std::numeric_limits<D>::max()) {
390 out_distance.
setNull(vertex_idx);
391 out_num_hops.
setNull(vertex_idx);
393 int32_t num_hops = 0;
394 int32_t current_node_idx = vertex_idx;
395 while (current_node_idx != graph_traversal_results.start_node_idx) {
396 current_node_idx = graph_traversal_results.parents[current_node_idx];
399 out_num_hops[vertex_idx] = num_hops;
404 return graph_traversal_results.num_vertices;
407 template <
typename Z>
408 struct RasterGraphEdges {
409 std::vector<int32_t> bins_1;
410 std::vector<int32_t> bins_2;
411 std::vector<Z> distances;
414 template <
typename Z>
415 struct RasterGraphEdgeGenerator {
416 const Z slope_weight_exponent;
417 const Z slope_pct_max;
418 RasterGraphEdges<Z> raster_graph_edges;
419 std::atomic<int32_t> num_edges{0};
421 RasterGraphEdgeGenerator(
const int64_t max_size,
422 const Z slope_weight_exponent,
423 const Z slope_pct_max)
424 : slope_weight_exponent(slope_weight_exponent), slope_pct_max(slope_pct_max) {
425 raster_graph_edges.bins_1.resize(max_size);
426 raster_graph_edges.bins_2.resize(max_size);
427 raster_graph_edges.distances.resize(max_size);
430 int64_t size()
const {
return num_edges; }
432 inline void conditionally_write_edge_slope(
const std::pair<int64_t, Z>& bin_1_idx_and_z,
433 const std::pair<int64_t, Z>& bin_2_idx_and_z,
434 const Z x_distance) {
437 if (bin_2_idx_and_z.second == inline_null_value<Z>()) {
440 const Z slope = (bin_2_idx_and_z.second - bin_1_idx_and_z.second) / x_distance;
441 if (slope * 100.0 > slope_pct_max) {
444 const int32_t edge_idx = num_edges++;
445 raster_graph_edges.bins_1[edge_idx] =
static_cast<int32_t
>(bin_1_idx_and_z.first);
446 raster_graph_edges.bins_2[edge_idx] =
static_cast<int32_t
>(bin_2_idx_and_z.first);
447 const Z abs_slope = 1.0 + abs(slope);
448 raster_graph_edges.distances[edge_idx] =
449 x_distance * pow(abs_slope, slope_weight_exponent);
452 void trim_to_size() {
453 raster_graph_edges.bins_1.resize(num_edges);
454 raster_graph_edges.bins_2.resize(num_edges);
455 raster_graph_edges.distances.resize(num_edges);
459 template <
typename T,
typename Z>
460 RasterGraphEdges<Z> generate_raster_graph_edges(
const GeoRaster<T, Z>& geo_raster,
461 const Z slope_weight_exponent,
462 const Z slope_pct_max) {
463 RasterGraphEdgeGenerator<Z> raster_graph_edge_generator(
464 geo_raster.
num_bins_ * 8, slope_weight_exponent, slope_pct_max);
466 const Z x_distance_diag = sqrt(2.0 * x_distance * x_distance);
468 tbb::blocked_range<int64_t>(0, geo_raster.
num_y_bins_),
469 [&](
const tbb::blocked_range<int64_t>& r) {
470 const int64_t end_y_bin = r.end();
471 for (int64_t y_bin = r.begin(); y_bin != end_y_bin; ++y_bin) {
472 for (int64_t x_bin = 0; x_bin < geo_raster.
num_x_bins_; ++x_bin) {
473 const auto bin_1_idx_and_z =
475 if (bin_1_idx_and_z.second == inline_null_value<Z>()) {
478 raster_graph_edge_generator.conditionally_write_edge_slope(
482 raster_graph_edge_generator.conditionally_write_edge_slope(
486 raster_graph_edge_generator.conditionally_write_edge_slope(
490 raster_graph_edge_generator.conditionally_write_edge_slope(
494 raster_graph_edge_generator.conditionally_write_edge_slope(
498 raster_graph_edge_generator.conditionally_write_edge_slope(
502 raster_graph_edge_generator.conditionally_write_edge_slope(
506 raster_graph_edge_generator.conditionally_write_edge_slope(
513 raster_graph_edge_generator.trim_to_size();
514 return raster_graph_edge_generator.raster_graph_edges;
530 template <
typename T,
typename Z>
531 TEMPLATE_NOINLINE int32_t tf_raster_graph_shortest_slope_weighted_path__cpu_template(
537 const T bin_dim_meters,
538 const bool geographic_coords,
539 const int64_t neighborhood_fill_radius,
540 const bool fill_only_nulls,
545 const Z slope_weight_exponent,
546 const Z slope_pct_max,
552 const std::string error_msg =
553 "Invalid Raster Aggregate Type: " + agg_type_str.
getString();
554 return mgr.ERROR_MESSAGE(error_msg);
567 const auto origin_x_bin = geo_raster.
get_x_bin(origin_x);
568 const auto origin_y_bin = geo_raster.
get_y_bin(origin_y);
569 const auto dest_x_bin = geo_raster.
get_x_bin(dest_x);
570 const auto dest_y_bin = geo_raster.
get_y_bin(dest_y);
572 return mgr.ERROR_MESSAGE(
"Origin coordinates are out of bounds.");
575 return mgr.ERROR_MESSAGE(
"Destination coordinates are out of bounds.");
578 const auto origin_bin =
580 const auto dest_bin =
585 CHECK_NE(origin_bin, bin_out_of_bounds_idx);
586 CHECK_NE(dest_bin, bin_out_of_bounds_idx);
588 if (neighborhood_fill_radius > 0) {
592 auto raster_graph_edges =
593 generate_raster_graph_edges(geo_raster, slope_weight_exponent, slope_pct_max);
596 const Column<Z> distance(raster_graph_edges.distances);
597 GraphTraversalResults<int32_t, Z> graph_traversal_results;
598 TerminalNodes<int32_t> terminal_nodes(origin_bin, dest_bin);
600 graph_traversal_results =
601 graph_shortest_path_impl(node1, node2, distance, terminal_nodes);
602 }
catch (std::exception& e) {
603 return mgr.ERROR_MESSAGE(e.what());
605 auto output_timer =
DEBUG_TIMER(
"Output shortest path results");
608 const auto node2_val = graph_traversal_results.attr_idx_map.get_key_for_idx(
609 graph_traversal_results.end_node_idx);
610 const int64_t vertex_idx = graph_traversal_results.end_node_idx;
611 int32_t current_node_idx = vertex_idx;
613 int32_t num_hops = 0;
614 const auto distance_to_origin_node =
615 graph_traversal_results.graph_distances[vertex_idx];
616 if (distance_to_origin_node != std::numeric_limits<Z>::max()) {
617 while (current_node_idx != graph_traversal_results.start_node_idx) {
618 current_node_idx = graph_traversal_results.parents[current_node_idx];
622 current_node_idx = vertex_idx;
623 int32_t path_step_idx = num_hops;
624 const auto end_bin_idx =
625 graph_traversal_results.attr_idx_map.get_key_for_idx(current_node_idx);
627 output_path_step[path_step_idx] = path_step_idx + 1;
628 const auto [end_path_x, end_path_y] =
630 output_x[path_step_idx] = end_path_x;
631 output_y[path_step_idx] = end_path_y;
632 while (current_node_idx != graph_traversal_results.start_node_idx) {
633 current_node_idx = graph_traversal_results.parents[current_node_idx];
636 graph_traversal_results.attr_idx_map.get_key_for_idx(current_node_idx);
637 output_path_step[path_step_idx] = path_step_idx + 1;
639 output_x[path_step_idx] = path_x;
640 output_y[path_step_idx] = path_y;
649 #endif // #ifdef HAVE_TBB
650 #endif // #ifdef __CUDACC__
void set_output_row_size(int64_t num_rows)
int64_t get_bin_idx_for_xy_coords(const T x, const T y) const
std::pair< T, T > get_xy_coords_for_bin_idx(const int64_t bin_idx) const
std::string getString() const
DEVICE int64_t size() const
void fill_bins_from_neighbors(const int64_t neighborhood_fill_radius, const bool fill_only_nulls, const RasterAggType raster_agg_type=RasterAggType::GAUSS_AVG)
T get_y_bin(const T input) const
RasterAggType get_raster_agg_type(const std::string &agg_type_str, const bool is_fill_agg)
T get_x_bin(const T input) const
bool is_bin_out_of_bounds(const int64_t x_bin, const int64_t y_bin) const
static constexpr int32_t INVALID_STR_ID
bool g_enable_smem_group_by true
std::pair< int64_t, Z > get_bin_idx_and_z_val_for_xy_bin(const int64_t x_bin, const int64_t y_bin) const
DEVICE bool isNull(int64_t index) const
DEVICE void setNull(int64_t index)
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
bool g_enable_watchdog false
#define DEBUG_TIMER(name)
#define TEMPLATE_NOINLINE