OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsStats.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
21 #include <iostream>
22 #include <stdexcept>
23 #include <string>
24 #include <vector>
25 
26 #include <rapidjson/document.h>
27 
29 
30 enum class StatsRequestPredicateOp { NONE, LT, GT };
31 
35  , filter_val(0.)
36  , is_gt(false)
37  , is_no_op(true) {}
39  const double filter_val)
40  : predicate_op(predicate_op)
41  , filter_val(filter_val)
42  , is_gt(predicate_op == StatsRequestPredicateOp::GT)
43  , is_no_op(predicate_op == StatsRequestPredicateOp::NONE) {}
44 
46  : predicate_op(other.predicate_op)
47  , filter_val(other.filter_val)
48  , is_gt(other.is_gt)
49  , is_no_op(other.is_no_op) {}
50 
51  std::string to_string() const {
52  std::string str;
53  switch (predicate_op) {
55  str += "NONE";
56  break;
57  }
59  str += "LT";
60  break;
61  }
63  str += "GT";
64  break;
65  }
66  }
67  str += "|" + std::to_string(filter_val);
68  return str;
69  }
70 
71  template <typename T>
72  inline bool operator()(const T val) const {
73  return is_no_op || (is_gt && (val >= filter_val));
74  }
75 
76  bool operator==(StatsRequestPredicate const& rhs) const {
77  return predicate_op == rhs.predicate_op && filter_val == rhs.filter_val;
78  }
79 
81  double filter_val;
82  bool is_gt;
83  bool is_no_op;
84 };
85 
86 template <typename T>
87 struct ColumnStats {
88  int64_t total_count;
90  T min;
91  T max;
92  T sum;
93  double mean;
94 
96  : total_count(0)
98  , min(std::numeric_limits<T>::max())
99  , max(std::numeric_limits<T>::lowest())
100  , sum(0.0)
101  , mean(0.0) {}
102 };
103 
105  COUNT,
106  MIN,
107  MAX,
108  SUM,
109  AVG,
110 };
111 
112 struct StatsRequest {
113  std::string name;
114  int32_t attr_id;
117  double filter_val;
118  double result;
119 };
120 
121 std::vector<StatsRequest> parse_stats_requests_json(
122  const std::string& stats_requests_json_str,
123  const int64_t num_attrs);
124 
125 template <typename TA>
127  const ColumnList<TA>& attrs,
128  StatsRequest& stats_request,
129  std::unordered_map<std::string, ColumnStats<TA>>& stats_map) {
130  StatsRequestPredicate predicate(stats_request.filter_type, stats_request.filter_val);
131  const std::string request_str_key =
132  std::to_string(stats_request.attr_id) + "||" + predicate.to_string();
133  auto stats_map_itr = stats_map.find(request_str_key);
134  if (stats_map_itr != stats_map.end()) {
135  return stats_map_itr->second;
136  }
137  const auto column_stats = get_column_stats(attrs[stats_request.attr_id], predicate);
138  stats_map[request_str_key] = column_stats;
139  return column_stats;
140 }
141 
142 template <typename TA>
144  std::vector<StatsRequest>& stats_requests) {
145  std::unordered_map<std::string, ColumnStats<TA>> stats_map;
146 
147  for (auto& stats_request : stats_requests) {
148  const auto column_stats = get_column_stats(attrs, stats_request, stats_map);
149  switch (stats_request.agg_type) {
151  stats_request.result = column_stats.non_null_or_filtered_count;
152  break;
153  }
155  stats_request.result = column_stats.min;
156  break;
157  }
159  stats_request.result = column_stats.max;
160  break;
161  }
163  stats_request.result = column_stats.sum;
164  break;
165  }
167  stats_request.result = column_stats.mean;
168  break;
169  }
170  }
171  }
172 }
173 
174 template <typename TA>
176  Column<TA>& stat_vals,
177  const std::vector<StatsRequest>& stats_requests) {
178  const int64_t num_requests = static_cast<int64_t>(stats_requests.size());
179  for (int64_t request_id = 0; request_id < num_requests; ++request_id) {
180  stat_names[request_id] =
181  stat_names.string_dict_proxy_->getOrAddTransient(stats_requests[request_id].name);
182  stat_vals[request_id] = stats_requests[request_id].result;
183  }
184 }
185 
186 template <typename T>
188  const T* data,
189  const int64_t num_rows,
190  const StatsRequestPredicate& predicate = StatsRequestPredicate());
191 
192 template <typename T>
194  const Column<T>& col,
195  const StatsRequestPredicate& predicate = StatsRequestPredicate());
196 
197 #endif // __CUDACC__
bool operator==(StatsRequestPredicate const &rhs) const
std::vector< StatsRequest > parse_stats_requests_json(const std::string &stats_requests_json_str, const int64_t num_attrs)
NEVER_INLINE HOST ColumnStats< T > get_column_stats(const T *data, const int64_t num_rows, const StatsRequestPredicate &predicate)
StatsRequestPredicateOp
std::string to_string(char const *&&v)
std::string to_string() const
#define HOST
StatsRequestPredicate(const StatsRequestPredicate &other)
void populate_output_stats_cols(Column< TextEncodingDict > &stat_names, Column< TA > &stat_vals, const std::vector< StatsRequest > &stats_requests)
StringDictionaryProxy * string_dict_proxy_
int64_t non_null_or_filtered_count
bool g_enable_smem_group_by true
int32_t getOrAddTransient(const std::string &)
void compute_stats_requests(const ColumnList< TA > &attrs, std::vector< StatsRequest > &stats_requests)
StatsRequestPredicateOp filter_type
StatsRequestPredicateOp predicate_op
StatsRequestPredicate(const StatsRequestPredicateOp predicate_op, const double filter_val)
bool operator()(const T val) const
RequestId request_id()
Definition: Logger.cpp:876
#define NEVER_INLINE
bool g_enable_watchdog false
Definition: Execute.cpp:80
StatsRequestAggType
StatsRequestAggType agg_type
string name
Definition: setup.in.py:72