OmniSciDB
a5dc49c757
|
#include "TableFunctionsCommon.hpp"
#include <filesystem>
#include <memory>
#include <regex>
#include <string>
#include <tbb/parallel_for.h>
#include <tbb/task_arena.h>
Go to the source code of this file.
Namespaces | |
FileUtilities | |
Macros | |
#define | NANOSECONDS_PER_SECOND 1000000000 |
Functions | |
template<typename T > | |
NEVER_INLINE HOST std::pair< T, T > | get_column_min_max (const Column< T > &col) |
template NEVER_INLINE HOST std::pair< int8_t, int8_t > | get_column_min_max (const Column< int8_t > &col) |
template NEVER_INLINE HOST std::pair< int16_t, int16_t > | get_column_min_max (const Column< int16_t > &col) |
template NEVER_INLINE HOST std::pair< int32_t, int32_t > | get_column_min_max (const Column< int32_t > &col) |
template NEVER_INLINE HOST std::pair< int64_t, int64_t > | get_column_min_max (const Column< int64_t > &col) |
template NEVER_INLINE HOST std::pair< float, float > | get_column_min_max (const Column< float > &col) |
template NEVER_INLINE HOST std::pair< double, double > | get_column_min_max (const Column< double > &col) |
std::pair< int32_t, int32_t > | get_column_min_max (const Column< TextEncodingDict > &col) |
template<typename T > | |
NEVER_INLINE HOST double | get_column_mean (const T *data, const int64_t num_rows) |
template NEVER_INLINE HOST double | get_column_mean (const int8_t *data, const int64_t num_rows) |
template NEVER_INLINE HOST double | get_column_mean (const int16_t *data, const int64_t num_rows) |
template NEVER_INLINE HOST double | get_column_mean (const int32_t *data, const int64_t num_rows) |
template NEVER_INLINE HOST double | get_column_mean (const int64_t *data, const int64_t num_rows) |
template NEVER_INLINE HOST double | get_column_mean (const float *data, const int64_t num_rows) |
template NEVER_INLINE HOST double | get_column_mean (const double *data, const int64_t num_rows) |
template<typename T > | |
NEVER_INLINE HOST double | get_column_mean (const Column< T > &col) |
template NEVER_INLINE HOST double | get_column_mean (const Column< int8_t > &col) |
template NEVER_INLINE HOST double | get_column_mean (const Column< int16_t > &col) |
template NEVER_INLINE HOST double | get_column_mean (const Column< int32_t > &col) |
template NEVER_INLINE HOST double | get_column_mean (const Column< int64_t > &col) |
template NEVER_INLINE HOST double | get_column_mean (const Column< float > &col) |
template NEVER_INLINE HOST double | get_column_mean (const Column< double > &col) |
template<typename T > | |
NEVER_INLINE HOST double | get_column_std_dev (const Column< T > &col, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const Column< int32_t > &col, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const Column< int64_t > &col, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const Column< float > &col, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const Column< double > &col, const double mean) |
template<typename T > | |
NEVER_INLINE HOST double | get_column_std_dev (const T *data, const int64_t num_rows, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const int32_t *data, const int64_t num_rows, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const int64_t *data, const int64_t num_rows, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const float *data, const int64_t num_rows, const double mean) |
template NEVER_INLINE HOST double | get_column_std_dev (const double *data, const int64_t num_rows, const double mean) |
template<typename T > | |
NEVER_INLINE HOST std::tuple < T, T, bool > | get_column_metadata (const Column< T > &col) |
template NEVER_INLINE HOST std::tuple< int8_t, int8_t, bool > | get_column_metadata (const Column< int8_t > &col) |
template NEVER_INLINE HOST std::tuple< int16_t, int16_t, bool > | get_column_metadata (const Column< int16_t > &col) |
template NEVER_INLINE HOST std::tuple< int32_t, int32_t, bool > | get_column_metadata (const Column< int32_t > &col) |
template NEVER_INLINE HOST std::tuple< int64_t, int64_t, bool > | get_column_metadata (const Column< int64_t > &col) |
template NEVER_INLINE HOST std::tuple< float, float, bool > | get_column_metadata (const Column< float > &col) |
template NEVER_INLINE HOST std::tuple< double, double, bool > | get_column_metadata (const Column< double > &col) |
std::tuple< int32_t, int32_t, bool > | get_column_metadata (const Column< TextEncodingDict > &col) |
template<typename T > | |
void | z_std_normalize_col (const T *input_data, T *output_data, const int64_t num_rows, const double mean, const double std_dev) |
template void | z_std_normalize_col (const float *input_data, float *output_data, const int64_t num_rows, const double mean, const double std_dev) |
template void | z_std_normalize_col (const double *input_data, double *output_data, const int64_t num_rows, const double mean, const double std_dev) |
template<typename T > | |
std::vector< std::vector< T > > | z_std_normalize_data (const std::vector< T * > &input_data, const int64_t num_rows) |
template std::vector < std::vector< float > > | z_std_normalize_data (const std::vector< float * > &input_data, const int64_t num_rows) |
template std::vector < std::vector< double > > | z_std_normalize_data (const std::vector< double * > &input_data, const int64_t num_rows) |
template<typename T > | |
ZStdNormalizationSummaryStats< T > | z_std_normalize_data_with_summary_stats (const std::vector< T * > &input_data, const int64_t num_rows) |
template ZStdNormalizationSummaryStats < float > | z_std_normalize_data_with_summary_stats (const std::vector< float * > &input_data, const int64_t num_rows) |
template ZStdNormalizationSummaryStats < double > | z_std_normalize_data_with_summary_stats (const std::vector< double * > &input_data, const int64_t num_rows) |
template<typename T1 , typename T2 > | |
NEVER_INLINE HOST T1 | distance_in_meters (const T1 fromlon, const T1 fromlat, const T2 tolon, const T2 tolat) |
template NEVER_INLINE HOST float | distance_in_meters (const float fromlon, const float fromlat, const float tolon, const float tolat) |
template NEVER_INLINE HOST float | distance_in_meters (const float fromlon, const float fromlat, const double tolon, const double tolat) |
template NEVER_INLINE HOST double | distance_in_meters (const double fromlon, const double fromlat, const float tolon, const float tolat) |
template NEVER_INLINE HOST double | distance_in_meters (const double fromlon, const double fromlat, const double tolon, const double tolat) |
Computes the distance, in meters, between two WGS-84 positions. More... | |
std::regex | FileUtilities::glob_to_regex (const std::string &glob, bool case_sensitive=false) |
std::vector < std::filesystem::path > | FileUtilities::get_fs_paths (const std::string &file_or_directory) |
template<typename T > | |
NEVER_INLINE HOST bool | is_valid_tf_input (const T input, const T bounds_val, const BoundsType bounds_type, const IntervalType interval_type) |
template NEVER_INLINE HOST bool | is_valid_tf_input (const int32_t input, const int32_t bounds_val, const BoundsType bounds_type, const IntervalType interval_type) |
template NEVER_INLINE HOST bool | is_valid_tf_input (const int64_t input, const int64_t bounds_val, const BoundsType bounds_type, const IntervalType interval_type) |
template NEVER_INLINE HOST bool | is_valid_tf_input (const float input, const float bounds_val, const BoundsType bounds_type, const IntervalType interval_type) |
template NEVER_INLINE HOST bool | is_valid_tf_input (const double input, const double bounds_val, const BoundsType bounds_type, const IntervalType interval_type) |
#define NANOSECONDS_PER_SECOND 1000000000 |
Definition at line 29 of file TableFunctionsCommon.cpp.
NEVER_INLINE HOST T1 distance_in_meters | ( | const T1 | fromlon, |
const T1 | fromlat, | ||
const T2 | tolon, | ||
const T2 | tolat | ||
) |
Definition at line 452 of file TableFunctionsCommon.cpp.
template NEVER_INLINE HOST float distance_in_meters | ( | const float | fromlon, |
const float | fromlat, | ||
const float | tolon, | ||
const float | tolat | ||
) |
template NEVER_INLINE HOST float distance_in_meters | ( | const float | fromlon, |
const float | fromlat, | ||
const double | tolon, | ||
const double | tolat | ||
) |
template NEVER_INLINE HOST double distance_in_meters | ( | const double | fromlon, |
const double | fromlat, | ||
const float | tolon, | ||
const float | tolat | ||
) |
template NEVER_INLINE HOST double distance_in_meters | ( | const double | fromlon, |
const double | fromlat, | ||
const double | tolon, | ||
const double | tolat | ||
) |
Computes the distance, in meters, between two WGS-84 positions.
The result is equal to EARTH_RADIUS_IN_METERS*ArcInRadians(from,to)
ArcInRadians is equal to Distance(from,to)/EARTH_RADIUS_IN_METERS
= 2*asin(sqrt(h(d/EARTH_RADIUS_IN_METERS )))
where:
h(x)=sinĀ²(x/2)
code attribution: http://blog.julien.cayzac.name/2008/10/arc-and-distance-between-two-points-on.html
The haversine formula gives: h(d/R) = h(from.lat-to.lat)+h(from.lon-to.lon)+cos(from.lat)*cos(to.lat)
Definition at line 433 of file ExtensionFunctions.hpp.
Referenced by GeoRaster< T, Z >::calculate_bins_and_scales(), length_linestring(), ST_Distance_Point_LineString_Geodesic(), and ST_Distance_Point_Point_Geodesic().
NEVER_INLINE HOST double get_column_mean | ( | const T * | data, |
const int64_t | num_rows | ||
) |
Definition at line 116 of file TableFunctionsCommon.cpp.
References max_inputs_per_thread, threading_serial::parallel_for(), and heavydb.dtypes::T.
Referenced by get_column_mean(), r2_score_impl(), z_std_normalize_data(), and z_std_normalize_data_with_summary_stats().
template NEVER_INLINE HOST double get_column_mean | ( | const int8_t * | data, |
const int64_t | num_rows | ||
) |
template NEVER_INLINE HOST double get_column_mean | ( | const int16_t * | data, |
const int64_t | num_rows | ||
) |
template NEVER_INLINE HOST double get_column_mean | ( | const int32_t * | data, |
const int64_t | num_rows | ||
) |
template NEVER_INLINE HOST double get_column_mean | ( | const int64_t * | data, |
const int64_t | num_rows | ||
) |
template NEVER_INLINE HOST double get_column_mean | ( | const float * | data, |
const int64_t | num_rows | ||
) |
template NEVER_INLINE HOST double get_column_mean | ( | const double * | data, |
const int64_t | num_rows | ||
) |
NEVER_INLINE HOST double get_column_mean | ( | const Column< T > & | col | ) |
Definition at line 183 of file TableFunctionsCommon.cpp.
References get_column_mean(), Column< T >::getPtr(), and Column< T >::size().
template NEVER_INLINE HOST double get_column_mean | ( | const Column< int8_t > & | col | ) |
template NEVER_INLINE HOST double get_column_mean | ( | const Column< int16_t > & | col | ) |
template NEVER_INLINE HOST double get_column_mean | ( | const Column< int32_t > & | col | ) |
template NEVER_INLINE HOST double get_column_mean | ( | const Column< int64_t > & | col | ) |
template NEVER_INLINE HOST double get_column_mean | ( | const Column< float > & | col | ) |
template NEVER_INLINE HOST double get_column_mean | ( | const Column< double > & | col | ) |
NEVER_INLINE HOST std::tuple<T, T, bool> get_column_metadata | ( | const Column< T > & | col | ) |
Definition at line 276 of file TableFunctionsCommon.cpp.
References Column< T >::isNull(), max_inputs_per_thread, threading_serial::parallel_for(), Column< T >::size(), and heavydb.dtypes::T.
Referenced by get_column_metadata().
template NEVER_INLINE HOST std::tuple<int8_t, int8_t, bool> get_column_metadata | ( | const Column< int8_t > & | col | ) |
template NEVER_INLINE HOST std::tuple<int16_t, int16_t, bool> get_column_metadata | ( | const Column< int16_t > & | col | ) |
template NEVER_INLINE HOST std::tuple<int32_t, int32_t, bool> get_column_metadata | ( | const Column< int32_t > & | col | ) |
template NEVER_INLINE HOST std::tuple<int64_t, int64_t, bool> get_column_metadata | ( | const Column< int64_t > & | col | ) |
template NEVER_INLINE HOST std::tuple<float, float, bool> get_column_metadata | ( | const Column< float > & | col | ) |
template NEVER_INLINE HOST std::tuple<double, double, bool> get_column_metadata | ( | const Column< double > & | col | ) |
std::tuple<int32_t, int32_t, bool> get_column_metadata | ( | const Column< TextEncodingDict > & | col | ) |
Definition at line 358 of file TableFunctionsCommon.cpp.
References get_column_metadata(), Column< TextEncodingDict >::getPtr(), and Column< TextEncodingDict >::size().
NEVER_INLINE HOST std::pair<T, T> get_column_min_max | ( | const Column< T > & | col | ) |
Definition at line 32 of file TableFunctionsCommon.cpp.
References max_inputs_per_thread, threading_serial::parallel_for(), Column< T >::size(), and heavydb.dtypes::T.
Referenced by ct_union_pushdown_stats__cpu_template(), RasterFormat_Namespace::format_raster_data(), GeoRaster< T, Z >::GeoRaster(), get_column_min_max(), get_min_or_max(), get_min_or_max_union(), and TableFunctions_Namespace::OneHotEncoder_Namespace::get_top_k_keys().
template NEVER_INLINE HOST std::pair<int8_t, int8_t> get_column_min_max | ( | const Column< int8_t > & | col | ) |
template NEVER_INLINE HOST std::pair<int16_t, int16_t> get_column_min_max | ( | const Column< int16_t > & | col | ) |
template NEVER_INLINE HOST std::pair<int32_t, int32_t> get_column_min_max | ( | const Column< int32_t > & | col | ) |
template NEVER_INLINE HOST std::pair<int64_t, int64_t> get_column_min_max | ( | const Column< int64_t > & | col | ) |
template NEVER_INLINE HOST std::pair<float, float> get_column_min_max | ( | const Column< float > & | col | ) |
template NEVER_INLINE HOST std::pair<double, double> get_column_min_max | ( | const Column< double > & | col | ) |
std::pair<int32_t, int32_t> get_column_min_max | ( | const Column< TextEncodingDict > & | col | ) |
Definition at line 104 of file TableFunctionsCommon.cpp.
References get_column_min_max(), Column< TextEncodingDict >::getPtr(), and Column< TextEncodingDict >::size().
NEVER_INLINE HOST double get_column_std_dev | ( | const Column< T > & | col, |
const double | mean | ||
) |
Definition at line 195 of file TableFunctionsCommon.cpp.
References get_column_std_dev(), Column< T >::getPtr(), and Column< T >::size().
Referenced by get_column_std_dev(), z_std_normalize_data(), and z_std_normalize_data_with_summary_stats().
template NEVER_INLINE HOST double get_column_std_dev | ( | const Column< int32_t > & | col, |
const double | mean | ||
) |
template NEVER_INLINE HOST double get_column_std_dev | ( | const Column< int64_t > & | col, |
const double | mean | ||
) |
template NEVER_INLINE HOST double get_column_std_dev | ( | const Column< float > & | col, |
const double | mean | ||
) |
template NEVER_INLINE HOST double get_column_std_dev | ( | const Column< double > & | col, |
const double | mean | ||
) |
NEVER_INLINE HOST double get_column_std_dev | ( | const T * | data, |
const int64_t | num_rows, | ||
const double | mean | ||
) |
Definition at line 209 of file TableFunctionsCommon.cpp.
References max_inputs_per_thread, threading_serial::parallel_for(), and heavydb.dtypes::T.
template NEVER_INLINE HOST double get_column_std_dev | ( | const int32_t * | data, |
const int64_t | num_rows, | ||
const double | mean | ||
) |
template NEVER_INLINE HOST double get_column_std_dev | ( | const int64_t * | data, |
const int64_t | num_rows, | ||
const double | mean | ||
) |
template NEVER_INLINE HOST double get_column_std_dev | ( | const float * | data, |
const int64_t | num_rows, | ||
const double | mean | ||
) |
template NEVER_INLINE HOST double get_column_std_dev | ( | const double * | data, |
const int64_t | num_rows, | ||
const double | mean | ||
) |
NEVER_INLINE HOST bool is_valid_tf_input | ( | const T | input, |
const T | bounds_val, | ||
const BoundsType | bounds_type, | ||
const IntervalType | interval_type | ||
) |
Definition at line 556 of file TableFunctionsCommon.cpp.
References Exclusive, Inclusive, Max, Min, and UNREACHABLE.
template NEVER_INLINE HOST bool is_valid_tf_input | ( | const int32_t | input, |
const int32_t | bounds_val, | ||
const BoundsType | bounds_type, | ||
const IntervalType | interval_type | ||
) |
template NEVER_INLINE HOST bool is_valid_tf_input | ( | const int64_t | input, |
const int64_t | bounds_val, | ||
const BoundsType | bounds_type, | ||
const IntervalType | interval_type | ||
) |
template NEVER_INLINE HOST bool is_valid_tf_input | ( | const float | input, |
const float | bounds_val, | ||
const BoundsType | bounds_type, | ||
const IntervalType | interval_type | ||
) |
template NEVER_INLINE HOST bool is_valid_tf_input | ( | const double | input, |
const double | bounds_val, | ||
const BoundsType | bounds_type, | ||
const IntervalType | interval_type | ||
) |
void z_std_normalize_col | ( | const T * | input_data, |
T * | output_data, | ||
const int64_t | num_rows, | ||
const double | mean, | ||
const double | std_dev | ||
) |
Definition at line 365 of file TableFunctionsCommon.cpp.
References threading_serial::parallel_for().
Referenced by z_std_normalize_data(), and z_std_normalize_data_with_summary_stats().
template void z_std_normalize_col | ( | const float * | input_data, |
float * | output_data, | ||
const int64_t | num_rows, | ||
const double | mean, | ||
const double | std_dev | ||
) |
template void z_std_normalize_col | ( | const double * | input_data, |
double * | output_data, | ||
const int64_t | num_rows, | ||
const double | mean, | ||
const double | std_dev | ||
) |
std::vector<std::vector<T> > z_std_normalize_data | ( | const std::vector< T * > & | input_data, |
const int64_t | num_rows | ||
) |
Definition at line 397 of file TableFunctionsCommon.cpp.
References get_column_mean(), get_column_std_dev(), and z_std_normalize_col().
Referenced by dbscan__cpu_template(), and kmeans__cpu_template().
template std::vector<std::vector<float> > z_std_normalize_data | ( | const std::vector< float * > & | input_data, |
const int64_t | num_rows | ||
) |
template std::vector<std::vector<double> > z_std_normalize_data | ( | const std::vector< double * > & | input_data, |
const int64_t | num_rows | ||
) |
ZStdNormalizationSummaryStats<T> z_std_normalize_data_with_summary_stats | ( | const std::vector< T * > & | input_data, |
const int64_t | num_rows | ||
) |
Definition at line 422 of file TableFunctionsCommon.cpp.
References get_column_mean(), get_column_std_dev(), and z_std_normalize_col().
Referenced by pca_fit_impl().
template ZStdNormalizationSummaryStats<float> z_std_normalize_data_with_summary_stats | ( | const std::vector< float * > & | input_data, |
const int64_t | num_rows | ||
) |
template ZStdNormalizationSummaryStats<double> z_std_normalize_data_with_summary_stats | ( | const std::vector< double * > & | input_data, |
const int64_t | num_rows | ||
) |