OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CategoricalFeaturesBuilder< T > Struct Template Reference

#include <MLTableFunctions.hpp>

Public Member Functions

 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const ColumnList< T > &numeric_features, const int32_t cat_top_k, const float cat_min_fraction, const bool cat_include_others)
 
 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const int32_t cat_top_k, const float cat_min_fraction, const bool cat_include_others)
 
 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const ColumnList< T > &numeric_features, const std::vector< std::vector< std::string >> &cat_feature_keys)
 
 CategoricalFeaturesBuilder (const ColumnList< TextEncodingDict > &cat_features, const std::vector< std::vector< std::string >> &cat_feature_keys)
 
ColumnList< T > getFeatures ()
 
const std::vector< std::vector
< std::string > > & 
getCatFeatureKeys () const
 

Private Attributes

int64_t num_rows_
 
std::vector
< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol
< T > > 
one_hot_encoded_cols_
 
std::vector< std::vector
< std::string > > 
cat_feature_keys_
 
std::vector< int8_t * > col_ptrs_
 

Detailed Description

template<typename T>
struct CategoricalFeaturesBuilder< T >

Definition at line 385 of file MLTableFunctions.hpp.

Constructor & Destructor Documentation

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const ColumnList< T > &  numeric_features,
const int32_t  cat_top_k,
const float  cat_min_fraction,
const bool  cat_include_others 
)
inline

Definition at line 387 of file MLTableFunctions.hpp.

References ColumnList< T >::numCols(), ColumnList< TextEncodingDict >::numCols(), and ColumnList< T >::ptrs_.

392  : num_rows_(numeric_features.size()) {
394  one_hot_encoding_info(cat_top_k, cat_min_fraction, cat_include_others);
395  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
396  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
397  one_hot_encoding_infos;
398  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
399  one_hot_encoding_infos.emplace_back(one_hot_encoding_info);
400  }
402  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
403  cat_features, one_hot_encoding_infos);
404  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
405  cat_feature_keys_.emplace_back(one_hot_encoded_col.cat_features);
406  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
407  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
408  }
409  }
410  const int64_t num_numeric_features = numeric_features.numCols();
411  for (int64_t numeric_feature_idx = 0; numeric_feature_idx < num_numeric_features;
412  ++numeric_feature_idx) {
413  col_ptrs_.emplace_back(numeric_features.ptrs_[numeric_feature_idx]);
414  }
415  }
DEVICE int64_t numCols() const
DEVICE int64_t numCols() const
int8_t ** ptrs_
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_
DEVICE int64_t size() const

+ Here is the call graph for this function:

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const int32_t  cat_top_k,
const float  cat_min_fraction,
const bool  cat_include_others 
)
inline

Definition at line 417 of file MLTableFunctions.hpp.

References ColumnList< TextEncodingDict >::numCols().

421  : num_rows_(cat_features.size()) {
423  one_hot_encoding_info(cat_top_k, cat_min_fraction, cat_include_others);
424  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
425  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
426  one_hot_encoding_infos;
427  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
428  one_hot_encoding_infos.emplace_back(one_hot_encoding_info);
429  }
431  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
432  cat_features, one_hot_encoding_infos);
433  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
434  cat_feature_keys_.emplace_back(one_hot_encoded_col.cat_features);
435  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
436  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
437  }
438  }
439  }
DEVICE int64_t size() const
DEVICE int64_t numCols() const
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_

+ Here is the call graph for this function:

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const ColumnList< T > &  numeric_features,
const std::vector< std::vector< std::string >> &  cat_feature_keys 
)
inline

Definition at line 441 of file MLTableFunctions.hpp.

References ColumnList< T >::numCols(), ColumnList< TextEncodingDict >::numCols(), and ColumnList< T >::ptrs_.

445  : num_rows_(numeric_features.size()), cat_feature_keys_(cat_feature_keys) {
446  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
447  if (num_cat_features != cat_feature_keys_.size()) {
448  throw std::runtime_error(
449  "Number of provided categorical features does not match number of categorical "
450  "features in the model.");
451  }
452  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
453  one_hot_encoding_infos;
454  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
455  one_hot_encoding_infos.emplace_back(cat_feature_keys_[cat_idx]);
456  }
458  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
459  cat_features, one_hot_encoding_infos);
460  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
461  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
462  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
463  }
464  }
465  const int64_t num_numeric_features = numeric_features.numCols();
466  for (int64_t numeric_feature_idx = 0; numeric_feature_idx < num_numeric_features;
467  ++numeric_feature_idx) {
468  col_ptrs_.emplace_back(numeric_features.ptrs_[numeric_feature_idx]);
469  }
470  }
DEVICE int64_t numCols() const
DEVICE int64_t numCols() const
int8_t ** ptrs_
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_
DEVICE int64_t size() const

+ Here is the call graph for this function:

template<typename T>
CategoricalFeaturesBuilder< T >::CategoricalFeaturesBuilder ( const ColumnList< TextEncodingDict > &  cat_features,
const std::vector< std::vector< std::string >> &  cat_feature_keys 
)
inline

Definition at line 472 of file MLTableFunctions.hpp.

References ColumnList< TextEncodingDict >::numCols().

475  : num_rows_(cat_features.size()), cat_feature_keys_(cat_feature_keys) {
476  const size_t num_cat_features = static_cast<size_t>(cat_features.numCols());
477  if (num_cat_features != cat_feature_keys_.size()) {
478  throw std::runtime_error(
479  "Number of provided categorical features does not match number of categorical "
480  "features in the model.");
481  }
482  std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo>
483  one_hot_encoding_infos;
484  for (size_t cat_idx = 0; cat_idx < num_cat_features; ++cat_idx) {
485  one_hot_encoding_infos.emplace_back(cat_feature_keys_[cat_idx]);
486  }
488  TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode<T>(
489  cat_features, one_hot_encoding_infos);
490  for (auto& one_hot_encoded_col : one_hot_encoded_cols_) {
491  for (auto& one_hot_encoded_vec : one_hot_encoded_col.encoded_buffers) {
492  col_ptrs_.emplace_back(reinterpret_cast<int8_t*>(one_hot_encoded_vec.data()));
493  }
494  }
495  }
DEVICE int64_t size() const
DEVICE int64_t numCols() const
std::vector< int8_t * > col_ptrs_
std::vector< TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol< T > > one_hot_encoded_cols_
std::vector< std::vector< std::string > > cat_feature_keys_

+ Here is the call graph for this function:

Member Function Documentation

template<typename T>
const std::vector<std::vector<std::string> >& CategoricalFeaturesBuilder< T >::getCatFeatureKeys ( ) const
inline

Definition at line 502 of file MLTableFunctions.hpp.

Referenced by decision_tree_reg_fit__cpu_template(), gbt_reg_fit__cpu_template(), linear_reg_fit__cpu_template(), pca_fit__cpu_1(), pca_fit__cpu_template(), and random_forest_reg_fit__cpu_template().

502  {
503  return cat_feature_keys_;
504  }
std::vector< std::vector< std::string > > cat_feature_keys_

+ Here is the caller graph for this function:

template<typename T>
ColumnList<T> CategoricalFeaturesBuilder< T >::getFeatures ( )
inline

Definition at line 497 of file MLTableFunctions.hpp.

Referenced by decision_tree_reg_fit__cpu_template(), gbt_reg_fit__cpu_template(), linear_reg_fit__cpu_template(), pca_fit__cpu_1(), pca_fit__cpu_template(), and random_forest_reg_fit__cpu_template().

497  {
498  return ColumnList<T>(
499  col_ptrs_.data(), static_cast<int64_t>(col_ptrs_.size()), num_rows_);
500  }
std::vector< int8_t * > col_ptrs_

+ Here is the caller graph for this function:

Member Data Documentation

template<typename T>
std::vector<std::vector<std::string> > CategoricalFeaturesBuilder< T >::cat_feature_keys_
private

Definition at line 510 of file MLTableFunctions.hpp.

template<typename T>
std::vector<int8_t*> CategoricalFeaturesBuilder< T >::col_ptrs_
private

Definition at line 511 of file MLTableFunctions.hpp.

template<typename T>
int64_t CategoricalFeaturesBuilder< T >::num_rows_
private

Definition at line 507 of file MLTableFunctions.hpp.

template<typename T>
std::vector<TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodedCol<T> > CategoricalFeaturesBuilder< T >::one_hot_encoded_cols_
private

Definition at line 509 of file MLTableFunctions.hpp.


The documentation for this struct was generated from the following file: