OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OneHotEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2023 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifndef __CUDACC__
20 
22 #include "Shared/funcannotations.h"
23 
24 #include <vector>
25 
26 namespace TableFunctions_Namespace {
27 
28 namespace OneHotEncoder_Namespace {
29 
32  int32_t top_k_attrs;
35  std::vector<std::string> cat_features;
36 
38 
40  const float min_attr_proportion,
41  const bool include_others_attr)
43  , top_k_attrs(top_k_attrs)
44  , min_attr_proportion(min_attr_proportion)
45  , include_others_attr(include_others_attr) {}
46 
47  OneHotEncodingInfo(const std::vector<std::string>& cat_features)
48  : is_one_hot_encoded(true), cat_features(cat_features) {}
49 };
50 
51 template <typename F>
53  std::vector<std::vector<F>> encoded_buffers;
54  std::vector<std::string> cat_features;
55 };
56 
69 template <typename F>
71  const Column<TextEncodingDict>& text_col,
73  one_hot_encoding_info);
74 
87 template <typename F>
88 NEVER_INLINE HOST std::vector<OneHotEncodedCol<F>> one_hot_encode(
89  const ColumnList<TextEncodingDict>& text_cols,
90  const std::vector<
92  one_hot_encoding_infos);
93 
94 } // namespace OneHotEncoder_Namespace
95 
96 } // namespace TableFunctions_Namespace
97 
98 #endif // #ifndef __CUDACC__
OneHotEncodingInfo(const std::vector< std::string > &cat_features)
Definition: OneHotEncoder.h:47
NEVER_INLINE HOST OneHotEncodedCol< F > one_hot_encode(const Column< TextEncodingDict > &text_col, const TableFunctions_Namespace::OneHotEncoder_Namespace::OneHotEncodingInfo &one_hot_encoding_info)
Takes a column of text-encoded data and one-hot encoding information as input. It performs the one-ho...
#define HOST
bool g_enable_smem_group_by true
#define NEVER_INLINE
bool g_enable_watchdog false
Definition: Execute.cpp:80
OneHotEncodingInfo(const int32_t top_k_attrs, const float min_attr_proportion, const bool include_others_attr)
Definition: OneHotEncoder.h:39