17 #ifndef DATE_DAYS_ENCODER_H
18 #define DATE_DAYS_ENCODER_H
30 template <
typename T,
typename V>
38 const std::vector<size_t>& selected_idx,
39 const size_t byte_limit)
override {
41 <<
"getNumElemsForBytesEncodedDataAtIndices unexpectedly called for non varlen"
49 const std::vector<size_t>& selected_idx)
override {
50 std::shared_ptr<ChunkMetadata> chunk_metadata;
55 selected_idx, [&](
const size_t start_pos,
const size_t end_pos) {
56 size_t elem_count = end_pos - start_pos;
60 return chunk_metadata;
65 const size_t start_idx,
66 const size_t num_elements)
override {
67 auto current_data = data +
sizeof(V) * start_idx;
69 current_data, num_elements,
SQLTypeInfo{},
false, -1,
true);
72 std::shared_ptr<ChunkMetadata>
appendData(int8_t*& src_data,
73 const size_t num_elems_to_append,
75 const bool replicating =
false,
76 const int64_t offset = -1)
override {
78 src_data, num_elems_to_append, ti, replicating, offset,
false);
81 void getMetadata(
const std::shared_ptr<ChunkMetadata>& chunkMetadata)
override {
88 auto chunk_metadata = std::make_shared<ChunkMetadata>(ti, 0, 0,
ChunkStats{});
90 return chunk_metadata;
98 const auto data =
static_cast<T>(val);
109 const auto data =
static_cast<T>(val);
115 void updateStats(
const int8_t*
const src_data,
const size_t num_elements)
override {
116 const T* unencoded_data =
reinterpret_cast<const T*
>(src_data);
117 for (
size_t i = 0; i < num_elements; ++i) {
123 const size_t start_idx,
124 const size_t num_elements)
override {
129 const size_t start_idx,
130 const size_t num_elements)
override {
137 if (that_typed.has_nulls) {
147 dataMin = castedEncoder->dataMin;
148 dataMax = castedEncoder->dataMax;
154 fwrite((int8_t*)&
num_elems_,
sizeof(
size_t), 1, f);
155 fwrite((int8_t*)&
dataMin,
sizeof(
T), 1, f);
156 fwrite((int8_t*)&
dataMax,
sizeof(
T), 1, f);
157 fwrite((int8_t*)&
has_nulls,
sizeof(
bool), 1, f);
162 fread((int8_t*)&
num_elems_,
sizeof(
size_t), 1, f);
163 fread((int8_t*)&
dataMin, 1,
sizeof(
T), f);
164 fread((int8_t*)&
dataMax, 1,
sizeof(
T), f);
165 fread((int8_t*)&
has_nulls, 1,
sizeof(
bool), f);
169 const auto new_min = DatumFetcher::getDatumVal<T>(stats.
min);
170 const auto new_max = DatumFetcher::getDatumVal<T>(stats.
max);
183 dataMin = std::numeric_limits<T>::max();
184 dataMax = std::numeric_limits<T>::lowest();
195 const size_t num_elems_to_append,
197 const bool replicating,
198 const int64_t offset,
199 const bool is_encoded) {
200 if (offset == 0 && num_elems_to_append >=
num_elems_) {
204 CHECK(!is_encoded || !replicating);
206 T* unencoded_data =
reinterpret_cast<T*
>(src_data);
207 std::vector<V> encoded_data;
208 V* data_to_write =
nullptr;
210 encoded_data.resize(num_elems_to_append);
211 data_to_write = encoded_data.data();
212 for (
size_t i = 0; i < num_elems_to_append; ++i) {
213 size_t ri = replicating ? 0 : i;
217 data_to_write =
reinterpret_cast<V*
>(src_data);
218 for (
size_t i = 0; i < num_elems_to_append; ++i) {
224 auto append_data_size = num_elems_to_append *
sizeof(V);
227 buffer_->
append(reinterpret_cast<int8_t*>(data_to_write), append_data_size);
229 src_data += num_elems_to_append *
sizeof(
T);
235 buffer_->
write(reinterpret_cast<int8_t*>(data_to_write),
236 num_elems_to_append *
sizeof(V),
237 static_cast<size_t>(offset));
240 auto chunk_metadata = std::make_shared<ChunkMetadata>();
242 return chunk_metadata;
246 if (encoded_data == std::numeric_limits<V>::min()) {
257 if (unencoded_data == std::numeric_limits<V>::min()) {
259 encoded_data =
static_cast<V
>(unencoded_data);
271 #endif // DATE_DAYS_ENCODER_H
void updateStats(const int8_t *const src_data, const size_t num_elements) override
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
void updateStats(const int64_t val, const bool is_null) override
void resetChunkStats() override
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
void execute_over_contiguous_indices(const std::vector< size_t > &indices, std::function< void(const size_t, const size_t)> to_execute)
void readMetadata(FILE *f) override
int64_t get_epoch_seconds_from_days(const int64_t days)
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
V encodeDataAndUpdateStats(const T &unencoded_data)
void writeMetadata(FILE *f) override
size_t getNumElems() const
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
void updateStats(const double val, const bool is_null) override
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
DateDaysOverflowValidator date_days_overflow_validator_
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *, int8_t *data, const size_t start_idx, const size_t num_elements) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
void reduceStats(const Encoder &that) override
void copyMetadata(const Encoder *copyFromEncoder) override
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
void updateStatsWithAlreadyEncoded(const V &encoded_data)
int64_t get_epoch_days_from_seconds(const int64_t seconds)
std::shared_ptr< ChunkMetadata > appendEncodedOrUnencodedData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating, const int64_t offset, const bool is_encoded)
virtual void reserve(size_t num_bytes)=0
DateDaysEncoder(Data_Namespace::AbstractBuffer *buffer)
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *, int8_t *data, const std::vector< size_t > &selected_idx) override