OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Encoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ENCODER_H
18 #define ENCODER_H
19 
20 #include <cmath>
21 #include <iostream>
22 #include <limits>
23 #include <stdexcept>
24 #include <vector>
25 
26 #include "ChunkMetadata.h"
27 #include "Shared/DateConverters.h"
28 #include "Shared/sqltypes.h"
29 #include "Shared/types.h"
30 
31 namespace Data_Namespace {
32 class AbstractBuffer;
33 }
34 
35 // default max input buffer size to 1MB
36 #define MAX_INPUT_BUF_SIZE 1048576
37 
39  public:
41  if (type.is_array()) {
42  type = type.get_elem_type();
43  }
44 
45  do_check_ = type.is_decimal();
46  int precision = type.get_precision();
47  int scale = type.get_scale();
48  max_ = (int64_t)std::pow((double)10.0, precision);
49  min_ = -max_;
50  pow10_ = precision - scale;
51  }
52 
53  template <typename T>
54  void validate(T value) const {
55  if (std::is_integral<T>::value) {
56  do_validate(static_cast<int64_t>(value));
57  }
58  }
59 
60  void do_validate(int64_t value) const {
61  if (!do_check_) {
62  return;
63  }
64 
65  if (value >= max_) {
66  throw std::runtime_error("Decimal overflow: value is greater than 10^" +
68  " value " + std::to_string(value));
69  }
70 
71  if (value <= min_) {
72  throw std::runtime_error("Decimal overflow: value is less than -10^" +
74  " value " + std::to_string(value));
75  }
76  }
77 
78  private:
79  bool do_check_;
80  int64_t max_;
81  int64_t min_;
82  int pow10_;
83 };
84 
85 template <typename INNER_VALIDATOR>
87  public:
88  NullAwareValidator(SQLTypeInfo type, INNER_VALIDATOR* inner_validator) {
89  if (type.is_array()) {
90  type = type.get_elem_type();
91  }
92 
94  inner_validator_ = inner_validator;
95  }
96 
97  template <typename T>
98  void validate(T value) {
99  if (skip_null_check_ || value != inline_int_null_value<T>()) {
100  inner_validator_->template validate<T>(value);
101  }
102  }
103 
104  private:
106  INNER_VALIDATOR* inner_validator_;
107 };
108 
110  public:
113  type.is_array() ? type.get_elem_type().is_date_in_days() : type.is_date_in_days();
114  const bool is_date_16_ = is_date_in_days_ ? type.get_comp_param() == 16 : false;
115  max_ = is_date_16_ ? static_cast<int64_t>(std::numeric_limits<int16_t>::max())
116  : static_cast<int64_t>(std::numeric_limits<int32_t>::max());
117  min_ = is_date_16_ ? static_cast<int64_t>(std::numeric_limits<int16_t>::min())
118  : static_cast<int64_t>(std::numeric_limits<int32_t>::min());
119  }
120 
121  template <typename T>
122  void validate(T value) {
123  if (!is_date_in_days_ || !std::is_integral<T>::value) {
124  return;
125  }
126  const int64_t days =
127  DateConverters::get_epoch_days_from_seconds(static_cast<int64_t>(value));
128  if (days > max_) {
129  throw std::runtime_error("Date encoding overflow: Epoch days " +
130  std::to_string(days) + " greater than maximum capacity " +
132  }
133  if (days < min_) {
134  throw std::runtime_error("Date encoding underflow: Epoch days " +
135  std::to_string(days) + " less than minimum capacity " +
137  }
138  }
139 
140  private:
142  int64_t max_;
143  int64_t min_;
144 };
145 
146 class Encoder {
147  public:
149  const SQLTypeInfo sqlType);
151  virtual ~Encoder() {}
152 
168  const int8_t* index_data,
169  const std::vector<size_t>& selected_idx,
170  const size_t byte_limit) = 0;
171 
183  virtual std::shared_ptr<ChunkMetadata> appendEncodedDataAtIndices(
184  const int8_t* index_data,
185  int8_t* data,
186  const std::vector<size_t>& selected_idx) = 0;
187 
199  virtual std::shared_ptr<ChunkMetadata> appendEncodedData(const int8_t* index_data,
200  int8_t* data,
201  const size_t start_idx,
202  const size_t num_elements) = 0;
203 
211  virtual std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
212  const size_t num_elems_to_append,
213  const SQLTypeInfo& ti,
214  const bool replicating = false,
215  const int64_t offset = -1) = 0;
216  virtual void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata);
217  // Only called from the executor for synthesized meta-information.
218  virtual std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) = 0;
219  virtual void updateStats(const int64_t val, const bool is_null) = 0;
220  virtual void updateStats(const double val, const bool is_null) = 0;
221 
228  virtual void updateStats(const int8_t* const src_data, const size_t num_elements) = 0;
229 
236  virtual void updateStatsEncoded(const int8_t* const dst_data,
237  const size_t num_elements) {
238  UNREACHABLE();
239  }
240 
248  virtual void updateStats(const std::vector<std::string>* const src_data,
249  const size_t start_idx,
250  const size_t num_elements) = 0;
251 
259  virtual void updateStats(const std::vector<ArrayDatum>* const src_data,
260  const size_t start_idx,
261  const size_t num_elements) = 0;
262 
263  virtual void reduceStats(const Encoder&) = 0;
264  virtual void copyMetadata(const Encoder* copyFromEncoder) = 0;
265  virtual void writeMetadata(FILE* f /*, const size_t offset*/) = 0;
266  virtual void readMetadata(FILE* f /*, const size_t offset*/) = 0;
267 
274  virtual bool resetChunkStats(const ChunkStats&) {
275  UNREACHABLE() << "Attempting to reset stats for unsupported type.";
276  return false;
277  }
278 
282  virtual void resetChunkStats() = 0;
283 
284  size_t getNumElems() const { return num_elems_; }
285  void setNumElems(const size_t num_elems) { num_elems_ = num_elems; }
286 
287  protected:
288  size_t num_elems_;
289 
291 
294 };
295 
296 #endif // Encoder_h
virtual void resetChunkStats()=0
size_t num_elems_
Definition: Encoder.h:288
DateDaysOverflowValidator(SQLTypeInfo type)
Definition: Encoder.h:111
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:292
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
static Encoder * Create(Data_Namespace::AbstractBuffer *buffer, const SQLTypeInfo sqlType)
Definition: Encoder.cpp:26
INNER_VALIDATOR * inner_validator_
Definition: Encoder.h:106
#define UNREACHABLE()
Definition: Logger.h:338
Constants for Builtin SQL Types supported by HEAVY.AI.
void setNumElems(const size_t num_elems)
Definition: Encoder.h:285
std::string to_string(char const *&&v)
virtual bool resetChunkStats(const ChunkStats &)
: Reset chunk level stats (min, max, nulls) using new values from the argument.
Definition: Encoder.h:274
virtual void reduceStats(const Encoder &)=0
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:231
bool skip_null_check_
Definition: Encoder.h:105
void do_validate(int64_t value) const
Definition: Encoder.h:60
virtual size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit)=0
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:290
size_t getNumElems() const
Definition: Encoder.h:284
void validate(T value)
Definition: Encoder.h:122
An AbstractBuffer is a unit of data management for a data manager.
virtual ~Encoder()
Definition: Encoder.h:151
void validate(T value)
Definition: Encoder.h:98
Encoder(Data_Namespace::AbstractBuffer *buffer)
Definition: Encoder.cpp:225
DateDaysOverflowValidator date_days_overflow_validator_
Definition: Encoder.h:293
int get_precision() const
Definition: sqltypes.h:394
bool is_date_in_days() const
Definition: sqltypes.h:1018
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
NullAwareValidator(SQLTypeInfo type, INNER_VALIDATOR *inner_validator)
Definition: Encoder.h:88
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:402
virtual void updateStatsEncoded(const int8_t *const dst_data, const size_t num_elements)
Definition: Encoder.h:236
int64_t get_epoch_days_from_seconds(const int64_t seconds)
virtual void writeMetadata(FILE *f)=0
virtual std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx)=0
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
void validate(T value) const
Definition: Encoder.h:54
virtual void updateStats(const int64_t val, const bool is_null)=0
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
bool is_decimal() const
Definition: sqltypes.h:570
virtual void copyMetadata(const Encoder *copyFromEncoder)=0
virtual void readMetadata(FILE *f)=0
DecimalOverflowValidator(SQLTypeInfo type)
Definition: Encoder.h:40
bool is_array() const
Definition: sqltypes.h:585
virtual std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements)=0
virtual std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1)=0