OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringViewToScalarEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "BaseConvertEncoder.h"
21 
22 namespace data_conversion {
23 
24 template <typename DataType, typename MetadataType = DataType>
25 class StringViewToScalarEncoder : public TypedBaseConvertEncoder<DataType, MetadataType> {
26  public:
28  const bool error_tracking_enabled)
29  : TypedBaseConvertEncoder<DataType, MetadataType>(error_tracking_enabled)
30  , dst_chunk_(dst_chunk)
31  , is_date_in_days_(dst_chunk_.getColumnDesc()->columnType.is_date_in_days())
32  , date_days_overflow_validator_(std::nullopt)
33  , decimal_overflow_validator_(std::nullopt) {
34  initialize();
35  }
36 
37  void encodeAndAppendData(const int8_t* data, const size_t num_elements) override {
38  auto typed_data = reinterpret_cast<const std::string_view*>(data);
39  for (size_t i = 0; i < num_elements; ++i) {
40  auto converted_value = convertAndUpdateMetadata(typed_data[i]);
41  buffer_->append(reinterpret_cast<int8_t*>(&converted_value), sizeof(DataType));
42  }
43  }
44 
45  void clear() override {
48  }
49 
50  const Chunk_NS::Chunk& getDstChunk() const { return dst_chunk_; }
51 
52  std::optional<std::vector<bool>>& getDeleteBuffer() {
54  }
55 
56  private:
57  void initialize() {
58  auto type_info = dst_chunk_.getColumnDesc()->columnType;
59  dst_type_info_ = type_info;
61  if (is_date_in_days_) {
63  }
64  if (dst_type_info_.is_decimal()) {
66  }
67  }
68 
69  DataType convertAndUpdateMetadata(const std::string_view& typed_value) {
70  if (BaseConvertEncoder::isNull(typed_value)) {
73  throw std::runtime_error("NULL value not allowed in NOT NULL column");
74  }
76  } else {
78  BaseConvertEncoder::delete_buffer_->push_back(true);
79  } else {
80  BaseConvertEncoder::delete_buffer_->push_back(false);
81  }
82  }
84  }
85 
86  DataType converted_value{};
87  try {
88  converted_value = convert(typed_value);
90  converted_value, is_date_in_days_);
92  BaseConvertEncoder::delete_buffer_->push_back(false);
93  }
94  } catch (std::exception& except) {
97  BaseConvertEncoder::delete_buffer_->push_back(true);
98  } else {
99  throw;
100  }
101  }
102  return converted_value;
103  }
104 
105  DataType convert(const std::string_view& typed_value) {
106  if constexpr (std::is_same<DataType, std::string_view>::value) {
108  return typed_value;
109  }
110  } else {
112 
113  auto& type_info = dst_type_info_;
114 
115  // TODO: remove this CHECK if it shows up in profiling
116  CHECK(type_info.is_integer() || type_info.is_boolean() || type_info.is_fp() ||
117  type_info.is_decimal() || type_info.is_time_or_date());
118 
119  // TODO: the call to `StringToDatum` and the switch statement below can be
120  // merged into one switch calling the appropriate parsing subroutine and may
121  // improve performance. Profile if improvement is observed.
122  Datum d = StringToDatum(typed_value, const_cast<SQLTypeInfo&>(type_info));
123  DataType result{};
124  switch (type_info.get_type()) {
125  case kBOOLEAN:
126  result = d.boolval;
127  break;
128  case kBIGINT:
129  case kTIME:
130  case kTIMESTAMP:
131  result = d.bigintval;
132  break;
133  case kNUMERIC:
134  case kDECIMAL:
135  if (type_info.get_compression() == kENCODING_FIXED) {
136  decimal_overflow_validator_->validate(d.bigintval);
137  }
138  result = d.bigintval;
139  break;
140  case kDATE:
141  if (is_date_in_days_) {
142  date_days_overflow_validator_->validate(d.bigintval);
144  } else {
145  result = d.bigintval;
146  }
147  break;
148  case kINT:
149  result = d.intval;
150  break;
151  case kSMALLINT:
152  result = d.smallintval;
153  break;
154  case kTINYINT:
155  result = d.tinyintval;
156  break;
157  case kFLOAT:
158  result = d.floatval;
159  break;
160  case kDOUBLE:
161  result = d.doubleval;
162  break;
163  default:
164  UNREACHABLE();
165  }
166 
167  return result;
168  }
169  }
170 
174 
175  const bool is_date_in_days_; // stored as a const bool to aid compiler in optimizing
176  // code paths related to this flag
177 
178  std::optional<DateDaysOverflowValidator> date_days_overflow_validator_;
179  std::optional<DecimalOverflowValidator> decimal_overflow_validator_;
180 };
181 
182 } // namespace data_conversion
void updateMetadataStats(const DataType &typed_value, const bool is_date_in_days=false)
Definition: sqltypes.h:76
std::optional< std::vector< bool > > & getDeleteBuffer()
DataType convertAndUpdateMetadata(const std::string_view &typed_value)
std::optional< DecimalOverflowValidator > decimal_overflow_validator_
#define UNREACHABLE()
Definition: Logger.h:338
DataType convert(const std::string_view &typed_value)
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
An AbstractBuffer is a unit of data management for a data manager.
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:339
bool isNull(const DataType &typed_value)
std::optional< std::vector< bool > > delete_buffer_
Definition: sqltypes.h:80
StringViewToScalarEncoder(const Chunk_NS::Chunk &dst_chunk, const bool error_tracking_enabled)
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
bool is_none_encoded_string() const
Definition: sqltypes.h:647
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:291
int64_t get_epoch_days_from_seconds(const int64_t seconds)
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::optional< DateDaysOverflowValidator > date_days_overflow_validator_
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
Definition: Datum.h:71
bool is_decimal() const
Definition: sqltypes.h:570
void encodeAndAppendData(const int8_t *data, const size_t num_elements) override