OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringViewToStringNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "BaseConvertEncoder.h"
20 
21 namespace data_conversion {
22 
24  public:
26  const bool error_tracking_enabled)
27  : BaseConvertEncoder(error_tracking_enabled), dst_chunk_(dst_chunk) {
28  initialize();
29  }
30 
31  void encodeAndAppendData(const int8_t* data, const size_t num_elements) override {
32  auto typed_data = reinterpret_cast<const std::string_view*>(data);
34  // scan for any strings that may be an error
35  bool tracking_individual_strings_required = false;
36  size_t first_error_index = 0;
37  for (size_t i = 0; i < num_elements; ++i) {
38  if (typed_data[i].size() > StringDictionary::MAX_STRLEN) {
39  tracking_individual_strings_required = true;
40  first_error_index = i;
41  break;
42  }
43  }
44 
45  if (!tracking_individual_strings_required) {
46  for (size_t i = 0; i < num_elements; ++i) {
47  delete_buffer_->push_back(false);
48  }
49  StringNoneEncoder* encoder = getEncoder();
50  auto metadata = encoder->appendData(typed_data, 0, num_elements, false);
51  has_nulls_ |= metadata->chunkStats.has_nulls;
52  } else {
53  std::vector<std::string_view> tracked_strings(num_elements);
54  for (size_t i = first_error_index; i < num_elements; ++i) {
55  if (typed_data[i].size() > StringDictionary::MAX_STRLEN) {
56  tracked_strings[i] = {};
57  delete_buffer_->push_back(true);
58  } else {
59  tracked_strings[i] = typed_data[i];
60  delete_buffer_->push_back(false);
61  }
62  auto metadata =
63  getEncoder()->appendData(tracked_strings.data(), 0, num_elements, false);
64  has_nulls_ |= metadata->chunkStats.has_nulls;
65  }
66  }
67  } else {
68  for (size_t i = 0; i < num_elements; ++i) {
69  if (typed_data[i].size() > StringDictionary::MAX_STRLEN) {
70  throw std::runtime_error("String length of " +
71  std::to_string(typed_data[i].size()) +
72  " exceeds allowed maximum string length of " +
74  }
75  }
76  auto metadata = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder())
77  ->appendData(typed_data, 0, num_elements, false);
78  has_nulls_ |= metadata->chunkStats.has_nulls;
79  }
80  }
82  auto encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
83  CHECK(encoder);
84  return encoder;
85  }
86 
87  private:
88  void initialize() {
89  auto type_info = dst_chunk_.getColumnDesc()->columnType;
90  dst_type_info_ = type_info;
92  }
93 
97 };
98 
99 } // namespace data_conversion
StringViewToStringNoneEncoder(const Chunk_NS::Chunk &dst_chunk, const bool error_tracking_enabled)
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
std::string to_string(char const *&&v)
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
void encodeAndAppendData(const int8_t *data, const size_t num_elements) override
An AbstractBuffer is a unit of data management for a data manager.
std::optional< std::vector< bool > > delete_buffer_
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
#define CHECK(condition)
Definition: Logger.h:291
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType