32 const std::vector<std::string>* srcData,
34 const size_t numAppendElems,
35 const size_t byteLimit,
36 const bool replicating) {
39 for (; n < start_idx + numAppendElems; n++) {
40 size_t len = (*srcData)[replicating ? 0 :
n].length();
41 if (dataSize + len > byteLimit) {
50 const int8_t* index_data,
51 const std::vector<size_t>& selected_idx,
52 const size_t byte_limit) {
53 size_t num_elements = 0;
55 for (
const auto& offset_index : selected_idx) {
57 if (data_size + element_size > byte_limit) {
60 data_size += element_size;
67 const int8_t* index_data,
69 const std::vector<size_t>& selected_idx) {
70 std::vector<std::string_view> data_subset;
71 data_subset.reserve(selected_idx.size());
72 for (
const auto& offset_index : selected_idx) {
75 return appendData(&data_subset, 0, selected_idx.size(),
false);
79 const int8_t* index_data,
81 const size_t start_idx,
82 const size_t num_elements) {
83 std::vector<std::string_view> data_subset;
84 data_subset.reserve(num_elements);
85 for (
size_t count = 0; count < num_elements; ++count) {
86 auto current_index = start_idx + count;
89 return appendData(&data_subset, 0, num_elements,
false);
92 template <
typename StringType>
94 const std::vector<StringType>* srcData,
96 const size_t numAppendElems,
97 const bool replicating) {
98 return appendData(srcData->data(), start_idx, numAppendElems, replicating);
101 template <
typename StringType>
104 const size_t numAppendElems,
105 const bool replicating) {
107 size_t append_index_size = numAppendElems *
sizeof(
StringOffsetT);
127 size_t append_data_size = 0;
128 for (
size_t n = start_idx;
n < start_idx + numAppendElems;
n++) {
129 size_t len = (srcData)[replicating ? 0 :
n].length();
130 append_data_size += len;
136 auto inbuf = std::make_unique<int8_t[]>(inbuf_size);
137 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
140 for (i = 0; num_appended < numAppendElems && i < inbuf_size /
sizeof(
StringOffsetT);
141 i++, num_appended++) {
142 p[i] =
last_offset + (srcData)[replicating ? 0 : num_appended + start_idx].length();
148 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
150 for (
int i = start_idx + num_appended;
151 num_appended < numAppendElems && size < inbuf_size;
152 i++, num_appended++) {
153 size_t len = (srcData)[replicating ? 0 : i].length();
154 if (len > inbuf_size) {
160 buffer_->
append((int8_t*)(srcData)[replicating ? 0 : i].data(), len);
163 }
else if (size + len > inbuf_size) {
166 char*
dest =
reinterpret_cast<char*
>(inbuf.get()) + size;
168 (srcData)[replicating ? 0 : i].
copy(dest, len);
184 auto chunk_metadata = std::make_shared<ChunkMetadata>();
186 return chunk_metadata;
190 const size_t start_idx,
191 const size_t num_elements) {
192 for (
size_t n = start_idx;
n < start_idx + num_elements;
n++) {
200 template <
typename StringType>
208 const int8_t* index_data,
210 auto string_offsets =
reinterpret_cast<const StringOffsetT*
>(index_data);
211 auto current_index = index + 1;
212 auto offset = string_offsets[current_index];
214 int64_t
last_offset = string_offsets[current_index - 1];
215 CHECK(last_offset >= 0 && last_offset <= offset);
216 return {offset, last_offset};
222 return string_byte_size;
230 auto current_data =
reinterpret_cast<const char*
>(data +
last_offset);
231 return std::string_view{current_data, string_byte_size};
234 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string>(
235 const std::vector<std::string>* srcData,
237 const size_t numAppendElems,
238 const bool replicating);
240 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string_view>(
241 const std::vector<std::string_view>* srcData,
243 const size_t numAppendElems,
244 const bool replicating);
246 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string>(
247 const std::string* srcData,
249 const size_t numAppendElems,
250 const bool replicating);
252 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string_view>(
253 const std::string_view* srcData,
255 const size_t numAppendElems,
256 const bool replicating);
258 template void StringNoneEncoder::update_elem_stats<std::string>(
const std::string& elem);
259 template void StringNoneEncoder::update_elem_stats<std::string_view>(
260 const std::string_view& elem);
264 chunkMetadata->chunkStats.min.stringval =
nullptr;
265 chunkMetadata->chunkStats.max.stringval =
nullptr;
266 chunkMetadata->chunkStats.has_nulls =
has_nulls;
273 chunk_stats.max.stringval =
nullptr;
275 return std::make_shared<ChunkMetadata>(ti, 0, 0, chunk_stats);
static std::string_view getStringAtIndex(const int8_t *index_data, const int8_t *data, size_t index)
void updateStats(const int64_t, const bool) override
#define MAX_INPUT_BUF_SIZE
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
AbstractBuffer * index_buf
Data_Namespace::AbstractBuffer * buffer_
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
DEVICE auto copy(ARGS &&...args)
An AbstractBuffer is a unit of data management for a data manager.
static size_t getStringSizeAtIndex(const int8_t *index_data, size_t index)
void update_elem_stats(const StringType &elem)
StringOffsetT last_offset
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
virtual void reserve(size_t num_bytes)=0
static std::pair< StringOffsetT, StringOffsetT > getStringOffsets(const int8_t *index_data, size_t index)