23 #ifndef ARRAY_NONE_ENCODER_H
24 #define ARRAY_NONE_ENCODER_H
52 const size_t numAppendElems,
53 const size_t byteLimit,
54 const bool replicating =
false) {
58 for (; n < start_idx + numAppendElems; n++) {
59 size_t len = (*srcData)[replicating ? 0 :
n].length;
60 if (dataSize + len > byteLimit) {
69 const std::vector<size_t>& selected_idx,
70 const size_t byte_limit)
override {
71 size_t num_elements = 0;
73 for (
const auto& offset_index : selected_idx) {
75 if (data_size + element_size > byte_limit) {
78 data_size += element_size;
84 std::shared_ptr<ChunkMetadata>
appendData(int8_t*& src_data,
85 const size_t num_elems_to_append,
87 const bool replicating =
false,
88 const int64_t offset = -1)
override {
94 const int8_t* index_data,
96 const std::vector<size_t>& selected_idx)
override {
97 std::vector<ArrayDatum> data_subset;
98 data_subset.reserve(selected_idx.size());
99 for (
const auto& offset_index : selected_idx) {
102 return appendData(&data_subset, 0, selected_idx.size(),
false);
107 const size_t start_idx,
108 const size_t num_elements)
override {
109 std::vector<ArrayDatum> data_subset;
110 data_subset.reserve(num_elements);
111 for (
size_t count = 0; count < num_elements; ++count) {
112 auto current_index = start_idx + count;
115 return appendData(&data_subset, 0, num_elements,
false);
118 std::shared_ptr<ChunkMetadata>
appendData(
const std::vector<ArrayDatum>* srcData,
120 const size_t numAppendElems,
121 const bool replicating) {
123 size_t append_index_size = numAppendElems *
sizeof(
ArrayOffsetT);
129 bool first_elem_padded =
false;
132 if ((*srcData)[0].is_null || (*srcData)[0].length <= 1) {
139 first_elem_padded =
true;
162 for (
size_t n = start_idx;
n < start_idx + numAppendElems;
n++) {
164 if ((*srcData)[replicating ? 0 :
n].is_null) {
167 append_data_size += (*srcData)[replicating ? 0 :
n].length;
171 size_t inbuf_size = std::min(std::max(append_index_size, append_data_size),
173 auto gc_inbuf = std::make_unique<int8_t[]>(inbuf_size);
174 auto inbuf = gc_inbuf.get();
175 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
178 for (i = 0; num_appended < numAppendElems && i < inbuf_size /
sizeof(
ArrayOffsetT);
179 i++, num_appended++) {
181 last_offset + (*srcData)[replicating ? 0 : num_appended + start_idx].length;
183 if ((*srcData)[replicating ? 0 : num_appended + start_idx].is_null) {
192 if (first_elem_padded) {
196 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
198 for (
int i = start_idx + num_appended;
199 num_appended < numAppendElems && size < inbuf_size;
200 i++, num_appended++) {
201 if ((*srcData)[replicating ? 0 : i].
is_null) {
204 size_t len = (*srcData)[replicating ? 0 : i].length;
205 if (len > inbuf_size) {
211 buffer_->
append((*srcData)[replicating ? 0 : i].pointer, len);
214 }
else if (size + len > inbuf_size) {
217 char*
dest = (
char*)inbuf + size;
219 std::memcpy((
void*)dest, (
void*)(*srcData)[replicating ? 0 : i].pointer, len);
234 for (
size_t n = start_idx;
n < start_idx + numAppendElems;
n++) {
238 auto chunk_metadata = std::make_shared<ChunkMetadata>();
240 return chunk_metadata;
243 void getMetadata(
const std::shared_ptr<ChunkMetadata>& chunkMetadata)
override {
250 auto chunk_metadata = std::make_shared<ChunkMetadata>(
252 return chunk_metadata;
259 void updateStats(
const int8_t*
const src_data,
const size_t num_elements)
override {
264 const size_t start_idx,
265 const size_t num_elements)
override {
270 const size_t start_idx,
271 const size_t num_elements)
override {
272 for (
size_t n = start_idx;
n < start_idx + num_elements;
n++) {
281 fwrite((int8_t*)&
num_elems_,
sizeof(
size_t), 1, f);
284 fwrite((int8_t*)&
has_nulls,
sizeof(
bool), 1, f);
290 fread((int8_t*)&
num_elems_,
sizeof(
size_t), 1, f);
293 fread((int8_t*)&
has_nulls,
sizeof(
bool), 1, f);
299 auto array_encoder =
dynamic_cast<const ArrayNoneEncoder*
>(copyFromEncoder);
351 if (array.is_null || array.length == 0) {
354 const int8_t* bool_array = array.pointer;
355 for (
size_t i = 0; i < array.length /
sizeof(bool); i++) {
374 if (array.is_null || array.length == 0) {
377 const int32_t* int_array = (int32_t*)array.pointer;
378 for (
size_t i = 0; i < array.length /
sizeof(int32_t); i++) {
397 if (array.is_null || array.length == 0) {
400 const int16_t* int_array = (int16_t*)array.pointer;
401 for (
size_t i = 0; i < array.length /
sizeof(int16_t); i++) {
420 if (array.is_null || array.length == 0) {
423 const int8_t* int_array = (int8_t*)array.pointer;
424 for (
size_t i = 0; i < array.length /
sizeof(int8_t); i++) {
445 if (array.is_null || array.length == 0) {
448 const int64_t* int_array = (int64_t*)array.pointer;
449 for (
size_t i = 0; i < array.length /
sizeof(int64_t); i++) {
468 if (array.is_null || array.length == 0) {
471 const float* flt_array = (
float*)array.pointer;
472 for (
size_t i = 0; i < array.length /
sizeof(float); i++) {
491 if (array.is_null || array.length == 0) {
494 const double* dbl_array = (
double*)array.pointer;
495 for (
size_t i = 0; i < array.length /
sizeof(double); i++) {
516 if (array.is_null || array.length == 0) {
519 const auto tm_array =
reinterpret_cast<int64_t*
>(array.pointer);
520 for (
size_t i = 0; i < array.length /
sizeof(int64_t); i++) {
542 if (array.is_null || array.length == 0) {
545 const int32_t* int_array = (int32_t*)array.pointer;
546 for (
size_t i = 0; i < array.length /
sizeof(int32_t); i++) {
568 auto array_offsets =
reinterpret_cast<const ArrayOffsetT*
>(index_data);
569 auto current_index = index + 1;
570 auto offset = array_offsets[current_index];
571 int64_t
last_offset = array_offsets[current_index - 1];
572 return {offset, last_offset};
577 size_t array_byte_size = std::abs(offset) - std::abs(
last_offset);
578 return array_byte_size;
583 size_t array_byte_size = std::abs(offset) - std::abs(
last_offset);
592 #endif // ARRAY_NONE_ENCODER_H
void update_elem_stats(const ArrayDatum &array)
HOST DEVICE SQLTypes get_subtype() const
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
std::shared_ptr< ChunkMetadata > appendData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating)
#define MAX_INPUT_BUF_SIZE
void updateStats(const double, const bool) override
void updateStats(const int8_t *const src_data, const size_t num_elements) override
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
void resetChunkStats() override
size_t getNumElems() const
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
An AbstractBuffer is a unit of data management for a data manager.
ArrayNoneEncoder(AbstractBuffer *buffer)
void setIndexBuffer(AbstractBuffer *buf)
AbstractBuffer * index_buf
void readMetadata(FILE *f) override
HOST DEVICE EncodingType get_compression() const
static constexpr size_t DEFAULT_NULL_PADDING_SIZE
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
ArrayDatum getArrayDatumAtIndex(const int8_t *index_data, int8_t *data, size_t index)
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
SQLTypeInfo getSqlType() const
void reduceStats(const Encoder &) override
std::pair< ArrayOffsetT, ArrayOffsetT > getArrayOffsetsAtIndex(const int8_t *index_data, size_t index)
bool g_enable_watchdog false
void writeMetadata(FILE *f) override
SQLTypeInfo get_elem_type() const
virtual void reserve(size_t num_bytes)=0
AbstractBuffer * getIndexBuf() const
void updateStats(const int64_t, const bool) override
void copyMetadata(const Encoder *copyFromEncoder) override
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
size_t getArrayDatumSizeAtIndex(const int8_t *index_data, size_t index)