23 #include <parquet/schema.h>
24 #include <parquet/types.h>
26 namespace foreign_storage {
38 ,
min_(std::numeric_limits<V>::max())
39 ,
max_(std::numeric_limits<V>::lowest())
51 const int16_t* rep_levels,
52 const int64_t values_read,
53 const int64_t levels_read,
57 auto parquet_data_ptr =
reinterpret_cast<const parquet::ByteArray*
>(values);
58 for (int64_t i = 0, j = 0; i < levels_read; ++i) {
61 auto& byte_array = parquet_data_ptr[j++];
68 appendData(def_levels, rep_levels, values_read, levels_read, values);
72 const int16_t* rep_levels,
73 const int64_t values_read,
74 const int64_t levels_read,
75 int8_t* values)
override {
77 auto parquet_data_ptr =
reinterpret_cast<const parquet::ByteArray*
>(values);
78 for (int64_t i = 0, j = 0; i < levels_read; ++i) {
81 auto& byte_array = parquet_data_ptr[j++];
93 appendData(def_levels, rep_levels, values_read, levels_read, values);
97 const int16_t* rep_levels,
98 const int64_t values_read,
99 const int64_t levels_read,
100 int8_t* values)
override {
103 def_levels, rep_levels, values_read, levels_read,
encode_buffer_.data());
109 int8_t* omnisci_data_bytes,
110 const size_t num_elements)
override {
112 auto parquet_data_ptr =
113 reinterpret_cast<const parquet::ByteArray*
>(parquet_data_bytes);
114 auto omnisci_data_ptr =
reinterpret_cast<V*
>(omnisci_data_bytes);
115 std::vector<std::string_view> string_views;
116 string_views.reserve(num_elements);
117 for (
size_t i = 0; i < num_elements; ++i) {
118 auto& byte_array = parquet_data_ptr[i];
120 string_views.emplace_back(reinterpret_cast<const char*>(byte_array.ptr),
123 string_views.emplace_back(
nullptr, 0);
131 int8_t* omnisci_data_bytes)
override {
136 const parquet::RowGroupMetaData* group_metadata,
137 const int parquet_column_index,
140 group_metadata, parquet_column_index, column_type);
141 auto column_metadata = group_metadata->ColumnChunk(parquet_column_index);
143 column_metadata->num_values();
146 metadata->chunkStats.has_nulls =
false;
158 V* data_ptr =
reinterpret_cast<V*
>(values);
159 for (int64_t i = 0; i < values_read; ++i) {
160 min_ = std::min<V>(data_ptr[i],
min_);
161 max_ = std::max<V>(data_ptr[i],
max_);
bool encodingIsIdentityForSameTypes() const override
void updateMetadataStats(int64_t values_read, int8_t *values)
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
bool is_error_tracking_enabled_
RejectedRowIndices invalid_indices_
void validateAndAppendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
std::vector< int8_t > encode_buffer_
void copy(const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination) override
InvalidRowGroupIndices * invalid_indices_
StringDictionary * string_dictionary_
int64_t current_batch_offset_
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
std::set< int64_t > InvalidRowGroupIndices
An AbstractBuffer is a unit of data management for a data manager.
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
ChunkMetadata * chunk_metadata_
virtual std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
ParquetStringEncoder(Data_Namespace::AbstractBuffer *buffer, StringDictionary *string_dictionary, ChunkMetadata *chunk_metadata)
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
void encodeAndCopyContiguous(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override
void appendDataTrackErrors(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
const size_t omnisci_data_type_byte_size_
static constexpr size_t MAX_STRLEN
size_t current_chunk_offset_