OmniSciDB
a5dc49c757
|
#include <ParquetStringEncoder.h>
Public Member Functions | |
ParquetStringEncoder (Data_Namespace::AbstractBuffer *buffer, StringDictionary *string_dictionary, ChunkMetadata *chunk_metadata) | |
void | validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override |
void | appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override |
void | appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override |
void | encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override |
void | encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override |
std::shared_ptr< ChunkMetadata > | getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override |
Public Member Functions inherited from foreign_storage::TypedParquetInPlaceEncoder< V, V > | |
TypedParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor) | |
TypedParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size) | |
void | validate (const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override |
std::string | integralTypeToString (const V &element) const |
bool | isIntegralType (const SQLTypeInfo &type) const |
std::string | elementToString (const V &element) const |
std::string | encodedDataToString (const int8_t *bytes) const override |
void | setDetectBufferConverterType () |
void | validateUsingEncodersColumnType (const int8_t *parquet_data, const int64_t j) const override |
void | reserve (const size_t num_append_elements) override |
void | appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override |
void | validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override |
void | eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override |
void | appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override |
void | encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override |
void | setNull (int8_t *omnisci_data_bytes) override |
void | copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination) override |
std::shared_ptr< ChunkMetadata > | getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override |
Public Member Functions inherited from foreign_storage::ParquetInPlaceEncoder | |
ParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size) | |
Public Member Functions inherited from foreign_storage::ParquetScalarEncoder | |
ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer) | |
Public Member Functions inherited from foreign_storage::ParquetEncoder | |
ParquetEncoder (Data_Namespace::AbstractBuffer *buffer) | |
virtual | ~ParquetEncoder ()=default |
RejectedRowIndices | getRejectedRowIndices () const |
virtual void | disableMetadataStatsValidation () |
virtual void | initializeErrorTracking () |
virtual void | initializeColumnType (const SQLTypeInfo &column_type) |
Protected Member Functions | |
bool | encodingIsIdentityForSameTypes () const override |
Protected Member Functions inherited from foreign_storage::TypedParquetInPlaceEncoder< V, V > | |
std::pair< V, V > | getUnencodedStats (std::shared_ptr< parquet::Statistics > stats) const |
Private Member Functions | |
void | updateMetadataStats (int64_t values_read, int8_t *values) |
Private Attributes | |
StringDictionary * | string_dictionary_ |
ChunkMetadata * | chunk_metadata_ |
std::vector< int8_t > | encode_buffer_ |
V | min_ |
V | max_ |
int64_t | current_batch_offset_ |
InvalidRowGroupIndices * | invalid_indices_ |
Additional Inherited Members | |
Static Protected Member Functions inherited from foreign_storage::ParquetEncoder | |
static std::shared_ptr < ChunkMetadata > | createMetadata (const SQLTypeInfo &column_type) |
static void | throwNotNullViolation (const std::string &parquet_column_name) |
static void | validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type) |
Protected Attributes inherited from foreign_storage::ParquetInPlaceEncoder | |
const size_t | omnisci_data_type_byte_size_ |
const size_t | parquet_data_type_byte_size_ |
Protected Attributes inherited from foreign_storage::ParquetEncoder | |
Data_Namespace::AbstractBuffer * | buffer_ |
bool | is_error_tracking_enabled_ |
RejectedRowIndices | invalid_indices_ |
size_t | current_chunk_offset_ |
SQLTypeInfo | column_type_ |
bool | validate_metadata_stats_ |
Definition at line 29 of file ParquetStringEncoder.h.
|
inline |
Definition at line 31 of file ParquetStringEncoder.h.
References foreign_storage::ParquetStringEncoder< V >::chunk_metadata_, ChunkMetadata::chunkStats, and ChunkStats::has_nulls.
|
inlineoverridevirtual |
Appends Parquet data to the buffer using an in-place algorithm. Any necessary transformation or validation of the data and decoding of nulls is part of appending the data. Each class inheriting from this abstract class must implement the functionality to copy, nullify and encode the data.
def_levels | - an array containing the Dremel encoding definition levels |
rep_levels | - an array containing the Dremel encoding repetition levels |
values_read | - the number of non-null values read |
levels_read | - the total number of values (non-null & null) that are read |
values | - values that are read |
Note that the Parquet format encodes nulls using Dremel encoding.
Reimplemented from foreign_storage::ParquetInPlaceEncoder.
Definition at line 96 of file ParquetStringEncoder.h.
References foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::appendData(), foreign_storage::ParquetStringEncoder< V >::chunk_metadata_, ChunkMetadata::chunkStats, foreign_storage::ParquetStringEncoder< V >::encode_buffer_, foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), and ChunkStats::has_nulls.
Referenced by foreign_storage::ParquetStringEncoder< V >::appendDataTrackErrors(), and foreign_storage::ParquetStringEncoder< V >::validateAndAppendData().
|
inlineoverridevirtual |
Implements foreign_storage::ParquetEncoder.
Definition at line 71 of file ParquetStringEncoder.h.
References foreign_storage::ParquetStringEncoder< V >::appendData(), CHECK, CHECK_LT, foreign_storage::ParquetEncoder::column_type_, foreign_storage::ParquetEncoder::current_chunk_offset_, foreign_storage::ParquetEncoder::invalid_indices_, foreign_storage::ParquetEncoder::is_error_tracking_enabled_, and StringDictionary::MAX_STRLEN.
|
inlineoverridevirtual |
Implements foreign_storage::ParquetScalarEncoder.
Definition at line 130 of file ParquetStringEncoder.h.
References foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::copy().
|
inlineoverridevirtual |
Implements foreign_storage::ParquetScalarEncoder.
Definition at line 108 of file ParquetStringEncoder.h.
References CHECK, StringDictionary::getOrAddBulk(), StringDictionary::MAX_STRLEN, foreign_storage::ParquetStringEncoder< V >::string_dictionary_, and foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
Referenced by foreign_storage::ParquetStringEncoder< V >::appendData().
|
inlineoverrideprotectedvirtual |
Reimplemented from foreign_storage::TypedParquetInPlaceEncoder< V, V >.
Definition at line 151 of file ParquetStringEncoder.h.
|
inlineoverridevirtual |
Reimplemented from foreign_storage::ParquetEncoder.
Definition at line 135 of file ParquetStringEncoder.h.
References foreign_storage::ParquetEncoder::getRowGroupMetadata(), and foreign_storage::ParquetInPlaceEncoder::omnisci_data_type_byte_size_.
|
inlineprivate |
Definition at line 154 of file ParquetStringEncoder.h.
References foreign_storage::ParquetStringEncoder< V >::chunk_metadata_, ChunkMetadata::chunkStats, ChunkMetadata::fillChunkStats(), ChunkStats::has_nulls, foreign_storage::ParquetStringEncoder< V >::max_, and foreign_storage::ParquetStringEncoder< V >::min_.
Referenced by foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous().
|
inlineoverridevirtual |
Implements foreign_storage::ParquetImportEncoder.
Definition at line 50 of file ParquetStringEncoder.h.
References foreign_storage::ParquetStringEncoder< V >::appendData(), CHECK_LT, foreign_storage::ParquetStringEncoder< V >::current_batch_offset_, and StringDictionary::MAX_STRLEN.
|
private |
Definition at line 167 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::appendData(), foreign_storage::ParquetStringEncoder< V >::ParquetStringEncoder(), and foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
|
private |
Definition at line 172 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::validateAndAppendData().
|
private |
Definition at line 168 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::appendData().
|
private |
Definition at line 173 of file ParquetStringEncoder.h.
|
private |
Definition at line 170 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
|
private |
Definition at line 170 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
|
private |
Definition at line 166 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous().