OmniSciDB
a5dc49c757
|
#include <ParquetInPlaceEncoder.h>
Public Member Functions | |
ParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size) | |
virtual void | reserve (const size_t num_elements)=0 |
void | appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override |
Public Member Functions inherited from foreign_storage::ParquetScalarEncoder | |
ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer) | |
virtual void | setNull (int8_t *omnisci_data_bytes)=0 |
virtual void | copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination)=0 |
virtual void | encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0 |
virtual void | encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements)=0 |
virtual void | validate (const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const =0 |
virtual void | validateUsingEncodersColumnType (const int8_t *parquet_data, const int64_t j) const =0 |
virtual std::string | encodedDataToString (const int8_t *bytes) const =0 |
Public Member Functions inherited from foreign_storage::ParquetEncoder | |
ParquetEncoder (Data_Namespace::AbstractBuffer *buffer) | |
virtual | ~ParquetEncoder ()=default |
virtual void | appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values)=0 |
virtual std::shared_ptr < ChunkMetadata > | getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) |
RejectedRowIndices | getRejectedRowIndices () const |
virtual void | disableMetadataStatsValidation () |
virtual void | initializeErrorTracking () |
virtual void | initializeColumnType (const SQLTypeInfo &column_type) |
Public Member Functions inherited from foreign_storage::ParquetImportEncoder | |
virtual void | eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices)=0 |
virtual void | validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices)=0 |
Protected Attributes | |
const size_t | omnisci_data_type_byte_size_ |
const size_t | parquet_data_type_byte_size_ |
Protected Attributes inherited from foreign_storage::ParquetEncoder | |
Data_Namespace::AbstractBuffer * | buffer_ |
bool | is_error_tracking_enabled_ |
RejectedRowIndices | invalid_indices_ |
size_t | current_chunk_offset_ |
SQLTypeInfo | column_type_ |
bool | validate_metadata_stats_ |
Private Member Functions | |
void | decodeNullsAndEncodeData (int8_t *data_ptr, const int16_t *def_levels, const int64_t values_read, const int64_t levels_read, const bool do_encoding) |
Additional Inherited Members | |
Static Protected Member Functions inherited from foreign_storage::ParquetEncoder | |
static std::shared_ptr < ChunkMetadata > | createMetadata (const SQLTypeInfo &column_type) |
static void | throwNotNullViolation (const std::string &parquet_column_name) |
static void | validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type) |
Definition at line 32 of file ParquetInPlaceEncoder.h.
|
inline |
Definition at line 34 of file ParquetInPlaceEncoder.h.
|
inlineoverridevirtual |
Appends Parquet data to the buffer using an in-place algorithm. Any necessary transformation or validation of the data and decoding of nulls is part of appending the data. Each class inheriting from this abstract class must implement the functionality to copy, nullify and encode the data.
def_levels | - an array containing the Dremel encoding definition levels |
rep_levels | - an array containing the Dremel encoding repetition levels |
values_read | - the number of non-null values read |
levels_read | - the total number of values (non-null & null) that are read |
values | - values that are read |
Note that the Parquet format encodes nulls using Dremel encoding.
Implements foreign_storage::ParquetEncoder.
Reimplemented in foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >, foreign_storage::TypedParquetInPlaceEncoder< int64_t, int32_t, NullType >, foreign_storage::TypedParquetInPlaceEncoder< V, V >, and foreign_storage::ParquetStringEncoder< V >.
Definition at line 57 of file ParquetInPlaceEncoder.h.
References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetEncoder::buffer_, decodeNullsAndEncodeData(), foreign_storage::ParquetScalarEncoder::encodeAndCopy(), omnisci_data_type_byte_size_, and parquet_data_type_byte_size_.
Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::appendData().
|
inlineprivate |
Definition at line 91 of file ParquetInPlaceEncoder.h.
References CHECK, foreign_storage::ParquetScalarEncoder::copy(), foreign_storage::ParquetScalarEncoder::encodeAndCopy(), omnisci_data_type_byte_size_, parquet_data_type_byte_size_, and foreign_storage::ParquetScalarEncoder::setNull().
Referenced by appendData().
|
pure virtual |
|
protected |
Definition at line 87 of file ParquetInPlaceEncoder.h.
Referenced by appendData(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::appendData(), decodeNullsAndEncodeData(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::eraseInvalidIndicesInBuffer(), and foreign_storage::ParquetStringEncoder< V >::getRowGroupMetadata().
|
protected |
Definition at line 88 of file ParquetInPlaceEncoder.h.
Referenced by appendData(), and decodeNullsAndEncodeData().