OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetInPlaceEncoder Class Referenceabstract

#include <ParquetInPlaceEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetInPlaceEncoder:
+ Collaboration diagram for foreign_storage::ParquetInPlaceEncoder:

Public Member Functions

 ParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
 
virtual void reserve (const size_t num_elements)=0
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetScalarEncoder
 ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual void setNull (int8_t *omnisci_data_bytes)=0
 
virtual void copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination)=0
 
virtual void encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
 
virtual void encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements)=0
 
virtual void validate (const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const =0
 
virtual void validateUsingEncodersColumnType (const int8_t *parquet_data, const int64_t j) const =0
 
virtual std::string encodedDataToString (const int8_t *bytes) const =0
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values)=0
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking ()
 
virtual void initializeColumnType (const SQLTypeInfo &column_type)
 
- Public Member Functions inherited from foreign_storage::ParquetImportEncoder
virtual void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices)=0
 
virtual void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices)=0
 

Protected Attributes

const size_t omnisci_data_type_byte_size_
 
const size_t parquet_data_type_byte_size_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 

Private Member Functions

void decodeNullsAndEncodeData (int8_t *data_ptr, const int16_t *def_levels, const int64_t values_read, const int64_t levels_read, const bool do_encoding)
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 

Detailed Description

Definition at line 32 of file ParquetInPlaceEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetInPlaceEncoder::ParquetInPlaceEncoder ( Data_Namespace::AbstractBuffer buffer,
const size_t  omnisci_data_type_byte_size,
const size_t  parquet_data_type_byte_size 
)
inline

Definition at line 34 of file ParquetInPlaceEncoder.h.

37  : ParquetScalarEncoder(buffer)
38  , omnisci_data_type_byte_size_(omnisci_data_type_byte_size)
39  , parquet_data_type_byte_size_(parquet_data_type_byte_size) {}
ParquetScalarEncoder(Data_Namespace::AbstractBuffer *buffer)

Member Function Documentation

void foreign_storage::ParquetInPlaceEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Appends Parquet data to the buffer using an in-place algorithm. Any necessary transformation or validation of the data and decoding of nulls is part of appending the data. Each class inheriting from this abstract class must implement the functionality to copy, nullify and encode the data.

Parameters
def_levels- an array containing the Dremel encoding definition levels
rep_levels- an array containing the Dremel encoding repetition levels
values_read- the number of non-null values read
levels_read- the total number of values (non-null & null) that are read
values- values that are read

Note that the Parquet format encodes nulls using Dremel encoding.

Implements foreign_storage::ParquetEncoder.

Reimplemented in foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >, foreign_storage::TypedParquetInPlaceEncoder< int64_t, int32_t, NullType >, foreign_storage::TypedParquetInPlaceEncoder< V, V >, and foreign_storage::ParquetStringEncoder< V >.

Definition at line 57 of file ParquetInPlaceEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetEncoder::buffer_, decodeNullsAndEncodeData(), foreign_storage::ParquetScalarEncoder::encodeAndCopy(), omnisci_data_type_byte_size_, and parquet_data_type_byte_size_.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::appendData().

61  {
63  for (int64_t i = 0; i < values_read; ++i) {
65  values + i * omnisci_data_type_byte_size_);
66  }
67  }
68 
69  if (values_read < levels_read) { // nulls exist
71  values,
72  def_levels,
73  values_read,
74  levels_read,
77  for (int64_t i = levels_read - 1; i >= 0; --i) {
79  values + i * omnisci_data_type_byte_size_);
80  }
81  }
82 
83  buffer_->append(values, levels_read * omnisci_data_type_byte_size_);
84  }
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
void decodeNullsAndEncodeData(int8_t *data_ptr, const int16_t *def_levels, const int64_t values_read, const int64_t levels_read, const bool do_encoding)
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
Data_Namespace::AbstractBuffer * buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetInPlaceEncoder::decodeNullsAndEncodeData ( int8_t *  data_ptr,
const int16_t *  def_levels,
const int64_t  values_read,
const int64_t  levels_read,
const bool  do_encoding 
)
inlineprivate

Definition at line 91 of file ParquetInPlaceEncoder.h.

References CHECK, foreign_storage::ParquetScalarEncoder::copy(), foreign_storage::ParquetScalarEncoder::encodeAndCopy(), omnisci_data_type_byte_size_, parquet_data_type_byte_size_, and foreign_storage::ParquetScalarEncoder::setNull().

Referenced by appendData().

95  {
96  for (int64_t i = levels_read - 1, j = values_read - 1; i >= 0; --i) {
97  if (def_levels[i]) { // not null
98  CHECK(j >= 0);
99  if (do_encoding) {
100  encodeAndCopy(data_ptr + (j--) * parquet_data_type_byte_size_,
101  data_ptr + i * omnisci_data_type_byte_size_);
102  } else {
103  copy(data_ptr + (j--) * omnisci_data_type_byte_size_,
104  data_ptr + i * omnisci_data_type_byte_size_);
105  }
106  } else { // null
107  setNull(data_ptr + i * omnisci_data_type_byte_size_);
108  }
109  }
110  }
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
virtual void setNull(int8_t *omnisci_data_bytes)=0
#define CHECK(condition)
Definition: Logger.h:291
virtual void copy(const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetInPlaceEncoder::reserve ( const size_t  num_elements)
pure virtual

Member Data Documentation

const size_t foreign_storage::ParquetInPlaceEncoder::parquet_data_type_byte_size_
protected

Definition at line 88 of file ParquetInPlaceEncoder.h.

Referenced by appendData(), and decodeNullsAndEncodeData().


The documentation for this class was generated from the following file: