OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetDetectStringEncoder Class Reference

#include <ParquetDetectStringEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetDetectStringEncoder:
+ Collaboration diagram for foreign_storage::ParquetDetectStringEncoder:

Public Member Functions

 ParquetDetectStringEncoder (Data_Namespace::AbstractBuffer *buffer)
 
void setNull (int8_t *omnisci_data_bytes) override
 
void copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination) override
 
void encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
 
void encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override
 
void validate (const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override
 
void validateUsingEncodersColumnType (const int8_t *parquet_data, const int64_t j) const override
 
std::string encodedDataToString (const int8_t *bytes) const override
 
void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override
 
void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetScalarEncoder
 ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer)
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking ()
 
virtual void initializeColumnType (const SQLTypeInfo &column_type)
 

Public Attributes

TypedParquetDetectBufferdetect_buffer_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 

Detailed Description

Definition at line 29 of file ParquetDetectStringEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetDetectStringEncoder::ParquetDetectStringEncoder ( Data_Namespace::AbstractBuffer buffer)
inline

Definition at line 31 of file ParquetDetectStringEncoder.h.

References CHECK, and detect_buffer_.

32  : ParquetScalarEncoder(buffer)
33  , detect_buffer_(dynamic_cast<TypedParquetDetectBuffer*>(buffer_)) {
35  }
#define CHECK(condition)
Definition: Logger.h:291
ParquetScalarEncoder(Data_Namespace::AbstractBuffer *buffer)
Data_Namespace::AbstractBuffer * buffer_

Member Function Documentation

void foreign_storage::ParquetDetectStringEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 89 of file ParquetDetectStringEncoder.h.

References foreign_storage::TypedParquetDetectBuffer::appendValue(), CHECK, foreign_storage::ParquetEncoder::current_chunk_offset_, detect_buffer_, foreign_storage::ParquetEncoder::invalid_indices_, foreign_storage::ParquetEncoder::is_error_tracking_enabled_, and StringDictionary::MAX_STRLEN.

Referenced by appendDataTrackErrors().

93  {
94  CHECK(levels_read > 0);
95 
96  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
97 
98  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
99  if (def_levels[i]) {
100  CHECK(j < values_read);
101  auto& byte_array = parquet_data_ptr[j++];
104  i);
105  detect_buffer_->appendValue({}); // add empty string
106  } else {
107  auto string_value =
108  std::string{reinterpret_cast<const char*>(byte_array.ptr), byte_array.len};
109  detect_buffer_->appendValue(string_value);
110  }
111  } else {
112  detect_buffer_->appendValue("NULL");
113  }
114  }
117  }
118  }
RejectedRowIndices invalid_indices_
#define CHECK(condition)
Definition: Logger.h:291
static constexpr size_t MAX_STRLEN

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetDetectStringEncoder::appendDataTrackErrors ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 120 of file ParquetDetectStringEncoder.h.

References appendData(), CHECK, and foreign_storage::ParquetEncoder::is_error_tracking_enabled_.

124  {
126  appendData(def_levels, rep_levels, values_read, levels_read, values);
127  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::ParquetDetectStringEncoder::copy ( const int8_t *  omnisci_data_bytes_source,
int8_t *  omnisci_data_bytes_destination 
)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 38 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

39  {
40  UNREACHABLE();
41  }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetDetectStringEncoder::encodeAndCopy ( const int8_t *  parquet_data_bytes,
int8_t *  omnisci_data_bytes 
)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 43 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

44  {
45  UNREACHABLE();
46  }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetDetectStringEncoder::encodeAndCopyContiguous ( const int8_t *  parquet_data_bytes,
int8_t *  omnisci_data_bytes,
const size_t  num_elements 
)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 48 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

50  {
51  UNREACHABLE();
52  }
#define UNREACHABLE()
Definition: Logger.h:338
std::string foreign_storage::ParquetDetectStringEncoder::encodedDataToString ( const int8_t *  bytes) const
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 69 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

69  {
70  UNREACHABLE();
71  return {};
72  }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetDetectStringEncoder::eraseInvalidIndicesInBuffer ( const InvalidRowGroupIndices invalid_indices)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 74 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

75  {
76  UNREACHABLE();
77  }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetDetectStringEncoder::setNull ( int8_t *  omnisci_data_bytes)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 37 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

37 { UNREACHABLE(); }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetDetectStringEncoder::validate ( const int8_t *  parquet_data,
const int64_t  j,
const SQLTypeInfo column_type 
) const
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 54 of file ParquetDetectStringEncoder.h.

References StringDictionary::MAX_STRLEN.

Referenced by validateUsingEncodersColumnType().

56  {
57  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(parquet_data);
58  auto& byte_array = parquet_data_ptr[j];
59  if (byte_array.len > StringDictionary::MAX_STRLEN) {
60  throw ForeignStorageException("String exceeeds max length allowed in dictionary");
61  }
62  }
static constexpr size_t MAX_STRLEN

+ Here is the caller graph for this function:

void foreign_storage::ParquetDetectStringEncoder::validateAndAppendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values,
const SQLTypeInfo column_type,
InvalidRowGroupIndices invalid_indices 
)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 79 of file ParquetDetectStringEncoder.h.

References UNREACHABLE.

85  {
86  UNREACHABLE();
87  }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetDetectStringEncoder::validateUsingEncodersColumnType ( const int8_t *  parquet_data,
const int64_t  j 
) const
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 64 of file ParquetDetectStringEncoder.h.

References foreign_storage::ParquetEncoder::column_type_, and validate().

65  {
66  validate(parquet_data, j, column_type_);
67  }
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override

+ Here is the call graph for this function:

Member Data Documentation

TypedParquetDetectBuffer* foreign_storage::ParquetDetectStringEncoder::detect_buffer_

Definition at line 129 of file ParquetDetectStringEncoder.h.

Referenced by appendData(), and ParquetDetectStringEncoder().


The documentation for this class was generated from the following file: