OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetEncoder Class Referenceabstract

#include <ParquetEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetEncoder:
+ Collaboration diagram for foreign_storage::ParquetEncoder:

Public Member Functions

 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values)=0
 
virtual void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values)=0
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking ()
 
virtual void initializeColumnType (const SQLTypeInfo &column_type)
 

Static Protected Member Functions

static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 

Protected Attributes

Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 

Detailed Description

Definition at line 30 of file ParquetEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetEncoder::ParquetEncoder ( Data_Namespace::AbstractBuffer buffer)
inline

Definition at line 32 of file ParquetEncoder.h.

virtual foreign_storage::ParquetEncoder::~ParquetEncoder ( )
virtualdefault

Member Function Documentation

static std::shared_ptr<ChunkMetadata> foreign_storage::ParquetEncoder::createMetadata ( const SQLTypeInfo column_type)
inlinestaticprotected

Definition at line 96 of file ParquetEncoder.h.

References SQLTypeInfo::get_elem_type(), Data_Namespace::AbstractBuffer::initEncoder(), and SQLTypeInfo::is_array().

Referenced by getRowGroupMetadata(), and foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata().

96  {
97  auto metadata = std::make_shared<ChunkMetadata>();
98  ForeignStorageBuffer buffer;
99  buffer.initEncoder(column_type.is_array() ? column_type.get_elem_type()
100  : column_type);
101  auto encoder = buffer.getEncoder();
102  encoder->getMetadata(metadata);
103  metadata->sqlType = column_type;
104  return metadata;
105  }
void initEncoder(const SQLTypeInfo &tmp_sql_type)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
bool is_array() const
Definition: sqltypes.h:585

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetEncoder::disableMetadataStatsValidation ( )
inlinevirtual

Reimplemented in foreign_storage::ParquetArrayEncoder.

Definition at line 76 of file ParquetEncoder.h.

References validate_metadata_stats_.

Referenced by foreign_storage::ParquetArrayEncoder::disableMetadataStatsValidation().

+ Here is the caller graph for this function:

RejectedRowIndices foreign_storage::ParquetEncoder::getRejectedRowIndices ( ) const
inline

Definition at line 74 of file ParquetEncoder.h.

References invalid_indices_.

74 { return invalid_indices_; }
RejectedRowIndices invalid_indices_
virtual std::shared_ptr<ChunkMetadata> foreign_storage::ParquetEncoder::getRowGroupMetadata ( const parquet::RowGroupMetaData *  group_metadata,
const int  parquet_column_index,
const SQLTypeInfo column_type 
)
inlinevirtual

Reimplemented in foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >, foreign_storage::TypedParquetInPlaceEncoder< int64_t, int32_t, NullType >, foreign_storage::TypedParquetInPlaceEncoder< V, V >, foreign_storage::ParquetStringEncoder< V >, foreign_storage::ParquetArrayEncoder, and foreign_storage::ParquetFixedLengthArrayEncoder.

Definition at line 51 of file ParquetEncoder.h.

References createMetadata(), report::stats, foreign_storage::validate_and_get_column_metadata_statistics(), validate_metadata_stats_, and validateNullCount().

Referenced by foreign_storage::ParquetStringEncoder< V >::getRowGroupMetadata().

54  {
55  int64_t null_count{0};
56  auto metadata = createMetadata(column_type);
57 
58  if (validate_metadata_stats_ && group_metadata->num_rows() > 0) {
59  // update statistics
60  auto column_metadata = group_metadata->ColumnChunk(parquet_column_index);
61  auto stats = validate_and_get_column_metadata_statistics(column_metadata.get());
62  null_count = stats->null_count();
63  validateNullCount(group_metadata->schema()->Column(parquet_column_index)->name(),
64  null_count,
65  column_type);
66  }
67  metadata->chunkStats.has_nulls = null_count > 0;
68 
69  // update sizing
70  metadata->numElements = group_metadata->num_rows();
71  return metadata;
72  }
std::shared_ptr< parquet::Statistics > validate_and_get_column_metadata_statistics(const parquet::ColumnChunkMetaData *column_metadata)
dictionary stats
Definition: report.py:116
static void validateNullCount(const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
static std::shared_ptr< ChunkMetadata > createMetadata(const SQLTypeInfo &column_type)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetEncoder::initializeColumnType ( const SQLTypeInfo column_type)
inlinevirtual

Reimplemented in foreign_storage::ParquetArrayEncoder.

Definition at line 80 of file ParquetEncoder.h.

References column_type_.

Referenced by foreign_storage::ParquetArrayEncoder::initializeColumnType().

80  {
81  column_type_ = column_type;
82  }

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetEncoder::initializeErrorTracking ( )
inlinevirtual

Reimplemented in foreign_storage::ParquetArrayEncoder.

Definition at line 78 of file ParquetEncoder.h.

References is_error_tracking_enabled_.

Referenced by foreign_storage::ParquetArrayEncoder::initializeErrorTracking().

+ Here is the caller graph for this function:

static void foreign_storage::ParquetEncoder::throwNotNullViolation ( const std::string &  parquet_column_name)
inlinestaticprotected

Definition at line 107 of file ParquetEncoder.h.

Referenced by validateNullCount().

107  {
108  std::stringstream error_message;
109  error_message << "A null value was detected in Parquet column '"
110  << parquet_column_name << "' but HeavyDB column is set to not null";
111  throw std::runtime_error(error_message.str());
112  }

+ Here is the caller graph for this function:

static void foreign_storage::ParquetEncoder::validateNullCount ( const std::string &  parquet_column_name,
int64_t  null_count,
const SQLTypeInfo column_type 
)
inlinestaticprotected

Definition at line 114 of file ParquetEncoder.h.

References SQLTypeInfo::get_notnull(), and throwNotNullViolation().

Referenced by getRowGroupMetadata(), and foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata().

116  {
117  bool has_nulls = null_count > 0;
118  if (has_nulls && column_type.get_notnull()) {
119  throwNotNullViolation(parquet_column_name);
120  }
121  }
static void throwNotNullViolation(const std::string &parquet_column_name)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

bool foreign_storage::ParquetEncoder::validate_metadata_stats_
protected

The documentation for this class was generated from the following file: