OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetTimestampEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "ParquetInPlaceEncoder.h"
20 
21 namespace foreign_storage {
22 
23 // The following semantics apply to the templated types below.
24 //
25 // V - type of omnisci data
26 // T - physical type of parquet data
27 // conversion_denominator - the denominator constant used in converting parquet to omnisci
28 // data
29 // NullType - the type of null encoding
30 //
31 // The `conversion_denominator` template is used instead of a class member to
32 // specify it at compile-time versus run-time. In testing this has a major
33 // impact on the runtime of the conversion performed by this encoder since the
34 // compiler can significantly optimize if this is known at compile time.
35 template <typename V, typename T, T conversion_denominator, typename NullType = V>
36 class ParquetTimestampEncoder : public TypedParquetInPlaceEncoder<V, T, NullType>,
38  public:
40  const ColumnDescriptor* column_desciptor,
41  const parquet::ColumnDescriptor* parquet_column_descriptor)
42  : TypedParquetInPlaceEncoder<V, T, NullType>(buffer,
43  column_desciptor,
44  parquet_column_descriptor) {
45  CHECK(parquet_column_descriptor->logical_type()->is_timestamp());
46  }
47 
48  void encodeAndCopy(const int8_t* parquet_data_bytes,
49  int8_t* omnisci_data_bytes) override {
50  const auto& parquet_data_value = reinterpret_cast<const T*>(parquet_data_bytes)[0];
51  auto& omnisci_data_value = reinterpret_cast<V*>(omnisci_data_bytes)[0];
52  omnisci_data_value = convert(parquet_data_value);
53  }
54 
55  void validate(const int8_t* parquet_data,
56  const int64_t j,
57  const SQLTypeInfo& column_type) const override {
58  const auto& parquet_data_value = reinterpret_cast<const T*>(parquet_data)[j];
59  validate(parquet_data_value, column_type);
60  }
61 
62  void validate(std::shared_ptr<parquet::Statistics> stats,
63  const SQLTypeInfo& column_type) const override {
64  auto [unencoded_stats_min, unencoded_stats_max] =
66  validate(unencoded_stats_min, column_type);
67  validate(unencoded_stats_max, column_type);
68  }
69 
70  private:
71  void validate(const T& value, const SQLTypeInfo& column_type) const {
72  CHECK(column_type.is_timestamp() || column_type.is_date());
73  if (column_type.is_timestamp()) {
75  } else if (column_type.is_date()) {
77  }
78  }
79 
80  protected:
81  T convert(const T& value) const {
82  T quotient = value / conversion_denominator;
83  return value < 0 && (value % conversion_denominator != 0) ? quotient - 1 : quotient;
84  }
85 };
86 
87 template <typename V, typename T, T conversion_denominator, typename NullType = V>
90 
91 } // namespace foreign_storage
void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
void validate(const T &value, const SQLTypeInfo &column_type) const
bool is_timestamp() const
Definition: sqltypes.h:1046
static void validateValue(const D &data_value, const SQLTypeInfo &column_type)
ParquetTimestampEncoder< V, T, conversion_denominator, NullType > ParquetDateInSecondsFromTimestampEncoder
dictionary stats
Definition: report.py:116
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
ParquetTimestampEncoder(Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
std::pair< T, T > getUnencodedStats(std::shared_ptr< parquet::Statistics > stats) const
#define CHECK(condition)
Definition: Logger.h:291
bool is_date() const
Definition: sqltypes.h:1028
void validate(std::shared_ptr< parquet::Statistics > stats, const SQLTypeInfo &column_type) const override
static void validateValue(const D &data_value, const SQLTypeInfo &column_type)