OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetDateInSecondsEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "ParquetInPlaceEncoder.h"
20 
21 namespace foreign_storage {
22 template <typename NullType>
24  : public TypedParquetInPlaceEncoder<int64_t, int32_t, NullType>,
26  public:
28  const ColumnDescriptor* column_desciptor,
29  const parquet::ColumnDescriptor* parquet_column_descriptor)
30  : TypedParquetInPlaceEncoder<int64_t, int32_t, NullType>(
31  buffer,
32  column_desciptor,
33  parquet_column_descriptor) {
34  CHECK(parquet_column_descriptor->logical_type()->is_date());
35  }
36 
38  : TypedParquetInPlaceEncoder<int64_t, int32_t, NullType>(buffer,
39  sizeof(int64_t),
40  sizeof(int32_t)) {}
41 
42  void encodeAndCopy(const int8_t* parquet_data_bytes,
43  int8_t* omnisci_data_bytes) override {
44  const auto& parquet_data_value =
45  reinterpret_cast<const int32_t*>(parquet_data_bytes)[0];
46  auto& omnisci_data_value = reinterpret_cast<int64_t*>(omnisci_data_bytes)[0];
47  omnisci_data_value = parquet_data_value * kSecsPerDay;
48  }
49 
50  void validate(std::shared_ptr<parquet::Statistics> stats,
51  const SQLTypeInfo& column_type) const override {
52  auto [unencoded_stats_min, unencoded_stats_max] =
54  validate(unencoded_stats_min, column_type);
55  validate(unencoded_stats_max, column_type);
56  }
57 
58  void validate(const int8_t* parquet_data,
59  const int64_t j,
60  const SQLTypeInfo& column_type) const override {
61  const auto& parquet_data_value = reinterpret_cast<const int32_t*>(parquet_data)[j];
62  validate(parquet_data_value, column_type);
63  }
64 
65  private:
66  void validate(const int32_t& value, const SQLTypeInfo& column_type) const {
67  CHECK(column_type.is_date());
68  if (column_type.get_compression() ==
69  kENCODING_NONE) { // do not validate NONE ENCODED dates as it is impossible for
70  // bounds to be exceeded (the conversion done for this case is
71  // from a date in days as a 32-bit integer to a date in seconds
72  // as a 64-bit integer)
73  return;
74  }
76  column_type);
77  }
78 };
79 
80 } // namespace foreign_storage
static constexpr int64_t kSecsPerDay
static void validateValue(const D &data_value, const SQLTypeInfo &column_type)
void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
dictionary stats
Definition: report.py:116
void validate(std::shared_ptr< parquet::Statistics > stats, const SQLTypeInfo &column_type) const override
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
std::pair< T, T > getUnencodedStats(std::shared_ptr< parquet::Statistics > stats) const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
ParquetDateInSecondsEncoder(Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
void validate(const int32_t &value, const SQLTypeInfo &column_type) const
#define CHECK(condition)
Definition: Logger.h:291
ParquetDateInSecondsEncoder(Data_Namespace::AbstractBuffer *buffer)
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override
bool is_date() const
Definition: sqltypes.h:1028