OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ChunkMetadata.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstddef>
20 #include <iostream>
21 
22 #include "Logger/Logger.h"
23 #include "Shared/StringTransform.h"
24 #include "Shared/sqltypes.h"
25 #include "Shared/types.h"
26 
27 struct ChunkStats {
30  bool has_nulls;
31 };
32 
33 struct ChunkMetadata {
35  size_t numBytes;
36  size_t numElements;
38 
39  ChunkMetadata(const SQLTypeInfo& sql_type,
40  const size_t num_bytes,
41  const size_t num_elements,
42  const ChunkStats& chunk_stats)
43  : sqlType(sql_type)
44  , numBytes(num_bytes)
45  , numElements(num_elements)
46  , chunkStats(chunk_stats) {}
47 
49 
50  template <typename T>
51  void fillChunkStats(const T min, const T max, const bool has_nulls) {
52  chunkStats.has_nulls = has_nulls;
53  switch (sqlType.get_type()) {
54  case kBOOLEAN: {
57  break;
58  }
59  case kTINYINT: {
62  break;
63  }
64  case kSMALLINT: {
67  break;
68  }
69  case kINT: {
70  chunkStats.min.intval = min;
71  chunkStats.max.intval = max;
72  break;
73  }
74  case kBIGINT:
75  case kNUMERIC:
76  case kDECIMAL: {
77  chunkStats.min.bigintval = min;
78  chunkStats.max.bigintval = max;
79  break;
80  }
81  case kTIME:
82  case kTIMESTAMP:
83  case kDATE: {
84  chunkStats.min.bigintval = min;
85  chunkStats.max.bigintval = max;
86  break;
87  }
88  case kFLOAT: {
89  chunkStats.min.floatval = min;
90  chunkStats.max.floatval = max;
91  break;
92  }
93  case kDOUBLE: {
94  chunkStats.min.doubleval = min;
95  chunkStats.max.doubleval = max;
96  break;
97  }
98  case kVARCHAR:
99  case kCHAR:
100  case kTEXT:
102  chunkStats.min.intval = min;
103  chunkStats.max.intval = max;
104  }
105  break;
106  default: {
107  break;
108  }
109  }
110  }
111 
112  void fillChunkStats(const Datum min, const Datum max, const bool has_nulls) {
113  chunkStats.has_nulls = has_nulls;
114  chunkStats.min = min;
115  chunkStats.max = max;
116  }
117 
118  bool operator==(const ChunkMetadata& that) const {
119  return sqlType == that.sqlType && numBytes == that.numBytes &&
120  numElements == that.numElements &&
122  that.chunkStats.min,
125  that.chunkStats.max,
128  }
129 
130  bool isPlaceholder() const {
131  // Currently needed because a lot of our Datum operations (in this case
132  // extract_int_type_from_datum()) are not safe for all types.
133  const auto type =
135  switch (type) {
136  case kCHAR:
137  case kVARCHAR:
138  case kTEXT:
140  return false;
141  }
142  case kBOOLEAN:
143  case kTINYINT:
144  case kSMALLINT:
145  case kINT:
146  case kBIGINT:
147  case kTIME:
148  case kTIMESTAMP:
149  case kDATE: {
152  return (numElements > 0 && !chunkStats.has_nulls && (min > max));
153  }
154  default:
155  return false;
156  }
157  return false;
158  }
159 };
160 
161 inline std::ostream& operator<<(std::ostream& out, const ChunkMetadata& chunk_metadata) {
162  auto type = chunk_metadata.sqlType.is_array() ? chunk_metadata.sqlType.get_elem_type()
163  : chunk_metadata.sqlType;
164  // Unencoded strings have no min/max.
165  std::string min, max;
166  if (type.is_string() && type.get_compression() == kENCODING_NONE) {
167  min = "<invalid>";
168  max = "<invalid>";
169  } else if (type.is_string()) {
170  min = to_string(chunk_metadata.chunkStats.min.intval);
171  max = to_string(chunk_metadata.chunkStats.max.intval);
172  } else {
173  min = DatumToString(chunk_metadata.chunkStats.min, type);
174  max = DatumToString(chunk_metadata.chunkStats.max, type);
175  }
176  out << "type: " << chunk_metadata.sqlType.get_type_name()
177  << " numBytes: " << chunk_metadata.numBytes << " numElements "
178  << chunk_metadata.numElements << " min: " << min << " max: " << max
179  << " has_nulls: " << std::to_string(chunk_metadata.chunkStats.has_nulls);
180  return out;
181 }
182 
183 inline int64_t extract_min_stat_int_type(const ChunkStats& stats, const SQLTypeInfo& ti) {
184  return extract_int_type_from_datum(stats.min, ti);
185 }
186 
187 inline int64_t extract_max_stat_int_type(const ChunkStats& stats, const SQLTypeInfo& ti) {
188  return extract_int_type_from_datum(stats.max, ti);
189 }
190 
191 inline double extract_min_stat_fp_type(const ChunkStats& stats, const SQLTypeInfo& ti) {
192  return extract_fp_type_from_datum(stats.min, ti);
193 }
194 
195 inline double extract_max_stat_fp_type(const ChunkStats& stats, const SQLTypeInfo& ti) {
196  return extract_fp_type_from_datum(stats.max, ti);
197 }
198 
199 using ChunkMetadataMap = std::map<int, std::shared_ptr<ChunkMetadata>>;
200 using ChunkMetadataVector =
201  std::vector<std::pair<ChunkKey, std::shared_ptr<ChunkMetadata>>>;
int8_t tinyintval
Definition: Datum.h:73
bool isPlaceholder() const
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:460
Definition: sqltypes.h:76
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
ChunkMetadata(const SQLTypeInfo &sql_type, const size_t num_bytes, const size_t num_elements, const ChunkStats &chunk_stats)
Definition: ChunkMetadata.h:39
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:51
bool has_nulls
Definition: ChunkMetadata.h:30
Constants for Builtin SQL Types supported by HEAVY.AI.
dictionary stats
Definition: report.py:116
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
int32_t intval
Definition: Datum.h:75
int64_t extract_max_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
std::string to_string(char const *&&v)
ChunkStats chunkStats
Definition: ChunkMetadata.h:37
void fillChunkStats(const Datum min, const Datum max, const bool has_nulls)
std::map< int, std::shared_ptr< ChunkMetadata >> ChunkMetadataMap
float floatval
Definition: Datum.h:77
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:408
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
int64_t extract_int_type_from_datum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:523
int64_t bigintval
Definition: Datum.h:76
int64_t extract_min_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int16_t smallintval
Definition: Datum.h:74
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:561
Definition: sqltypes.h:79
Definition: sqltypes.h:80
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::string get_type_name() const
Definition: sqltypes.h:484
Definition: sqltypes.h:68
double extract_fp_type_from_datum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:549
Definition: sqltypes.h:72
Definition: Datum.h:71
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
bool is_decimal() const
Definition: sqltypes.h:570
SQLTypeInfo sqlType
Definition: ChunkMetadata.h:34
bool operator==(const ChunkMetadata &that) const
bool is_array() const
Definition: sqltypes.h:585
double doubleval
Definition: Datum.h:78
size_t numElements
Definition: ChunkMetadata.h:36