OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
misc.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Credits: Howard Hinnant for open source date calculations.
18 
19 #include "misc.h"
20 #include "sqltypes.h"
21 
22 #include <cctype>
23 #include <cstdio>
24 #include <fstream>
25 #include <iomanip>
26 
27 namespace shared {
28 
29 size_t formatDate(char* buf, size_t const max, int64_t const unixtime) {
30  DivUMod const div_day = divUMod(unixtime, 24 * 60 * 60);
31  DivUMod const div_era = divUMod(div_day.quot - 11017, 146097);
32  unsigned const doe = static_cast<unsigned>(div_era.rem);
33  unsigned const yoe = (doe - doe / 1460 + doe / 36524 - (doe == 146096)) / 365;
34  unsigned const doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
35  unsigned const moy = (5 * doy + 2) / 153;
36  static_assert(8 <= sizeof(long long)); // long long needed for snprintf()
37  long long const y = 2000 + div_era.quot * 400 + yoe + (9 < moy);
38  unsigned const m = moy + (9 < moy ? -9 : 3);
39  unsigned const d = doy - (153 * moy + 2) / 5 + 1;
40  int const len = snprintf(buf, max, "%04lld-%02u-%02u", y, m, d);
41  if (0 <= len && static_cast<size_t>(len) < max) {
42  return static_cast<size_t>(len);
43  }
44  return 0;
45 }
46 
47 size_t formatDateTime(char* buf,
48  size_t const max,
49  int64_t const timestamp,
50  int const dimension,
51  bool use_iso_format) {
52  constexpr int pow10[10]{
53  1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
54  DivUMod const div_hip = divUMod(timestamp, pow10[dimension]);
55  DivUMod const div_day = divUMod(div_hip.quot, 24 * 60 * 60);
56  DivUMod const div_era = divUMod(div_day.quot - 11017, 146097);
57  unsigned const doe = static_cast<unsigned>(div_era.rem);
58  unsigned const yoe = (doe - doe / 1460 + doe / 36524 - (doe == 146096)) / 365;
59  unsigned const doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
60  unsigned const moy = (5 * doy + 2) / 153;
61  static_assert(8 <= sizeof(long long)); // long long needed for snprintf()
62  long long const y = 2000 + div_era.quot * 400 + yoe + (9 < moy);
63  unsigned const m = moy + (9 < moy ? -9 : 3);
64  unsigned const d = doy - (153 * moy + 2) / 5 + 1;
65  unsigned const minutes = static_cast<unsigned>(div_day.rem) / 60;
66  unsigned const ss = div_day.rem % 60;
67  unsigned const hh = minutes / 60;
68  unsigned const mm = minutes % 60;
69  const char* date_time_format;
70  if (use_iso_format) {
71  if (dimension) {
72  date_time_format = "%04lld-%02u-%02uT%02u:%02u:%02u";
73  } else {
74  date_time_format = "%04lld-%02u-%02uT%02u:%02u:%02uZ";
75  }
76  } else {
77  date_time_format = "%04lld-%02u-%02u %02u:%02u:%02u";
78  }
79  int const len = snprintf(buf, max, date_time_format, y, m, d, hh, mm, ss);
80  if (0 <= len && static_cast<size_t>(len) < max) {
81  if (dimension) {
82  auto precision_format = use_iso_format ? ".%0*dZ" : ".%0*d";
83  int const len_frac = snprintf(buf + len,
84  max - len,
85  precision_format,
86  dimension,
87  static_cast<int>(div_hip.rem));
88  if (0 <= len_frac && static_cast<size_t>(len + len_frac) < max) {
89  return static_cast<size_t>(len + len_frac);
90  }
91  } else {
92  return static_cast<size_t>(len);
93  }
94  }
95  return 0;
96 }
97 
98 size_t formatHMS(char* buf, size_t const max, int64_t const unixtime) {
99  unsigned const seconds = static_cast<unsigned>(unsignedMod(unixtime, 24 * 60 * 60));
100  unsigned const minutes = seconds / 60;
101  unsigned const ss = seconds % 60;
102  unsigned const hh = minutes / 60;
103  unsigned const mm = minutes % 60;
104  int const len = snprintf(buf, max, "%02u:%02u:%02u", hh, mm, ss);
105  if (0 <= len && static_cast<size_t>(len) < max) {
106  return static_cast<size_t>(len);
107  }
108  return 0;
109 }
110 
111 std::string convert_temporal_to_iso_format(const SQLTypeInfo& type_info,
112  int64_t unix_time) {
113  std::string iso_str;
114  if (type_info.get_type() == kTIME) {
115  // Set a buffer size that can contain HH:MM:SS
116  iso_str.resize(8);
117  const auto len = shared::formatHMS(iso_str.data(), iso_str.length() + 1, unix_time);
118  CHECK_EQ(len, iso_str.length());
119  } else if (type_info.get_type() == kDATE) {
120  // Set a buffer size that can contain YYYYYYYYYYYY-mm-dd (int64_t can represent up to
121  // 12 digit years)
122  iso_str.resize(18);
123  const size_t len =
124  shared::formatDate(iso_str.data(), iso_str.length() + 1, unix_time);
125  CHECK_GT(len, static_cast<size_t>(0));
126  iso_str.resize(len);
127  } else if (type_info.get_type() == kTIMESTAMP) {
128  auto precision = type_info.get_precision();
129  // Set a buffer size that can contain the specified timestamp precision
130  // YYYYYYYYYYYY-mm-dd(18) T(1) HH:MM:SS(8) .(precision?) nnnnnnnnn(precision) Z(1)
131  // (int64_t can represent up to 12 digit years with seconds precision)
132  iso_str.resize(18 + 1 + 8 + bool(precision) + precision + 1);
133  const size_t len = shared::formatDateTime(
134  iso_str.data(), iso_str.length() + 1, unix_time, precision, true);
135  CHECK_GT(len, static_cast<size_t>(0));
136  iso_str.resize(len);
137  } else {
138  UNREACHABLE() << "Unexpected column type: " << type_info.toString();
139  }
140  return iso_str;
141 }
142 
143 size_t compute_hash(int32_t item_1, int32_t item_2) {
144  static_assert(sizeof(item_1) + sizeof(item_2) <= sizeof(size_t));
145  return (static_cast<size_t>(item_1) << (8 * sizeof(item_2))) |
146  (static_cast<size_t>(item_2));
147 }
148 
149 // Escape and quote contents of filename as a json string and output to os.
150 // Q: Why not just return the file contents as a string?
151 // A: Constructing a string may unnecessarily contribute to memory fragmentation,
152 // and is probably less performant due to the extra heap allocations.
153 void FileContentsEscaper::quoteAndPrint(std::ostream& os) const {
154  std::ifstream file(filename);
155  if (!file.is_open()) {
156  os << "\"Unable to open " << filename << '"';
157  return;
158  }
159  char ch;
160  std::ios orig_os_state(nullptr);
161  orig_os_state.copyfmt(os);
162  os << '"';
163  while (file.get(ch)) {
164  if (ch == '"') {
165  os << "\\\"";
166  } else if (ch == '\\') {
167  os << "\\\\";
168  } else if (std::isprint(ch) || ch == ' ') {
169  os << ch;
170  } else {
171  switch (ch) {
172  // clang-format off
173  case '\b': os << "\\b"; break;
174  case '\f': os << "\\f"; break;
175  case '\n': os << "\\n"; break;
176  case '\r': os << "\\r"; break;
177  case '\t': os << "\\t"; break;
178  // clang-format on
179  default:
180  os << "\\u" << std::hex << std::setw(4) << std::setfill('0')
181  << static_cast<unsigned>(static_cast<unsigned char>(ch));
182  break;
183  }
184  }
185  }
186  os << '"';
187  os.copyfmt(orig_os_state);
188 }
189 
190 std::ostream& operator<<(std::ostream& os, FileContentsEscaper const& fce) {
191  fce.quoteAndPrint(os);
192  return os;
193 }
194 
195 } // namespace shared
#define CHECK_EQ(x, y)
Definition: Logger.h:301
int64_t quot
Definition: misc.h:180
Definition: sqltypes.h:76
std::ostream & operator<<(std::ostream &os, const StringDictKey &dict_key)
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:111
#define UNREACHABLE()
Definition: Logger.h:338
Constants for Builtin SQL Types supported by HEAVY.AI.
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
#define CHECK_GT(x, y)
Definition: Logger.h:305
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:98
char const *const filename
Definition: misc.h:150
std::string toString() const
Definition: sqltypes.h:525
int get_precision() const
Definition: sqltypes.h:394
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:29
Definition: sqltypes.h:80
uint64_t unsignedMod(int64_t num, int64_t den)
Definition: misc.h:195
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension, bool use_iso_format)
Definition: misc.cpp:47
int64_t rem
Definition: misc.h:181
size_t compute_hash(int32_t item_1, int32_t item_2)
Definition: misc.cpp:143
void quoteAndPrint(std::ostream &) const
Definition: misc.cpp:153
DivUMod divUMod(int64_t num, int64_t den)
Definition: misc.h:185