OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryExporterCSV.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <boost/variant/get.hpp>
20 
21 #include "QueryEngine/ResultSet.h"
22 #include "Shared/misc.h"
23 
24 namespace import_export {
25 
27 
29 
30 void QueryExporterCSV::beginExport(const std::string& file_path,
31  const std::string& layer_name,
32  const CopyParams& copy_params,
33  const std::vector<TargetMetaInfo>& column_infos,
34  const FileCompression file_compression,
35  const ArrayNullHandling array_null_handling) {
36  validateFileExtensions(file_path, "CSV", {".csv", ".tsv"});
37 
38  // compression?
39  auto actual_file_path{file_path};
40  if (file_compression != FileCompression::kNone) {
41  // @TODO(se) implement post-export compression
42  throw std::runtime_error("Compression not yet supported for this file type");
43  }
44 
45  // open file
46  outfile_.open(actual_file_path);
47  if (!outfile_) {
48  throw std::runtime_error("Failed to create file '" + actual_file_path + "'");
49  }
50 
51  // write header?
53  bool not_first{false};
54  int column_index = 0;
55  for (auto const& column_info : column_infos) {
56  // get name or default
57  auto column_name = safeColumnName(column_info.get_resname(), column_index + 1);
58  // output to header line
59  if (not_first) {
60  outfile_ << copy_params.delimiter;
61  } else {
62  not_first = true;
63  }
64  outfile_ << column_name;
65  column_index++;
66  }
67  outfile_ << copy_params.line_delim;
68  }
69 
70  // keep these
71  copy_params_ = copy_params;
72 }
73 
74 namespace {
75 
76 std::string nullable_str_to_string(const NullableString& str) {
77  auto nptr = boost::get<void*>(&str);
78  if (nptr) {
79  CHECK(!*nptr);
80  return "NULL";
81  }
82  auto sptr = boost::get<std::string>(&str);
83  CHECK(sptr);
84  return *sptr;
85 }
86 
87 std::string target_value_to_string(const TargetValue& tv,
88  const SQLTypeInfo& ti,
89  const std::string& delim) {
90  if (ti.is_array()) {
91  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
92  CHECK(array_tv);
93  if (array_tv->is_initialized()) {
94  const auto& vec = array_tv->get();
95  std::vector<std::string> elem_strs;
96  elem_strs.reserve(vec.size());
97  const auto& elem_ti = ti.get_elem_type();
98  for (const auto& elem_tv : vec) {
99  elem_strs.push_back(target_value_to_string(elem_tv, elem_ti, delim));
100  }
101  return "{" + boost::algorithm::join(elem_strs, delim) + "}";
102  }
103  return "NULL";
104  }
105  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
106  if (ti.is_time()) {
107  return shared::convert_temporal_to_iso_format(ti, *boost::get<int64_t>(scalar_tv));
108  }
109  if (ti.is_decimal()) {
110  Datum datum;
111  datum.bigintval = *boost::get<int64_t>(scalar_tv);
112  if (datum.bigintval == NULL_BIGINT) {
113  return "NULL";
114  }
115  return DatumToString(datum, ti);
116  }
117  if (ti.is_boolean()) {
118  const auto bool_val = *boost::get<int64_t>(scalar_tv);
119  return bool_val == NULL_BOOLEAN ? "NULL" : (bool_val ? "true" : "false");
120  }
121  auto iptr = boost::get<int64_t>(scalar_tv);
122  if (iptr) {
123  return *iptr == inline_int_null_val(ti) ? "NULL" : std::to_string(*iptr);
124  }
125  auto fptr = boost::get<float>(scalar_tv);
126  if (fptr) {
127  return *fptr == inline_fp_null_val(ti) ? "NULL" : std::to_string(*fptr);
128  }
129  auto dptr = boost::get<double>(scalar_tv);
130  if (dptr) {
131  return *dptr == inline_fp_null_val(ti.is_decimal() ? SQLTypeInfo(kDOUBLE, false) : ti)
132  ? "NULL"
133  : std::to_string(*dptr);
134  }
135  auto sptr = boost::get<NullableString>(scalar_tv);
136  CHECK(sptr);
137  return nullable_str_to_string(*sptr);
138 }
139 
140 } // namespace
141 
142 void QueryExporterCSV::exportResults(const std::vector<AggregatedResult>& query_results) {
143  for (auto& agg_result : query_results) {
144  auto results = agg_result.rs;
145  auto const& targets = agg_result.targets_meta;
146 
147  while (true) {
148  auto const crt_row = results->getNextRow(true, true);
149  if (crt_row.empty()) {
150  break;
151  }
152  bool not_first = false;
153  for (size_t i = 0; i < results->colCount(); ++i) {
154  bool is_null{false};
155  auto const tv = crt_row[i];
156  auto const scalar_tv = boost::get<ScalarTargetValue>(&tv);
157  if (not_first) {
159  } else {
160  not_first = true;
161  }
162  if (copy_params_.quoted) {
164  }
165  auto const& ti = targets[i].get_type_info();
166  if (!scalar_tv) {
167  outfile_ << target_value_to_string(crt_row[i], ti, " | ");
168  if (copy_params_.quoted) {
170  }
171  continue;
172  }
173  if (boost::get<int64_t>(scalar_tv)) {
174  auto int_val = *(boost::get<int64_t>(scalar_tv));
175  switch (ti.get_type()) {
176  case kBOOLEAN:
177  is_null = (int_val == NULL_BOOLEAN);
178  break;
179  case kTINYINT:
180  is_null = (int_val == NULL_TINYINT);
181  break;
182  case kSMALLINT:
183  is_null = (int_val == NULL_SMALLINT);
184  break;
185  case kINT:
186  is_null = (int_val == NULL_INT);
187  break;
188  case kBIGINT:
189  is_null = (int_val == NULL_BIGINT);
190  break;
191  case kTIME:
192  case kTIMESTAMP:
193  case kDATE:
194  is_null = (int_val == NULL_BIGINT);
195  break;
196  default:
197  is_null = false;
198  }
199  if (is_null) {
201  } else if (ti.is_time()) {
203  } else if (ti.is_boolean()) {
204  outfile_ << (int_val ? "true" : "false");
205  } else {
206  outfile_ << int_val;
207  }
208  } else if (boost::get<double>(scalar_tv)) {
209  auto real_val = *(boost::get<double>(scalar_tv));
210  if (ti.get_type() == kFLOAT) {
211  is_null = (real_val == NULL_FLOAT);
212  } else {
213  is_null = (real_val == NULL_DOUBLE);
214  }
215  if (is_null) {
217  } else if (ti.get_type() == kNUMERIC) {
218  outfile_ << std::setprecision(ti.get_precision()) << real_val;
219  } else {
220  outfile_ << std::setprecision(std::numeric_limits<double>::digits10 + 1)
221  << real_val;
222  }
223  } else if (boost::get<float>(scalar_tv)) {
224  CHECK_EQ(kFLOAT, ti.get_type());
225  auto real_val = *(boost::get<float>(scalar_tv));
226  if (real_val == NULL_FLOAT) {
228  } else {
229  outfile_ << std::setprecision(std::numeric_limits<float>::digits10 + 1)
230  << real_val;
231  }
232  } else {
233  auto s = boost::get<NullableString>(scalar_tv);
234  is_null = !s || boost::get<void*>(s);
235  if (is_null) {
237  } else {
238  auto s_notnull = boost::get<std::string>(s);
239  CHECK(s_notnull);
240  if (!copy_params_.quoted) {
241  outfile_ << *s_notnull;
242  } else {
243  size_t q = s_notnull->find(copy_params_.quote);
244  if (q == std::string::npos) {
245  outfile_ << *s_notnull;
246  } else {
247  std::string str(*s_notnull);
248  while (q != std::string::npos) {
249  str.insert(q, 1, copy_params_.escape);
250  q = str.find(copy_params_.quote, q + 2);
251  }
252  outfile_ << str;
253  }
254  }
255  }
256  }
257  if (copy_params_.quoted) {
259  }
260  }
262  }
263  }
264 }
265 
267  // just close the file
268  outfile_.close();
269 }
270 
271 } // namespace import_export
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define NULL_DOUBLE
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:460
Definition: sqltypes.h:76
#define NULL_FLOAT
#define NULL_BIGINT
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:111
std::string safeColumnName(const std::string &resname, const int column_index)
std::string join(T const &container, std::string const &delim)
std::string target_value_to_string(const TargetValue &tv, const SQLTypeInfo &ti, const std::string &delim)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:579
void exportResults(const std::vector< AggregatedResult > &query_results) final
std::string to_string(char const *&&v)
#define NULL_INT
std::string nullable_str_to_string(const NullableString &str)
ImportHeaderRow has_header
Definition: CopyParams.h:46
CONSTEXPR DEVICE bool is_null(const T &value)
int64_t bigintval
Definition: Datum.h:76
bool is_boolean() const
Definition: sqltypes.h:582
#define NULL_BOOLEAN
Definition: sqltypes.h:80
boost::variant< std::string, void * > NullableString
Definition: TargetValue.h:179
void beginExport(const std::string &file_path, const std::string &layer_name, const CopyParams &copy_params, const std::vector< TargetMetaInfo > &column_infos, const FileCompression file_compression, const ArrayNullHandling array_null_handling) final
#define NULL_TINYINT
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
#define NULL_SMALLINT
Basic constructors and methods of the row set interface.
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
Definition: sqltypes.h:72
Definition: Datum.h:71
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
bool is_decimal() const
Definition: sqltypes.h:570
void validateFileExtensions(const std::string &file_path, const std::string &file_type, const std::unordered_set< std::string > &valid_extensions) const
bool is_array() const
Definition: sqltypes.h:585