16 #include "CsvShared.h"
19 #include "FsiJsonUtils.h"
23 namespace foreign_storage {
28 rapidjson::Document::AllocatorType& allocator) {
35 json_val, file_region.
region_size,
"region_size", allocator);
37 json_val, file_region.
row_count,
"row_count", allocator);
38 if (file_region.filename.size()) {
40 json_val, file_region.filename,
"filename", allocator);
45 CHECK(json_val.IsObject());
52 if (json_val.HasMember(
"filename")) {
60 const std::string& option_name) {
61 if (
auto it = foreign_table->
options.find(option_name);
62 it != foreign_table->
options.end()) {
63 if (it->second.length() == 1) {
66 if (it->second == std::string(
"\\n")) {
68 }
else if (it->second == std::string(
"\\t")) {
71 throw std::runtime_error{
"Invalid value specified for option \"" + option_name +
72 "\". Expected a single character, \"\\n\" or \"\\t\"."};
80 const std::string& option_name,
81 const size_t expected_num_chars) {
82 if (
auto it = foreign_table->
options.find(option_name);
83 it != foreign_table->
options.end()) {
84 if (it->second.length() != expected_num_chars) {
85 throw std::runtime_error{
"Value of \"" + option_name +
86 "\" foreign table option has the wrong number of "
87 "characters. Expected " +
96 const std::string& option_name) {
97 if (
auto it = foreign_table->
options.find(option_name);
98 it != foreign_table->
options.end()) {
99 if (boost::iequals(it->second,
"TRUE")) {
101 }
else if (boost::iequals(it->second,
"FALSE")) {
104 throw std::runtime_error{
"Invalid boolean value specified for \"" + option_name +
105 "\" foreign table option. "
106 "Value must be either 'true' or 'false'."};
114 static constexpr
const char* S3_DIRECT =
"S3_DIRECT";
115 static constexpr
const char* S3_SELECT =
"S3_SELECT";
116 static constexpr
const char* S3_ACCESS_TYPE =
"S3_ACCESS_TYPE";
117 auto access_type = foreign_table->
options.find(S3_ACCESS_TYPE);
119 if (access_type != foreign_table->
options.end()) {
123 throw std::runtime_error{
124 "The \"" + std::string{S3_ACCESS_TYPE} +
125 "\" option is only valid for foreign tables using servers with \"" +
129 if (access_type->second != S3_DIRECT && access_type->second != S3_SELECT) {
130 throw std::runtime_error{
131 "Invalid value provided for the \"" + std::string{S3_ACCESS_TYPE} +
132 "\" option. Value must be one of the following: " + S3_DIRECT +
", " +
135 return (access_type->second == S3_SELECT);
150 if (
const auto& value =
153 copy_params.array_delim = value[0];
155 if (
const auto& value =
158 copy_params.array_begin = value[0];
159 copy_params.array_end = value[1];
161 if (
auto it = foreign_table->
options.find(
"BUFFER_SIZE");
162 it != foreign_table->
options.end()) {
163 copy_params.buffer_size = std::stoi(it->second);
167 copy_params.delimiter = value[0];
171 copy_params.escape = value[0];
174 if (has_header.has_value()) {
175 if (has_header.value()) {
176 copy_params.has_header = import_export::ImportHeaderRow::HAS_HEADER;
178 copy_params.has_header = import_export::ImportHeaderRow::NO_HEADER;
183 copy_params.line_delim = value[0];
188 if (
auto it = foreign_table->
options.find(
"NULLS");
189 it != foreign_table->
options.end()) {
190 copy_params.null_str = it->second;
194 copy_params.quote = value[0];
203 const std::map<
ChunkKey, std::shared_ptr<ChunkMetadata>>& chunk_metadata_map,
204 const std::map<ChunkKey, AbstractBuffer*>& buffers,
210 ChunkKey data_chunk_key = chunk_key;
213 const auto column = catalog->getMetadataForColumnUnlocked(
216 if (column->columnType.is_varlen_indeed()) {
217 data_chunk_key.push_back(1);
218 ChunkKey index_chunk_key = chunk_key;
219 index_chunk_key.push_back(2);
221 CHECK(buffers.find(data_chunk_key) != buffers.end());
222 CHECK(buffers.find(index_chunk_key) != buffers.end());
224 data_buffer = buffers.find(data_chunk_key)->second;
225 index_buffer = buffers.find(index_chunk_key)->second;
226 CHECK_EQ(data_buffer->size(),
static_cast<size_t>(0));
229 size_t index_offset_size{0};
230 if (column->columnType.is_string() || column->columnType.is_geometry()) {
232 }
else if (column->columnType.is_array()) {
237 CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
238 index_buffer->
reserve(index_offset_size *
239 (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
241 data_chunk_key = chunk_key;
242 CHECK(buffers.find(data_chunk_key) != buffers.end());
243 data_buffer = buffers.find(data_chunk_key)->second;
245 CHECK(chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end());
246 data_buffer->
reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
255 size_t num_elements) {
260 chunk_metadata->numElements = num_elements;
269 auto scalar_metadata =
271 chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
272 chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
274 chunk_metadata->chunkStats.has_nulls =
true;
275 return chunk_metadata;
std::vector< int > ChunkKey
HOST DEVICE int get_size() const
import_export::CopyParams validate_and_get_copy_params(const ForeignTable *foreign_table)
void setIndexBuffer(AbstractBuffer *ib)
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
size_t first_row_file_offset
void get_value(const rapidjson::Value &json_val, FileRegion &file_region)
void initEncoder(const SQLTypeInfo &tmp_sql_type)
void setBuffer(AbstractBuffer *b)
void validate_options(const ForeignTable *foreign_table)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
std::shared_ptr< ChunkMetadata > get_placeholder_metadata(const ColumnDescriptor *column, size_t num_elements)
std::string validate_and_get_string_with_length(const ForeignTable *foreign_table, const std::string &option_name, const size_t expected_num_chars)
static SysCatalog & instance()
static const std::string STORAGE_TYPE_KEY
std::optional< bool > validate_and_get_bool_value(const ForeignTable *foreign_table, const std::string &option_name)
#define CHUNK_KEY_TABLE_IDX
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
std::string validate_and_get_delimiter(const ForeignTable *foreign_table, const std::string &option_name)
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
void init_chunk_for_column(const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
const ForeignServer * foreign_server
Encoder * getEncoder() const
void setColumnDesc(const ColumnDescriptor *cd)
bool is_varlen_indeed() const
#define CHUNK_KEY_COLUMN_IDX
SQLTypeInfo get_elem_type() const
virtual void reserve(size_t num_bytes)=0
bool validate_and_get_is_s3_select(const ForeignTable *foreign_table)
void set_value(rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator)
static const std::string S3_STORAGE_TYPE