27 #ifdef ENABLE_IMPORT_PARQUET
41 std::string data_wrapper_type;
46 #ifdef ENABLE_IMPORT_PARQUET
53 return data_wrapper_type;
57 namespace foreign_storage {
58 std::tuple<std::unique_ptr<foreign_storage::ForeignServer>,
59 std::unique_ptr<foreign_storage::UserMapping>,
60 std::unique_ptr<foreign_storage::ForeignTable>>
65 const int32_t user_id) {
67 db_id, user_id, copy_from_source, copy_params);
74 db_id, user_id, copy_from_source, copy_params, server.get());
77 user_mapping->validate(server.get());
82 db_id, table, copy_from_source, copy_params, server.get());
85 foreign_table->validateOptionValues();
87 return {std::move(server), std::move(user_mapping), std::move(foreign_table)};
90 std::tuple<std::unique_ptr<foreign_storage::ForeignServer>,
91 std::unique_ptr<foreign_storage::UserMapping>,
92 std::unique_ptr<foreign_storage::ForeignTable>>
105 #ifdef ENABLE_IMPORT_PARQUET
114 namespace foreign_storage {
118 throw std::runtime_error{
"Regex parser options must contain a line regex."};
124 #ifdef ENABLE_IMPORT_PARQUET
132 return value ?
"TRUE" :
"FALSE";
144 return std::make_unique<CsvDataWrapper>(
145 db_id, foreign_table, user_mapping,
true);
147 return std::make_unique<RegexParserDataWrapper>(
148 db_id, foreign_table, user_mapping,
true);
150 #ifdef ENABLE_IMPORT_PARQUET
152 return std::make_unique<ParquetDataWrapper>(
153 db_id, foreign_table,
false);
161 const std::string& data_wrapper_type,
165 #ifdef ENABLE_IMPORT_PARQUET
168 return std::make_unique<ParquetImporter>(db_id, foreign_table, user_mapping);
174 std::unique_ptr<UserMapping>
178 const std::string& file_path,
187 const std::string& file_path,
191 auto foreign_server = std::make_unique<foreign_storage::ForeignServer>();
193 foreign_server->id = -1;
194 foreign_server->user_id = user_id;
199 #ifdef ENABLE_IMPORT_PARQUET
206 foreign_server->name =
"import_proxy_server";
209 throw std::runtime_error(
"ODBC storage not supported");
211 throw std::runtime_error(
"AWS storage not supported");
217 return foreign_server;
223 switch (has_header) {
240 const std::string& copy_from_source,
246 auto foreign_table = std::make_unique<ForeignTable>();
252 foreign_table->foreign_server = server;
298 throw std::runtime_error(
"ODBC storage not supported");
300 throw std::runtime_error(
"AWS storage not supported");
302 foreign_table->options[
"FILE_PATH"] = copy_from_source;
318 const std::array<char, 3> array_marker{
324 throw std::runtime_error(
325 "geo_explode_collections is not yet supported for FSI CSV import");
339 foreign_table->initializeOptions();
340 return foreign_table;
344 const std::string& data_wrapper_type,
347 std::unique_ptr<ForeignDataWrapper> data_wrapper;
352 data_wrapper = std::make_unique<CsvDataWrapper>(db_id, foreign_table);
354 #ifdef ENABLE_IMPORT_PARQUET
356 data_wrapper = std::make_unique<ParquetDataWrapper>(db_id, foreign_table);
359 data_wrapper = std::make_unique<RegexParserDataWrapper>(db_id, foreign_table);
361 data_wrapper = std::make_unique<InternalCatalogDataWrapper>(db_id, foreign_table);
364 std::make_unique<InternalExecutorStatsDataWrapper>(db_id, foreign_table);
367 std::make_unique<InternalMLModelMetadataDataWrapper>(db_id, foreign_table);
369 data_wrapper = std::make_unique<InternalMemoryStatsDataWrapper>(db_id, foreign_table);
372 std::make_unique<InternalStorageStatsDataWrapper>(db_id, foreign_table);
374 data_wrapper = std::make_unique<InternalLogsDataWrapper>(db_id, foreign_table);
376 throw std::runtime_error(
"Unsupported data wrapper");
382 const std::string& data_wrapper_type,
384 bool is_s3_select_wrapper{
false};
385 std::string data_wrapper_type_key{data_wrapper_type};
386 constexpr
const char* S3_SELECT_WRAPPER_KEY =
"CSV_S3_SELECT";
389 is_s3_select_wrapper =
true;
390 data_wrapper_type_key = S3_SELECT_WRAPPER_KEY;
396 if (is_s3_select_wrapper) {
399 itr->second = std::make_unique<CsvDataWrapper>();
401 #ifdef ENABLE_IMPORT_PARQUET
403 itr->second = std::make_unique<ParquetDataWrapper>();
406 itr->second = std::make_unique<RegexParserDataWrapper>();
408 itr->second = std::make_unique<InternalCatalogDataWrapper>();
410 itr->second = std::make_unique<InternalExecutorStatsDataWrapper>();
412 itr->second = std::make_unique<InternalMLModelMetadataDataWrapper>();
414 itr->second = std::make_unique<InternalMemoryStatsDataWrapper>();
416 itr->second = std::make_unique<InternalStorageStatsDataWrapper>();
418 itr->second = std::make_unique<InternalLogsDataWrapper>();
423 return itr->second.get();
427 const std::string& data_wrapper_type) {
429 if (std::find(supported_wrapper_types.begin(),
430 supported_wrapper_types.end(),
431 data_wrapper_type) == supported_wrapper_types.end()) {
432 std::vector<std::string_view> user_facing_wrapper_types;
433 for (
const auto&
type : supported_wrapper_types) {
435 user_facing_wrapper_types.emplace_back(
type);
438 throw std::runtime_error{
"Invalid data wrapper type \"" + data_wrapper_type +
439 "\". Data wrapper type must be one of the following: " +
440 join(user_facing_wrapper_types,
", ") +
"."};
444 std::map<std::string, std::unique_ptr<ForeignDataWrapper>>
bool contains(const T &container, const U &element)
static const std::string GEO_EXPLODE_COLLECTIONS_KEY
bool is_valid_data_wrapper(const std::string &data_wrapper_type)
static std::unique_ptr< ForeignDataWrapper > createForImport(const std::string &data_wrapper_type, const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping)
static const std::string BUFFER_SIZE_KEY
static constexpr char const * REGEX_PARSER
static const std::string REGEX_PATH_FILTER_KEY
static const ForeignDataWrapper * createForValidation(const std::string &data_wrapper_type, const ForeignTable *foreign_table=nullptr)
static std::unique_ptr< ForeignDataWrapper > create(const std::string &data_wrapper_type, const int db_id, const ForeignTable *foreign_table)
shared utility for globbing files, paths can be specified as either a single file, directory or wildcards
static const std::string HEADER_KEY
static const std::string TRIM_SPACES_KEY
static const std::string ARRAY_MARKER_KEY
bool is_s3_uri(const std::string &file_path)
static const std::string LOCAL_FILE_STORAGE_TYPE
static constexpr char const * INTERNAL_STORAGE_STATS
bool geo_explode_collections
static const std::string LINE_REGEX_KEY
static const std::string NULLS_KEY
static bool validateAndGetIsS3Select(const ForeignTable *foreign_table)
static constexpr std::array< char const *, 6 > INTERNAL_DATA_WRAPPERS
void set_header_option(OptionsMap &options, const import_export::ImportHeaderRow &has_header)
static constexpr char const * INTERNAL_CATALOG
std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > create_proxy_fsi_objects(const std::string ©_from_source, const import_export::CopyParams ©_params, const int db_id, const TableDescriptor *table, const int32_t user_id)
Create proxy fsi objects for use outside FSI.
static std::map< std::string, std::unique_ptr< ForeignDataWrapper > > validation_data_wrappers_
static const std::string FILE_SORT_ORDER_BY_KEY
ImportHeaderRow has_header
static const std::string SOURCE_SRID_KEY
std::optional< std::string > regex_path_filter
static const std::string LONLAT_KEY
static std::unique_ptr< ForeignDataWrapper > createForGeneralImport(const import_export::CopyParams ©_params, const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping)
static SysCatalog & instance()
static const std::string STORAGE_TYPE_KEY
bool geo_validate_geometry
static constexpr char const * INTERNAL_ML_MODEL_METADATA
std::string bool_to_option_value(const bool value)
static void validateDataWrapperType(const std::string &data_wrapper_type)
std::string get_data_wrapper_type(const import_export::CopyParams ©_params)
static const std::string LINE_DELIMITER_KEY
import_export::SourceType source_type
bool is_valid_source_type(const import_export::CopyParams ©_params)
static constexpr std::array< std::string_view, 9 > supported_data_wrapper_types
size_t num_import_threads(const int32_t copy_params_threads)
static constexpr char const * INTERNAL_EXECUTOR_STATS
static const std::string DELIMITER_KEY
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
static constexpr const char * GEO_VALIDATE_GEOMETRY_KEY
void validate_regex_parser_options(const import_export::CopyParams ©_params)
static const std::string ARRAY_DELIMITER_KEY
std::string line_start_regex
static constexpr char const * INTERNAL_MEMORY_STATS
static std::unique_ptr< ForeignServer > createForeignServerProxy(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams ©_params)
static std::unique_ptr< ForeignTable > createForeignTableProxy(const int db_id, const TableDescriptor *table, const std::string &file_path, const import_export::CopyParams ©_params, const ForeignServer *server)
static const std::string THREADS_KEY
static constexpr char const * CSV
std::map< std::string, std::string, std::less<>> OptionsMap
static const std::string LINE_START_REGEX_KEY
static const std::string FILE_SORT_REGEX_KEY
static const std::string QUOTED_KEY
static const std::string QUOTE_KEY
static constexpr char const * INTERNAL_LOGS
std::optional< std::string > file_sort_order_by
static const std::string ESCAPE_KEY
static constexpr char const * PARQUET
std::optional< std::string > file_sort_regex
static std::unique_ptr< UserMapping > createUserMappingProxyIfApplicable(const int db_id, const int user_id, const std::string &file_path, const import_export::CopyParams ©_params, const ForeignServer *server)