OmniSciDB
a5dc49c757
|
#include <Importer.h>
Classes | |
struct | GeoFileLayerInfo |
Public Types | |
enum | GeoFileLayerContents { GeoFileLayerContents::EMPTY, GeoFileLayerContents::GEO, GeoFileLayerContents::NON_GEO, GeoFileLayerContents::UNSUPPORTED_GEO } |
Public Member Functions | |
Importer (Catalog_Namespace::Catalog &c, const TableDescriptor *t, const std::string &f, const CopyParams &p) | |
Importer (Loader *providedLoader, const std::string &f, const CopyParams &p) | |
~Importer () override | |
ImportStatus | import (const Catalog_Namespace::SessionInfo *session_info) override |
ImportStatus | importDelimited (const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info) override |
ImportStatus | importGDAL (const std::map< std::string, std::string > &colname_to_src, const Catalog_Namespace::SessionInfo *session_info, const bool is_raster) |
const CopyParams & | get_copy_params () const |
const std::list< const ColumnDescriptor * > & | get_column_descs () const |
void | load (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t row_count, const Catalog_Namespace::SessionInfo *session_info) |
std::vector< std::vector < std::unique_ptr < TypedImportBuffer > > > & | get_import_buffers_vec () |
std::vector< std::unique_ptr < TypedImportBuffer > > & | get_import_buffers (int i) |
const bool * | get_is_array () const |
Catalog_Namespace::Catalog & | getCatalog () |
void | checkpoint (const std::vector< Catalog_Namespace::TableEpochInfo > &table_epochs) |
auto | getLoader () const |
Public Member Functions inherited from import_export::DataStreamSink | |
DataStreamSink () | |
DataStreamSink (const CopyParams ©_params, const std::string file_path) | |
virtual | ~DataStreamSink () |
const CopyParams & | get_copy_params () const |
void | import_compressed (std::vector< std::string > &file_paths, const Catalog_Namespace::SessionInfo *session_info) |
Public Member Functions inherited from import_export::AbstractImporter | |
virtual | ~AbstractImporter ()=default |
Static Public Member Functions | |
static ImportStatus | get_import_status (const std::string &id) |
static void | set_import_status (const std::string &id, const ImportStatus is) |
static const std::list < ColumnDescriptor > | gdalToColumnDescriptors (const std::string &fileName, const bool is_raster, const std::string &geoColumnName, const CopyParams ©_params) |
static void | readMetadataSampleGDAL (const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams ©_params) |
static bool | gdalFileExists (const std::string &path, const CopyParams ©_params) |
static bool | gdalFileOrDirectoryExists (const std::string &path, const CopyParams ©_params) |
static std::vector< std::string > | gdalGetAllFilesInArchive (const std::string &archive_path, const CopyParams ©_params) |
static std::vector < GeoFileLayerInfo > | gdalGetLayersInGeoFile (const std::string &file_name, const CopyParams ©_params) |
static void | set_geo_physical_import_buffer (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool force_null=false) |
static void | set_geo_physical_import_buffer_columnar (const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column) |
Private Member Functions | |
ImportStatus | importGDALGeo (const std::map< std::string, std::string > &colname_to_src, const Catalog_Namespace::SessionInfo *session_info) |
ImportStatus | importGDALRaster (const Catalog_Namespace::SessionInfo *session_info) |
Static Private Member Functions | |
static bool | gdalStatInternal (const std::string &path, const CopyParams ©_params, bool also_dir) |
static Geospatial::GDAL::DataSourceUqPtr | openGDALDataSource (const std::string &fileName, const CopyParams ©_params) |
static const std::list < ColumnDescriptor > | gdalToColumnDescriptorsGeo (const std::string &fileName, const std::string &geoColumnName, const CopyParams ©_params) |
static const std::list < ColumnDescriptor > | gdalToColumnDescriptorsRaster (const std::string &fileName, const std::string &geoColumnName, const CopyParams ©_params) |
Private Attributes | |
std::string | import_id |
size_t | file_size |
size_t | max_threads |
char * | buffer [2] |
std::vector< std::vector < std::unique_ptr < TypedImportBuffer > > > | import_buffers_vec |
std::unique_ptr< Loader > | loader |
std::unique_ptr< bool[]> | is_array_a |
Static Private Attributes | |
static std::mutex | init_gdal_mutex |
Additional Inherited Members | |
Protected Member Functions inherited from import_export::DataStreamSink | |
ImportStatus | archivePlumber (const Catalog_Namespace::SessionInfo *session_info) |
Protected Attributes inherited from import_export::DataStreamSink | |
CopyParams | copy_params |
const std::string | file_path |
FILE * | p_file = nullptr |
ImportStatus | import_status_ |
heavyai::shared_mutex | import_mutex_ |
size_t | total_file_size {0} |
std::vector< size_t > | file_offsets |
std::mutex | file_offsets_mutex |
Definition at line 784 of file Importer.h.
|
strong |
Enumerator | |
---|---|
EMPTY | |
GEO | |
NON_GEO | |
UNSUPPORTED_GEO |
Definition at line 837 of file Importer.h.
import_export::Importer::Importer | ( | Catalog_Namespace::Catalog & | c, |
const TableDescriptor * | t, | ||
const std::string & | f, | ||
const CopyParams & | p | ||
) |
Definition at line 172 of file Importer.cpp.
import_export::Importer::Importer | ( | Loader * | providedLoader, |
const std::string & | f, | ||
const CopyParams & | p | ||
) |
Definition at line 178 of file Importer.cpp.
References buffer, import_export::DataStreamSink::file_path, file_size, import_id, is_array_a, kARRAY, loader, max_threads, and import_export::DataStreamSink::p_file.
|
override |
Definition at line 219 of file Importer.cpp.
References buffer, and import_export::DataStreamSink::p_file.
void import_export::Importer::checkpoint | ( | const std::vector< Catalog_Namespace::TableEpochInfo > & | table_epochs | ) |
Definition at line 3524 of file Importer.cpp.
References DEBUG_TIMING, Data_Namespace::DISK_LEVEL, logger::ERROR, measure< TimeT >::execution(), StorageType::FOREIGN_TABLE, import_buffers_vec, import_export::DataStreamSink::import_mutex_, import_export::DataStreamSink::import_status_, logger::INFO, import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, loader, and LOG.
Referenced by importDelimited(), importGDALGeo(), and importGDALRaster().
|
static |
Definition at line 5053 of file Importer.cpp.
References gdalStatInternal().
Referenced by DBHandler::check_geospatial_files(), DBHandler::detect_column_types(), DBHandler::get_all_files_in_archive(), DBHandler::get_first_geo_file_in_archive(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().
|
static |
Definition at line 5058 of file Importer.cpp.
References gdalStatInternal().
Referenced by DBHandler::detect_column_types(), DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().
|
static |
Definition at line 5130 of file Importer.cpp.
References import_export::gdalGatherFilesInArchiveRecursive(), Geospatial::GDAL::init(), import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, import_export::CopyParams::s3_secret_key, import_export::CopyParams::s3_session_token, and Geospatial::GDAL::setAuthorizationTokens().
Referenced by anonymous_namespace{DBHandler.cpp}::find_first_geo_file_in_archive(), and DBHandler::get_all_files_in_archive().
|
static |
Definition at line 5157 of file Importer.cpp.
References CHECK, EMPTY, GEO, Geospatial::GDAL::init(), NON_GEO, openGDALDataSource(), import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, import_export::CopyParams::s3_secret_key, import_export::CopyParams::s3_session_token, Geospatial::GDAL::setAuthorizationTokens(), and UNSUPPORTED_GEO.
Referenced by DBHandler::get_layers_in_geo_file(), and DBHandler::importGeoTableSingle().
|
staticprivate |
Definition at line 5018 of file Importer.cpp.
References Geospatial::GDAL::init(), run_benchmark_import::result, import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, import_export::CopyParams::s3_secret_key, import_export::CopyParams::s3_session_token, and Geospatial::GDAL::setAuthorizationTokens().
Referenced by gdalFileExists(), and gdalFileOrDirectoryExists().
|
static |
Definition at line 4820 of file Importer.cpp.
References gdalToColumnDescriptorsGeo(), and gdalToColumnDescriptorsRaster().
Referenced by DBHandler::detect_column_types().
|
staticprivate |
Definition at line 4904 of file Importer.cpp.
References import_export::CopyParams::add_metadata_columns, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::CopyParams::geo_coords_comp_param, import_export::CopyParams::geo_coords_encoding, import_export::CopyParams::geo_coords_srid, import_export::CopyParams::geo_coords_type, import_export::CopyParams::geo_explode_collections, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), kARRAY, kENCODING_DICT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kTEXT, import_export::anonymous_namespace{Importer.cpp}::ogr_to_type(), openGDALDataSource(), import_export::parse_add_metadata_columns(), import_export::PROMOTE_LINESTRING_TO_MULTILINESTRING, import_export::PROMOTE_POINT_TO_MULTIPOINT, import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_subtype(), SQLTypeInfo::set_type(), and ColumnDescriptor::sourceName.
Referenced by gdalToColumnDescriptors().
|
staticprivate |
Definition at line 4832 of file Importer.cpp.
References import_export::CopyParams::add_metadata_columns, ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::anonymous_namespace{Importer.cpp}::convert_raster_point_transform(), import_export::anonymous_namespace{Importer.cpp}::convert_raster_point_type(), import_export::RasterImporter::detect(), import_export::RasterImporter::getBandNamesAndSQLTypes(), import_export::RasterImporter::getPointNamesAndSQLTypes(), Geospatial::GDAL::init(), kENCODING_GEOINT, kGEOMETRY, kPOINT, import_export::parse_add_metadata_columns(), import_export::CopyParams::raster_import_bands, import_export::CopyParams::raster_import_dimensions, import_export::CopyParams::raster_point_compute_angle, import_export::CopyParams::raster_point_transform, import_export::CopyParams::raster_point_type, import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, import_export::CopyParams::s3_secret_key, import_export::CopyParams::s3_session_token, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_subtype(), SQLTypeInfo::set_type(), Geospatial::GDAL::setAuthorizationTokens(), and ColumnDescriptor::sourceName.
Referenced by gdalToColumnDescriptors().
|
inline |
Definition at line 801 of file Importer.h.
References loader.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
inline |
Definition at line 800 of file Importer.h.
References import_export::DataStreamSink::copy_params.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
inline |
Definition at line 810 of file Importer.h.
References import_buffers_vec.
Referenced by import_export::import_thread_delimited(), and import_export::import_thread_shapefile().
|
inline |
Definition at line 807 of file Importer.h.
References import_buffers_vec.
|
static |
Definition at line 231 of file Importer.cpp.
References import_export::import_status_map, and import_export::status_mutex.
Referenced by DBHandler::import_table_status().
|
inline |
Definition at line 813 of file Importer.h.
References is_array_a.
Referenced by import_export::import_thread_delimited().
|
inline |
Definition at line 847 of file Importer.h.
References loader.
Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), and import_export::import_thread_delimited().
|
inline |
|
overridevirtual |
Implements import_export::AbstractImporter.
Definition at line 4361 of file Importer.cpp.
References import_export::DataStreamSink::archivePlumber().
|
overridevirtual |
Implements import_export::DataStreamSink.
Definition at line 4365 of file Importer.cpp.
References threading_serial::async(), CHECK, checkpoint(), logger::ERROR, import_export::DataStreamSink::file_offsets, import_export::DataStreamSink::file_offsets_mutex, file_size, import_export::delimited_parser::find_row_end_pos(), heavyai::fopen(), Catalog_Namespace::SessionInfo::get_session_id(), Executor::getExecutor(), import_buffers_vec, import_id, import_export::DataStreamSink::import_mutex_, import_export::DataStreamSink::import_status_, import_export::import_thread_delimited(), logger::INFO, import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, loader, LOG, max_threads, import_export::num_import_threads(), import_export::DataStreamSink::p_file, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), logger::thread_id(), import_export::DataStreamSink::total_file_size, Executor::UNITARY_EXECUTOR_ID, and VLOG.
ImportStatus import_export::Importer::importGDAL | ( | const std::map< std::string, std::string > & | colname_to_src, |
const Catalog_Namespace::SessionInfo * | session_info, | ||
const bool | is_raster | ||
) |
Definition at line 5226 of file Importer.cpp.
References importGDALGeo(), and importGDALRaster().
Referenced by QueryRunner::ImportDriver::importGeoTable().
|
private |
Definition at line 5236 of file Importer.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, checkpoint(), logger::ERROR, g_enable_non_kernel_time_query_interrupt, Catalog_Namespace::SessionInfo::get_session_id(), Executor::getExecutor(), import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), import_buffers_vec, import_id, import_export::DataStreamSink::import_mutex_, import_export::DataStreamSink::import_status_, import_export::import_thread_shapefile(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, loader, LOG, max_threads, import_export::num_import_threads(), openGDALDataSource(), import_export::parse_add_metadata_columns(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), logger::thread_id(), toString(), Executor::UNITARY_EXECUTOR_ID, and VLOG.
Referenced by importGDAL().
|
private |
Definition at line 5524 of file Importer.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, CHECK_LE, anonymous_namespace{Importer.cpp}::check_session_interrupted(), checkpoint(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, Geospatial::compress_coords(), import_export::anonymous_namespace{Importer.cpp}::convert_raster_point_transform(), import_export::anonymous_namespace{Importer.cpp}::convert_raster_point_type(), import_export::RasterImporter::detect(), logger::ERROR, f(), g_enable_non_kernel_time_query_interrupt, SQLTypeInfo::get_output_srid(), Catalog_Namespace::SessionInfo::get_session_id(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), import_export::RasterImporter::getBandNamesAndSQLTypes(), import_export::RasterImporter::getBandNullValue(), import_export::RasterImporter::getBandsHeight(), import_export::RasterImporter::getBandsWidth(), Executor::getExecutor(), import_export::RasterImporter::getNumBands(), import_export::RasterImporter::getPointNamesAndSQLTypes(), import_export::RasterImporter::getProjectedPixelCoords(), import_export::RasterImporter::getRawPixels(), import_export::RasterImporter::import(), import_buffers_vec, import_id, import_export::DataStreamSink::import_status_, logger::INFO, ColumnDescriptor::isGeoPhyCol, kARRAY, kBIGINT, kDOUBLE, kFLOAT, kINT, kMaxRasterScanlinesPerThread, kNULLT, kPOINT, kSMALLINT, kTINYINT, load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, loader, LOG, max_threads, NULL_DOUBLE, NULL_FLOAT, NULL_INT, NULL_SMALLINT, import_export::num_import_threads(), import_export::parse_add_metadata_columns(), import_export::BadRowsTracker::rows, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_estimated, import_export::ImportStatus::rows_rejected, set_import_status(), logger::thread_id(), timer_start(), TIMER_STOP, to_string(), toString(), Executor::UNITARY_EXECUTOR_ID, UNLIKELY, and VLOG.
Referenced by importGDAL().
void import_export::Importer::load | ( | const std::vector< std::unique_ptr< TypedImportBuffer >> & | import_buffers, |
size_t | row_count, | ||
const Catalog_Namespace::SessionInfo * | session_info | ||
) |
Definition at line 3514 of file Importer.cpp.
References import_export::DataStreamSink::import_mutex_, import_export::DataStreamSink::import_status_, import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, and loader.
Referenced by import_export::import_thread_delimited(), import_export::import_thread_shapefile(), and importGDALRaster().
|
staticprivate |
Definition at line 4583 of file Importer.cpp.
References Geospatial::GDAL::init(), import_export::kGeoFile, Geospatial::GDAL::openDataSource(), import_export::CopyParams::s3_access_key, import_export::CopyParams::s3_endpoint, import_export::CopyParams::s3_region, import_export::CopyParams::s3_secret_key, import_export::CopyParams::s3_session_token, Geospatial::GDAL::setAuthorizationTokens(), import_export::CopyParams::source_type, and to_string().
Referenced by gdalGetLayersInGeoFile(), gdalToColumnDescriptorsGeo(), importGDALGeo(), and readMetadataSampleGDAL().
|
static |
Definition at line 4625 of file Importer.cpp.
References import_export::CopyParams::add_metadata_columns, CHECK, import_export::CopyParams::geo_layer_name, import_export::anonymous_namespace{Importer.cpp}::getLayerWithSpecifiedName(), openGDALDataSource(), and import_export::parse_add_metadata_columns().
Referenced by DBHandler::detect_column_types().
|
static |
Definition at line 1636 of file Importer.cpp.
References ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.
Referenced by import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer(), Parser::AddColumnStmt::execute(), import_export::fill_missing_columns(), import_export::import_thread_delimited(), foreign_storage::TextFileBufferParser::processGeoColumn(), and foreign_storage::TextFileBufferParser::processInvalidGeoColumn().
|
static |
Definition at line 1731 of file Importer.cpp.
References CHECK, ColumnDescriptor::columnId, ColumnDescriptor::columnType, Geospatial::compress_coords(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getMetadataForColumn(), Geospatial::is_null_point(), kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, NULL_ARRAY_DOUBLE, NULL_DOUBLE, and ColumnDescriptor::tableId.
Referenced by DBHandler::fillGeoColumns().
|
static |
Definition at line 240 of file Importer.cpp.
References import_export::ImportStatus::elapsed, import_export::ImportStatus::end, import_id, import_export::import_status_map, import_export::ImportStatus::start, and import_export::status_mutex.
Referenced by importDelimited(), importGDALGeo(), importGDALRaster(), import_export::ForeignDataImporter::importGeneralS3(), and anonymous_namespace{ForeignDataImporter.cpp}::load_foreign_data_buffers().
|
private |
Definition at line 898 of file Importer.h.
Referenced by Importer(), and ~Importer().
|
private |
Definition at line 896 of file Importer.h.
Referenced by importDelimited(), and Importer().
|
private |
Definition at line 899 of file Importer.h.
Referenced by checkpoint(), get_import_buffers(), get_import_buffers_vec(), importDelimited(), importGDALGeo(), and importGDALRaster().
|
private |
Definition at line 895 of file Importer.h.
Referenced by importDelimited(), Importer(), importGDALGeo(), importGDALRaster(), heavydb.thrift.Heavy.import_table_status_args::read(), set_import_status(), and heavydb.thrift.Heavy.import_table_status_args::write().
|
staticprivate |
Definition at line 902 of file Importer.h.
|
private |
Definition at line 901 of file Importer.h.
Referenced by get_is_array(), and Importer().
|
private |
Definition at line 900 of file Importer.h.
Referenced by checkpoint(), get_column_descs(), getCatalog(), getLoader(), importDelimited(), Importer(), importGDALGeo(), importGDALRaster(), and load().
|
private |
Definition at line 897 of file Importer.h.
Referenced by importDelimited(), Importer(), importGDALGeo(), and importGDALRaster().