25 #include <boost/algorithm/string.hpp>
26 #include <boost/filesystem.hpp>
27 #include <boost/tokenizer.hpp>
29 #include <xercesc/dom/DOMDocument.hpp>
30 #include <xercesc/dom/DOMElement.hpp>
31 #include <xercesc/dom/DOMNodeList.hpp>
32 #include <xercesc/framework/MemBufInputSource.hpp>
33 #include <xercesc/parsers/XercesDOMParser.hpp>
37 #include <ogrsf_frmts.h>
43 #define DEBUG_RASTER_IMPORT 0
45 namespace import_export {
48 : transform_arg_{
nullptr}, mode_{mode} {
50 auto const gcp_count = datasource->GetGCPCount();
51 auto const* gcp_list = datasource->GetGCPs();
53 case Mode::kPolynomial: {
54 static constexpr
int kPolynomialOrder = 2;
56 GDALCreateGCPTransformer(gcp_count, gcp_list, kPolynomialOrder,
false);
59 case Mode::kThinPlateSpline: {
60 transform_arg_ = GDALCreateTPSTransformer(gcp_count, gcp_list,
false);
64 CHECK(transform_arg_);
82 GDALGCPTransform(
transform_arg_,
false, 1, &x, &y,
nullptr, &success);
85 GDALTPSTransform(
transform_arg_,
false, 1, &x, &y,
nullptr, &success);
89 throw std::runtime_error(
"Failed GCP/TPS Transform");
99 switch (gdal_data_type) {
119 throw std::runtime_error(
"Unknown/unsupported GDAL data type: " +
139 throw std::runtime_error(
"Unknown/unsupported SQL type: " +
to_string(sql_type));
143 const std::string& tifftag_imagedescription) {
157 using Document = xercesc_3_2::DOMDocument;
158 using Element = xercesc_3_2::DOMElement;
159 using String = xercesc_3_2::XMLString;
160 using Utils = xercesc_3_2::XMLPlatformUtils;
161 using Parser = xercesc_3_2::XercesDOMParser;
162 using Source = xercesc_3_2::MemBufInputSource;
166 std::unordered_map<std::string, XMLCh*> tags;
169 for (
auto& tag : tags) {
170 String::release(&tag.second);
174 tags.emplace(
"ID", String::transcode(
"ID"));
175 tags.emplace(
"Name", String::transcode(
"Name"));
176 tags.emplace(
"Buffer", String::transcode(
"Buffer"));
177 tags.emplace(
"Image", String::transcode(
"Image"));
178 tags.emplace(
"Pixels", String::transcode(
"Pixels"));
179 tags.emplace(
"Channel", String::transcode(
"Channel"));
181 auto get_tag = [&](
const std::string&
name) ->
const XMLCh* {
182 return tags.find(
name)->second;
187 parser.setValidationScheme(Parser::Val_Never);
188 parser.setDoNamespaces(
false);
189 parser.setDoSchema(
false);
190 parser.setLoadExternalDTD(
false);
192 Source source(reinterpret_cast<const XMLByte*>(tifftag_imagedescription.c_str()),
193 tifftag_imagedescription.length(),
196 parser.parse(source);
198 std::vector<std::string> band_names;
200 auto const* document = parser.getDocument();
202 auto const* root_element = document->getDocumentElement();
204 auto const* image_list = root_element->getElementsByTagName(get_tag(
"Image"));
205 if (image_list && image_list->getLength() > 0) {
206 auto const* image_element =
dynamic_cast<const Element*
>(image_list->item(0));
207 auto const* pixels_list = image_element->getElementsByTagName(get_tag(
"Pixels"));
208 if (pixels_list && pixels_list->getLength() > 0) {
209 auto const* pixels_element =
dynamic_cast<const Element*
>(pixels_list->item(0));
210 auto const* channel_list =
211 pixels_element->getElementsByTagName(get_tag(
"Channel"));
212 for (XMLSize_t i = 0; i < channel_list->getLength(); i++) {
213 auto const* channel_element =
214 dynamic_cast<const Element*
>(channel_list->item(i));
215 auto const* name_attribute = channel_element->getAttribute(get_tag(
"Name"));
216 if (name_attribute) {
217 auto const*
name = String::transcode(name_attribute);
219 band_names.emplace_back(
name);
232 switch (point_type) {
250 return x * 111319.490778;
254 return 6378136.99911 * log(tan(.00872664626 * y + .785398163397));
259 auto driver = source->GetDriver();
261 std::string
name = driver->GetDescription();
267 return name ==
"HDF5Image" || name ==
"HDF5" || name ==
"BAG" || name ==
"KEA";
277 const std::string& specified_band_names,
278 const std::string& specified_band_dimensions,
281 const bool point_compute_angle,
282 const bool throw_on_error,
285 bool has_spatial_reference{
false};
291 if (datasource ==
nullptr) {
292 throw std::runtime_error(
"Raster Importer: Unable to open raster file " +
296 #if DEBUG_RASTER_IMPORT
302 auto const subdatasources =
304 for (
auto const& subdatasource : subdatasources) {
305 auto const name_equals = subdatasource.find(
"_NAME=");
306 if (name_equals != std::string::npos) {
307 auto subdatasource_name =
308 subdatasource.substr(name_equals + 6, std::string::npos);
310 <<
"DEBUG: Found subdatasource '" << subdatasource_name <<
"'";
316 if (datasource->GetSpatialRef()) {
317 has_spatial_reference =
true;
319 }
else if (datasource->GetGCPSpatialRef()) {
320 auto const num_points = datasource->GetGCPCount();
322 <<
"DEBUG: Found GCP Spatial Reference with " << num_points <<
" GCPs";
323 has_spatial_reference = (num_points > 0);
333 <<
"DEBUG: No sub-datasources found, just using '" << file_name <<
"'";
346 bool optimize_to_smallint =
false;
350 optimize_to_smallint =
true;
358 std::set<std::pair<int, int>> found_dimensions;
362 const uint32_t datasource_idx) {
363 auto raster_count = datasource->GetRasterCount();
364 if (raster_count == 0) {
365 throw std::runtime_error(
"Raster Importer: Raster file " + file_name +
370 for (
int i = 1; i <= raster_count; i++) {
380 auto band = datasource->GetRasterBand(i);
384 auto const band_width = band->GetXSize();
385 auto const band_height = band->GetYSize();
386 int block_size_x, block_size_y;
387 band->GetBlockSize(&block_size_x, &block_size_y);
390 LOG(
INFO) <<
"Raster Importer: Found Band '" << band_name <<
"', with dimensions "
391 << band_width <<
"x" << band_height;
399 found_dimensions.insert({band_width, band_height});
405 int null_value_valid{0};
406 auto null_value = band->GetNoDataValue(&null_value_valid);
416 itr->datasource_idx = datasource_idx;
418 itr->sql_type = sql_type;
419 itr->null_value = null_value;
420 itr->null_value_valid = (null_value_valid != 0);
429 null_value_valid != 0});
436 specified_band_names, specified_band_dimensions, metadata_column_infos);
439 uint32_t datasource_idx{0u};
440 std::vector<std::string> valid_datasource_names;
448 if (datasource_handle ==
nullptr) {
451 valid_datasource_names.push_back(datasource_name);
455 process_datasource(datasource_handle, datasource_idx++);
459 if (found_dimensions.size() > 1u) {
461 LOG(
WARNING) <<
"Raster Importer: Dimensions found as follows:";
462 for (
auto const& dimension : found_dimensions) {
463 LOG(
WARNING) <<
"Raster Importer: " << dimension.first <<
"x" << dimension.second;
465 if (throw_on_error) {
466 throw std::runtime_error(
"Raster Importer: Raster file '" + file_name +
467 "' datasource/band dimensions are inconsistent. This file "
468 "cannot be imported into a single table.");
470 LOG(
WARNING) <<
"Raster Importer: Raster file '" << file_name
471 <<
"' datasource/band dimensions are inconsistent. This file "
472 "cannot be imported into a single table.";
474 }
else if (found_dimensions.size() == 1u) {
483 if (throw_on_error) {
484 throw std::runtime_error(
"Raster Importer: Raster file " + file_name +
485 " has no importable bands");
487 LOG(
ERROR) <<
"Raster Importer: Raster file " << file_name
488 <<
" has no importable bands";
494 auto const failed_datasource_count =
495 datasource_names_.size() - valid_datasource_names.size();
496 if (failed_datasource_count) {
497 LOG(
WARNING) <<
"Raster Importer: Failed to open " << failed_datasource_count
501 datasource_names_ = valid_datasource_names;
507 if (optimize_to_smallint && (
bands_width_ <= std::numeric_limits<int16_t>::max() &&
517 throw std::runtime_error(
518 "Raster Importer: Raster file has no geo-spatial reference metadata, unable to "
519 "transform points to World Space. Use raster_point_transform='none|file' "
524 throw std::runtime_error(
525 "Raster Importer: Cannot do World Transform with SMALLINT/INT Point type");
529 throw std::runtime_error(
530 "Raster Importer: Must do World Transform with POINT Point type");
536 throw std::runtime_error(
537 "Raster Importer: Raster file '" + file_name +
538 "' has band dimensions too large for 'SMALLINT' raster_point_type (" +
542 throw std::runtime_error(
543 "Raster Importer: Must do World Transform with raster_point_compute_angle");
550 throw std::runtime_error(
"Raster Import aborted. No bands to import.");
557 std::map<std::string, std::vector<Geospatial::GDAL::DataSourceUqPtr>>
558 datasource_thread_handles_map;
559 for (uint32_t i = 0; i < max_threads; i++) {
561 auto& datasource_thread_handles = datasource_thread_handles_map[datasource_name];
564 if (datasource_handle ==
nullptr) {
565 throw std::runtime_error(
"Raster Importer: Unable to open raster file " +
570 if (!max_threads_using_default) {
571 throw std::runtime_error(
"Raster Importer: Unable to import raster file " +
572 datasource_name +
": GDAL driver " +
574 " requires use of HDF5 library which is "
575 "incompatible with multithreading.");
581 datasource_thread_handles.emplace_back(std::move(datasource_handle));
628 bool need_gcp_transformers{
false};
629 auto const* spatial_reference = global_datasource_handle->GetSpatialRef();
630 if (!spatial_reference) {
631 spatial_reference = global_datasource_handle->GetGCPSpatialRef();
632 if (spatial_reference) {
633 need_gcp_transformers =
true;
636 if (spatial_reference) {
639 OGRSpatialReference sr_geometry;
640 auto import_err = sr_geometry.importFromEPSG(srid);
641 if (import_err != OGRERR_NONE) {
642 throw std::runtime_error(
"Failed to create spatial reference for EPSG " +
645 #if GDAL_VERSION_MAJOR >= 3
646 sr_geometry.SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
649 for (uint32_t i = 0; i < max_threads; i++) {
651 OGRCreateCoordinateTransformation(spatial_reference, &sr_geometry));
653 throw std::runtime_error(
654 "Failed to create coordinate system transformation to EPSG " +
657 if (need_gcp_transformers) {
676 names_and_sql_types.emplace_back(
"raster_point", sql_type);
678 names_and_sql_types.emplace_back(
"raster_lon", sql_type);
679 names_and_sql_types.emplace_back(
"raster_lat", sql_type);
682 names_and_sql_types.emplace_back(
"raster_angle",
kFLOAT);
685 names_and_sql_types.emplace_back(
"raster_x", sql_type);
686 names_and_sql_types.emplace_back(
"raster_y", sql_type);
689 return names_and_sql_types;
700 names_and_sql_types.emplace_back(info.alt_name, info.sql_type);
702 return names_and_sql_types;
706 const int band_idx)
const {
709 return {info.null_value, info.null_value_valid};
713 const uint32_t thread_idx,
717 double prev_mx{0.0}, prev_my{0.0};
721 double dx = double(x);
722 double dy = double(y);
729 double fdy = affine_transform_matrix_[3] + (dx * affine_transform_matrix_[4]) +
730 (dy * affine_transform_matrix_[5]);
744 1, &dx, &dy,
nullptr, &success);
746 throw std::runtime_error(
"Failed OGRCoordinateTransform");
753 coords[x] = {dx, dy, 0.0f};
766 auto const angle = atan2f(my - prev_my, mx - prev_mx) * (-180.0f /
M_PI);
771 std::get<2>(coords[x]) = angle;
773 std::get<2>(coords[0]) = angle;
784 const uint32_t band_idx,
795 auto const& datasource_handles_per_thread =
797 CHECK_LT(thread_idx, datasource_handles_per_thread.size());
798 auto const& datasource_handle = datasource_handles_per_thread[thread_idx];
799 CHECK(datasource_handle);
802 auto* band = datasource_handle->GetRasterBand(band_info.band_idx);
809 auto result = band->RasterIO(GF_Read,
814 raw_pixel_bytes.data(),
822 throw std::runtime_error(
"Failed to read raster pixels, rows " +
835 std::string driver_name = datasource->GetDriverName();
837 LOG(
INFO) <<
"Raster Importer: Using Raster Driver '" << driver_name <<
"'";
840 if (driver_name ==
"GTiff") {
846 datasource->GetMetadata(),
"TIFFTAG_IMAGEDESCRIPTION");
847 if (tifftag_imagedescription.length()) {
854 for (
auto const&
name : names) {
863 std::vector<std::string> names;
864 auto const raster_count = datasource->GetRasterCount();
865 for (
int i = 1; i <= raster_count; i++) {
866 auto* band = datasource->GetRasterBand(i);
868 auto const* description = band->GetDescription();
869 if (!description || strlen(description) == 0) {
873 names.emplace_back(description);
877 }
else if (driver_name ==
"netCDF" || driver_name ==
"Zarr") {
885 std::vector<std::string> tokens;
886 boost::split(tokens, datasource_name, boost::is_any_of(
":"));
887 if (tokens.size() < 3 || tokens[2].length() == 0u) {
888 LOG(
WARNING) <<
"Raster Importer: Failed to parse band name from datasource name";
894 }
else if (driver_name ==
"GRIB") {
900 std::vector<std::string> names;
901 auto const raster_count = datasource->GetRasterCount();
902 for (
int i = 1; i <= raster_count; i++) {
903 auto* band = datasource->GetRasterBand(i);
905 auto const grib_comment =
907 if (grib_comment.length() == 0) {
908 LOG(
WARNING) <<
"Raster Importer: Failed to parse band name from GRIB_COMMENT";
912 names.push_back(grib_comment);
919 const std::string& specified_band_names,
920 const std::string& specified_band_dimensions,
923 if (specified_band_names.length()) {
925 std::vector<std::string> names_and_alt_names;
926 boost::split(names_and_alt_names, specified_band_names, boost::is_any_of(
","));
929 for (
auto const& name_and_alt_name : names_and_alt_names) {
931 std::vector<std::string> tokens;
932 boost::split(tokens, name_and_alt_name, boost::is_any_of(
"="));
933 if (tokens.size() < 1 || tokens.size() > 2) {
934 throw std::runtime_error(
"Failed to parse specified name '" + name_and_alt_name +
938 auto const& alt_name =
strip(tokens[tokens.size() == 2 ? 1 : 0]);
949 for (
auto const& name_and_sql_type : names_and_sql_types) {
951 boost::algorithm::to_lower_copy(name_and_sql_type.first), 1);
955 if (specified_band_dimensions.length()) {
957 std::vector<std::string> values;
958 boost::split(values, specified_band_dimensions, boost::is_any_of(
",x "));
959 if (values.size() != 2u) {
960 throw std::invalid_argument(
"failed to parse width/height values from '" +
961 specified_band_dimensions +
"'");
964 size_t num_chars_w{0u}, num_chars_h{0u};
967 if (num_chars_w == 0u || num_chars_h == 0u) {
968 throw std::invalid_argument(
"empty width/height value");
971 throw std::invalid_argument(
"negative width/height value");
973 }
catch (std::invalid_argument& e) {
974 throw std::runtime_error(
"Raster Importer: Invalid specified dimensions (" +
975 std::string(e.what()) +
")");
976 }
catch (std::out_of_range& e) {
977 throw std::runtime_error(
"Raster Importer: Out-of-range specified dimensions (" +
978 std::string(e.what()) +
")");
983 for (
auto const& mci : metadata_column_infos) {
984 auto column_name_lower =
985 boost::algorithm::to_lower_copy(mci.column_descriptor.columnName);
988 throw std::runtime_error(
"Invalid metadata column name '" +
989 mci.column_descriptor.columnName +
990 "' (clashes with existing column name)");
1020 const int band_idx) {
1021 std::string band_name;
1026 band_idx <= static_cast<int>(
raw_band_names_[datasource_idx].size())) {
1032 if (band_name.length() == 0) {
1038 auto band_name_lower = boost::algorithm::to_lower_copy(band_name);
1041 auto const suffix = ++(itr->second);
1054 throw std::runtime_error(
"Specified import band name '" + itr.first +
1055 "' was not found in the input raster file");
PointTransform point_transform_
int specified_band_height_
const NamesAndSQLTypes getBandNamesAndSQLTypes() const
const NullValue getBandNullValue(const int band_idx) const
#define DEBUG_RASTER_IMPORT
static std::vector< std::string > unpackMetadata(char **metadata)
void getRawBandNamesForFormat(const Geospatial::GDAL::DataSourceUqPtr &datasource)
static void logMetadata(GDALMajorObject *object)
std::vector< std::tuple< double, double, float >> Coords
GDALDataType sql_type_to_gdal_data_type(const SQLTypes sql_type)
const uint32_t getNumBands() const
SQLTypes point_type_to_sql_type(const RasterImporter::PointType point_type)
const PointTransform getPointTransform() const
std::string get_datasource_driver_name(OGRDataSource *source)
std::vector< std::byte > RawPixels
bool shouldImportBandWithName(const std::string &name)
std::string suffix(SQLTypes type)
std::vector< std::vector< std::string > > raw_band_names_
#define LOG_IF(severity, condition)
std::vector< Geospatial::GDAL::CoordinateTransformationUqPtr > coordinate_transformations_
bool shouldImportBandWithDimensions(const int width, const int height)
void checkSpecifiedBandNamesFound() const
const NamesAndSQLTypes getPointNamesAndSQLTypes() const
std::vector< ImportBandInfo > import_band_infos_
std::vector< std::string > datasource_names_
std::vector< std::string > get_ome_tiff_band_names(const std::string &tifftag_imagedescription)
V & get_from_map(std::map< K, V, comp > &map, const K &key)
void getRawPixels(const uint32_t thread_idx, const uint32_t band_idx, const int y_start, const int num_rows, const SQLTypes column_sql_type, RawPixels &raw_pixel_bytes)
int specified_band_width_
std::string sanitize_name(const std::string &name, const bool underscore=false)
std::unique_ptr< OGRDataSource, DataSourceDeleter > DataSourceUqPtr
std::map< std::string, bool > specified_band_names_map_
std::vector< std::vector< Geospatial::GDAL::DataSourceUqPtr > > datasource_handles_
bool point_compute_angle_
std::pair< double, bool > NullValue
const Coords getProjectedPixelCoords(const uint32_t thread_idx, const int y) const
std::vector< std::pair< std::string, SQLTypes >> NamesAndSQLTypes
std::map< std::string, int > column_name_repeats_map_
std::array< double, 6 > affine_transform_matrix_
void initializeFiltering(const std::string &specified_band_names, const std::string &specified_band_dimensions, const MetadataColumnInfos &metadata_column_infos)
bool datasource_requires_libhdf(OGRDataSource *source)
std::string getBandName(const uint32_t datasource_idx, const int band_idx)
static DataSourceUqPtr openDataSource(const std::string &name, const import_export::SourceType source_type)
EXTENSION_NOINLINE double conv_4326_900913_y(const double y)
void detect(const std::string &file_name, const std::string &specified_band_names, const std::string &specified_band_dimensions, const PointType point_type, const PointTransform point_transform, const bool point_compute_angle, const bool throw_on_error, const MetadataColumnInfos &metadata_column_infos)
EXTENSION_NOINLINE double conv_4326_900913_x(const double x)
void import(size_t &max_threads, const bool max_threads_using_default)
static std::string getMetadataString(char **metadata, const std::string &key)
SQLTypes gdal_data_type_to_sql_type(const GDALDataType gdal_data_type)
std::vector< std::unique_ptr< GCPTransformer > > gcp_transformers_
std::vector< MetadataColumnInfo > MetadataColumnInfos