OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export Namespace Reference

Namespaces

 anonymous_namespace{ExpressionParser.cpp}
 
 anonymous_namespace{ForeignDataImporter.cpp}
 
 anonymous_namespace{Importer.cpp}
 
 anonymous_namespace{QueryExporterCSV.cpp}
 
 anonymous_namespace{QueryExporterGDAL.cpp}
 
 anonymous_namespace{RasterImporter.cpp}
 
 delimited_parser
 

Classes

class  AbstractImporter
 
struct  CopyParams
 
class  ExpressionParser
 
class  ForeignDataImporter
 
class  ImportBatchResult
 
struct  GeoImportException
 
class  ColumnNotGeoError
 
struct  BadRowsTracker
 
class  ImporterUtils
 
class  TypedImportBuffer
 
class  Loader
 
struct  ImportStatus
 
class  DataStreamSink
 
class  Detector
 
class  Importer
 
struct  MetadataColumnInfo
 
class  QueryExporter
 
class  QueryExporterCSV
 
class  QueryExporterGDAL
 
class  GCPTransformer
 
class  RasterImporter
 

Typedefs

using FieldNameToIndexMapType = std::map< std::string, size_t >
 
using ColumnNameToSourceNameMapType = std::map< std::string, std::string >
 
using FeaturePtrVector = std::vector< Geospatial::GDAL::FeatureUqPtr >
 
using ArraySliceRange = std::pair< size_t, size_t >
 
using MetadataColumnInfos = std::vector< MetadataColumnInfo >
 

Enumerations

enum  ImportHeaderRow { ImportHeaderRow::kAutoDetect, ImportHeaderRow::kNoHeader, ImportHeaderRow::kHasHeader }
 
enum  RasterPointType {
  RasterPointType::kNone, RasterPointType::kAuto, RasterPointType::kSmallInt, RasterPointType::kInt,
  RasterPointType::kFloat, RasterPointType::kDouble, RasterPointType::kPoint
}
 
enum  RasterPointTransform { RasterPointTransform::kNone, RasterPointTransform::kAuto, RasterPointTransform::kFile, RasterPointTransform::kWorld }
 
enum  SourceType {
  SourceType::kUnknown, SourceType::kUnsupported, SourceType::kDelimitedFile, SourceType::kGeoFile,
  SourceType::kRasterFile, SourceType::kParquetFile, SourceType::kOdbc, SourceType::kRegexParsedFile
}
 

Functions

static const std::string trim_space (const char *field, const size_t len)
 
Datum NullArrayDatum (SQLTypeInfo &ti)
 
ArrayDatum StringToArray (const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
 
ArrayDatum NullArray (const SQLTypeInfo &ti)
 
void addBinaryStringArray (const TDatum &datum, std::vector< std::string > &string_vec)
 
Datum TDatumToDatum (const TDatum &datum, SQLTypeInfo &ti)
 
ArrayDatum TDatumToArrayDatum (const TDatum &datum, const SQLTypeInfo &ti)
 
bool importGeoFromLonLat (double lon, double lat, std::vector< double > &coords, std::vector< double > &bounds, SQLTypeInfo &ti)
 
static ImportStatus import_thread_delimited (int thread_id, Importer *importer, std::unique_ptr< char[]> scratch_buffer, size_t begin_pos, size_t end_pos, size_t total_size, size_t first_row_index_this_buffer, const Catalog_Namespace::SessionInfo *session_info, Executor *executor)
 
static ImportStatus import_thread_shapefile (int thread_id, Importer *importer, OGRCoordinateTransformation *coordinate_transformation, const FeaturePtrVector &features, size_t firstFeature, size_t numFeatures, const FieldNameToIndexMapType &fieldNameToIndexMap, const ColumnNameToSourceNameMapType &columnNameToSourceNameMap, const Catalog_Namespace::SessionInfo *session_info, Executor *executor, const MetadataColumnInfos &metadata_column_infos)
 
template<class T >
bool try_cast (const std::string &str)
 
void gdalGatherFilesInArchiveRecursive (const std::string &archive_path, std::vector< std::string > &files)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
setup_column_loaders (const TableDescriptor *td, Loader *loader)
 
std::vector< std::unique_ptr
< TypedImportBuffer > > 
fill_missing_columns (const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
 
std::unique_ptr< AbstractImportercreate_importer (Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
 
MetadataColumnInfos parse_add_metadata_columns (const std::string &add_metadata_columns, const std::string &file_path)
 
size_t num_import_threads (const int32_t copy_params_threads)
 

Variables

static constexpr size_t kImportFileBufferSize = (1 << 23)
 
static constexpr size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
 
static constexpr bool PROMOTE_POINT_TO_MULTIPOINT = false
 
static constexpr bool PROMOTE_LINESTRING_TO_MULTILINESTRING = false
 
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 
static heavyai::shared_mutex status_mutex
 
static std::map< std::string,
ImportStatus
import_status_map
 
static const size_t kImportRowLimit = 10000
 

Typedef Documentation

using import_export::ArraySliceRange = typedef std::pair<size_t, size_t>

Definition at line 74 of file Importer.h.

using import_export::ColumnNameToSourceNameMapType = typedef std::map<std::string, std::string>

Definition at line 151 of file Importer.cpp.

Definition at line 152 of file Importer.cpp.

using import_export::FieldNameToIndexMapType = typedef std::map<std::string, size_t>

Definition at line 150 of file Importer.cpp.

Definition at line 36 of file MetadataColumn.h.

Enumeration Type Documentation

Function Documentation

void import_export::addBinaryStringArray ( const TDatum &  datum,
std::vector< std::string > &  string_vec 
)

Definition at line 415 of file Importer.cpp.

Referenced by import_export::TypedImportBuffer::add_value().

415  {
416  const auto& arr = datum.val.arr_val;
417  for (const auto& elem_datum : arr) {
418  string_vec.push_back(elem_datum.val.str_val);
419  }
420 }

+ Here is the caller graph for this function:

std::unique_ptr< AbstractImporter > import_export::create_importer ( Catalog_Namespace::Catalog catalog,
const TableDescriptor td,
const std::string &  copy_from_source,
const import_export::CopyParams copy_params 
)

Definition at line 6287 of file Importer.cpp.

References g_enable_fsi_regex_import, g_enable_legacy_delimited_import, kDelimitedFile, kParquetFile, kRegexParsedFile, and import_export::CopyParams::source_type.

Referenced by Parser::CopyTableStmt::execute(), and DBHandler::import_table().

6291  {
6293 #ifdef ENABLE_IMPORT_PARQUET
6294  if (!g_enable_legacy_parquet_import) {
6295  return std::make_unique<import_export::ForeignDataImporter>(
6296  copy_from_source, copy_params, td);
6297  }
6298 #else
6299  throw std::runtime_error("Parquet not supported!");
6300 #endif
6301  }
6302 
6305  return std::make_unique<import_export::ForeignDataImporter>(
6306  copy_from_source, copy_params, td);
6307  }
6308 
6311  return std::make_unique<import_export::ForeignDataImporter>(
6312  copy_from_source, copy_params, td);
6313  } else {
6314  throw std::runtime_error(
6315  "Regex parsed import only supported using 'fsi-regex-import' flag");
6316  }
6317  }
6318 
6319  return std::make_unique<import_export::Importer>(
6320  catalog, td, copy_from_source, copy_params);
6321 }
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:84
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:88

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::fill_missing_columns ( const Catalog_Namespace::Catalog cat,
Fragmenter_Namespace::InsertData insert_data 
)

Definition at line 6217 of file Importer.cpp.

References anonymous_namespace{Utm.h}::a, CHECK, ColumnDescriptor::columnId, Fragmenter_Namespace::InsertData::columnIds, ColumnDescriptor::columnName, ColumnDescriptor::columnType, Fragmenter_Namespace::InsertData::data, ColumnDescriptor::default_value, SQLTypeInfo::get_comp_param(), SQLTypeInfo::get_compression(), import_export::TypedImportBuffer::get_data_block_pointers(), SQLTypeInfo::get_physical_cols(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Geospatial::GeoTypesFactory::getGeoColumns(), Catalog_Namespace::Catalog::getMetadataForDict(), Fragmenter_Namespace::InsertData::is_default, SQLTypeInfo::is_geometry(), IS_STRING, kARRAY, kENCODING_DICT, import_export::Importer::set_geo_physical_import_buffer(), gpu_enabled::sort(), and Fragmenter_Namespace::InsertData::tableId.

Referenced by RelAlgExecutor::executeSimpleInsert(), DBHandler::insert_data(), and Parser::InsertIntoTableAsSelectStmt::populateData().

6219  {
6220  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> defaults_buffers;
6221  if (insert_data.is_default.size() == 0) {
6222  insert_data.is_default.resize(insert_data.columnIds.size(), false);
6223  }
6224  CHECK(insert_data.is_default.size() == insert_data.is_default.size());
6225  auto cds = cat->getAllColumnMetadataForTable(insert_data.tableId, false, false, true);
6226  if (cds.size() == insert_data.columnIds.size()) {
6227  // all columns specified
6228  return defaults_buffers;
6229  }
6230  for (auto cd : cds) {
6231  if (std::find(insert_data.columnIds.begin(),
6232  insert_data.columnIds.end(),
6233  cd->columnId) == insert_data.columnIds.end()) {
6234  StringDictionary* dict = nullptr;
6235  if (cd->columnType.get_type() == kARRAY &&
6236  IS_STRING(cd->columnType.get_subtype()) && !cd->default_value.has_value()) {
6237  throw std::runtime_error("Cannot omit column \"" + cd->columnName +
6238  "\": omitting TEXT arrays is not supported yet");
6239  }
6240  if (cd->columnType.get_compression() == kENCODING_DICT) {
6241  dict = cat->getMetadataForDict(cd->columnType.get_comp_param())->stringDict.get();
6242  }
6243  defaults_buffers.emplace_back(std::make_unique<TypedImportBuffer>(cd, dict));
6244  }
6245  }
6246  // put buffers in order to fill geo sub-columns properly
6247  std::sort(defaults_buffers.begin(),
6248  defaults_buffers.end(),
6249  [](decltype(defaults_buffers[0])& a, decltype(defaults_buffers[0])& b) {
6250  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
6251  });
6252  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6253  auto cd = defaults_buffers[i]->getColumnDesc();
6254  std::string default_value = cd->default_value.value_or("NULL");
6255  defaults_buffers[i]->add_value(
6256  cd, default_value, !cd->default_value.has_value(), import_export::CopyParams());
6257  if (cd->columnType.is_geometry()) {
6258  std::vector<double> coords, bounds;
6259  std::vector<int> ring_sizes, poly_rings;
6260  SQLTypeInfo tinfo{cd->columnType};
6261  const bool validate_with_geos_if_available = false;
6263  tinfo,
6264  coords,
6265  bounds,
6266  ring_sizes,
6267  poly_rings,
6268  validate_with_geos_if_available));
6269  // set physical columns starting with the following ID
6270  auto next_col = i + 1;
6272  *cat, cd, defaults_buffers, next_col, coords, bounds, ring_sizes, poly_rings);
6273  // skip physical columns filled with the call above
6274  i += cd->columnType.get_physical_cols();
6275  }
6276  }
6277  auto data = import_export::TypedImportBuffer::get_data_block_pointers(defaults_buffers);
6278  CHECK(data.size() == defaults_buffers.size());
6279  for (size_t i = 0; i < defaults_buffers.size(); ++i) {
6280  insert_data.data.push_back(data[i]);
6281  insert_data.columnIds.push_back(defaults_buffers[i]->getColumnDesc()->columnId);
6282  insert_data.is_default.push_back(true);
6283  }
6284  return defaults_buffers;
6285 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
std::vector< bool > is_default
Definition: Fragmenter.h:75
constexpr double a
Definition: Utm.h:32
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool force_null=false)
Definition: Importer.cpp:1636
static std::vector< DataBlockPtr > get_data_block_pointers(const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
Definition: Importer.cpp:2937
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1907
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2175
#define IS_STRING(T)
Definition: sqltypes.h:309
#define CHECK(condition)
Definition: Logger.h:291
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::gdalGatherFilesInArchiveRecursive ( const std::string &  archive_path,
std::vector< std::string > &  files 
)

Definition at line 5063 of file Importer.cpp.

References LOG, run_benchmark_import::result, and logger::WARNING.

Referenced by import_export::Importer::gdalGetAllFilesInArchive().

5064  {
5065  // prepare to gather subdirectories
5066  std::vector<std::string> subdirectories;
5067 
5068  // get entries
5069  char** entries = VSIReadDir(archive_path.c_str());
5070  if (!entries) {
5071  LOG(WARNING) << "Failed to get file listing at archive: " << archive_path;
5072  return;
5073  }
5074 
5075  // force scope
5076  {
5077  // request clean-up
5078  ScopeGuard entries_guard = [&] { CSLDestroy(entries); };
5079 
5080  // check all the entries
5081  int index = 0;
5082  while (true) {
5083  // get next entry, or drop out if there isn't one
5084  char* entry_c = entries[index++];
5085  if (!entry_c) {
5086  break;
5087  }
5088  std::string entry(entry_c);
5089 
5090  // ignore '.' and '..'
5091  if (entry == "." || entry == "..") {
5092  continue;
5093  }
5094 
5095  // build the full path
5096  std::string entry_path = archive_path + std::string("/") + entry;
5097 
5098  // is it a file or a sub-folder
5099  VSIStatBufL sb;
5100  int result = VSIStatExL(entry_path.c_str(), &sb, VSI_STAT_NATURE_FLAG);
5101  if (result < 0) {
5102  break;
5103  }
5104 
5105  if (VSI_ISDIR(sb.st_mode)) {
5106  // a directory that ends with .gdb could be a Geodatabase bundle
5107  // arguably dangerous to decide this purely by name, but any further
5108  // validation would be very complex especially at this scope
5109  if (boost::iends_with(entry_path, ".gdb")) {
5110  // add the directory as if it was a file and don't recurse into it
5111  files.push_back(entry_path);
5112  } else {
5113  // add subdirectory to be recursed into
5114  subdirectories.push_back(entry_path);
5115  }
5116  } else {
5117  // add this file
5118  files.push_back(entry_path);
5119  }
5120  }
5121  }
5122 
5123  // recurse into each subdirectories we found
5124  for (const auto& subdirectory : subdirectories) {
5125  gdalGatherFilesInArchiveRecursive(subdirectory, files);
5126  }
5127 }
#define LOG(tag)
Definition: Logger.h:285
void gdalGatherFilesInArchiveRecursive(const std::string &archive_path, std::vector< std::string > &files)
Definition: Importer.cpp:5063

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_delimited ( int  thread_id,
Importer *  importer,
std::unique_ptr< char[]>  scratch_buffer,
size_t  begin_pos,
size_t  end_pos,
size_t  total_size,
size_t  first_row_index_this_buffer,
const Catalog_Namespace::SessionInfo session_info,
Executor executor 
)
static

Definition at line 1986 of file Importer.cpp.

References CHECK, CHECK_LT, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::GeoTypesFactory::createOGRGeometry(), DEBUG_TIMING, logger::ERROR, measure< TimeT >::execution(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::delimited_parser::find_beginning(), import_export::CopyParams::geo_explode_collections, geo_promoted_type_match(), import_export::CopyParams::geo_validate_geometry, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), import_export::Importer::get_is_array(), import_export::delimited_parser::get_row(), Catalog_Namespace::SessionInfo::get_session_id(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), importGeoFromLonLat(), logger::INFO, IS_GEO, is_null(), ImportHelpers::is_null_datum(), kDelimitedFile, kMULTIPOINT, kPOINT, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::lonlat, import_export::CopyParams::max_reject, import_export::CopyParams::null_str, shared::printContainer(), import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, import_export::CopyParams::source_type, sv_strip(), gpu_enabled::swap(), import_export::ImportStatus::thread_id, logger::thread_id(), to_string(), import_export::CopyParams::trim_spaces, and UNLIKELY.

Referenced by import_export::Importer::importDelimited().

1995  {
1996  ImportStatus thread_import_status;
1997  int64_t total_get_row_time_us = 0;
1998  int64_t total_str_to_val_time_us = 0;
1999  auto query_session = session_info ? session_info->get_session_id() : "";
2000  CHECK(scratch_buffer);
2001  auto buffer = scratch_buffer.get();
2002  auto load_ms = measure<>::execution([]() {});
2003 
2004  thread_import_status.thread_id = thread_id;
2005 
2006  auto ms = measure<>::execution([&]() {
2007  const CopyParams& copy_params = importer->get_copy_params();
2008  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2009  size_t begin =
2010  delimited_parser::find_beginning(buffer, begin_pos, end_pos, copy_params);
2011  const char* thread_buf = buffer + begin_pos + begin;
2012  const char* thread_buf_end = buffer + end_pos;
2013  const char* buf_end = buffer + total_size;
2014  bool try_single_thread = false;
2015  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2016  importer->get_import_buffers(thread_id);
2018  int phys_cols = 0;
2019  int point_cols = 0;
2020  for (const auto cd : col_descs) {
2021  const auto& col_ti = cd->columnType;
2022  phys_cols += col_ti.get_physical_cols();
2023  if (cd->columnType.get_type() == kPOINT ||
2024  cd->columnType.get_type() == kMULTIPOINT) {
2025  point_cols++;
2026  }
2027  }
2028  auto num_cols = col_descs.size() - phys_cols;
2029  for (const auto& p : import_buffers) {
2030  p->clear();
2031  }
2032  std::vector<std::string_view> row;
2033  size_t row_index_plus_one = 0;
2034  for (const char* p = thread_buf; p < thread_buf_end; p++) {
2035  row.clear();
2036  std::vector<std::unique_ptr<char[]>>
2037  tmp_buffers; // holds string w/ removed escape chars, etc
2038  row_index_plus_one++;
2039  if (DEBUG_TIMING) {
2042  thread_buf_end,
2043  buf_end,
2044  copy_params,
2045  importer->get_is_array(),
2046  row,
2047  tmp_buffers,
2048  try_single_thread,
2049  true);
2050  });
2051  total_get_row_time_us += us;
2052  } else {
2054  thread_buf_end,
2055  buf_end,
2056  copy_params,
2057  importer->get_is_array(),
2058  row,
2059  tmp_buffers,
2060  try_single_thread,
2061  true);
2062  }
2063  // Each POINT could consume two separate coords instead of a single WKT
2064  if (row.size() < num_cols || (num_cols + point_cols) < row.size()) {
2065  thread_import_status.rows_rejected++;
2066  LOG(ERROR) << "Incorrect Row (expected " << num_cols << " columns, has "
2067  << row.size() << "): " << shared::printContainer(row);
2068  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2069  break;
2070  }
2071  continue;
2072  }
2073 
2074  //
2075  // lambda for importing a row (perhaps multiple times if exploding a collection)
2076  //
2077 
2078  auto execute_import_row = [&](OGRGeometry* import_geometry) {
2079  size_t import_idx = 0;
2080  size_t col_idx = 0;
2081  try {
2082  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2083  auto cd = *cd_it;
2084  const auto& col_ti = cd->columnType;
2085 
2086  bool is_null =
2087  ImportHelpers::is_null_datum(row[import_idx], copy_params.null_str);
2088  // Note: default copy_params.null_str is "\N", but everyone uses "NULL".
2089  // So initially nullness may be missed and not passed to add_value,
2090  // which then might also check and still decide it's actually a NULL, e.g.
2091  // if kINT doesn't start with a digit or a '-' then it's considered NULL.
2092  // So "NULL" is not recognized as NULL but then it's not recognized as
2093  // a valid kINT, so it's a NULL after all.
2094  // Checking for "NULL" here too, as a widely accepted notation for NULL.
2095 
2096  // Treating empty as NULL
2097  if (!cd->columnType.is_string() && row[import_idx].empty()) {
2098  is_null = true;
2099  }
2100  if (!cd->columnType.is_string() && !copy_params.trim_spaces) {
2101  // everything but strings should be always trimmed
2102  row[import_idx] = sv_strip(row[import_idx]);
2103  }
2104 
2105  if (col_ti.get_physical_cols() == 0) {
2106  // not geo
2107 
2108  import_buffers[col_idx]->add_value(
2109  cd, row[import_idx], is_null, copy_params);
2110 
2111  // next
2112  ++import_idx;
2113  ++col_idx;
2114  } else {
2115  // geo
2116 
2117  // store null string in the base column
2118  import_buffers[col_idx]->add_value(
2119  cd, copy_params.null_str, true, copy_params);
2120 
2121  // WKT from string we're not storing
2122  auto const& geo_string = row[import_idx];
2123 
2124  // next
2125  ++import_idx;
2126  ++col_idx;
2127 
2128  SQLTypes col_type = col_ti.get_type();
2129  CHECK(IS_GEO(col_type));
2130 
2131  std::vector<double> coords;
2132  std::vector<double> bounds;
2133  std::vector<int> ring_sizes;
2134  std::vector<int> poly_rings;
2135 
2136  // if this is a POINT or MULTIPOINT column, and the field is not null, and
2137  // looks like a scalar numeric value (and not a hex blob) attempt to import
2138  // two columns as lon/lat (or lat/lon)
2139  if ((col_type == kPOINT || col_type == kMULTIPOINT) && !is_null &&
2140  geo_string.size() > 0 &&
2141  (geo_string[0] == '.' || isdigit(geo_string[0]) ||
2142  geo_string[0] == '-') &&
2143  geo_string.find_first_of("ABCDEFabcdef") == std::string::npos) {
2144  double lon = std::atof(std::string(geo_string).c_str());
2145  double lat = NAN;
2146  auto lat_str = row[import_idx];
2147  ++import_idx;
2148  if (lat_str.size() > 0 &&
2149  (lat_str[0] == '.' || isdigit(lat_str[0]) || lat_str[0] == '-')) {
2150  lat = std::atof(std::string(lat_str).c_str());
2151  }
2152  // Swap coordinates if this table uses a reverse order: lat/lon
2153  if (!copy_params.lonlat) {
2154  std::swap(lat, lon);
2155  }
2156  // TODO: should check if POINT/MULTIPOINT column should have been declared
2157  // with SRID WGS 84, EPSG 4326 ? if (col_ti.get_dimension() != 4326) {
2158  // throw std::runtime_error("POINT column " + cd->columnName + " is
2159  // not WGS84, cannot insert lon/lat");
2160  // }
2161  SQLTypeInfo import_ti{col_ti};
2162  if (copy_params.source_type ==
2164  import_ti.get_output_srid() == 4326) {
2165  auto srid0 = copy_params.source_srid;
2166  if (srid0 > 0) {
2167  // srid0 -> 4326 transform is requested on import
2168  import_ti.set_input_srid(srid0);
2169  }
2170  }
2171  if (!importGeoFromLonLat(lon, lat, coords, bounds, import_ti)) {
2172  throw std::runtime_error(
2173  "Cannot read lon/lat to insert into POINT/MULTIPOINT column " +
2174  cd->columnName);
2175  }
2176  } else {
2177  // import it
2178  SQLTypeInfo import_ti{col_ti};
2179  if (copy_params.source_type ==
2181  import_ti.get_output_srid() == 4326) {
2182  auto srid0 = copy_params.source_srid;
2183  if (srid0 > 0) {
2184  // srid0 -> 4326 transform is requested on import
2185  import_ti.set_input_srid(srid0);
2186  }
2187  }
2188  if (is_null) {
2189  if (col_ti.get_notnull()) {
2190  throw std::runtime_error("NULL geo for column " + cd->columnName);
2191  }
2193  import_ti, coords, bounds, ring_sizes, poly_rings);
2194  } else {
2195  if (import_geometry) {
2196  // geometry already exploded
2198  import_geometry,
2199  import_ti,
2200  coords,
2201  bounds,
2202  ring_sizes,
2203  poly_rings,
2204  copy_params.geo_validate_geometry)) {
2205  std::string msg =
2206  "Failed to extract valid geometry from exploded row " +
2207  std::to_string(first_row_index_this_buffer +
2208  row_index_plus_one) +
2209  " for column " + cd->columnName;
2210  throw std::runtime_error(msg);
2211  }
2212  } else {
2213  // extract geometry directly from WKT
2215  std::string(geo_string),
2216  import_ti,
2217  coords,
2218  bounds,
2219  ring_sizes,
2220  poly_rings,
2221  copy_params.geo_validate_geometry)) {
2222  std::string msg = "Failed to extract valid geometry from row " +
2223  std::to_string(first_row_index_this_buffer +
2224  row_index_plus_one) +
2225  " for column " + cd->columnName;
2226  throw std::runtime_error(msg);
2227  }
2228  }
2229 
2230  // validate types
2231  if (!geo_promoted_type_match(import_ti.get_type(), col_type)) {
2232  throw std::runtime_error(
2233  "Imported geometry doesn't match the type of column " +
2234  cd->columnName);
2235  }
2236  }
2237  }
2238 
2239  // import extracted geo
2240  Importer::set_geo_physical_import_buffer(importer->getCatalog(),
2241  cd,
2242  import_buffers,
2243  col_idx,
2244  coords,
2245  bounds,
2246  ring_sizes,
2247  poly_rings);
2248 
2249  // skip remaining physical columns
2250  for (int i = 0; i < cd->columnType.get_physical_cols(); ++i) {
2251  ++cd_it;
2252  }
2253  }
2254  }
2255  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2256  check_session_interrupted(query_session, executor))) {
2257  thread_import_status.load_failed = true;
2258  thread_import_status.load_msg =
2259  "Table load was cancelled via Query Interrupt";
2260  return;
2261  }
2262  thread_import_status.rows_completed++;
2263  } catch (const std::exception& e) {
2264  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2265  import_buffers[col_idx_to_pop]->pop_value();
2266  }
2267  thread_import_status.rows_rejected++;
2268  LOG(ERROR) << "Input exception thrown: " << e.what()
2269  << ". Row discarded. Data: " << shared::printContainer(row);
2270  if (thread_import_status.rows_rejected > copy_params.max_reject) {
2271  LOG(ERROR) << "Load was cancelled due to max reject rows being reached";
2272  thread_import_status.load_failed = true;
2273  thread_import_status.load_msg =
2274  "Load was cancelled due to max reject rows being reached";
2275  }
2276  }
2277  }; // End of lambda
2278 
2279  if (copy_params.geo_explode_collections) {
2280  // explode and import
2281  auto const [collection_col_idx, collection_child_type, collection_col_name] =
2282  explode_collections_step1(col_descs);
2283  // pull out the collection WKT or WKB hex
2284  CHECK_LT(collection_col_idx, (int)row.size()) << "column index out of range";
2285  auto const& collection_geo_string = row[collection_col_idx];
2286  // convert to OGR
2287  OGRGeometry* ogr_geometry = nullptr;
2288  ScopeGuard destroy_ogr_geometry = [&] {
2289  if (ogr_geometry) {
2290  OGRGeometryFactory::destroyGeometry(ogr_geometry);
2291  }
2292  };
2294  std::string(collection_geo_string), copy_params.geo_validate_geometry);
2295  // do the explode and import
2296  us = explode_collections_step2(ogr_geometry,
2297  collection_child_type,
2298  collection_col_name,
2299  first_row_index_this_buffer + row_index_plus_one,
2300  execute_import_row);
2301  } else {
2302  // import non-collection row just once
2304  [&] { execute_import_row(nullptr); });
2305  }
2306 
2307  if (thread_import_status.load_failed) {
2308  break;
2309  }
2310  } // end thread
2311  total_str_to_val_time_us += us;
2312  if (!thread_import_status.load_failed && thread_import_status.rows_completed > 0) {
2313  load_ms = measure<>::execution([&]() {
2314  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2315  });
2316  }
2317  }); // end execution
2318 
2319  if (DEBUG_TIMING && !thread_import_status.load_failed &&
2320  thread_import_status.rows_completed > 0) {
2321  LOG(INFO) << "Thread" << std::this_thread::get_id() << ":"
2322  << thread_import_status.rows_completed << " rows inserted in "
2323  << (double)ms / 1000.0 << "sec, Insert Time: " << (double)load_ms / 1000.0
2324  << "sec, get_row: " << (double)total_get_row_time_us / 1000000.0
2325  << "sec, str_to_val: " << (double)total_str_to_val_time_us / 1000000.0
2326  << "sec" << std::endl;
2327  }
2328 
2329  return thread_import_status;
2330 }
bool geo_promoted_type_match(const SQLTypes a, const SQLTypes b)
Definition: sqltypes.h:2031
bool is_null_datum(const DatumStringType &datum, const std::string &null_indicator)
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:124
SQLTypes
Definition: sqltypes.h:65
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
std::string_view sv_strip(std::string_view str)
return trimmed string_view
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1896
#define LOG(tag)
Definition: Logger.h:285
bool importGeoFromLonLat(double lon, double lat, std::vector< double > &coords, std::vector< double > &bounds, SQLTypeInfo &ti)
Definition: Importer.cpp:1608
size_t find_beginning(const char *buffer, size_t begin, size_t end, const import_export::CopyParams &copy_params)
Finds the closest possible row beginning in the given buffer.
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings)
Definition: Types.cpp:1342
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:154
void set_input_srid(int d)
Definition: sqltypes.h:474
CONSTEXPR DEVICE bool is_null(const T &value)
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
const char * get_row(const char *buf, const char *buf_end, const char *entire_buf_end, const import_export::CopyParams &copy_params, const bool *is_array, std::vector< T > &row, std::vector< std::unique_ptr< char[]>> &tmp_buffers, bool &try_single_thread, bool filter_empty_lines)
Parses the first row in the given buffer and inserts fields into given vector.
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
#define CHECK_LT(x, y)
Definition: Logger.h:303
static OGRGeometry * createOGRGeometry(const std::string &wkt_or_wkb_hex, const bool validate_with_geos_if_available)
Definition: Types.cpp:1063
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1862
ThreadId thread_id()
Definition: Logger.cpp:879
#define CHECK(condition)
Definition: Logger.h:291
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:108
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114
#define IS_GEO(T)
Definition: sqltypes.h:310

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static ImportStatus import_export::import_thread_shapefile ( int  thread_id,
Importer *  importer,
OGRCoordinateTransformation *  coordinate_transformation,
const FeaturePtrVector &  features,
size_t  firstFeature,
size_t  numFeatures,
const FieldNameToIndexMapType &  fieldNameToIndexMap,
const ColumnNameToSourceNameMapType &  columnNameToSourceNameMap,
const Catalog_Namespace::SessionInfo session_info,
Executor executor,
const MetadataColumnInfos &  metadata_column_infos 
)
static

Definition at line 2338 of file Importer.cpp.

References CHECK, anonymous_namespace{Importer.cpp}::check_session_interrupted(), Geospatial::compress_coords(), DEBUG_TIMING, logger::ERROR, import_export::anonymous_namespace{Importer.cpp}::explode_collections_step1(), import_export::anonymous_namespace{Importer.cpp}::explode_collections_step2(), import_export::CopyParams::geo_explode_collections, geo_promoted_type_match(), import_export::CopyParams::geo_validate_geometry, import_export::Importer::get_column_descs(), import_export::Importer::get_copy_params(), import_export::Importer::get_import_buffers(), Catalog_Namespace::SessionInfo::get_session_id(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), QueryExecutionError::hasErrorCode(), logger::INFO, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOLYGON, import_export::Importer::load(), import_export::ImportStatus::load_failed, import_export::ImportStatus::load_msg, LOG, import_export::CopyParams::null_str, import_export::ImportStatus::rows_completed, import_export::ImportStatus::rows_rejected, generate_TableFunctionsFactory_init::separator, import_export::ImportStatus::thread_id, logger::thread_id(), timer_start(), TIMER_STOP, to_string(), and UNLIKELY.

Referenced by import_export::Importer::importGDALGeo().

2349  {
2350  ImportStatus thread_import_status;
2351  const CopyParams& copy_params = importer->get_copy_params();
2352  const std::list<const ColumnDescriptor*>& col_descs = importer->get_column_descs();
2353  std::vector<std::unique_ptr<TypedImportBuffer>>& import_buffers =
2354  importer->get_import_buffers(thread_id);
2355  auto query_session = session_info ? session_info->get_session_id() : "";
2356  for (const auto& p : import_buffers) {
2357  p->clear();
2358  }
2359 
2360  auto convert_timer = timer_start();
2361 
2362  // for all the features in this chunk...
2363  for (size_t iFeature = 0; iFeature < numFeatures; iFeature++) {
2364  // ignore null features
2365  if (!features[iFeature]) {
2366  continue;
2367  }
2368 
2369  // get this feature's geometry
2370  // for geodatabase, we need to consider features with no geometry
2371  // as we still want to create a table, even if it has no geo column
2372  OGRGeometry* pGeometry = features[iFeature]->GetGeometryRef();
2373  if (pGeometry && coordinate_transformation) {
2374  pGeometry->transform(coordinate_transformation);
2375  }
2376 
2377  //
2378  // lambda for importing a feature (perhaps multiple times if exploding a collection)
2379  //
2380 
2381  auto execute_import_feature = [&](OGRGeometry* import_geometry) {
2382  size_t col_idx = 0;
2383  try {
2384  if (UNLIKELY((thread_import_status.rows_completed & 0xFFFF) == 0 &&
2385  check_session_interrupted(query_session, executor))) {
2386  thread_import_status.load_failed = true;
2387  thread_import_status.load_msg = "Table load was cancelled via Query Interrupt";
2388  throw QueryExecutionError(ErrorCode::INTERRUPTED);
2389  }
2390 
2391  uint32_t field_column_count{0u};
2392  uint32_t metadata_column_count{0u};
2393 
2394  for (auto cd_it = col_descs.begin(); cd_it != col_descs.end(); cd_it++) {
2395  auto cd = *cd_it;
2396 
2397  // is this a geo column?
2398  const auto& col_ti = cd->columnType;
2399  if (col_ti.is_geometry()) {
2400  // Note that this assumes there is one and only one geo column in the
2401  // table. Currently, the importer only supports reading a single
2402  // geospatial feature from an input shapefile / geojson file, but this
2403  // code will need to be modified if that changes
2404  SQLTypes col_type = col_ti.get_type();
2405 
2406  // store null string in the base column
2407  import_buffers[col_idx]->add_value(
2408  cd, copy_params.null_str, true, copy_params);
2409  ++col_idx;
2410 
2411  // the data we now need to extract for the other columns
2412  std::vector<double> coords;
2413  std::vector<double> bounds;
2414  std::vector<int> ring_sizes;
2415  std::vector<int> poly_rings;
2416 
2417  // extract it
2418  SQLTypeInfo import_ti{col_ti};
2419  bool is_null_geo = !import_geometry;
2420  if (is_null_geo) {
2421  if (col_ti.get_notnull()) {
2422  throw std::runtime_error("NULL geo for column " + cd->columnName);
2423  }
2425  import_ti, coords, bounds, ring_sizes, poly_rings);
2426  } else {
2428  import_geometry,
2429  import_ti,
2430  coords,
2431  bounds,
2432  ring_sizes,
2433  poly_rings,
2434  copy_params.geo_validate_geometry)) {
2435  std::string msg = "Failed to extract valid geometry from feature " +
2436  std::to_string(firstFeature + iFeature + 1) +
2437  " for column " + cd->columnName;
2438  throw std::runtime_error(msg);
2439  }
2440 
2441  // validate types
2442  if (!geo_promoted_type_match(import_ti.get_type(), col_type)) {
2443  throw std::runtime_error(
2444  "Imported geometry doesn't match the type of column " +
2445  cd->columnName);
2446  }
2447  }
2448 
2449  // create coords array value and add it to the physical column
2450  ++cd_it;
2451  auto cd_coords = *cd_it;
2452  std::vector<TDatum> td_coord_data;
2453  if (!is_null_geo) {
2454  std::vector<uint8_t> compressed_coords =
2455  Geospatial::compress_coords(coords, col_ti);
2456  for (auto cc : compressed_coords) {
2457  TDatum td_byte;
2458  td_byte.val.int_val = cc;
2459  td_coord_data.push_back(td_byte);
2460  }
2461  }
2462  TDatum tdd_coords;
2463  tdd_coords.val.arr_val = td_coord_data;
2464  tdd_coords.is_null = is_null_geo;
2465  import_buffers[col_idx]->add_value(cd_coords, tdd_coords, false);
2466  ++col_idx;
2467 
2468  if (col_type == kMULTILINESTRING || col_type == kPOLYGON ||
2469  col_type == kMULTIPOLYGON) {
2470  // Create [linest]ring_sizes array value and add it to the physical column
2471  ++cd_it;
2472  auto cd_ring_sizes = *cd_it;
2473  std::vector<TDatum> td_ring_sizes;
2474  if (!is_null_geo) {
2475  for (auto ring_size : ring_sizes) {
2476  TDatum td_ring_size;
2477  td_ring_size.val.int_val = ring_size;
2478  td_ring_sizes.push_back(td_ring_size);
2479  }
2480  }
2481  TDatum tdd_ring_sizes;
2482  tdd_ring_sizes.val.arr_val = td_ring_sizes;
2483  tdd_ring_sizes.is_null = is_null_geo;
2484  import_buffers[col_idx]->add_value(cd_ring_sizes, tdd_ring_sizes, false);
2485  ++col_idx;
2486  }
2487 
2488  if (col_type == kMULTIPOLYGON) {
2489  // Create poly_rings array value and add it to the physical column
2490  ++cd_it;
2491  auto cd_poly_rings = *cd_it;
2492  std::vector<TDatum> td_poly_rings;
2493  if (!is_null_geo) {
2494  for (auto num_rings : poly_rings) {
2495  TDatum td_num_rings;
2496  td_num_rings.val.int_val = num_rings;
2497  td_poly_rings.push_back(td_num_rings);
2498  }
2499  }
2500  TDatum tdd_poly_rings;
2501  tdd_poly_rings.val.arr_val = td_poly_rings;
2502  tdd_poly_rings.is_null = is_null_geo;
2503  import_buffers[col_idx]->add_value(cd_poly_rings, tdd_poly_rings, false);
2504  ++col_idx;
2505  }
2506 
2507  if (col_type == kLINESTRING || col_type == kMULTILINESTRING ||
2508  col_type == kPOLYGON || col_type == kMULTIPOLYGON ||
2509  col_type == kMULTIPOINT) {
2510  // Create bounds array value and add it to the physical column
2511  ++cd_it;
2512  auto cd_bounds = *cd_it;
2513  std::vector<TDatum> td_bounds_data;
2514  if (!is_null_geo) {
2515  for (auto b : bounds) {
2516  TDatum td_double;
2517  td_double.val.real_val = b;
2518  td_bounds_data.push_back(td_double);
2519  }
2520  }
2521  TDatum tdd_bounds;
2522  tdd_bounds.val.arr_val = td_bounds_data;
2523  tdd_bounds.is_null = is_null_geo;
2524  import_buffers[col_idx]->add_value(cd_bounds, tdd_bounds, false);
2525  ++col_idx;
2526  }
2527  } else if (field_column_count < fieldNameToIndexMap.size()) {
2528  //
2529  // field column
2530  //
2531  auto const cit = columnNameToSourceNameMap.find(cd->columnName);
2532  CHECK(cit != columnNameToSourceNameMap.end());
2533  auto const& field_name = cit->second;
2534 
2535  auto const fit = fieldNameToIndexMap.find(field_name);
2536  if (fit == fieldNameToIndexMap.end()) {
2537  throw ColumnNotGeoError(cd->columnName);
2538  }
2539 
2540  auto const& field_index = fit->second;
2541  CHECK(field_index < fieldNameToIndexMap.size());
2542 
2543  auto const& feature = features[iFeature];
2544 
2545  auto field_defn = feature->GetFieldDefnRef(field_index);
2546  CHECK(field_defn);
2547 
2548  // OGRFeature::GetFieldAsString() can only return 80 characters
2549  // so for array columns, we are obliged to fetch the actual values
2550  // and construct the concatenated string ourselves
2551 
2552  std::string value_string;
2553  int array_index = 0, array_size = 0;
2554 
2555  auto stringify_numeric_list = [&](auto* values) {
2556  value_string = "{";
2557  while (array_index < array_size) {
2558  auto separator = (array_index > 0) ? "," : "";
2559  value_string += separator + std::to_string(values[array_index]);
2560  array_index++;
2561  }
2562  value_string += "}";
2563  };
2564 
2565  auto field_type = field_defn->GetType();
2566  switch (field_type) {
2567  case OFTInteger:
2568  case OFTInteger64:
2569  case OFTReal:
2570  case OFTString:
2571  case OFTBinary:
2572  case OFTDate:
2573  case OFTTime:
2574  case OFTDateTime: {
2575  value_string = feature->GetFieldAsString(field_index);
2576  } break;
2577  case OFTIntegerList: {
2578  auto* values = feature->GetFieldAsIntegerList(field_index, &array_size);
2579  stringify_numeric_list(values);
2580  } break;
2581  case OFTInteger64List: {
2582  auto* values = feature->GetFieldAsInteger64List(field_index, &array_size);
2583  stringify_numeric_list(values);
2584  } break;
2585  case OFTRealList: {
2586  auto* values = feature->GetFieldAsDoubleList(field_index, &array_size);
2587  stringify_numeric_list(values);
2588  } break;
2589  case OFTStringList: {
2590  auto** array_of_strings = feature->GetFieldAsStringList(field_index);
2591  value_string = "{";
2592  if (array_of_strings) {
2593  while (auto* this_string = array_of_strings[array_index]) {
2594  auto separator = (array_index > 0) ? "," : "";
2595  value_string += separator + std::string(this_string);
2596  array_index++;
2597  }
2598  }
2599  value_string += "}";
2600  } break;
2601  default:
2602  throw std::runtime_error("Unsupported geo file field type (" +
2603  std::to_string(static_cast<int>(field_type)) +
2604  ")");
2605  }
2606 
2607  import_buffers[col_idx]->add_value(cd, value_string, false, copy_params);
2608  ++col_idx;
2609  field_column_count++;
2610  } else if (metadata_column_count < metadata_column_infos.size()) {
2611  //
2612  // metadata column
2613  //
2614  auto const& mci = metadata_column_infos[metadata_column_count];
2615  if (mci.column_descriptor.columnName != cd->columnName) {
2616  throw std::runtime_error("Metadata column name mismatch");
2617  }
2618  import_buffers[col_idx]->add_value(cd, mci.value, false, copy_params);
2619  ++col_idx;
2620  metadata_column_count++;
2621  } else {
2622  throw std::runtime_error("Column count mismatch");
2623  }
2624  }
2625  thread_import_status.rows_completed++;
2626  } catch (QueryExecutionError& e) {
2627  if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
2628  throw e;
2629  }
2630  } catch (ColumnNotGeoError& e) {
2631  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Aborting import.";
2632  throw std::runtime_error(e.what());
2633  } catch (const std::exception& e) {
2634  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
2635  import_buffers[col_idx_to_pop]->pop_value();
2636  }
2637  thread_import_status.rows_rejected++;
2638  LOG(ERROR) << "Input exception thrown: " << e.what() << ". Row discarded.";
2639  }
2640  };
2641 
2642  if (pGeometry && copy_params.geo_explode_collections) {
2643  // explode and import
2644  auto const [collection_idx_type_name, collection_child_type, collection_col_name] =
2645  explode_collections_step1(col_descs);
2646  explode_collections_step2(pGeometry,
2647  collection_child_type,
2648  collection_col_name,
2649  firstFeature + iFeature + 1,
2650  execute_import_feature);
2651  } else {
2652  // import non-collection or null feature just once
2653  execute_import_feature(pGeometry);
2654  }
2655  } // end features
2656 
2657  float convert_s = TIMER_STOP(convert_timer);
2658 
2659  float load_s = 0.0f;
2660  if (thread_import_status.rows_completed > 0) {
2661  auto load_timer = timer_start();
2662  importer->load(import_buffers, thread_import_status.rows_completed, session_info);
2663  load_s = TIMER_STOP(load_timer);
2664  }
2665 
2666  if (DEBUG_TIMING && thread_import_status.rows_completed > 0) {
2667  LOG(INFO) << "DEBUG: Process " << convert_s << "s";
2668  LOG(INFO) << "DEBUG: Load " << load_s << "s";
2669  LOG(INFO) << "DEBUG: Total " << (convert_s + load_s) << "s";
2670  }
2671 
2672  thread_import_status.thread_id = thread_id;
2673 
2674  return thread_import_status;
2675 }
bool geo_promoted_type_match(const SQLTypes a, const SQLTypes b)
Definition: sqltypes.h:2031
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:124
SQLTypes
Definition: sqltypes.h:65
int64_t explode_collections_step2(OGRGeometry *ogr_geometry, SQLTypes collection_child_type, const std::string &collection_col_name, size_t row_or_feature_idx, std::function< void(OGRGeometry *)> execute_import_lambda)
Definition: Importer.cpp:1896
#define LOG(tag)
Definition: Logger.h:285
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings)
Definition: Types.cpp:1342
std::string to_string(char const *&&v)
#define DEBUG_TIMING
Definition: Importer.cpp:154
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
#define TIMER_STOP(t)
Definition: Importer.cpp:100
bool hasErrorCode(ErrorCode const ec) const
Definition: ErrorHandling.h:65
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
#define UNLIKELY(x)
Definition: likely.h:25
std::string get_session_id() const
Definition: SessionInfo.h:93
std::tuple< int, SQLTypes, std::string > explode_collections_step1(const std::list< const ColumnDescriptor * > &col_descs)
Definition: Importer.cpp:1862
ThreadId thread_id()
Definition: Logger.cpp:879
#define CHECK(condition)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool import_export::importGeoFromLonLat ( double  lon,
double  lat,
std::vector< double > &  coords,
std::vector< double > &  bounds,
SQLTypeInfo ti 
)

Definition at line 1608 of file Importer.cpp.

References CHECK_EQ, Geospatial::GeoPoint::getColumns(), and SQLTypeInfo::transforms().

Referenced by import_thread_delimited().

1612  {
1613  if (std::isinf(lat) || std::isnan(lat) || std::isinf(lon) || std::isnan(lon)) {
1614  return false;
1615  }
1616  if (ti.transforms()) {
1617  Geospatial::GeoPoint pt{std::vector<double>{lon, lat}};
1618  if (!pt.transform(ti)) {
1619  return false;
1620  }
1621  pt.getColumns(coords);
1622  } else {
1623  coords.push_back(lon);
1624  coords.push_back(lat);
1625  }
1626  // in case of promotion to MULTIPOINT
1627  CHECK_EQ(coords.size(), 2u);
1628  bounds.reserve(4);
1629  bounds.push_back(coords[0]);
1630  bounds.push_back(coords[1]);
1631  bounds.push_back(coords[0]);
1632  bounds.push_back(coords[1]);
1633  return true;
1634 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void getColumns(std::vector< double > &coords) const
Definition: Types.cpp:568
bool transforms() const
Definition: sqltypes.h:626

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::NullArray ( const SQLTypeInfo ti)

Definition at line 371 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), NullArrayDatum(), and NullDatum().

Referenced by import_export::TypedImportBuffer::add_value(), import_export::TypedImportBuffer::add_values(), import_export::TypedImportBuffer::addDefaultValues(), import_export::ImporterUtils::composeNullArray(), and TDatumToArrayDatum().

371  {
372  SQLTypeInfo elem_ti = ti.get_elem_type();
373  auto len = ti.get_size();
374 
375  if (len > 0) {
376  // Compose a NULL fixlen array
377  int8_t* buf = (int8_t*)checked_malloc(len);
378  // First scalar is a NULL_ARRAY sentinel
379  Datum d = NullArrayDatum(elem_ti);
380  int8_t* p = append_datum(buf, d, elem_ti);
381  CHECK(p);
382  // Rest is filled with normal NULL sentinels
383  Datum d0 = NullDatum(elem_ti);
384  while ((p - buf) < len) {
385  p = append_datum(p, d0, elem_ti);
386  CHECK(p);
387  }
388  CHECK((p - buf) == len);
389  return ArrayDatum(len, buf, true);
390  }
391  // NULL varlen array
392  return ArrayDatum(0, NULL, true);
393 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:580
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:288
#define CHECK(condition)
Definition: Logger.h:291
Datum NullArrayDatum(SQLTypeInfo &ti)
Definition: Importer.cpp:273
Definition: Datum.h:71
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::NullArrayDatum ( SQLTypeInfo ti)

Definition at line 273 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, Datum::doubleval, Datum::floatval, import_export::anonymous_namespace{Importer.cpp}::get_type_for_datum(), inline_fixed_encoding_null_array_val(), Datum::intval, kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_ARRAY_DOUBLE, NULL_ARRAY_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by NullArray().

273  {
274  Datum d;
275  const auto type = get_type_for_datum(ti);
276  switch (type) {
277  case kBOOLEAN:
279  break;
280  case kBIGINT:
282  break;
283  case kINT:
285  break;
286  case kSMALLINT:
288  break;
289  case kTINYINT:
291  break;
292  case kFLOAT:
294  break;
295  case kDOUBLE:
297  break;
298  case kTIME:
299  case kTIMESTAMP:
300  case kDATE:
302  break;
303  case kPOINT:
304  case kMULTIPOINT:
305  case kLINESTRING:
306  case kMULTILINESTRING:
307  case kPOLYGON:
308  case kMULTIPOLYGON:
309  throw std::runtime_error("Internal error: geometry type in NullArrayDatum.");
310  default:
311  throw std::runtime_error("Internal error: invalid type in NullArrayDatum.");
312  }
313  return d;
314 }
int8_t tinyintval
Definition: Datum.h:73
Definition: sqltypes.h:76
int8_t boolval
Definition: Datum.h:72
int32_t intval
Definition: Datum.h:75
float floatval
Definition: Datum.h:77
int64_t bigintval
Definition: Datum.h:76
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: Datum.h:74
Definition: sqltypes.h:80
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:72
SQLTypes get_type_for_datum(const SQLTypeInfo &ti)
Definition: Importer.cpp:260
Definition: Datum.h:71
double doubleval
Definition: Datum.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t import_export::num_import_threads ( const int32_t  copy_params_threads)
inline

Definition at line 31 of file thread_count.h.

References g_max_import_threads.

Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), foreign_storage::get_num_threads(), import_export::Importer::importDelimited(), import_export::Importer::importGDALGeo(), and import_export::Importer::importGDALRaster().

31  {
32  if (copy_params_threads > 0) {
33  return static_cast<size_t>(copy_params_threads);
34  }
35  return std::min(static_cast<size_t>(std::thread::hardware_concurrency()),
37 }
size_t g_max_import_threads
Definition: Importer.cpp:105

+ Here is the caller graph for this function:

MetadataColumnInfos import_export::parse_add_metadata_columns ( const std::string &  add_metadata_columns,
const std::string &  file_path 
)

Definition at line 35 of file MetadataColumn.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, import_export::ExpressionParser::evalAsString(), IS_INTEGER, join(), kBIGINT, kDATE, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kNULLT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, run_benchmark_import::parser, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), SQLTypeInfo::set_fixed_size(), SQLTypeInfo::set_type(), import_export::ExpressionParser::setExpression(), import_export::ExpressionParser::setStringConstant(), ColumnDescriptor::sourceName, split(), strip(), to_lower(), and to_string().

Referenced by import_export::Importer::gdalToColumnDescriptorsGeo(), import_export::Importer::gdalToColumnDescriptorsRaster(), import_export::Importer::importGDALGeo(), import_export::Importer::importGDALRaster(), and import_export::Importer::readMetadataSampleGDAL().

36  {
37  //
38  // each string is "column_name,column_type,expression"
39  //
40  // column_type can be:
41  // tinyint
42  // smallint
43  // int
44  // bigint
45  // float
46  // double
47  // date
48  // time
49  // timestamp
50  // text
51  //
52  // expression can be in terms of:
53  // filename
54  // filedir
55  // filepath
56  // etc.
57  //
58 
59  // anything to do?
60  if (add_metadata_columns.length() == 0u) {
61  return {};
62  }
63 
64  // split by ";"
65  // @TODO(se) is this safe?
66  // probably won't appear in a file name/path or a date/time string
67  std::vector<std::string> add_metadata_column_strings;
68  boost::split(add_metadata_column_strings, add_metadata_columns, boost::is_any_of(";"));
69  if (add_metadata_column_strings.size() == 0u) {
70  return {};
71  }
72 
73  ExpressionParser parser;
74 
75  // known string constants
76  auto const fn = boost::filesystem::path(file_path).filename().string();
77  auto const fd = boost::filesystem::path(file_path).parent_path().string();
78  auto const fp = file_path;
79  parser.setStringConstant("filename", fn);
80  parser.setStringConstant("filedir", fd);
81  parser.setStringConstant("filepath", fp);
82 
83  MetadataColumnInfos metadata_column_infos;
84 
85  // for each requested column...
86  for (auto const& add_metadata_column_string : add_metadata_column_strings) {
87  // strip
88  auto const add_metadata_column = strip(add_metadata_column_string);
89 
90  // tokenize and extract
91  std::vector<std::string> tokens;
92  boost::split(tokens, add_metadata_column, boost::is_any_of(","));
93  if (tokens.size() < 3u) {
94  throw std::runtime_error("Invalid metadata column info '" + add_metadata_column +
95  "' (must be of the form 'name,type,expression')");
96  }
97  auto token_itr = tokens.begin();
98  auto const column_name = strip(*token_itr++);
99  auto const data_type = strip(to_lower(*token_itr++));
100  tokens.erase(tokens.begin(), token_itr);
101  auto const expression = strip(boost::join(tokens, ","));
102 
103  // get column type
104  SQLTypes sql_type{kNULLT};
105  double range_min{0.0}, range_max{0.0};
106  if (data_type == "tinyint") {
107  sql_type = kTINYINT;
108  range_min = static_cast<double>(std::numeric_limits<int8_t>::min());
109  range_max = static_cast<double>(std::numeric_limits<int8_t>::max());
110  } else if (data_type == "smallint") {
111  sql_type = kSMALLINT;
112  range_min = static_cast<double>(std::numeric_limits<int16_t>::min());
113  range_max = static_cast<double>(std::numeric_limits<int16_t>::max());
114  } else if (data_type == "int") {
115  sql_type = kINT;
116  range_min = static_cast<double>(std::numeric_limits<int32_t>::min());
117  range_max = static_cast<double>(std::numeric_limits<int32_t>::max());
118  } else if (data_type == "bigint") {
119  sql_type = kBIGINT;
120  range_min = static_cast<double>(std::numeric_limits<int64_t>::min());
121  range_max = static_cast<double>(std::numeric_limits<int64_t>::max());
122  } else if (data_type == "float") {
123  sql_type = kFLOAT;
124  range_min = static_cast<double>(std::numeric_limits<float>::min());
125  range_max = static_cast<double>(std::numeric_limits<float>::max());
126  } else if (data_type == "double") {
127  sql_type = kDOUBLE;
128  range_min = static_cast<double>(std::numeric_limits<double>::min());
129  range_max = static_cast<double>(std::numeric_limits<double>::max());
130  } else if (data_type == "date") {
131  sql_type = kDATE;
132  } else if (data_type == "time") {
133  sql_type = kTIME;
134  } else if (data_type == "timestamp") {
135  sql_type = kTIMESTAMP;
136  } else if (data_type == "text") {
137  sql_type = kTEXT;
138  } else {
139  throw std::runtime_error("Invalid metadata column data type '" + data_type +
140  "' for column '" + column_name + "'");
141  }
142 
143  // set expression with force cast back to string
144  parser.setExpression("str(" + expression + ")");
145 
146  // evaluate
147  auto value = parser.evalAsString();
148 
149  // validate date/time/timestamp value now
150  // @TODO(se) do we need to provide for non-zero dimension?
151  try {
152  if (sql_type == kDATE) {
153  dateTimeParse<kDATE>(value, 0);
154  } else if (sql_type == kTIME) {
155  dateTimeParse<kTIME>(value, 0);
156  } else if (sql_type == kTIMESTAMP) {
157  dateTimeParse<kTIMESTAMP>(value, 0);
158  }
159  } catch (std::runtime_error& e) {
160  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
161  " value '" + value + "' for column '" + column_name + "'");
162  }
163 
164  // validate int/float/double
165  try {
166  if (IS_INTEGER(sql_type) || sql_type == kFLOAT || sql_type == kDOUBLE) {
167  size_t num_chars{0u};
168  auto const v = static_cast<double>(std::stod(value, &num_chars));
169  if (v < range_min || v > range_max) {
170  throw std::out_of_range(to_string(sql_type));
171  }
172  if (num_chars == 0u) {
173  throw std::invalid_argument("empty value");
174  }
175  }
176  } catch (std::invalid_argument& e) {
177  throw std::runtime_error("Invalid metadata column " + to_string(sql_type) +
178  " value '" + value + "' for column '" + column_name +
179  "' (" + e.what() + ")");
180  } catch (std::out_of_range& e) {
181  throw std::runtime_error("Out-of-range metadata column " + to_string(sql_type) +
182  " value '" + value + "' for column '" + column_name +
183  "' (" + e.what() + ")");
184  }
185 
186  // build column descriptor
187  ColumnDescriptor cd;
188  cd.columnName = cd.sourceName = column_name;
189  cd.columnType.set_type(sql_type);
191  if (sql_type == kTEXT) {
194  }
195 
196  // add to result
197  metadata_column_infos.push_back({std::move(cd), std::move(value)});
198  }
199 
200  // done
201  return metadata_column_infos;
202 }
std::string to_lower(const std::string &str)
void set_compression(EncodingType c)
Definition: sqltypes.h:481
Definition: sqltypes.h:76
SQLTypes
Definition: sqltypes.h:65
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::string join(T const &container, std::string const &delim)
std::string sourceName
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
void set_fixed_size()
Definition: sqltypes.h:479
specifies the content in-memory of a row in the column metadata table
void set_comp_param(int p)
Definition: sqltypes.h:482
Definition: sqltypes.h:79
Definition: sqltypes.h:80
#define IS_INTEGER(T)
Definition: sqltypes.h:304
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::string columnName
std::vector< MetadataColumnInfo > MetadataColumnInfos
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:470

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< std::unique_ptr< TypedImportBuffer > > import_export::setup_column_loaders ( const TableDescriptor td,
Loader *  loader 
)

Definition at line 6202 of file Importer.cpp.

References CHECK, import_export::Loader::get_column_descs(), and import_export::Loader::getStringDict().

Referenced by DBHandler::prepare_loader_generic().

6204  {
6205  CHECK(td);
6206  auto col_descs = loader->get_column_descs();
6207 
6208  std::vector<std::unique_ptr<TypedImportBuffer>> import_buffers;
6209  for (auto cd : col_descs) {
6210  import_buffers.emplace_back(
6211  std::make_unique<TypedImportBuffer>(cd, loader->getStringDict(cd)));
6212  }
6213 
6214  return import_buffers;
6215 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::StringToArray ( const std::string &  s,
const SQLTypeInfo ti,
const CopyParams &  copy_params 
)

Definition at line 316 of file Importer.cpp.

References append_datum(), import_export::CopyParams::array_begin, import_export::CopyParams::array_delim, import_export::CopyParams::array_end, CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), is_null(), SQLTypeInfo::is_number(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), LOG, import_export::CopyParams::null_str, NullDatum(), StringToDatum(), trim_space(), and logger::WARNING.

Referenced by import_export::TypedImportBuffer::add_value(), and import_export::TypedImportBuffer::addDefaultValues().

318  {
319  SQLTypeInfo elem_ti = ti.get_elem_type();
320  if (s == copy_params.null_str || s == "NULL" || s.empty()) {
321  return ArrayDatum(0, NULL, true);
322  }
323  if (s[0] != copy_params.array_begin || s[s.size() - 1] != copy_params.array_end) {
324  LOG(WARNING) << "Malformed array: " << s;
325  return ArrayDatum(0, NULL, true);
326  }
327  std::vector<std::string> elem_strs;
328  size_t last = 1;
329  for (size_t i = s.find(copy_params.array_delim, 1); i != std::string::npos;
330  i = s.find(copy_params.array_delim, last)) {
331  elem_strs.push_back(s.substr(last, i - last));
332  last = i + 1;
333  }
334  if (last + 1 <= s.size()) {
335  elem_strs.push_back(s.substr(last, s.size() - 1 - last));
336  }
337  if (elem_strs.size() == 1) {
338  auto str = elem_strs.front();
339  auto str_trimmed = trim_space(str.c_str(), str.length());
340  if (str_trimmed == "") {
341  elem_strs.clear(); // Empty array
342  }
343  }
344  if (!elem_ti.is_string()) {
345  size_t len = elem_strs.size() * elem_ti.get_size();
346  std::unique_ptr<int8_t, FreeDeleter> buf(
347  reinterpret_cast<int8_t*>(checked_malloc(len)));
348  int8_t* p = buf.get();
349  for (auto& es : elem_strs) {
350  auto e = trim_space(es.c_str(), es.length());
351  bool is_null = (e == copy_params.null_str) || e == "NULL";
352  if (!elem_ti.is_string() && e == "") {
353  is_null = true;
354  }
355  if (elem_ti.is_number() || elem_ti.is_time()) {
356  if (!isdigit(e[0]) && e[0] != '-') {
357  is_null = true;
358  }
359  }
360  Datum d = is_null ? NullDatum(elem_ti) : StringToDatum(e, elem_ti);
361  p = append_datum(p, d, elem_ti);
362  CHECK(p);
363  }
364  return ArrayDatum(len, buf.release(), false);
365  }
366  // must not be called for array of strings
367  CHECK(false);
368  return ArrayDatum(0, NULL, true);
369 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:580
#define LOG(tag)
Definition: Logger.h:285
bool is_number() const
Definition: sqltypes.h:576
bool is_time() const
Definition: sqltypes.h:579
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
CONSTEXPR DEVICE bool is_null(const T &value)
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:339
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:288
void trim_space(const char *&field_begin, const char *&field_end)
#define CHECK(condition)
Definition: Logger.h:291
bool is_string() const
Definition: sqltypes.h:561
Definition: Datum.h:71
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum import_export::TDatumToArrayDatum ( const TDatum &  datum,
const SQLTypeInfo ti 
)

Definition at line 469 of file Importer.cpp.

References append_datum(), CHECK, checked_malloc(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string(), NullArray(), and TDatumToDatum().

Referenced by import_export::TypedImportBuffer::add_value().

469  {
470  SQLTypeInfo elem_ti = ti.get_elem_type();
471 
472  CHECK(!elem_ti.is_string());
473 
474  if (datum.is_null) {
475  return NullArray(ti);
476  }
477 
478  size_t len = datum.val.arr_val.size() * elem_ti.get_size();
479  int8_t* buf = (int8_t*)checked_malloc(len);
480  int8_t* p = buf;
481  for (auto& e : datum.val.arr_val) {
482  p = append_datum(p, TDatumToDatum(e, elem_ti), elem_ti);
483  CHECK(p);
484  }
485 
486  return ArrayDatum(len, buf, false);
487 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:580
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:371
Datum TDatumToDatum(const TDatum &datum, SQLTypeInfo &ti)
Definition: Importer.cpp:422
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:291
bool is_string() const
Definition: sqltypes.h:561
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Datum import_export::TDatumToDatum ( const TDatum &  datum,
SQLTypeInfo ti 
)

Definition at line 422 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, decimal_to_int_type(), Datum::doubleval, Datum::floatval, SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, SQLTypeInfo::is_decimal(), kBIGINT, kBOOLEAN, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, NULL_DOUBLE, NULL_FLOAT, Datum::smallintval, Datum::tinyintval, and run_benchmark_import::type.

Referenced by TDatumToArrayDatum().

422  {
423  Datum d;
424  const auto type = ti.is_decimal() ? decimal_to_int_type(ti) : ti.get_type();
425  switch (type) {
426  case kBOOLEAN:
427  d.boolval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
428  break;
429  case kBIGINT:
430  d.bigintval =
431  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
432  break;
433  case kINT:
434  d.intval = datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
435  break;
436  case kSMALLINT:
437  d.smallintval =
438  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
439  break;
440  case kTINYINT:
441  d.tinyintval =
442  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
443  break;
444  case kFLOAT:
445  d.floatval = datum.is_null ? NULL_FLOAT : datum.val.real_val;
446  break;
447  case kDOUBLE:
448  d.doubleval = datum.is_null ? NULL_DOUBLE : datum.val.real_val;
449  break;
450  case kTIME:
451  case kTIMESTAMP:
452  case kDATE:
453  d.bigintval =
454  datum.is_null ? inline_fixed_encoding_null_val(ti) : datum.val.int_val;
455  break;
456  case kPOINT:
457  case kMULTIPOINT:
458  case kLINESTRING:
459  case kMULTILINESTRING:
460  case kPOLYGON:
461  case kMULTIPOLYGON:
462  throw std::runtime_error("Internal error: geometry type in TDatumToDatum.");
463  default:
464  throw std::runtime_error("Internal error: invalid type in TDatumToDatum.");
465  }
466  return d;
467 }
int8_t tinyintval
Definition: Datum.h:73
#define NULL_DOUBLE
Definition: sqltypes.h:76
#define NULL_FLOAT
int8_t boolval
Definition: Datum.h:72
int32_t intval
Definition: Datum.h:75
float floatval
Definition: Datum.h:77
int64_t bigintval
Definition: Datum.h:76
int16_t smallintval
Definition: Datum.h:74
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:561
Definition: sqltypes.h:80
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:72
Definition: Datum.h:71
bool is_decimal() const
Definition: sqltypes.h:570
double doubleval
Definition: Datum.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::string import_export::trim_space ( const char *  field,
const size_t  len 
)
static

Definition at line 247 of file Importer.cpp.

Referenced by import_export::delimited_parser::get_row(), and StringToArray().

247  {
248  size_t i = 0;
249  size_t j = len;
250  while (i < j && (field[i] == ' ' || field[i] == '\r')) {
251  i++;
252  }
253  while (i < j && (field[j - 1] == ' ' || field[j - 1] == '\r')) {
254  j--;
255  }
256  return std::string(field + i, j - i);
257 }
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:33

+ Here is the caller graph for this function:

template<class T >
bool import_export::try_cast ( const std::string &  str)

Definition at line 3207 of file Importer.cpp.

References heavydb.dtypes::T.

3207  {
3208  try {
3209  boost::lexical_cast<T>(str);
3210  } catch (const boost::bad_lexical_cast& e) {
3211  return false;
3212  }
3213  return true;
3214 }

Variable Documentation

std::map<std::string, ImportStatus> import_export::import_status_map
static
constexpr size_t import_export::kImportFileBufferSize = (1 << 23)
static

Definition at line 34 of file CopyParams.h.

const size_t import_export::kImportRowLimit = 10000
static
constexpr size_t import_export::max_import_buffer_resize_byte_size = 1024 * 1024 * 1024
static

Definition at line 37 of file CopyParams.h.

constexpr bool import_export::PROMOTE_LINESTRING_TO_MULTILINESTRING = false
static
constexpr bool import_export::PROMOTE_POINT_TO_MULTIPOINT = false
static
constexpr bool import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON = true
static
heavyai::shared_mutex import_export::status_mutex
static