OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryRunner::ImportDriver Class Reference

#include <QueryRunner.h>

+ Inheritance diagram for QueryRunner::ImportDriver:
+ Collaboration diagram for QueryRunner::ImportDriver:

Public Member Functions

 ImportDriver (std::shared_ptr< Catalog_Namespace::Catalog > cat, const Catalog_Namespace::UserMetadata &user, const ExecutorDeviceType dt=ExecutorDeviceType::GPU, const std::string session_id="")
 
void importGeoTable (const std::string &file_path, const std::string &table_name, const bool compression, const bool create_table, const bool explode_collections)
 
- Public Member Functions inherited from QueryRunner::QueryRunner
std::shared_ptr
< Catalog_Namespace::SessionInfo
getSession () const
 
void addSessionId (const std::string &session_id, ExecutorDeviceType device_type=ExecutorDeviceType::GPU)
 
void clearSessionId ()
 
std::shared_ptr
< Catalog_Namespace::Catalog
getCatalog () const
 
std::shared_ptr< CalcitegetCalcite () const
 
std::shared_ptr< ExecutorgetExecutor () const
 
Catalog_Namespace::UserMetadatagetUserMetadata () const
 
bool gpusPresent () const
 
virtual void clearGpuMemory () const
 
virtual void clearCpuMemory () const
 
std::vector< MemoryInfogetMemoryInfo (const Data_Namespace::MemoryLevel memory_level) const
 
BufferPoolStats getBufferPoolStats (const Data_Namespace::MemoryLevel memory_level, const bool current_db_only) const
 
virtual std::unique_ptr
< Parser::Stmt
createStatement (const std::string &)
 
virtual void runDDLStatement (const std::string &)
 
virtual void validateDDLStatement (const std::string &)
 
virtual std::shared_ptr
< ResultSet
runSQL (const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
 
virtual std::shared_ptr
< ExecutionResult
runSelectQuery (const std::string &query_str, CompilationOptions co, ExecutionOptions eo)
 
virtual std::shared_ptr
< ResultSet
runSQL (const std::string &query_str, const ExecutorDeviceType device_type, const bool hoist_literals=true, const bool allow_loop_joins=true)
 
virtual std::shared_ptr
< ExecutionResult
runSelectQuery (const std::string &query_str, const ExecutorDeviceType device_type, const bool hoist_literals, const bool allow_loop_joins, const bool just_explain=false)
 
virtual std::shared_ptr
< ResultSet
runSQLWithAllowingInterrupt (const std::string &query_str, const std::string &session_id, const ExecutorDeviceType device_type, const double running_query_check_freq=0.9, const unsigned pending_query_check_freq=1000)
 
virtual std::vector
< std::shared_ptr< ResultSet > > 
runMultipleStatements (const std::string &, const ExecutorDeviceType)
 
virtual void runImport (Parser::CopyTableStmt *import_stmt)
 
virtual std::unique_ptr
< import_export::Loader
getLoader (const TableDescriptor *td) const
 
RegisteredQueryHint getParsedQueryHint (const std::string &)
 
std::optional
< std::unordered_map< size_t,
std::unordered_map< unsigned,
RegisteredQueryHint > > > 
getParsedQueryHints (const std::string &query_str)
 
std::shared_ptr< const RelAlgNodegetRootNodeFromParsedQuery (const std::string &query_str)
 
std::optional
< RegisteredQueryHint
getParsedGlobalQueryHints (const std::string &query_str)
 
RaExecutionSequence getRaExecutionSequence (const std::string &query_str)
 
virtual std::shared_ptr
< ResultSet
getCalcitePlan (const std::string &query_str, bool enable_watchdog, bool is_explain_as_json_str, bool is_explain_detailed) const
 
std::tuple< QueryPlanHash,
std::shared_ptr< HashTable >
, std::optional
< HashtableCacheMetaInfo > > 
getCachedHashtableWithoutCacheKey (std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
 
std::shared_ptr< CacheItemMetricgetCacheItemMetric (QueryPlanHash cache_key, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
 
size_t getNumberOfCachedItem (CacheItemStatus item_status, CacheItemType hash_table_type, bool with_bbox_intersect_tuning_param=false) const
 
void resizeDispatchQueue (const size_t num_executors)
 
QueryPlanDagInfo getQueryInfoForDataRecyclerTest (const std::string &)
 
std::shared_ptr< RelAlgTranslatorgetRelAlgTranslator (const std::string &, Executor *)
 
ExtractedQueryPlanDag extractQueryPlanDag (const std::string &)
 
std::unique_ptr< RelAlgDaggetRelAlgDag (const std::string &)
 
 QueryRunner (std::unique_ptr< Catalog_Namespace::SessionInfo > session)
 
virtual ~QueryRunner ()=default
 
void setExplainType (const ExecutorExplainType explain_type)
 

Additional Inherited Members

- Static Public Member Functions inherited from QueryRunner::QueryRunner
static QueryRunnerinit (const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
 
static QueryRunnerinit (const File_Namespace::DiskCacheConfig *disk_cache_config, const char *db_path, const std::vector< LeafHostInfo > &string_servers={}, const std::vector< LeafHostInfo > &leaf_servers={})
 
static QueryRunnerinit (const char *db_path, const std::vector< LeafHostInfo > &string_servers, const std::vector< LeafHostInfo > &leaf_servers)
 
static QueryRunnerinit (const char *db_path, const std::string &user, const std::string &pass, const std::string &db_name, const std::vector< LeafHostInfo > &string_servers, const std::vector< LeafHostInfo > &leaf_servers, const std::string &udf_filename="", bool uses_gpus=true, const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20, const bool create_user=false, const bool create_db=false, const File_Namespace::DiskCacheConfig *config=nullptr)
 
static QueryRunnerinit (std::unique_ptr< Catalog_Namespace::SessionInfo > &session)
 
static QueryRunnerget ()
 
static void reset ()
 
static ExecutionOptions defaultExecutionOptionsForRunSQL (bool allow_loop_joins=true, bool just_explain=false)
 
template<typename... Ts>
static std::shared_ptr
< query_state::QueryState
create_query_state (Ts &&...args)
 
- Static Public Attributes inherited from QueryRunner::QueryRunner
static query_state::QueryStates query_states_
 
- Protected Member Functions inherited from QueryRunner::QueryRunner
 QueryRunner (const char *db_path, const std::string &user, const std::string &pass, const std::string &db_name, const std::vector< LeafHostInfo > &string_servers, const std::vector< LeafHostInfo > &leaf_servers, const std::string &udf_filename, bool uses_gpus, const size_t max_gpu_mem, const int reserved_gpu_mem, const bool create_user, const bool create_db, const File_Namespace::DiskCacheConfig *disk_cache_config=nullptr)
 
- Protected Attributes inherited from QueryRunner::QueryRunner
ExecutorExplainType explain_type_ = ExecutorExplainType::Default
 
Catalog_Namespace::DBMetadata db_metadata_
 
std::shared_ptr
< Catalog_Namespace::SessionInfo
session_info_
 
std::unique_ptr
< QueryDispatchQueue
dispatch_queue_
 
std::shared_ptr< QueryEnginequery_engine_
 
- Static Protected Attributes inherited from QueryRunner::QueryRunner
static std::unique_ptr
< QueryRunner
qr_instance_ = nullptr
 

Detailed Description

Definition at line 336 of file QueryRunner.h.

Constructor & Destructor Documentation

QueryRunner::ImportDriver::ImportDriver ( std::shared_ptr< Catalog_Namespace::Catalog cat,
const Catalog_Namespace::UserMetadata user,
const ExecutorDeviceType  dt = ExecutorDeviceType::GPU,
const std::string  session_id = "" 
)

Definition at line 1224 of file QueryRunner.cpp.

1228  : QueryRunner(
1229  std::make_unique<Catalog_Namespace::SessionInfo>(cat, user, dt, session_id)) {}
std::string cat(Ts &&...args)
QueryRunner(std::unique_ptr< Catalog_Namespace::SessionInfo > session)

Member Function Documentation

void QueryRunner::ImportDriver::importGeoTable ( const std::string &  file_path,
const std::string &  table_name,
const bool  compression,
const bool  create_table,
const bool  explode_collections 
)

Definition at line 1231 of file QueryRunner.cpp.

References cat(), CHECK, measure< TimeT >::execution(), import_export::Importer::importGDAL(), logger::INFO, join(), shared::kDataDirectoryName, kDECIMAL, kENCODING_GEOINT, kENCODING_NONE, Geospatial::kGeoColumnName, import_export::kGeoFile, kINTERVAL_DAY_TIME, kINTERVAL_YEAR_MONTH, LOG, QueryRunner::QueryRunner::runDDLStatement(), ImportHelpers::sanitize_name(), QueryRunner::QueryRunner::session_info_, and import_export::CopyParams::source_type.

1235  {
1236  using namespace import_export;
1237 
1238  static constexpr bool kIsGeoRaster{false};
1239 
1241 
1242  CopyParams copy_params;
1244  if (compression) {
1245  copy_params.geo_coords_encoding = EncodingType::kENCODING_GEOINT;
1246  copy_params.geo_coords_comp_param = 32;
1247  } else {
1248  copy_params.geo_coords_encoding = EncodingType::kENCODING_NONE;
1249  copy_params.geo_coords_comp_param = 0;
1250  }
1251  copy_params.geo_explode_collections = explode_collections;
1252 
1253  std::map<std::string, std::string> colname_to_src;
1254  auto& cat = session_info_->getCatalog();
1255  auto cds = Importer::gdalToColumnDescriptors(
1256  file_path, kIsGeoRaster, Geospatial::kGeoColumnName, copy_params);
1257 
1258  for (auto& cd : cds) {
1259  const auto col_name_sanitized = ImportHelpers::sanitize_name(cd.columnName);
1260  const auto ret =
1261  colname_to_src.insert(std::make_pair(col_name_sanitized, cd.columnName));
1262  CHECK(ret.second);
1263  cd.columnName = col_name_sanitized;
1264  }
1265 
1266  if (create_table) {
1267  const auto td = cat.getMetadataForTable(table_name);
1268  if (td != nullptr) {
1269  throw std::runtime_error(
1270  "Error: Table " + table_name +
1271  " already exists. Possible failure to correctly re-create " +
1272  shared::kDataDirectoryName + " directory.");
1273  }
1274  if (table_name != ImportHelpers::sanitize_name(table_name)) {
1275  throw std::runtime_error("Invalid characters in table name: " + table_name);
1276  }
1277 
1278  std::string stmt{"CREATE TABLE " + table_name};
1279  std::vector<std::string> col_stmts;
1280 
1281  for (auto& cd : cds) {
1282  if (cd.columnType.get_type() == SQLTypes::kINTERVAL_DAY_TIME ||
1283  cd.columnType.get_type() == SQLTypes::kINTERVAL_YEAR_MONTH) {
1284  throw std::runtime_error(
1285  "Unsupported type: INTERVAL_DAY_TIME or INTERVAL_YEAR_MONTH for col " +
1286  cd.columnName + " (table: " + table_name + ")");
1287  }
1288 
1289  if (cd.columnType.get_type() == SQLTypes::kDECIMAL) {
1290  if (cd.columnType.get_precision() == 0 && cd.columnType.get_scale() == 0) {
1291  cd.columnType.set_precision(14);
1292  cd.columnType.set_scale(7);
1293  }
1294  }
1295 
1296  std::string col_stmt;
1297  col_stmt.append(cd.columnName + " " + cd.columnType.get_type_name() + " ");
1298 
1299  if (cd.columnType.get_compression() != EncodingType::kENCODING_NONE) {
1300  col_stmt.append("ENCODING " + cd.columnType.get_compression_name() + " ");
1301  } else {
1302  if (cd.columnType.is_string()) {
1303  col_stmt.append("ENCODING NONE");
1304  } else if (cd.columnType.is_geometry()) {
1305  if (cd.columnType.get_output_srid() == 4326) {
1306  col_stmt.append("ENCODING NONE");
1307  }
1308  }
1309  }
1310  col_stmts.push_back(col_stmt);
1311  }
1312 
1313  stmt.append(" (" + boost::algorithm::join(col_stmts, ",") + ");");
1314  runDDLStatement(stmt);
1315 
1316  LOG(INFO) << "Created table: " << table_name;
1317  } else {
1318  LOG(INFO) << "Not creating table: " << table_name;
1319  }
1320 
1321  const auto td = cat.getMetadataForTable(table_name);
1322  if (td == nullptr) {
1323  throw std::runtime_error("Error: Failed to create table " + table_name);
1324  }
1325 
1326  import_export::Importer importer(cat, td, file_path, copy_params);
1327  auto ms = measure<>::execution(
1328  [&]() { importer.importGDAL(colname_to_src, session_info_.get(), kIsGeoRaster); });
1329  LOG(INFO) << "Import Time for " << table_name << ": " << (double)ms / 1000.0 << " s";
1330 }
const std::string kDataDirectoryName
std::string cat(Ts &&...args)
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
#define LOG(tag)
Definition: Logger.h:285
std::string join(T const &container, std::string const &delim)
const std::string kGeoColumnName
Definition: ColumnNames.h:23
virtual void runDDLStatement(const std::string &)
import_export::SourceType source_type
Definition: CopyParams.h:57
std::string sanitize_name(const std::string &name, const bool underscore=false)
std::shared_ptr< Catalog_Namespace::SessionInfo > session_info_
Definition: QueryRunner.h:331
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:


The documentation for this class was generated from the following files: