OmniSciDB
a5dc49c757
|
#include <Importer.h>
Public Member Functions | |
Detector (const boost::filesystem::path &fp, CopyParams &cp) | |
std::vector< std::string > | get_headers () |
std::vector< std::vector < std::string > > | get_sample_rows (size_t n) |
std::vector< SQLTypeInfo > | getBestColumnTypes () const |
Public Member Functions inherited from import_export::DataStreamSink | |
DataStreamSink () | |
DataStreamSink (const CopyParams ©_params, const std::string file_path) | |
virtual | ~DataStreamSink () |
const CopyParams & | get_copy_params () const |
void | import_compressed (std::vector< std::string > &file_paths, const Catalog_Namespace::SessionInfo *session_info) |
Static Public Member Functions | |
static SQLTypes | detect_sqltype (const std::string &str) |
Public Attributes | |
std::vector< std::vector < std::string > > | raw_rows |
bool | has_headers = false |
Private Member Functions | |
void | init () |
void | read_file () |
void | detect_row_delimiter () |
void | split_raw_data () |
std::vector< SQLTypes > | detect_column_types (const std::vector< std::string > &row) |
void | find_best_sqltypes () |
std::vector< SQLTypes > | find_best_sqltypes (const std::vector< std::vector< std::string >> &raw_rows, const CopyParams ©_params) |
std::vector< SQLTypes > | find_best_sqltypes (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const CopyParams ©_params) |
std::vector< EncodingType > | find_best_encodings (const std::vector< std::vector< std::string >>::const_iterator &row_begin, const std::vector< std::vector< std::string >>::const_iterator &row_end, const std::vector< SQLTypes > &best_types) |
bool | detect_headers (const std::vector< SQLTypes > &first_types, const std::vector< SQLTypes > &rest_types) |
void | find_best_sqltypes_and_headers () |
ImportStatus | importDelimited (const std::string &file_path, const bool decompressed, const Catalog_Namespace::SessionInfo *session_info) override |
Static Private Member Functions | |
static bool | more_restrictive_sqltype (const SQLTypes a, const SQLTypes b) |
Private Attributes | |
std::string | raw_data |
boost::filesystem::path | file_path |
std::chrono::duration< double > | timeout {1} |
std::string | line1 |
std::vector< SQLTypes > | best_sqltypes |
std::vector< EncodingType > | best_encodings |
Additional Inherited Members | |
Protected Member Functions inherited from import_export::DataStreamSink | |
ImportStatus | archivePlumber (const Catalog_Namespace::SessionInfo *session_info) |
Protected Attributes inherited from import_export::DataStreamSink | |
CopyParams | copy_params |
const std::string | file_path |
FILE * | p_file = nullptr |
ImportStatus | import_status_ |
heavyai::shared_mutex | import_mutex_ |
size_t | total_file_size {0} |
std::vector< size_t > | file_offsets |
std::mutex | file_offsets_mutex |
Definition at line 729 of file Importer.h.
import_export::Detector::Detector | ( | const boost::filesystem::path & | fp, |
CopyParams & | cp | ||
) |
Definition at line 3670 of file Importer.cpp.
References g_enable_fsi, init(), import_export::kParquetFile, read_file(), and import_export::CopyParams::source_type.
|
private |
Definition at line 3305 of file Importer.cpp.
References detect_sqltype().
Referenced by find_best_sqltypes_and_headers().
|
private |
Definition at line 3449 of file Importer.cpp.
References has_headers, and kTEXT.
Referenced by find_best_sqltypes_and_headers().
|
private |
Definition at line 3156 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, import_export::CopyParams::delimiter, and file_path.
Referenced by init().
|
static |
Definition at line 3216 of file Importer.cpp.
References dateTimeParseOptional< kDATE >(), dateTimeParseOptional< kTIME >(), dateTimeParseOptional< kTIMESTAMP >(), kBIGINT, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, import_export::PROMOTE_LINESTRING_TO_MULTILINESTRING, import_export::PROMOTE_POINT_TO_MULTIPOINT, import_export::PROMOTE_POLYGON_TO_MULTIPOLYGON, shared::transform(), and run_benchmark_import::type.
Referenced by detect_column_types(), and find_best_sqltypes().
|
private |
Definition at line 3413 of file Importer.cpp.
References file_path, IS_STRING, kENCODING_DICT, kENCODING_NONE, and raw_rows.
Referenced by find_best_sqltypes_and_headers().
|
private |
Definition at line 3360 of file Importer.cpp.
References best_sqltypes, import_export::DataStreamSink::copy_params, and raw_rows.
Referenced by find_best_sqltypes(), and find_best_sqltypes_and_headers().
|
private |
Definition at line 3364 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, find_best_sqltypes(), and raw_rows.
|
private |
Definition at line 3370 of file Importer.cpp.
References detect_sqltype(), run_benchmark_import::end_time, file_path, kCHAR, kTEXT, more_restrictive_sqltype(), import_export::CopyParams::null_str, raw_rows, and timeout.
|
private |
Definition at line 3337 of file Importer.cpp.
References best_encodings, best_sqltypes, import_export::DataStreamSink::copy_params, detect_column_types(), detect_headers(), find_best_encodings(), find_best_sqltypes(), import_export::CopyParams::has_header, has_headers, import_export::kAutoDetect, import_export::kHasHeader, import_export::kNoHeader, and raw_rows.
Referenced by init().
std::vector< std::string > import_export::Detector::get_headers | ( | ) |
Definition at line 3479 of file Importer.cpp.
References best_sqltypes, has_headers, raw_rows, and to_string().
Referenced by DBHandler::detect_column_types().
std::vector< std::vector< std::string > > import_export::Detector::get_sample_rows | ( | size_t | n | ) |
Definition at line 3464 of file Importer.cpp.
References has_headers, anonymous_namespace{Utm.h}::n, and raw_rows.
Referenced by DBHandler::detect_column_types().
std::vector< SQLTypeInfo > import_export::Detector::getBestColumnTypes | ( | ) | const |
Definition at line 3498 of file Importer.cpp.
References best_encodings, best_sqltypes, and CHECK_EQ.
Referenced by DBHandler::detect_column_types().
|
overrideprivatevirtual |
Implements import_export::DataStreamSink.
Definition at line 3086 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, run_benchmark_import::end_time, heavyai::fopen(), import_export::DataStreamSink::import_mutex_, import_export::DataStreamSink::import_status_, import_export::kImportRowLimit, parse_ast::line, line1, import_export::CopyParams::line_delim, import_export::ImportStatus::load_failed, anonymous_namespace{Utm.h}::n, import_export::DataStreamSink::p_file, raw_data, import_export::ImportStatus::rows_completed, and timeout.
|
private |
Definition at line 3080 of file Importer.cpp.
References detect_row_delimiter(), find_best_sqltypes_and_headers(), and split_raw_data().
Referenced by Detector().
|
staticprivate |
Definition at line 3313 of file Importer.cpp.
References anonymous_namespace{Utm.h}::a, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, and kTIMESTAMP.
Referenced by find_best_sqltypes().
|
private |
Definition at line 3151 of file Importer.cpp.
References import_export::DataStreamSink::archivePlumber().
Referenced by Detector().
|
private |
Definition at line 3165 of file Importer.cpp.
References import_export::DataStreamSink::copy_params, import_export::delimited_parser::get_row(), raw_data, raw_rows, and import_export::CopyParams::threads.
Referenced by init().
|
private |
Definition at line 781 of file Importer.h.
Referenced by find_best_sqltypes_and_headers(), and getBestColumnTypes().
|
private |
Definition at line 780 of file Importer.h.
Referenced by find_best_sqltypes(), find_best_sqltypes_and_headers(), get_headers(), and getBestColumnTypes().
|
private |
Definition at line 774 of file Importer.h.
Referenced by detect_row_delimiter(), find_best_encodings(), and find_best_sqltypes().
bool import_export::Detector::has_headers = false |
Definition at line 741 of file Importer.h.
Referenced by detect_headers(), find_best_sqltypes_and_headers(), get_headers(), and get_sample_rows().
|
private |
Definition at line 776 of file Importer.h.
Referenced by importDelimited().
|
private |
Definition at line 773 of file Importer.h.
Referenced by importDelimited(), and split_raw_data().
std::vector<std::vector<std::string> > import_export::Detector::raw_rows |
Definition at line 739 of file Importer.h.
Referenced by find_best_encodings(), find_best_sqltypes(), find_best_sqltypes_and_headers(), get_headers(), get_sample_rows(), and split_raw_data().
|
private |
Definition at line 775 of file Importer.h.
Referenced by find_best_sqltypes(), and importDelimited().