OmniSciDB
a5dc49c757
|
#include <TextFileBufferParser.h>
Public Member Functions | |
virtual ParseBufferResult | parseBuffer (ParseBufferRequest &request, bool convert_data_blocks, bool columns_are_pre_filtered=false, bool skip_dict_encoding=false) const =0 |
virtual import_export::CopyParams | validateAndGetCopyParams (const ForeignTable *foreign_table) const =0 |
virtual size_t | findRowEndPosition (size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const import_export::CopyParams ©_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FileReader *file_reader) const =0 |
virtual void | validateFiles (const FileReader *file_reader, const ForeignTable *foreign_table) const =0 |
Static Public Member Functions | |
static std::map< int, DataBlockPtr > | convertImportBuffersToDataBlocks (const std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, const bool skip_dict_encoding=false) |
static bool | isCoordinateScalar (const std::string_view datum) |
static void | processGeoColumn (std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams ©_params, std::list< const ColumnDescriptor * >::iterator &cd_it, std::vector< std::string_view > &row, size_t &import_idx, bool is_null, size_t first_row_index, size_t row_index_plus_one, std::shared_ptr< Catalog_Namespace::Catalog > catalog) |
static void | fillRejectedRowWithInvalidData (const std::list< const ColumnDescriptor * > &columns, std::list< const ColumnDescriptor * >::iterator &cd_it, const size_t col_idx, ParseBufferRequest &request) |
static bool | isNullDatum (const std::string_view datum, const ColumnDescriptor *column, const std::string &null_indicator) |
Static Public Attributes | |
static const std::string | BUFFER_SIZE_KEY = "BUFFER_SIZE" |
Static Private Member Functions | |
static void | processInvalidGeoColumn (std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams ©_params, const ColumnDescriptor *cd, std::shared_ptr< Catalog_Namespace::Catalog > catalog) |
Definition at line 94 of file TextFileBufferParser.h.
|
static |
Definition at line 59 of file TextFileBufferParser.cpp.
References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, IS_STRING, kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.
Referenced by foreign_storage::CsvFileBufferParser::parseBuffer(), foreign_storage::RegexFileBufferParser::parseBuffer(), and foreign_storage::InternalSystemDataWrapper::populateChunkBuffers().
|
static |
Fill the current row of the request
with invalid (null) data as row will be marked as rejected
Definition at line 171 of file TextFileBufferParser.cpp.
References foreign_storage::ParseBufferRequest::copy_params, foreign_storage::ParseBufferRequest::getCatalog(), foreign_storage::ParseBufferRequest::import_buffers, and processInvalidGeoColumn().
Referenced by foreign_storage::CsvFileBufferParser::parseBuffer(), and foreign_storage::RegexFileBufferParser::parseBuffer().
|
pure virtual |
Finds and returns the offset of the end of the last row in the given buffer. If the buffer does not contain at least one row, the buffer is extended with more content from the file until a row is read. An exception is thrown if the buffer is extended to a maximum threshold and at least one row has still not been read.
Implemented in foreign_storage::RegexFileBufferParser, and foreign_storage::CsvFileBufferParser.
Referenced by foreign_storage::dispatch_scan_requests().
|
static |
Definition at line 121 of file TextFileBufferParser.cpp.
Referenced by foreign_storage::CsvFileBufferParser::parseBuffer(), processGeoColumn(), and foreign_storage::anonymous_namespace{TextFileBufferParser.cpp}::set_coordinates_from_separate_lon_lat_columns().
|
static |
Definition at line 332 of file TextFileBufferParser.cpp.
References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_notnull(), is_null(), ImportHelpers::is_null_datum(), and SQLTypeInfo::is_string().
Referenced by foreign_storage::CsvFileBufferParser::parseBuffer(), and foreign_storage::RegexFileBufferParser::parseBuffer().
|
pure virtual |
Parses a given file buffer and returns data blocks for each column in the file along with metadata related to rows and row offsets within the buffer.
convert_data_blocks | - convert import buffers to data blocks |
columns_are_pre_filtered | - file buffer passed into parse_buffer only has the necessary columns that are being requested, not all columns. |
skip_dict_encoding | - skip dictionary encoding for encoded strings; the encoding will be required to happen later in processing |
Implemented in foreign_storage::RegexFileBufferParser, and foreign_storage::CsvFileBufferParser.
Referenced by foreign_storage::parse_file_regions(), foreign_storage::populate_chunks(), and foreign_storage::scan_metadata().
|
static |
Definition at line 244 of file TextFileBufferParser.cpp.
References CHECK, geo_promoted_type_match(), import_export::CopyParams::geo_validate_geometry, Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), IS_GEO, isCoordinateScalar(), kPOINT, import_export::CopyParams::lonlat, import_export::CopyParams::null_str, foreign_storage::anonymous_namespace{TextFileBufferParser.cpp}::set_coordinates_from_separate_lon_lat_columns(), import_export::Importer::set_geo_physical_import_buffer(), import_export::CopyParams::source_srid, and to_string().
Referenced by foreign_storage::CsvFileBufferParser::parseBuffer(), and foreign_storage::RegexFileBufferParser::parseBuffer().
|
staticprivate |
Definition at line 209 of file TextFileBufferParser.cpp.
References CHECK, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), Geospatial::GeoTypesFactory::getNullGeoColumns(), IS_GEO, import_export::CopyParams::null_str, and import_export::Importer::set_geo_physical_import_buffer().
Referenced by fillRejectedRowWithInvalidData().
|
pure virtual |
Validates foreign table parse options and returns a CopyParams object upon successful validation. An exception is thrown if validation fails.
Implemented in foreign_storage::RegexFileBufferParser, and foreign_storage::CsvFileBufferParser.
Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan(), foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata(), foreign_storage::AbstractTextFileDataWrapper::populateChunks(), and foreign_storage::AbstractTextFileDataWrapper::restoreDataWrapperInternals().
|
pure virtual |
Performs basic validation of files to be parsed.
Implemented in foreign_storage::CsvFileBufferParser, and foreign_storage::RegexFileBufferParser.
Referenced by foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::initialize_non_append_mode_scan().
|
inlinestatic |