OmniSciDB
a5dc49c757
|
#include <RegexFileBufferParser.h>
Public Member Functions | |
RegexFileBufferParser (const ForeignTable *foreign_table) | |
ParseBufferResult | parseBuffer (ParseBufferRequest &request, bool convert_data_blocks, bool columns_are_pre_filtered=false, bool skip_dict_encoding=false) const override |
import_export::CopyParams | validateAndGetCopyParams (const ForeignTable *foreign_table) const override |
size_t | findRowEndPosition (size_t &alloc_size, std::unique_ptr< char[]> &buffer, size_t &buffer_size, const import_export::CopyParams ©_params, const size_t buffer_first_row_index, unsigned int &num_rows_in_buffer, FileReader *file_reader) const override |
void | validateFiles (const FileReader *file_reader, const ForeignTable *foreign_table) const override |
Static Public Member Functions | |
static void | setMaxBufferResize (size_t max_buffer_resize) |
Static Public Member Functions inherited from foreign_storage::TextFileBufferParser | |
static std::map< int, DataBlockPtr > | convertImportBuffersToDataBlocks (const std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, const bool skip_dict_encoding=false) |
static bool | isCoordinateScalar (const std::string_view datum) |
static void | processGeoColumn (std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, size_t &col_idx, const import_export::CopyParams ©_params, std::list< const ColumnDescriptor * >::iterator &cd_it, std::vector< std::string_view > &row, size_t &import_idx, bool is_null, size_t first_row_index, size_t row_index_plus_one, std::shared_ptr< Catalog_Namespace::Catalog > catalog) |
static void | fillRejectedRowWithInvalidData (const std::list< const ColumnDescriptor * > &columns, std::list< const ColumnDescriptor * >::iterator &cd_it, const size_t col_idx, ParseBufferRequest &request) |
static bool | isNullDatum (const std::string_view datum, const ColumnDescriptor *column, const std::string &null_indicator) |
Static Public Attributes | |
static const std::string | LINE_REGEX_KEY = "LINE_REGEX" |
static const std::string | LINE_START_REGEX_KEY = "LINE_START_REGEX" |
static const std::string | HEADER_KEY = "HEADER" |
Static Public Attributes inherited from foreign_storage::TextFileBufferParser | |
static const std::string | BUFFER_SIZE_KEY = "BUFFER_SIZE" |
Protected Member Functions | |
virtual bool | regexMatchColumns (const std::string &row_str, const boost::regex &line_regex, size_t logical_column_count, std::vector< std::string > &parsed_columns_str, std::vector< std::string_view > &parsed_columns_sv, const std::string &file_path) const |
virtual bool | shouldRemoveNonMatches () const |
virtual bool | shouldTruncateStringValues () const |
Static Private Member Functions | |
static size_t | getMaxBufferResize () |
Private Attributes | |
boost::regex | line_regex_ |
std::optional< boost::regex > | line_start_regex_ |
Static Private Attributes | |
static size_t | max_buffer_resize_ |
static bool | skip_first_line_ {false} |
Definition at line 23 of file RegexFileBufferParser.h.
foreign_storage::RegexFileBufferParser::RegexFileBufferParser | ( | const ForeignTable * | foreign_table | ) |
Definition at line 152 of file RegexFileBufferParser.cpp.
|
overridevirtual |
Finds and returns the offset of the end of the last row in the given buffer. If the buffer does not contain at least one row, the buffer is extended with more content from the file until a row is read. An exception is thrown if the buffer is extended to a maximum threshold and at least one row has still not been read.
Implements foreign_storage::TextFileBufferParser.
Definition at line 388 of file RegexFileBufferParser.cpp.
References CHECK, CHECK_EQ, CHECK_GT, import_export::delimited_parser::extend_buffer(), foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::find_last_end_of_line(), foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_row_count(), getMaxBufferResize(), foreign_storage::FileReader::isEndOfLastFile(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, line_regex_, line_start_regex_, foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex(), import_export::delimited_parser::max_buffer_resize, and shouldRemoveNonMatches().
|
staticprivate |
Definition at line 479 of file RegexFileBufferParser.cpp.
References max_buffer_resize_.
Referenced by findRowEndPosition().
|
overridevirtual |
Parses a given file buffer and returns data blocks for each column in the file along with metadata related to rows and row offsets within the buffer.
Implements foreign_storage::TextFileBufferParser.
Definition at line 160 of file RegexFileBufferParser.cpp.
References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, CHECK, foreign_storage::TextFileBufferParser::convertImportBuffersToDataBlocks(), foreign_storage::ParseBufferRequest::copy_params, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::TextFileBufferParser::fillRejectedRowWithInvalidData(), foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::foreign_table_schema, foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_next_row(), foreign_storage::ParseBufferRequest::getCatalog(), foreign_storage::ParseBufferRequest::getColumns(), foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::import_buffers, is_null(), foreign_storage::TextFileBufferParser::isNullDatum(), import_export::CopyParams::line_delim, line_regex_, line_start_regex_, StringDictionary::MAX_STRLEN, import_export::CopyParams::null_str, foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::TextFileBufferParser::processGeoColumn(), regexMatchColumns(), run_benchmark_import::result, shouldRemoveNonMatches(), shouldTruncateStringValues(), and foreign_storage::ParseBufferRequest::track_rejected_rows.
|
protectedvirtual |
Reimplemented in foreign_storage::LogFileBufferParser.
Definition at line 330 of file RegexFileBufferParser.cpp.
References CHECK_GT, and foreign_storage::throw_number_of_columns_mismatch_error().
Referenced by parseBuffer(), and foreign_storage::LogFileBufferParser::regexMatchColumns().
|
static |
Definition at line 475 of file RegexFileBufferParser.cpp.
References import_export::delimited_parser::max_buffer_resize, and max_buffer_resize_.
|
protectedvirtual |
Reimplemented in foreign_storage::LogFileBufferParser.
Definition at line 483 of file RegexFileBufferParser.cpp.
Referenced by findRowEndPosition(), and parseBuffer().
|
protectedvirtual |
Reimplemented in foreign_storage::LogFileBufferParser.
Definition at line 487 of file RegexFileBufferParser.cpp.
Referenced by parseBuffer().
|
overridevirtual |
Validates foreign table parse options and returns a CopyParams object upon successful validation. An exception is thrown if validation fails.
Implements foreign_storage::TextFileBufferParser.
Definition at line 359 of file RegexFileBufferParser.cpp.
References foreign_storage::TextFileBufferParser::BUFFER_SIZE_KEY, foreign_storage::ForeignTable::GEO_VALIDATE_GEOMETRY_KEY, HEADER_KEY, import_export::kHasHeader, import_export::kNoHeader, foreign_storage::OptionsContainer::options, import_export::CopyParams::plain_text, foreign_storage::AbstractFileStorageDataWrapper::THREADS_KEY, and foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_and_get_bool_value().
|
overridevirtual |
Performs basic validation of files to be parsed.
Implements foreign_storage::TextFileBufferParser.
Definition at line 455 of file RegexFileBufferParser.cpp.
References CHECK, foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::get_line_start_regex(), foreign_storage::FileReader::getFirstLineForEachFile(), parse_ast::line, line_start_regex_, and foreign_storage::anonymous_namespace{RegexFileBufferParser.cpp}::line_starts_with_regex().
|
inlinestatic |
Definition at line 51 of file RegexFileBufferParser.h.
Referenced by validateAndGetCopyParams().
|
private |
Definition at line 74 of file RegexFileBufferParser.h.
Referenced by findRowEndPosition(), and parseBuffer().
|
inlinestatic |
|
private |
Definition at line 75 of file RegexFileBufferParser.h.
Referenced by findRowEndPosition(), parseBuffer(), and validateFiles().
|
inlinestatic |
|
inlinestaticprivate |
Definition at line 68 of file RegexFileBufferParser.h.
Referenced by getMaxBufferResize(), and setMaxBufferResize().
|
inlinestaticprivate |
Definition at line 72 of file RegexFileBufferParser.h.