OmniSciDB
a5dc49c757
|
#include <AbstractTextFileDataWrapper.h>
Classes | |
struct | ResidualBuffer |
Public Member Functions | |
AbstractTextFileDataWrapper () | |
AbstractTextFileDataWrapper (const int db_id, const ForeignTable *foreign_table) | |
AbstractTextFileDataWrapper (const int db_id, const ForeignTable *foreign_table, const UserMapping *user_mapping, const bool disable_cache) | |
void | populateChunkMetadata (ChunkMetadataVector &chunk_metadata_vector) override |
void | populateChunkBuffers (const ChunkToBufferMap &required_buffers, const ChunkToBufferMap &optional_buffers, AbstractBuffer *delete_buffer) override |
std::string | getSerializedDataWrapper () const override |
void | restoreDataWrapperInternals (const std::string &file_path, const ChunkMetadataVector &chunk_metadata) override |
bool | isRestored () const override |
ParallelismLevel | getCachedParallelismLevel () const override |
ParallelismLevel | getNonCachedParallelismLevel () const override |
bool | isLazyFragmentFetchingEnabled () const override |
Public Member Functions inherited from foreign_storage::AbstractFileStorageDataWrapper | |
AbstractFileStorageDataWrapper () | |
void | validateServerOptions (const ForeignServer *foreign_server) const override |
void | validateTableOptions (const ForeignTable *foreign_table) const override |
const std::set < std::string_view > & | getSupportedTableOptions () const override |
void | validateUserMappingOptions (const UserMapping *user_mapping, const ForeignServer *foreign_server) const override |
const std::set < std::string_view > & | getSupportedUserMappingOptions () const override |
const std::set< std::string > | getAlterableTableOptions () const override |
Public Member Functions inherited from foreign_storage::ForeignDataWrapper | |
ForeignDataWrapper ()=default | |
virtual | ~ForeignDataWrapper ()=default |
virtual void | validateSchema (const std::list< ColumnDescriptor > &columns) const |
Protected Member Functions | |
virtual const TextFileBufferParser & | getFileBufferParser () const =0 |
virtual std::optional< size_t > | getMaxFileCount () const |
Private Member Functions | |
AbstractTextFileDataWrapper (const ForeignTable *foreign_table) | |
void | iterativeFileScan (ChunkMetadataVector &chunk_metadata_vector, IterativeFileScanParameters &file_scan_param) |
void | populateChunks (std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, int fragment_id, AbstractBuffer *delete_buffer) |
void | populateChunkMapForColumns (const std::set< const ColumnDescriptor * > &columns, const int fragment_id, const ChunkToBufferMap &buffers, std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map) |
void | updateMetadata (std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, int fragment_id) |
void | updateRolledOffChunks (const std::set< std::string > &rolled_off_files, const std::map< int32_t, const ColumnDescriptor * > &column_by_id) |
Private Attributes | |
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > | chunk_metadata_map_ |
std::map< int, FileRegions > | fragment_id_to_file_regions_map_ |
std::unique_ptr< FileReader > | file_reader_ |
const int | db_id_ |
const ForeignTable * | foreign_table_ |
std::map< ChunkKey, std::unique_ptr < ForeignStorageBuffer > > | chunk_encoder_buffers_ |
size_t | num_rows_ |
size_t | append_start_offset_ |
bool | is_restored_ |
const UserMapping * | user_mapping_ |
const bool | disable_cache_ |
bool | is_first_file_scan_call_ |
bool | is_file_scan_in_progress_ |
int | iterative_scan_last_fragment_id_ |
MetadataScanMultiThreadingParams | multi_threading_params_ |
size_t | buffer_size_ |
size_t | thread_count_ |
ResidualBuffer | residual_buffer_ |
Additional Inherited Members | |
Public Types inherited from foreign_storage::ForeignDataWrapper | |
enum | ParallelismLevel { NONE, INTRA_FRAGMENT, INTER_FRAGMENT } |
Static Public Member Functions inherited from foreign_storage::AbstractFileStorageDataWrapper | |
static shared::FilePathOptions | getFilePathOptions (const ForeignTable *foreign_table) |
Static Public Attributes inherited from foreign_storage::AbstractFileStorageDataWrapper | |
static const std::string | STORAGE_TYPE_KEY = "STORAGE_TYPE" |
static const std::string | BASE_PATH_KEY = "BASE_PATH" |
static const std::string | FILE_PATH_KEY = "FILE_PATH" |
static const std::string | REGEX_PATH_FILTER_KEY = "REGEX_PATH_FILTER" |
static const std::string | LOCAL_FILE_STORAGE_TYPE = "LOCAL_FILE" |
static const std::string | S3_STORAGE_TYPE = "AWS_S3" |
static const std::string | FILE_SORT_ORDER_BY_KEY = shared::FILE_SORT_ORDER_BY_KEY |
static const std::string | FILE_SORT_REGEX_KEY = shared::FILE_SORT_REGEX_KEY |
static const std::string | ALLOW_FILE_ROLL_OFF_KEY = "ALLOW_FILE_ROLL_OFF" |
static const std::string | THREADS_KEY = "THREADS" |
static const std::array < std::string, 1 > | supported_storage_types |
Static Protected Member Functions inherited from foreign_storage::AbstractFileStorageDataWrapper | |
static std::string | getFullFilePath (const ForeignTable *foreign_table) |
Returns the path to the source file/dir of the table. Depending on options this may result from a concatenation of server and table path options. More... | |
static bool | allowFileRollOff (const ForeignTable *foreign_table) |
Definition at line 92 of file AbstractTextFileDataWrapper.h.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | ) |
Definition at line 36 of file AbstractTextFileDataWrapper.cpp.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | const int | db_id, |
const ForeignTable * | foreign_table | ||
) |
Definition at line 45 of file AbstractTextFileDataWrapper.cpp.
foreign_storage::AbstractTextFileDataWrapper::AbstractTextFileDataWrapper | ( | const int | db_id, |
const ForeignTable * | foreign_table, | ||
const UserMapping * | user_mapping, | ||
const bool | disable_cache | ||
) |
Definition at line 57 of file AbstractTextFileDataWrapper.cpp.
|
private |
|
inlineoverridevirtual |
Gets the desired level of parallelism for the data wrapper when a cache is in use. This affects the optional buffers that the data wrapper is made aware of during data requests.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 115 of file AbstractTextFileDataWrapper.h.
References foreign_storage::ForeignDataWrapper::INTRA_FRAGMENT.
|
protectedpure virtual |
Implemented in foreign_storage::CsvDataWrapper, foreign_storage::RegexParserDataWrapper, and foreign_storage::InternalLogsDataWrapper.
Referenced by iterativeFileScan(), populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
protectedvirtual |
Reimplemented in foreign_storage::InternalLogsDataWrapper.
Definition at line 1763 of file AbstractTextFileDataWrapper.cpp.
Referenced by iterativeFileScan(), and populateChunkMetadata().
|
inlineoverridevirtual |
Gets the desired level of parallelism for the data wrapper when no cache is in use. This affects the optional buffers that the data wrapper is made aware of during data requests.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 117 of file AbstractTextFileDataWrapper.h.
References foreign_storage::ForeignDataWrapper::INTRA_FRAGMENT.
|
overridevirtual |
Serialize internal state of wrapper into file at given path if implemented
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1690 of file AbstractTextFileDataWrapper.cpp.
References json_utils::add_value_to_object(), append_start_offset_, file_reader_, fragment_id_to_file_regions_map_, num_rows_, and json_utils::write_to_string().
|
inlineoverridevirtual |
If true
data wrapper implements a lazy fragment fetching mode. This mode allows requests for fragments to be issued to populateChunks
without the prerequisite that populateChunkMetadata
has successfully finished execution. This is an optimization that has some specific use-cases and is not required.
NOTE: this mode is not guaranteed to work as expected when combined with certain types of refresh modes such as append. This is subject to change in the future, but has no impact on the intended use-cases of this mode.
Reimplemented from foreign_storage::ForeignDataWrapper.
Definition at line 121 of file AbstractTextFileDataWrapper.h.
|
overridevirtual |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1759 of file AbstractTextFileDataWrapper.cpp.
References is_restored_.
|
private |
Implements an iterative file scan that enables populating chunks fragment by fragment.
Definition at line 1527 of file AbstractTextFileDataWrapper.cpp.
References append_start_offset_, threading_serial::async(), buffer_size_, CHECK, chunk_metadata_map_, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, db_id_, DEBUG_TIMER, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, disable_cache_, foreign_storage::dispatch_scan_requests_with_exception_handling(), file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_buffer_size(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_thread_count(), Catalog_Namespace::SysCatalog::getCatalog(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFilePathOptions(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), getMaxFileCount(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::initialize_non_append_mode_scan(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::is_file_scan_finished(), is_file_scan_in_progress_, is_first_file_scan_call_, foreign_storage::ForeignTable::isAppendMode(), iterative_scan_last_fragment_id_, multi_threading_params_, num_rows_, foreign_storage::OptionsContainer::options, run_benchmark_import::parser, foreign_storage::populate_chunks(), foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::reset_multithreading_params(), residual_buffer_, TableDescriptor::tableId, thread_count_, user_mapping_, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
Referenced by populateChunks().
|
overridevirtual |
Populates given chunk buffers identified by chunk keys. All provided chunk buffers are expected to be for the same fragment.
required_buffers | - chunk buffers that must always be populated |
optional_buffers | - chunk buffers that can be optionally populated, if the data wrapper has to scan through chunk data anyways (typically for row wise data formats) |
delete_buffer | - chunk buffer for fragment's delete column, if non-null data wrapper is expected to mark deleted rows in buffer and continue processing |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 118 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHUNK_KEY_FRAGMENT_IDX, db_id_, DEBUG_TIMER, foreign_table_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_columns(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), is_file_scan_in_progress_, populateChunkMapForColumns(), populateChunks(), TableDescriptor::tableId, and updateMetadata().
|
private |
Definition at line 103 of file AbstractTextFileDataWrapper.cpp.
References chunk_metadata_map_, db_id_, foreign_table_, foreign_storage::init_chunk_for_column(), and TableDescriptor::tableId.
Referenced by populateChunkBuffers().
|
overridevirtual |
Populates provided chunk metadata vector with metadata for table specified in given chunk key. Metadata scan for text file(s) configured for foreign table occurs in parallel whenever appropriate. Parallel processing involves the main thread creating ParseBufferRequest objects, which contain buffers with text content read from file and adding these request objects to a queue that is consumed by a fixed number of threads. After request processing, request objects are put back into a pool for reuse for subsequent requests in order to avoid unnecessary allocation of new buffers.
chunk_metadata_vector | - vector to be populated with chunk metadata |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1373 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), foreign_storage::AbstractFileStorageDataWrapper::allowFileRollOff(), append_start_offset_, threading_serial::async(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, CHECK, foreign_storage::MultiFileReader::checkForRolledOffFiles(), foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, chunk_encoder_buffers_, CHUNK_KEY_COLUMN_IDX, chunk_metadata_map_, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, db_id_, DEBUG_TIMER, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, disable_cache_, foreign_storage::dispatch_scan_requests_with_exception_handling(), file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_buffer_size(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_thread_count(), Catalog_Namespace::SysCatalog::getCatalog(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFilePathOptions(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), getMaxFileCount(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::initialize_non_append_mode_scan(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::ForeignTable::isAppendMode(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, num_rows_, foreign_storage::OptionsContainer::options, run_benchmark_import::parser, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::scan_metadata(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::skip_metadata_scan(), foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, TableDescriptor::tableId, UNREACHABLE, updateRolledOffChunks(), user_mapping_, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
|
private |
Populates provided chunks with appropriate data by parsing all file regions containing chunk data.
column_id_to_chunk_map | - map of column id to chunks to be populated |
fragment_id | - fragment id of given chunks |
delete_buffer | - optional buffer to store deleted row indices |
Definition at line 339 of file AbstractTextFileDataWrapper.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, db_id_, file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_buffer_size(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_thread_count(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), Data_Namespace::AbstractBuffer::getMemoryPtr(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::is_file_scan_finished(), is_file_scan_in_progress_, is_first_file_scan_call_, iterativeFileScan(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, multi_threading_params_, foreign_storage::OptionsContainer::options, foreign_storage::parse_file_regions(), run_benchmark_import::parser, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::resize_delete_buffer(), run_benchmark_import::result, foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::throw_fragment_id_out_of_bounds_error(), UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
Referenced by populateChunkBuffers().
|
overridevirtual |
Restore internal state of datawrapper
file_path | - location of file created by serializeMetadata |
chunk_metadata_vector | - vector of chunk metadata recovered from disk |
Implements foreign_storage::ForeignDataWrapper.
Definition at line 1712 of file AbstractTextFileDataWrapper.cpp.
References append_start_offset_, CHECK, chunk_encoder_buffers_, chunk_metadata_map_, file_reader_, foreign_storage::ForeignTable::foreign_server, foreign_table_, fragment_id_to_file_regions_map_, json_utils::get_value_from_object(), getFileBufferParser(), foreign_storage::AbstractFileStorageDataWrapper::getFullFilePath(), is_restored_, foreign_storage::ForeignTable::isAppendMode(), foreign_storage::AbstractFileStorageDataWrapper::LOCAL_FILE_STORAGE_TYPE, num_rows_, foreign_storage::OptionsContainer::options, json_utils::read_from_file(), foreign_storage::AbstractFileStorageDataWrapper::STORAGE_TYPE_KEY, UNREACHABLE, and foreign_storage::TextFileBufferParser::validateAndGetCopyParams().
|
private |
Definition at line 147 of file AbstractTextFileDataWrapper.cpp.
References CHECK, chunk_metadata_map_, db_id_, foreign_table_, shared::get_from_map(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::skip_metadata_scan(), and TableDescriptor::tableId.
Referenced by populateChunkBuffers().
|
private |
Definition at line 1645 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_FRAGMENT_IDX, chunk_metadata_map_, shared::contains(), fragment_id_to_file_regions_map_, shared::get_from_map(), and foreign_storage::get_placeholder_metadata().
Referenced by populateChunkMetadata().
|
private |
Definition at line 181 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 198 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 177 of file AbstractTextFileDataWrapper.h.
Referenced by populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 168 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), populateChunkMapForColumns(), populateChunkMetadata(), restoreDataWrapperInternals(), updateMetadata(), and updateRolledOffChunks().
|
private |
Definition at line 173 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), populateChunkBuffers(), populateChunkMapForColumns(), populateChunkMetadata(), populateChunks(), and updateMetadata().
|
private |
Definition at line 188 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunkMetadata().
|
private |
Definition at line 171 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), populateChunks(), and restoreDataWrapperInternals().
|
private |
Definition at line 174 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), populateChunkBuffers(), populateChunkMapForColumns(), populateChunkMetadata(), populateChunks(), restoreDataWrapperInternals(), and updateMetadata().
|
private |
Definition at line 169 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), populateChunks(), restoreDataWrapperInternals(), and updateRolledOffChunks().
|
private |
Definition at line 191 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), populateChunkBuffers(), and populateChunks().
|
private |
Definition at line 190 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunks().
|
private |
Definition at line 183 of file AbstractTextFileDataWrapper.h.
Referenced by isRestored(), and restoreDataWrapperInternals().
|
private |
Definition at line 194 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 197 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunks().
|
private |
Definition at line 179 of file AbstractTextFileDataWrapper.h.
Referenced by getSerializedDataWrapper(), iterativeFileScan(), populateChunkMetadata(), and restoreDataWrapperInternals().
|
private |
Definition at line 201 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 199 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan().
|
private |
Definition at line 185 of file AbstractTextFileDataWrapper.h.
Referenced by iterativeFileScan(), and populateChunkMetadata().