OmniSciDB  a5dc49c757
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::MultiFileReader Class Referenceabstract

#include <FileReader.h>

+ Inheritance diagram for foreign_storage::MultiFileReader:
+ Collaboration diagram for foreign_storage::MultiFileReader:

Public Member Functions

 MultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 MultiFileReader (const std::string &file_path, const import_export::CopyParams &copy_params, const rapidjson::Value &value)
 
size_t getRemainingSize () override
 
bool isRemainingSizeKnown () override
 
size_t read (void *buffer, size_t max_size) override
 
size_t readRegion (void *buffer, size_t offset, size_t size) override
 
bool isScanFinished () const override
 
void serialize (rapidjson::Value &value, rapidjson::Document::AllocatorType &allocator) const override
 
FirstLineByFilePath getFirstLineForEachFile () const override
 
bool isEndOfLastFile () override
 
std::string getCurrentFilePath () const override
 
virtual std::set< std::string > checkForRolledOffFiles (const shared::FilePathOptions &file_path_options)
 
- Public Member Functions inherited from foreign_storage::FileReader
 FileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
virtual ~FileReader ()=default
 
virtual void checkForMoreRows (size_t file_offset, const shared::FilePathOptions &options, const ForeignServer *server_options=nullptr, const UserMapping *user_mapping=nullptr)
 

Protected Member Functions

virtual std::vector< std::string > getAllFilePaths (const shared::FilePathOptions &file_path_options) const =0
 

Protected Attributes

std::vector< std::unique_ptr
< FileReader > > 
files_
 
std::vector< std::string > file_locations_
 
std::vector< size_t > cumulative_sizes_
 
size_t current_index_
 
size_t current_offset_
 
size_t starting_offset_
 
bool is_end_of_last_file_
 
- Protected Attributes inherited from foreign_storage::FileReader
import_export::CopyParams copy_params_
 
std::string file_path_
 

Detailed Description

Definition at line 346 of file FileReader.h.

Constructor & Destructor Documentation

foreign_storage::MultiFileReader::MultiFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params 
)

Definition at line 530 of file FileReader.cpp.

532  : FileReader(file_path, copy_params)
533  , current_index_(0)
534  , current_offset_(0)
535  , starting_offset_(0)
536  , is_end_of_last_file_(false) {}
FileReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: FileReader.h:45
foreign_storage::MultiFileReader::MultiFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params,
const rapidjson::Value &  value 
)

Definition at line 538 of file FileReader.cpp.

References CHECK, cumulative_sizes_, current_index_, current_offset_, file_locations_, json_utils::get_value_from_object(), and starting_offset_.

541  : FileReader(file_path, copy_params)
542  , current_index_(0)
543  , current_offset_(0)
544  , starting_offset_(0)
545  , is_end_of_last_file_(false) {
546  json_utils::get_value_from_object(value, file_locations_, "file_locations");
547  json_utils::get_value_from_object(value, cumulative_sizes_, "cumulative_sizes");
548  json_utils::get_value_from_object(value, current_offset_, "current_offset");
549  json_utils::get_value_from_object(value, current_index_, "current_index");
550  if (value.HasMember("starting_offset")) {
551  json_utils::get_value_from_object(value, starting_offset_, "starting_offset");
552  }
553 
554  // Validate files_metadata here, but objects will be recreated by child class
555  CHECK(value.HasMember("files_metadata"));
556  CHECK(value["files_metadata"].IsArray());
557  CHECK(file_locations_.size() == value["files_metadata"].GetArray().Size());
558 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: JsonUtils.h:270
FileReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: FileReader.h:45
#define CHECK(condition)
Definition: Logger.h:291
std::vector< std::string > file_locations_
Definition: FileReader.h:381
std::vector< size_t > cumulative_sizes_
Definition: FileReader.h:384

+ Here is the call graph for this function:

Member Function Documentation

std::set< std::string > foreign_storage::MultiFileReader::checkForRolledOffFiles ( const shared::FilePathOptions file_path_options)
virtual

Definition at line 615 of file FileReader.cpp.

References shared::check_for_rolled_off_file_paths(), CHECK_LE, cumulative_sizes_, current_index_, file_locations_, files_, getAllFilePaths(), and starting_offset_.

Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().

616  {
617  auto all_file_paths = getAllFilePaths(file_path_options);
618  auto rolled_off_files =
620  if (!rolled_off_files.empty()) {
621  files_.erase(files_.begin(), files_.begin() + rolled_off_files.size());
622  CHECK_LE(rolled_off_files.size(), cumulative_sizes_.size());
623  starting_offset_ = cumulative_sizes_[rolled_off_files.size() - 1];
624  cumulative_sizes_.erase(cumulative_sizes_.begin(),
625  cumulative_sizes_.begin() + rolled_off_files.size());
626  current_index_ -= rolled_off_files.size();
627  }
628  return rolled_off_files;
629 }
virtual std::vector< std::string > getAllFilePaths(const shared::FilePathOptions &file_path_options) const =0
std::set< std::string > check_for_rolled_off_file_paths(const std::vector< std::string > &all_file_paths, std::vector< std::string > &processed_file_paths)
#define CHECK_LE(x, y)
Definition: Logger.h:304
std::vector< std::string > file_locations_
Definition: FileReader.h:381
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
std::vector< size_t > cumulative_sizes_
Definition: FileReader.h:384

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual std::vector<std::string> foreign_storage::MultiFileReader::getAllFilePaths ( const shared::FilePathOptions file_path_options) const
protectedpure virtual

Implemented in foreign_storage::LocalMultiFileReader.

Referenced by checkForRolledOffFiles().

+ Here is the caller graph for this function:

std::string foreign_storage::MultiFileReader::getCurrentFilePath ( ) const
overridevirtual

Returns the path of the currently processed file.

Implements foreign_storage::FileReader.

Definition at line 607 of file FileReader.cpp.

References CHECK_LT, current_index_, files_, and isScanFinished().

607  {
608  if (isScanFinished()) {
609  return files_.back()->getCurrentFilePath();
610  }
611  CHECK_LT(current_index_, files_.size());
612  return files_[current_index_]->getCurrentFilePath();
613 }
bool isScanFinished() const override
Definition: FileReader.h:362
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380

+ Here is the call graph for this function:

FirstLineByFilePath foreign_storage::MultiFileReader::getFirstLineForEachFile ( ) const
overridevirtual

Returns a map containing the first line for each file that will be read.

Implements foreign_storage::FileReader.

Definition at line 595 of file FileReader.cpp.

References files_.

595  {
596  FirstLineByFilePath first_line_by_file_path;
597  for (const auto& file : files_) {
598  first_line_by_file_path.merge(file->getFirstLineForEachFile());
599  }
600  return first_line_by_file_path;
601 }
std::map< std::string, std::string > FirstLineByFilePath
Definition: FileReader.h:37
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
size_t foreign_storage::MultiFileReader::getRemainingSize ( )
overridevirtual
Returns
size of the remaining content to be read

Implements foreign_storage::FileReader.

Definition at line 579 of file FileReader.cpp.

References current_index_, and files_.

579  {
580  size_t total_size = 0;
581  for (size_t index = current_index_; index < files_.size(); index++) {
582  total_size += files_[index]->getRemainingSize();
583  }
584  return total_size;
585 }
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
bool foreign_storage::MultiFileReader::isEndOfLastFile ( )
overridevirtual

Returns a boolean indicating whether the reader is at the end of the last file that was read.

Implements foreign_storage::FileReader.

Definition at line 603 of file FileReader.cpp.

References is_end_of_last_file_, and isScanFinished().

603  {
604  return (isScanFinished() || is_end_of_last_file_);
605 }
bool isScanFinished() const override
Definition: FileReader.h:362

+ Here is the call graph for this function:

bool foreign_storage::MultiFileReader::isRemainingSizeKnown ( )
overridevirtual
Returns
if remaining size is known

Implements foreign_storage::FileReader.

Definition at line 587 of file FileReader.cpp.

References current_index_, and files_.

587  {
588  bool size_known = true;
589  for (size_t index = current_index_; index < files_.size(); index++) {
590  size_known = size_known && files_[index]->isRemainingSizeKnown();
591  }
592  return size_known;
593 }
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
bool foreign_storage::MultiFileReader::isScanFinished ( ) const
inlineoverridevirtual
Returns
true if the entire file has been read

Implements foreign_storage::FileReader.

Definition at line 362 of file FileReader.h.

References current_index_, and files_.

Referenced by foreign_storage::LocalMultiFileReader::checkForMoreRows(), getCurrentFilePath(), isEndOfLastFile(), read(), and readRegion().

362 { return (current_index_ >= files_.size()); }
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380

+ Here is the caller graph for this function:

size_t foreign_storage::MultiFileReader::read ( void *  buffer,
size_t  max_size 
)
overridevirtual

Read up to max_size bytes from archive into buffer starting starting from the end of the last read

Parameters
buffer- buffer to load into
max_size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::FileReader.

Definition at line 734 of file FileReader.cpp.

References foreign_storage::anonymous_namespace{FileReader.cpp}::adjust_eof(), foreign_storage::FileReader::copy_params_, cumulative_sizes_, current_index_, current_offset_, files_, is_end_of_last_file_, isScanFinished(), and import_export::CopyParams::line_delim.

734  {
735  if (isScanFinished()) {
736  return 0;
737  }
738  // Leave one extra char in case we need to insert a delimiter
739  size_t bytes_read = files_[current_index_].get()->read(buffer, max_size - 1);
740  if (files_[current_index_].get()->isScanFinished()) {
741  adjust_eof(bytes_read, max_size, static_cast<char*>(buffer), copy_params_.line_delim);
742  }
743  current_offset_ += bytes_read;
744  if (current_index_ < files_.size() && files_[current_index_].get()->isScanFinished()) {
746  current_index_++;
747  is_end_of_last_file_ = true;
748  } else {
749  is_end_of_last_file_ = false;
750  }
751  return bytes_read;
752 }
import_export::CopyParams copy_params_
Definition: FileReader.h:128
bool isScanFinished() const override
Definition: FileReader.h:362
void adjust_eof(size_t &read_size, const size_t buffer_size, char *buffer, const char line_delim)
Definition: FileReader.cpp:37
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
std::vector< size_t > cumulative_sizes_
Definition: FileReader.h:384

+ Here is the call graph for this function:

size_t foreign_storage::MultiFileReader::readRegion ( void *  buffer,
size_t  offset,
size_t  size 
)
overridevirtual

Read up to max_size bytes from archive, starting at given offset isScanFinished() must return true to use readRegion

Parameters
buffer- buffer to load into
offset- starting point into the archive to read
size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::FileReader.

Definition at line 754 of file FileReader.cpp.

References CHECK, foreign_storage::FileReader::copy_params_, cumulative_sizes_, files_, isScanFinished(), import_export::CopyParams::line_delim, foreign_storage::anonymous_namespace{FileReader.cpp}::offset_to_index(), and starting_offset_.

754  {
756  // Get file index
757  auto index = offset_to_index(cumulative_sizes_, offset);
758  // Get offset into this file
759  size_t base = starting_offset_;
760  if (index > 0) {
761  base = cumulative_sizes_[index - 1];
762  }
763 
764  size_t read_size = size;
765  if (offset + size == cumulative_sizes_[index]) {
766  // Skip the last byte as it may have been an inserted delimiter
767  read_size--;
768  }
769  size_t bytes_read = files_[index].get()->readRegion(buffer, offset - base, read_size);
770 
771  if (offset + size == cumulative_sizes_[index]) {
772  // Re-insert delimiter
773  static_cast<char*>(buffer)[size - 1] = copy_params_.line_delim;
774  bytes_read++;
775  }
776 
777  return bytes_read;
778 }
size_t offset_to_index(const std::vector< size_t > &cumulative_sizes, size_t byte_offset)
Definition: FileReader.cpp:57
import_export::CopyParams copy_params_
Definition: FileReader.h:128
bool isScanFinished() const override
Definition: FileReader.h:362
#define CHECK(condition)
Definition: Logger.h:291
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
std::vector< size_t > cumulative_sizes_
Definition: FileReader.h:384

+ Here is the call graph for this function:

void foreign_storage::MultiFileReader::serialize ( rapidjson::Value &  value,
rapidjson::Document::AllocatorType &  allocator 
) const
overridevirtual

Serialize internal state to given json object This Json will later be used to restore the reader state through a constructor must be called when isScanFinished() is true

Parameters
value- json object to store needed state to this function can store any needed data or none
allocator- allocator to use for json contruction

Implements foreign_storage::FileReader.

Definition at line 560 of file FileReader.cpp.

References json_utils::add_value_to_object(), cumulative_sizes_, current_index_, current_offset_, file_locations_, files_, and starting_offset_.

561  {
562  json_utils::add_value_to_object(value, file_locations_, "file_locations", allocator);
564  value, cumulative_sizes_, "cumulative_sizes", allocator);
565  json_utils::add_value_to_object(value, current_offset_, "current_offset", allocator);
566  json_utils::add_value_to_object(value, current_index_, "current_index", allocator);
567  json_utils::add_value_to_object(value, starting_offset_, "starting_offset", allocator);
568 
569  // Serialize metadata from all files
570  rapidjson::Value files_metadata(rapidjson::kArrayType);
571  for (size_t index = 0; index < files_.size(); index++) {
572  rapidjson::Value file_metadata(rapidjson::kObjectType);
573  files_[index]->serialize(file_metadata, allocator);
574  files_metadata.PushBack(file_metadata, allocator);
575  }
576  value.AddMember("files_metadata", files_metadata, allocator);
577 };
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: JsonUtils.h:255
std::vector< std::string > file_locations_
Definition: FileReader.h:381
std::vector< std::unique_ptr< FileReader > > files_
Definition: FileReader.h:380
std::vector< size_t > cumulative_sizes_
Definition: FileReader.h:384

+ Here is the call graph for this function:

Member Data Documentation

std::vector<size_t> foreign_storage::MultiFileReader::cumulative_sizes_
protected
size_t foreign_storage::MultiFileReader::current_offset_
protected
bool foreign_storage::MultiFileReader::is_end_of_last_file_
protected
size_t foreign_storage::MultiFileReader::starting_offset_
protected

The documentation for this class was generated from the following files: