OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::SingleTextFileReader Class Reference

#include <FileReader.h>

+ Inheritance diagram for foreign_storage::SingleTextFileReader:
+ Collaboration diagram for foreign_storage::SingleTextFileReader:

Public Member Functions

 SingleTextFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 SingleTextFileReader (const std::string &file_path, const import_export::CopyParams &copy_params, const rapidjson::Value &value)
 
 ~SingleTextFileReader () override
 
 SingleTextFileReader (const SingleTextFileReader &)=delete
 
SingleTextFileReaderoperator= (const SingleTextFileReader &)=delete
 
size_t read (void *buffer, size_t max_size) override
 
size_t readRegion (void *buffer, size_t offset, size_t size) override
 
bool isScanFinished () const override
 
size_t getRemainingSize () override
 
bool isRemainingSizeKnown () override
 
void checkForMoreRows (size_t file_offset, const shared::FilePathOptions &options, const ForeignServer *server_options, const UserMapping *user_mapping) override
 
void serialize (rapidjson::Value &value, rapidjson::Document::AllocatorType &allocator) const override
 
- Public Member Functions inherited from foreign_storage::SingleFileReader
 SingleFileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
 ~SingleFileReader () override=default
 
FirstLineByFilePath getFirstLineForEachFile () const override
 
bool isEndOfLastFile () override
 
std::string getCurrentFilePath () const override
 
- Public Member Functions inherited from foreign_storage::FileReader
 FileReader (const std::string &file_path, const import_export::CopyParams &copy_params)
 
virtual ~FileReader ()=default
 

Private Member Functions

std::string getFirstLine () const override
 
void skipHeader () override
 

Private Attributes

std::FILE * file_
 
size_t data_size_
 
bool scan_finished_
 
size_t header_offset_
 
size_t total_bytes_read_
 

Additional Inherited Members

- Protected Attributes inherited from foreign_storage::FileReader
import_export::CopyParams copy_params_
 
std::string file_path_
 
- Static Protected Attributes inherited from foreign_storage::SingleFileReader
static constexpr size_t DEFAULT_HEADER_READ_SIZE {1024}
 

Detailed Description

Definition at line 153 of file FileReader.h.

Constructor & Destructor Documentation

foreign_storage::SingleTextFileReader::SingleTextFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params 
)

Definition at line 89 of file FileReader.cpp.

References data_size_, file_, heavyai::fopen(), foreign_storage::anonymous_namespace{FileReader.cpp}::get_data_size(), header_offset_, scan_finished_, and skipHeader().

91  : SingleFileReader(file_path, copy_params)
92  , scan_finished_(false)
93  , header_offset_(0)
94  , total_bytes_read_(0) {
95  file_ = fopen(file_path.c_str(), "rb");
96  if (!file_) {
97  throw std::runtime_error{"An error occurred when attempting to open file \"" +
98  file_path + "\". " + strerror(errno)};
99  }
100 
101  // Skip header and record offset
102  skipHeader();
103  fseek(file_, 0, SEEK_END);
104 
106  // Empty file
107  if (data_size_ == 0) {
108  scan_finished_ = true;
109  }
110 
111  if (fseek(file_, static_cast<long int>(header_offset_), SEEK_SET) != 0) {
112  throw std::runtime_error{"An error occurred when attempting to open file \"" +
113  file_path + "\". " + strerror(errno)};
114  };
115 }
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
size_t get_data_size(size_t file_size, size_t header_size)
Definition: FileReader.cpp:66
SingleFileReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: FileReader.cpp:73

+ Here is the call graph for this function:

foreign_storage::SingleTextFileReader::SingleTextFileReader ( const std::string &  file_path,
const import_export::CopyParams copy_params,
const rapidjson::Value &  value 
)

Definition at line 117 of file FileReader.cpp.

References data_size_, file_, heavyai::fopen(), json_utils::get_value_from_object(), header_offset_, and total_bytes_read_.

120  : SingleFileReader(file_path, copy_params)
121  , scan_finished_(true)
122  , header_offset_(0)
123  , total_bytes_read_(0) {
124  file_ = fopen(file_path.c_str(), "rb");
125  if (!file_) {
126  throw std::runtime_error{"An error occurred when attempting to open file \"" +
127  file_path + "\". " + strerror(errno)};
128  }
129  json_utils::get_value_from_object(value, header_offset_, "header_offset");
130  json_utils::get_value_from_object(value, total_bytes_read_, "total_bytes_read");
131  json_utils::get_value_from_object(value, data_size_, "data_size");
132 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: JsonUtils.h:270
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
SingleFileReader(const std::string &file_path, const import_export::CopyParams &copy_params)
Definition: FileReader.cpp:73

+ Here is the call graph for this function:

foreign_storage::SingleTextFileReader::~SingleTextFileReader ( )
inlineoverride

Definition at line 160 of file FileReader.h.

References file_.

160 { fclose(file_); }
foreign_storage::SingleTextFileReader::SingleTextFileReader ( const SingleTextFileReader )
delete

Member Function Documentation

void foreign_storage::SingleTextFileReader::checkForMoreRows ( size_t  file_offset,
const shared::FilePathOptions options,
const ForeignServer server_options,
const UserMapping user_mapping 
)
overridevirtual

Rescan the target files Throws an exception if the rescan fails (ie files are not in a valid appended state or not supported)

Parameters
file_offset- where to resume the scan from (end of the last row) as not all of the bytes may have been consumed by the upstream compoennet
server_options- only needed for S3 backed files
user_mapping- only needed for S3 backed files

Reimplemented from foreign_storage::FileReader.

Definition at line 144 of file FileReader.cpp.

References CHECK, data_size_, file_, foreign_storage::FileReader::file_path_, heavyai::fopen(), foreign_storage::anonymous_namespace{FileReader.cpp}::get_data_size(), header_offset_, isScanFinished(), scan_finished_, foreign_storage::throw_removed_row_in_file_error(), to_string(), and total_bytes_read_.

147  {
149  // Re-open file and check if there is any new data in it
150  fclose(file_);
151  file_ = fopen(file_path_.c_str(), "rb");
152  if (!file_) {
153  throw std::runtime_error{"An error occurred when attempting to open file \"" +
154  file_path_ + "\". " + strerror(errno)};
155  }
156  fseek(file_, 0, SEEK_END);
157  size_t new_file_size = ftell(file_);
158  size_t new_data_size = get_data_size(new_file_size, header_offset_);
159  if (new_data_size < data_size_) {
161  }
162  if (fseek(file_, static_cast<long int>(file_offset + header_offset_), SEEK_SET) != 0) {
163  throw std::runtime_error{"An error occurred when attempting to read offset " +
164  std::to_string(file_offset + header_offset_) +
165  " in file: \"" + file_path_ + "\". " + strerror(errno)};
166  }
167  if (new_data_size > data_size_) {
168  scan_finished_ = false;
169  total_bytes_read_ = file_offset;
170  data_size_ = new_data_size;
171  }
172 }
bool isScanFinished() const override
Definition: FileReader.h:186
void throw_removed_row_in_file_error(const std::string &file_path)
std::string to_string(char const *&&v)
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
size_t get_data_size(size_t file_size, size_t header_size)
Definition: FileReader.cpp:66
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::string foreign_storage::SingleTextFileReader::getFirstLine ( ) const
overrideprivatevirtual

Implements foreign_storage::SingleFileReader.

Definition at line 180 of file FileReader.cpp.

References CHECK, foreign_storage::FileReader::copy_params_, foreign_storage::FileReader::file_path_, parse_ast::line, and import_export::CopyParams::line_delim.

Referenced by skipHeader().

180  {
181  std::ifstream file{file_path_};
182  CHECK(file.good());
183  std::string line;
184  std::getline(file, line, copy_params_.line_delim);
185  file.close();
186  return line;
187 }
import_export::CopyParams copy_params_
Definition: FileReader.h:128
tuple line
Definition: parse_ast.py:10
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

size_t foreign_storage::SingleTextFileReader::getRemainingSize ( )
inlineoverridevirtual
Returns
size of the remaining content to be read

Implements foreign_storage::FileReader.

Definition at line 188 of file FileReader.h.

References data_size_, and total_bytes_read_.

bool foreign_storage::SingleTextFileReader::isRemainingSizeKnown ( )
inlineoverridevirtual
Returns
if remaining size is known

Implements foreign_storage::FileReader.

Definition at line 190 of file FileReader.h.

190 { return true; };
bool foreign_storage::SingleTextFileReader::isScanFinished ( ) const
inlineoverridevirtual
Returns
true if the entire file has been read

Implements foreign_storage::FileReader.

Definition at line 186 of file FileReader.h.

References scan_finished_.

Referenced by checkForMoreRows(), and readRegion().

+ Here is the caller graph for this function:

SingleTextFileReader& foreign_storage::SingleTextFileReader::operator= ( const SingleTextFileReader )
delete
size_t foreign_storage::SingleTextFileReader::read ( void *  buffer,
size_t  max_size 
)
inlineoverridevirtual

Read up to max_size bytes from archive into buffer starting starting from the end of the last read

Parameters
buffer- buffer to load into
max_size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::FileReader.

Definition at line 166 of file FileReader.h.

References file_, scan_finished_, and total_bytes_read_.

166  {
167  size_t bytes_read = fread(buffer, 1, max_size, file_);
168  if (!scan_finished_) {
169  scan_finished_ = feof(file_);
170  }
171 
172  total_bytes_read_ += bytes_read;
173  return bytes_read;
174  }
size_t foreign_storage::SingleTextFileReader::readRegion ( void *  buffer,
size_t  offset,
size_t  size 
)
inlineoverridevirtual

Read up to max_size bytes from archive, starting at given offset isScanFinished() must return true to use readRegion

Parameters
buffer- buffer to load into
offset- starting point into the archive to read
size- maximum number of bytes to read into the buffer
Returns
number of bytes actually read

Implements foreign_storage::FileReader.

Definition at line 176 of file FileReader.h.

References CHECK, file_, foreign_storage::FileReader::file_path_, header_offset_, isScanFinished(), and to_string().

176  {
178  if (fseek(file_, static_cast<long int>(offset + header_offset_), SEEK_SET) != 0) {
179  throw std::runtime_error{"An error occurred when attempting to read offset " +
180  std::to_string(offset) + " in file: \"" + file_path_ +
181  "\". " + strerror(errno)};
182  }
183  return fread(buffer, 1, size, file_);
184  }
bool isScanFinished() const override
Definition: FileReader.h:186
std::string to_string(char const *&&v)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::SingleTextFileReader::serialize ( rapidjson::Value &  value,
rapidjson::Document::AllocatorType &  allocator 
) const
overridevirtual

Serialize internal state to given json object This Json will later be used to restore the reader state through a constructor must be called when isScanFinished() is true

Parameters
value- json object to store needed state to this function can store any needed data or none
allocator- allocator to use for json contruction

Implements foreign_storage::FileReader.

Definition at line 134 of file FileReader.cpp.

References json_utils::add_value_to_object(), CHECK, data_size_, header_offset_, scan_finished_, and total_bytes_read_.

136  {
138  json_utils::add_value_to_object(value, header_offset_, "header_offset", allocator);
140  value, total_bytes_read_, "total_bytes_read", allocator);
141  json_utils::add_value_to_object(value, data_size_, "data_size", allocator);
142 }
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: JsonUtils.h:255
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::SingleTextFileReader::skipHeader ( )
overrideprivatevirtual

Implements foreign_storage::SingleFileReader.

Definition at line 174 of file FileReader.cpp.

References foreign_storage::FileReader::copy_params_, getFirstLine(), import_export::CopyParams::has_header, header_offset_, and import_export::kNoHeader.

Referenced by SingleTextFileReader().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

size_t foreign_storage::SingleTextFileReader::data_size_
private

Definition at line 206 of file FileReader.h.

Referenced by checkForMoreRows(), getRemainingSize(), serialize(), and SingleTextFileReader().

std::FILE* foreign_storage::SingleTextFileReader::file_
private
size_t foreign_storage::SingleTextFileReader::header_offset_
private
bool foreign_storage::SingleTextFileReader::scan_finished_
private

Definition at line 208 of file FileReader.h.

Referenced by checkForMoreRows(), isScanFinished(), read(), serialize(), and SingleTextFileReader().

size_t foreign_storage::SingleTextFileReader::total_bytes_read_
private

The documentation for this class was generated from the following files: