OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::RowGroupIntervalTracker Class Reference
+ Inheritance diagram for foreign_storage::RowGroupIntervalTracker:
+ Collaboration diagram for foreign_storage::RowGroupIntervalTracker:

Public Member Functions

 RowGroupIntervalTracker (const std::set< std::string > &file_paths, FileReaderMap *file_reader_cache, std::shared_ptr< arrow::fs::FileSystem > file_system)
 
std::optional< RowGroupIntervalgetNextRowGroupInterval () override
 
- Public Member Functions inherited from foreign_storage::AbstractRowGroupIntervalTracker
virtual ~AbstractRowGroupIntervalTracker ()=default
 

Private Member Functions

bool filesAreExhausted ()
 
void advanceToNextRowGroup ()
 

Private Attributes

std::set< std::string > file_paths_
 
FileReaderMapfile_reader_cache_
 
std::shared_ptr
< arrow::fs::FileSystem > 
file_system_
 
bool is_initialized_
 
int num_row_groups_
 
int current_row_group_index_
 
std::set< std::string >
::const_iterator 
current_file_iter_
 

Detailed Description

Definition at line 36 of file ParquetImporter.cpp.

Constructor & Destructor Documentation

foreign_storage::RowGroupIntervalTracker::RowGroupIntervalTracker ( const std::set< std::string > &  file_paths,
FileReaderMap file_reader_cache,
std::shared_ptr< arrow::fs::FileSystem >  file_system 
)
inline

Definition at line 38 of file ParquetImporter.cpp.

41  : file_paths_(file_paths)
42  , file_reader_cache_(file_reader_cache)
43  , file_system_(file_system)
44  , is_initialized_(false)
45  , num_row_groups_(0)
47  , current_file_iter_(file_paths_.begin()) {}
std::set< std::string >::const_iterator current_file_iter_
std::shared_ptr< arrow::fs::FileSystem > file_system_

Member Function Documentation

void foreign_storage::RowGroupIntervalTracker::advanceToNextRowGroup ( )
inlineprivate

Definition at line 61 of file ParquetImporter.cpp.

References current_file_iter_, current_row_group_index_, file_paths_, file_reader_cache_, file_system_, filesAreExhausted(), foreign_storage::FileReaderMap::getOrInsert(), is_initialized_, and num_row_groups_.

Referenced by getNextRowGroupInterval().

61  {
64  return;
65  }
66  if (!is_initialized_) {
68  is_initialized_ = true;
69  } else {
70  if (filesAreExhausted()) { // can be possible if many concurrent requests
71  return;
72  }
73  current_file_iter_++; // advance iterator
74  }
76  if (filesAreExhausted()) {
77  num_row_groups_ = 0;
78  } else {
79  auto file_reader =
81  num_row_groups_ = file_reader->parquet_reader()->metadata()->num_row_groups();
82  }
83  }
std::set< std::string >::const_iterator current_file_iter_
const ReaderPtr getOrInsert(const std::string &path, std::shared_ptr< arrow::fs::FileSystem > &file_system)
Definition: ParquetShared.h:70
std::shared_ptr< arrow::fs::FileSystem > file_system_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::RowGroupIntervalTracker::filesAreExhausted ( )
inlineprivate

Definition at line 59 of file ParquetImporter.cpp.

References current_file_iter_, and file_paths_.

Referenced by advanceToNextRowGroup(), and getNextRowGroupInterval().

59 { return current_file_iter_ == file_paths_.end(); }
std::set< std::string >::const_iterator current_file_iter_

+ Here is the caller graph for this function:

std::optional<RowGroupInterval> foreign_storage::RowGroupIntervalTracker::getNextRowGroupInterval ( )
inlineoverridevirtual

Implements foreign_storage::AbstractRowGroupIntervalTracker.

Definition at line 49 of file ParquetImporter.cpp.

References advanceToNextRowGroup(), current_file_iter_, current_row_group_index_, and filesAreExhausted().

49  {
51  if (filesAreExhausted()) {
52  return {};
53  }
54  return RowGroupInterval{
55  *current_file_iter_, current_row_group_index_, current_row_group_index_};
56  }
std::set< std::string >::const_iterator current_file_iter_

+ Here is the call graph for this function:

Member Data Documentation

std::set<std::string>::const_iterator foreign_storage::RowGroupIntervalTracker::current_file_iter_
private
int foreign_storage::RowGroupIntervalTracker::current_row_group_index_
private

Definition at line 91 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup(), and getNextRowGroupInterval().

std::set<std::string> foreign_storage::RowGroupIntervalTracker::file_paths_
private

Definition at line 85 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup(), and filesAreExhausted().

FileReaderMap* foreign_storage::RowGroupIntervalTracker::file_reader_cache_
private

Definition at line 86 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

std::shared_ptr<arrow::fs::FileSystem> foreign_storage::RowGroupIntervalTracker::file_system_
private

Definition at line 87 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

bool foreign_storage::RowGroupIntervalTracker::is_initialized_
private

Definition at line 89 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().

int foreign_storage::RowGroupIntervalTracker::num_row_groups_
private

Definition at line 90 of file ParquetImporter.cpp.

Referenced by advanceToNextRowGroup().


The documentation for this class was generated from the following file: