20 #include <unordered_set>
38 namespace foreign_storage {
48 const bool do_metadata_stats_validation =
true);
54 std::shared_ptr<arrow::fs::FileSystem> file_system);
65 const std::string& file_path,
84 const bool reserve_buffers_and_set_stats =
false);
87 const int fragment_id,
99 bool isNewFile(
const std::string& file_path)
const;
101 void addNewFile(
const std::string& file_path);
110 const std::list<RowGroupMetadata>& row_group_metadata);
113 const std::vector<std::string>& file_paths)
const;
116 const std::vector<std::string>& file_paths)
const;
120 const std::list<std::shared_ptr<ChunkMetadata>>& column_chunk_metadata,
121 int32_t fragment_id);
124 const std::vector<RowGroupInterval>& row_group_intervals);
std::string getSerializedDataWrapper() const override
size_t last_fragment_row_count_
void finalizeFragmentMap()
std::unique_ptr< FileReaderMap > file_reader_cache_
void updateMetadataForRolledOffFiles(const std::set< std::string > &rolled_off_files)
std::vector< std::string > getOrderedProcessedFilePaths()
void setLastFileRowCount(const std::string &file_path)
void restoreDataWrapperInternals(const std::string &file_path, const ChunkMetadataVector &chunk_metadata_vector) override
std::vector< std::string > getAllFilePaths()
std::unique_ptr< ForeignTableSchema > schema_
void populateChunkBuffers(const ChunkToBufferMap &required_buffers, const ChunkToBufferMap &optional_buffers, AbstractBuffer *delete_buffer) override
std::map< ChunkKey, AbstractBuffer * > ChunkToBufferMap
std::mutex delete_buffer_mutex_
void metadataScanRowGroupIntervals(const std::vector< RowGroupInterval > &row_group_intervals)
std::map< int, std::vector< RowGroupInterval > > fragment_to_row_group_interval_map_
void resetParquetMetadata()
std::map< ChunkKey, std::shared_ptr< ChunkMetadata > > chunk_metadata_map_
ParallelismLevel getCachedParallelismLevel() const override
const bool do_metadata_stats_validation_
void addNewFile(const std::string &file_path)
void fetchChunkMetadata()
DataPreview getDataPreview(const size_t num_rows)
const ForeignTable * foreign_table_
void metadataScanRowGroupMetadata(const std::list< RowGroupMetadata > &row_group_metadata)
void loadBuffersUsingLazyParquetChunkLoader(const int logical_column_id, const int fragment_id, const ChunkToBufferMap &required_buffers, AbstractBuffer *delete_buffer)
void addNewFragment(int row_group, const std::string &file_path)
void initializeChunkBuffers(const int fragment_index, const Interval< ColumnType > &column_interval, const ChunkToBufferMap &required_buffers, const bool reserve_buffers_and_set_stats=false)
void updateChunkMetadataForFragment(const Interval< ColumnType > &column_interval, const std::list< std::shared_ptr< ChunkMetadata >> &column_chunk_metadata, int32_t fragment_id)
size_t last_file_row_count_
An AbstractBuffer is a unit of data management for a data manager.
bool isNewFile(const std::string &file_path) const
bool moveToNextFragment(size_t new_rows_count) const
std::shared_ptr< arrow::fs::FileSystem > file_system_
void removeMetadataForLastFile(const std::string &last_file_path)
std::list< const ColumnDescriptor * > getColumnsToInitialize(const Interval< ColumnType > &column_interval)
ParallelismLevel getNonCachedParallelismLevel() const override
void metadataScanFiles(const std::vector< std::string > &file_paths)
void populateChunkMetadata(ChunkMetadataVector &chunk_metadata_vector) override
std::map< FilePathAndRowGroup, RowGroupMetadata > getRowGroupMetadataMap(const std::vector< std::string > &file_paths) const
std::pair< std::string, int32_t > FilePathAndRowGroup
bool isRestored() const override
std::list< RowGroupMetadata > getRowGroupMetadataForFilePaths(const std::vector< std::string > &file_paths) const