40 namespace File_Namespace {
91 std::stringstream file_name;
92 file_name <<
"table_" << db_id <<
"_" << tb_id <<
"/";
93 return file_name.str();
162 const size_t numBytes,
163 const size_t offset = 0,
165 const int32_t deviceId = -1)
override {
166 UNREACHABLE() <<
"Cache buffers support append(), but not write()";
200 inline MgrType
getMgrType()
override {
return CACHING_FILE_MGR; };
261 void checkpoint(
const int32_t db_id,
const int32_t tb_id)
override;
266 int32_t
epoch(int32_t db_id, int32_t tb_id)
const override;
273 const size_t numBytes = 0)
override;
280 const size_t num_bytes = 0)
override;
283 const std::vector<HeaderInfo>::const_iterator& headerStartIt,
284 const std::vector<HeaderInfo>::const_iterator& headerEndIt)
override;
293 int32_t page_num)
override;
324 std::unique_ptr<CachingFileMgr>
reconstruct()
const;
364 void free_page(std::pair<FileInfo*, int32_t>&& page)
override;
367 const ChunkKey& keyPrefix)
override;
374 std::string
dump()
const;
399 void init(
const size_t num_reader_threads);
417 const std::vector<HeaderInfo>::const_iterator& startIt,
418 const std::vector<HeaderInfo>::const_iterator& endIt)
override;
425 const size_t numBytes = 0)
override;
462 std::vector<ChunkKey>
getKeysForTable(int32_t db_id, int32_t tb_id)
const;
492 const size_t numBytes = 0)
const override;
495 const ChunkKeyToChunkMap::iterator chunk_it,
496 const bool purge =
true)
override;
500 const std::optional<std::string>& file_name = {})
const override{};
const size_t metadata_page_size_
size_t getTableFileMgrSpaceReserved(int32_t db_id, int32_t tb_id) const
size_t getMaxDataFilesSize() const
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
std::vector< int > ChunkKey
std::ostream & operator<<(std::ostream &os, DiskCacheLevel disk_cache_level)
LRUEvictionAlgorithm table_evict_alg_
std::string dumpTableQueue() const
static size_t getMinimumSize()
void removeDiskContent() const
Removes all disk data for the subdir.
std::string getStringMgrType() override
static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE
static constexpr float METADATA_SPACE_PERCENTAGE
DiskCacheLevel enabled_level
heavyai::shared_mutex table_dirs_mutex_
const std::string kDefaultDiskCacheDirName
std::string get_dir_name_for_table(int db_id, int tb_id)
void writeWrapperFile(const std::string &doc, int32_t db, int32_t tb)
Writes a wrapper file to a table subdir.
A logical page (Page) belongs to a file on disk.
size_t getFilesSize() const
Get the total size of page files (data and metadata files). This includes allocated, but unused space.
std::vector< ChunkKey > getChunkKeysForPrefix(const ChunkKey &prefix) const
Returns the keys for chunks with chunk data that match the given prefix.
void setMaxSizes()
Sets the maximum number of files/space for each type of storage based on the maximum size...
size_t max_wrapper_space_
void writeAndSyncEpochToDisk()
Write and flush the epoch to the epoch file on disk.
~CachingFileMgr() override
This file includes the class specification for the FILE manager (FileMgr), and related data structure...
std::string describeSelf() const override
describes this FileMgr for logging purposes.
size_t getSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
void closeRemovePhysical() override
Closes files and removes the caching directory.
size_t max_num_meta_files_
void touchKey(const ChunkKey &key) const
Used to track which tables/chunks were least recently used.
size_t getMaxSize() override
#define DEFAULT_METADATA_PAGE_SIZE
size_t getMetadataSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
size_t getMaxMetaFiles() const
void createTableFileMgrIfNoneExists(const int32_t db_id, const int32_t tb_id)
Create and initialize a subdirectory for a table if none exists.
Represents/provides access to contiguous data stored in the file system.
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Page requestFreePage(size_t pagesize, const bool isMetadata) override
requests a free page similar to FileMgr, but this override will also evict existing pages to make spa...
bool hasWrapperFile() const
void deleteWrapperFile(int32_t db, int32_t tb)
Deletes the wrapper file from a table subdir.
ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true) override
CachingFileMgr(const DiskCacheConfig &config)
static size_t num_pages_per_data_file_
std::optional< FileBuffer * > getBufferIfExists(const ChunkKey &key)
an optional version of get buffer if we are not sure a chunk exists.
std::set< ChunkKey > getKeysWithMetadata() const
std::string dumpEvictionQueue()
bool failOnReadError() const override
True if a read error should cause a fatal error.
FileInfo * evictPages()
evicts all data pages for the least recently used Chunk (metadata pages persist). Returns the first F...
FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a buffer.
void deleteCacheIfTooLarge()
When the cache is read from disk, we don't know which chunks were least recently used. Rather than try to evict random pages to get down to size we just reset the cache to make sure we have space.
heavyai::shared_mutex table_mutex_
void incrementAllEpochs()
Increment epochs for each table in the CFM.
CachingFileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0) override
allocates a new CachingFileBuffer and tracks it's use in the eviction algorithms. ...
std::string wrapper_file_path_
size_t getNumDataChunks() const
Returns the number of buffers with chunk data in the CFM.
std::string epoch_file_path_
bool hasWrapperFile(int32_t db_id, int32_t table_id) const
std::unique_ptr< CachingFileMgr > reconstruct() const
Initializes a new CFM using the initialization values in the current CFM.
size_t getTableFileMgrsSize() const
Returns the total size of all subdirectory files. Each table represented in the CFM has a subdirector...
void removeTableBuffers(int32_t db_id, int32_t tb_id)
Erases and cleans up all buffers for a table.
TableFileMgr(const std::string &table_path)
size_t getNumMetaFiles() const
static constexpr size_t DEFAULT_MAX_SIZE
bool updatePageIfDeleted(FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num) override
checks whether a page should be deleted.
void deleteWrapperFile() const
Deletes only the wrapper file on disk.
An AbstractBuffer is a unit of data management for a data manager.
void incrementEpoch()
increment the epoch for this subdir (not synced to disk).
void setMaxNumDataFiles(size_t max)
void deleteBufferIfExists(const ChunkKey &key)
deletes a buffer if it exists in the mgr. Otherwise do nothing.
static size_t num_pages_per_metadata_file_
void writeAndSyncEpochToDisk()
void removeTableFileMgr(int32_t db_id, int32_t tb_id)
Removes the subdirectory content for a table.
std::optional< size_t > limit_data_size_
size_t getNumDataFiles() const
size_t getAvailableSpace()
#define DEFAULT_PAGE_SIZE
bool isEnabledForFSI() const
size_t max_num_data_files_
void setDataSizeLimit(size_t max)
void setMaxNumMetadataFiles(size_t max)
void removeChunkKeepMetadata(const ChunkKey &key)
Free pages for chunk and remove it from the chunk eviction algorithm.
MgrType getMgrType() override
void writeWrapperFile(const std::string &doc) const
Writes wrapper file to disk.
bool hasFileMgrKey() const override
Query to determine if the contained pages will have their database and table ids overriden by the fil...
std::string dumpKeysWithChunkData() const
void write(int8_t *src, const size_t numBytes, const size_t offset=0, const MemoryLevel srcMemoryLevel=CPU_LEVEL, const int32_t deviceId=-1) override
bool isEnabledForMutableTables() const
static constexpr char WRAPPER_FILE_NAME[]
size_t getReservedSpace() const
Returns the disk space used (in bytes) for the subdir.
std::map< TablePair, std::unique_ptr< TableFileMgr > > table_dirs_
std::string dumpKeysWithMetadata() const
std::vector< ChunkKey > getKeysForTable(int32_t db_id, int32_t tb_id) const
returns set of keys contained in chunkIndex_ that match the given table prefix.
void readTableFileMgrs()
Checks for any sub-directories containing table-specific data and creates epochs from found files...
FileInfo * evictMetadataPages()
evicts all metadata pages for the least recently used table. Returns the first FileInfo that a page w...
size_t getMetadataFileSize() const
FileBuffer(FileMgr *fm, const size_t pageSize, const ChunkKey &chunkKey, const size_t initialSize=0)
Constructs a FileBuffer object.
size_t getAvailableWrapperSpace()
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const override
size_t getMaxDataFiles() const
void init(const size_t num_reader_threads)
Initializes a CFM, parsing any existing files and initializing data structures appropriately (current...
static std::string getDefaultPath(const std::string &base_path)
size_t getDataFileSize() const
size_t num_reader_threads
std::shared_timed_mutex shared_mutex
void free_page(std::pair< FileInfo *, int32_t > &&page) override
Unlike the FileMgr, the CFM frees pages immediately instead of holding them until the next checkpoint...
size_t getNumChunksWithMetadata() const
Returns the number of buffers with metadata in the CFM. Any buffer with an encoder counts...
size_t getChunkSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const override
static constexpr float METADATA_FILE_SPACE_PERCENTAGE
int32_t getEpoch() const
Returns the current epoch (locked)
std::string getTableFileMgrPath(int32_t db, int32_t tb) const
A selection of helper methods for File I/O.
void clearForTable(int32_t db_id, int32_t tb_id)
Removes all data related to the given table (pages and subdirectories).
void removeKey(const ChunkKey &key) const
This file includes the class specification for the Least Recently Used cache eviction algorithm used ...
A FileMgr capable of limiting it's size and storing data from multiple tables in a shared directory...
void setMaxWrapperSpace(size_t max)
LRUEvictionAlgorithm chunk_evict_alg_
std::string levelAsString() const
FileBuffer * putBuffer(const ChunkKey &key, AbstractBuffer *srcBuffer, const size_t numBytes=0) override
deletes any existing buffer for the given key then copies in a new one.
std::string dumpEvictionQueue() const
FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &startIt, const std::vector< HeaderInfo >::const_iterator &endIt) override
Creates a buffer and initializes it with info read from files on disk.
size_t getAllocated() override
size_t getMaxWrapperSize() const