OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GlobalFileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #pragma once
26 
27 #include <iostream>
28 #include <map>
29 #include <mutex>
30 #include <set>
31 #include "DataMgr/AbstractBuffer.h"
33 #include "FileMgr.h"
35 
37 
38 using namespace Data_Namespace;
39 
40 namespace File_Namespace {
41 
42 struct FileMgrParams {
43  FileMgrParams() : epoch(-1), max_rollback_epochs(-1) {}
44  int32_t epoch;
46 };
47 
52 class GlobalFileMgr : public AbstractBufferMgr { // implements
53 
54  public:
56  GlobalFileMgr(const int32_t device_id,
57  std::shared_ptr<ForeignStorageInterface> fsi,
58  std::string base_path = ".",
59  const size_t num_reader_threads = 0,
60  const size_t page_size = DEFAULT_PAGE_SIZE,
61  const size_t metadata_page_size = DEFAULT_METADATA_PAGE_SIZE);
62 
63  ~GlobalFileMgr() override {}
64 
67  size_t pageSize = 0,
68  const size_t numBytes = 0) override {
69  return getFileMgr(key)->createBuffer(key, pageSize, numBytes);
70  }
71 
72  bool isBufferOnDevice(const ChunkKey& key) override {
73  return getFileMgr(key)->isBufferOnDevice(key);
74  }
75 
77  // Purge == true means delete the data chunks -
78  // can't undelete and revert to previous
79  // state - reclaims disk space for chunk
80  void deleteBuffer(const ChunkKey& key, const bool purge = true) override {
81  return getFileMgr(key)->deleteBuffer(key, purge);
82  }
83 
84  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
85  const bool purge = true) override;
86 
88  AbstractBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override {
89  return getFileMgr(key)->getBuffer(key, numBytes);
90  }
91 
92  void fetchBuffer(const ChunkKey& key,
93  AbstractBuffer* destBuffer,
94  const size_t numBytes) override {
95  return getFileMgr(key)->fetchBuffer(key, destBuffer, numBytes);
96  }
97 
105  AbstractBuffer* d,
106  const size_t numBytes = 0) override {
107  return getFileMgr(key)->putBuffer(key, d, numBytes);
108  }
109 
110  // Buffer API
111  AbstractBuffer* alloc(const size_t numBytes) override {
112  LOG(FATAL) << "Operation not supported";
113  return nullptr; // satisfy return-type warning
114  }
115 
116  void free(AbstractBuffer* buffer) override { LOG(FATAL) << "Operation not supported"; }
117 
118  inline MgrType getMgrType() override { return GLOBAL_FILE_MGR; };
119  inline std::string getStringMgrType() override { return ToString(GLOBAL_FILE_MGR); }
120  inline std::string printSlabs() override { return "Not Implemented"; }
121  inline size_t getMaxSize() override { return 0; }
122  inline size_t getInUseSize() override { return 0; }
123  inline size_t getAllocated() override { return 0; }
124  inline bool isAllocationCapped() override { return false; }
125 
126  void init();
127 
129  const ChunkKey& keyPrefix) override {
130  return getFileMgr(keyPrefix)->getChunkMetadataVecForKeyPrefix(chunkMetadataVec,
131  keyPrefix);
132  }
133 
138  void checkpoint() override;
139  void checkpoint(const int32_t db_id, const int32_t tb_id) override;
140 
145  inline size_t getNumReaderThreads() { return num_reader_threads_; }
146 
147  size_t getNumChunks() override;
148 
149  void compactDataFiles(const int32_t db_id, const int32_t tb_id);
150 
151  static constexpr int32_t db_version_{2};
152 
153  private:
154  AbstractBufferMgr* findFileMgrUnlocked(const int32_t db_id, const int32_t tb_id);
155  void deleteFileMgr(const int32_t db_id, const int32_t tb_id);
156 
157  public:
158  AbstractBufferMgr* findFileMgr(const int32_t db_id, const int32_t tb_id) {
159  heavyai::shared_lock<heavyai::shared_mutex> read_lock(fileMgrs_mutex_);
160  return findFileMgrUnlocked(db_id, tb_id);
161  }
162  void setFileMgrParams(const int32_t db_id,
163  const int32_t tb_id,
164  const FileMgrParams& file_mgr_params);
165  AbstractBufferMgr* getFileMgr(const int32_t db_id, const int32_t tb_id);
167  return getFileMgr(key[0], key[1]);
168  }
169 
170  std::string getBasePath() const { return basePath_; }
171  size_t getPageSize() const { return page_size_; }
172  size_t getMetadataPageSize() const { return metadata_page_size_; }
173 
174  void writeFileMgrData(FileMgr* fileMgr = 0);
175 
176  inline bool getDBConvert() const { return dbConvert_; }
177  inline void setDBConvert(bool val) { dbConvert_ = val; }
178 
179  void removeTableRelatedDS(const int32_t db_id, const int32_t tb_id) override;
180  void setTableEpoch(const int32_t db_id, const int32_t tb_id, const int32_t start_epoch);
181  size_t getTableEpoch(const int32_t db_id, const int32_t tb_id);
182  void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id);
183  StorageStats getStorageStats(const int32_t db_id, const int32_t tb_id);
184 
185  // For testing purposes only
186  std::shared_ptr<FileMgr> getSharedFileMgr(const int db_id, const int table_id);
187 
188  // For testing purposes only
189  void setFileMgr(const int db_id, const int table_id, std::shared_ptr<FileMgr> file_mgr);
190  void closeFileMgr(const int32_t db_id,
191  const int32_t tb_id); // A locked public wrapper for deleteFileMgr,
192  // for now for unit testing
193  protected:
194  std::shared_ptr<ForeignStorageInterface> fsi_;
195 
196  private:
197  bool existsDiffBetweenFileMgrParamsAndFileMgr(
198  FileMgr* file_mgr,
199  const FileMgrParams& file_mgr_params) const;
200  std::string basePath_;
202  int32_t
203  epoch_; /* the current epoch (time of last checkpoint) will be used for all
204  * tables except of the one for which the value of the epoch has been reset
205  * using --start-epoch option at start up to rollback this table's updates.
206  */
207  const size_t page_size_;
208  const size_t metadata_page_size_;
209  bool dbConvert_;
210 
212  std::map<TablePair, std::shared_ptr<FileMgr>> ownedFileMgrs_;
213  std::map<TablePair, AbstractBufferMgr*> allFileMgrs_;
214  std::map<TablePair, int32_t> max_rollback_epochs_per_table_;
215  std::map<TablePair, StorageStats> lazy_initialized_stats_;
216 
218 };
219 
220 } // namespace File_Namespace
AbstractBuffer * putBuffer(const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
Puts the contents of d into the Chunk with the given key.
std::vector< int > ChunkKey
Definition: types.h:36
std::string getBasePath() const
int32_t epoch_
number of threads used when loading data
std::shared_ptr< ForeignStorageInterface > fsi_
bool isBufferOnDevice(const ChunkKey &key) override
Definition: GlobalFileMgr.h:72
AbstractBuffer * createBuffer(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a chunk with the specified key and page size.
Definition: GlobalFileMgr.h:66
std::map< TablePair, std::shared_ptr< FileMgr > > ownedFileMgrs_
#define LOG(tag)
Definition: Logger.h:285
bool isAllocationCapped() override
AbstractBuffer * alloc(const size_t numBytes) override
This file includes the class specification for the FILE manager (FileMgr), and related data structure...
AbstractBufferMgr * getFileMgr(const ChunkKey &key)
#define DEFAULT_METADATA_PAGE_SIZE
std::string getStringMgrType() override
std::shared_lock< T > shared_lock
std::map< TablePair, AbstractBufferMgr * > allFileMgrs_
size_t getAllocated() override
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
const size_t metadata_page_size_
used to set FileMgr page_size_
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
An AbstractBuffer is a unit of data management for a data manager.
size_t getMetadataPageSize() const
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
std::string printSlabs() override
#define DEFAULT_PAGE_SIZE
void deleteBuffer(const ChunkKey &key, const bool purge=true) override
Deletes the chunk with the specified key.
Definition: GlobalFileMgr.h:80
MgrType getMgrType() override
size_t getMaxSize() override
AbstractBuffer * getBuffer(const ChunkKey &key, const size_t numBytes=0) override
Returns the a pointer to the chunk with the specified key.
Definition: GlobalFileMgr.h:88
void free(AbstractBuffer *buffer) override
size_t num_reader_threads_
The OS file system path containing the files.
std::map< TablePair, int32_t > max_rollback_epochs_per_table_
AbstractBufferMgr * findFileMgr(const int32_t db_id, const int32_t tb_id)
std::map< TablePair, StorageStats > lazy_initialized_stats_
void fetchBuffer(const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
Definition: GlobalFileMgr.h:92
std::shared_timed_mutex shared_mutex
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
size_t getInUseSize() override
heavyai::shared_mutex fileMgrs_mutex_
bool dbConvert_
used to set FileMgr metadta_page_size_