33 #include <sys/sysctl.h>
34 #include <sys/types.h>
37 #include <boost/container/small_vector.hpp>
38 #include <boost/filesystem.hpp>
46 #include <string_view>
51 bool g_enable_tiered_cpu_mem{
false};
58 namespace Data_Namespace {
79 std::unique_ptr<CudaMgr_Namespace::CudaMgr> cudaMgr,
81 const size_t reservedGpuMem,
82 const size_t numReaderThreads,
84 : cudaMgr_{std::move(cudaMgr)}
87 , reservedGpuMem_{reservedGpuMem} {
95 std::atexit(atExitHandler);
97 LOG(
ERROR) <<
"CudaMgr instance is invalid, falling back to CPU-only mode.";
110 populateMgrs(system_parameters, numReaderThreads, cache_config);
111 createTopLevelMetadata();
124 for (
int level = numLevels - 1; level >= 0; --level) {
125 for (
size_t device = 0; device <
bufferMgrs_[level].size(); device++) {
146 usage.
free = mi[
"MemAvailable"];
147 usage.
total = mi[
"MemTotal"];
154 int64_t resident = 0;
157 std::ifstream fstatm(
"/proc/self/statm");
158 fstatm >> size >> resident >> shared;
162 sysconf(_SC_PAGE_SIZE);
164 usage.
resident = resident * page_size;
165 usage.
vtotal = size * page_size;
166 usage.
regular = (resident - shared) * page_size;
167 usage.
shared = shared * page_size;
171 usage.
frag = bi.getFragmentationPercent();
195 size_t physical_memory;
200 length =
sizeof(size_t);
201 sysctl(mib, 2, &physical_memory, &length, NULL, 0);
202 return physical_memory;
203 #elif defined(_MSC_VER)
204 MEMORYSTATUSEX status;
205 status.dwLength =
sizeof(status);
206 GlobalMemoryStatusEx(&status);
207 return status.ullTotalPhys;
209 long pages = sysconf(_SC_PHYS_PAGES);
210 long page_size = sysconf(_SC_PAGE_SIZE);
211 return pages * page_size;
216 size_t total_cpu_size,
217 size_t min_cpu_slab_size,
218 size_t max_cpu_slab_size,
219 size_t default_cpu_slab_size,
222 #ifdef ENABLE_MEMKIND
223 if (g_enable_tiered_cpu_mem) {
230 default_cpu_slab_size,
243 default_cpu_slab_size,
250 const size_t num_reader_threads,
253 for (
int level = numLevels - 1; level >= 0; --level) {
254 for (
size_t device = 0; device <
bufferMgrs_[level].size(); device++) {
259 populateMgrs(sys_params, num_reader_threads, cache_config);
265 size_t buffer_pool_size,
267 auto slab_size = std::min(initial_slab_size, buffer_pool_size);
268 slab_size = (slab_size / page_size) * page_size;
274 const size_t userSpecifiedNumReaderThreads,
285 if (cpu_buffer_size == 0) {
287 VLOG(1) <<
"Detected " << (float)total_system_memory / (1024 * 1024)
288 <<
"M of total system memory.";
289 cpu_buffer_size = total_system_memory *
292 auto min_cpu_slab_size =
294 auto max_cpu_slab_size =
299 auto default_cpu_slab_size =
301 LOG(
INFO) <<
"Min CPU Slab Size is " << float(min_cpu_slab_size) / (1024 * 1024)
303 LOG(
INFO) <<
"Max CPU Slab Size is " << float(max_cpu_slab_size) / (1024 * 1024)
305 LOG(
INFO) <<
"Default CPU Slab Size is " << float(default_cpu_slab_size) / (1024 * 1024)
307 LOG(
INFO) <<
"Max memory pool size for CPU is "
308 << float(cpu_buffer_size) / (1024 * 1024) <<
"MB";
310 size_t total_cpu_size = 0;
312 #ifdef ENABLE_MEMKIND
315 if (g_enable_tiered_cpu_mem) {
317 LOG(
INFO) <<
"Max memory pool size for PMEM is " << (float)
g_pmem_size / (1024 * 1024)
320 for (
auto cpu_tier_size : cpu_tier_sizes) {
321 total_cpu_size += cpu_tier_size;
325 total_cpu_size = cpu_buffer_size;
330 <<
"MB includes render buffer allocation";
336 default_cpu_slab_size,
341 auto num_gpus =
cudaMgr_->getDeviceCount();
342 for (
int gpu_num = 0; gpu_num < num_gpus; ++gpu_num) {
343 auto gpu_max_mem_size =
347 auto min_gpu_slab_size =
349 auto max_gpu_slab_size =
353 LOG(
INFO) <<
"Min GPU Slab size for GPU " << gpu_num <<
" is "
354 << float(min_gpu_slab_size) / (1024 * 1024) <<
"MB";
355 LOG(
INFO) <<
"Max GPU Slab size for GPU " << gpu_num <<
" is "
356 << float(max_gpu_slab_size) / (1024 * 1024) <<
"MB";
357 LOG(
INFO) <<
"Default GPU Slab size for GPU " << gpu_num <<
" is "
358 << float(default_gpu_slab_size) / (1024 * 1024) <<
"MB";
359 LOG(
INFO) <<
"Max memory pool size for GPU " << gpu_num <<
" is "
360 << float(gpu_max_mem_size) / (1024 * 1024) <<
"MB";
367 default_gpu_slab_size,
377 default_cpu_slab_size,
389 boost::filesystem::path path(mapdDataPath);
390 if (boost::filesystem::exists(path)) {
391 if (!boost::filesystem::is_directory(path)) {
393 "\" to convert DB is not a directory.";
397 "\" to convert DB does not exist.";
404 LOG(
INFO) <<
"Database conversion started.";
411 LOG(
INFO) <<
"Database conversion completed.";
424 auto fm_top = gfm->getFileMgr(chunkKey);
425 if (
auto fm = dynamic_cast<File_Namespace::FileMgr*>(fm_top)) {
426 fm->createOrMigrateTopLevelMetadata();
437 std::vector<MemoryInfo> mem_info;
451 for (
size_t slab_num = 0; slab_num < slab_segments.size(); ++slab_num) {
452 for (
auto const& segment : slab_segments[slab_num]) {
457 md.
touch = segment.last_touched;
460 md.
chunk_key.end(), segment.chunk_key.begin(), segment.chunk_key.end());
464 mem_info.push_back(mi);
466 int numGpus =
cudaMgr_->getDeviceCount();
467 for (
int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
480 for (
size_t slab_num = 0; slab_num < slab_segments.size(); ++slab_num) {
481 for (
auto const& segment : slab_segments[slab_num]) {
486 md.
touch = segment.last_touched;
488 md.
chunk_key.end(), segment.chunk_key.begin(), segment.chunk_key.end());
493 mem_info.push_back(mi);
504 int numGpus =
cudaMgr_->getDeviceCount();
505 std::ostringstream tss;
506 for (
int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
507 tss <<
bufferMgrs_[memLevel][gpuNum]->printSlabs();
521 int numGpus =
cudaMgr_->getDeviceCount();
522 for (
int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
523 auto buffer_mgr_for_gpu =
525 CHECK(buffer_mgr_for_gpu);
526 buffer_mgr_for_gpu->clearSlabs();
529 LOG(
WARNING) <<
"Unable to clear GPU memory: No GPUs detected";
532 auto buffer_mgr_for_cpu =
534 CHECK(buffer_mgr_for_cpu);
535 buffer_mgr_for_cpu->clearSlabs();
541 const int deviceId) {
543 return bufferMgrs_[memLevel][deviceId]->isBufferOnDevice(key);
549 bufferMgrs_[0][0]->getChunkMetadataVecForKeyPrefix(chunkMetadataVec, keyPrefix);
555 const size_t page_size) {
557 int level =
static_cast<int>(memoryLevel);
558 return bufferMgrs_[level][deviceId]->createBuffer(key, page_size);
564 const size_t numBytes) {
566 const auto level =
static_cast<size_t>(memoryLevel);
569 return bufferMgrs_[level][deviceId]->getBuffer(key, numBytes);
576 for (
int level = numLevels - 1; level >= 0; --level) {
577 for (
int device = 0; device <
levelSizes_[level]; ++device) {
578 bufferMgrs_[level][device]->deleteBuffersWithPrefix(keyPrefix);
591 for (
int device = 0; device <
levelSizes_[memLevel]; ++device) {
592 bufferMgrs_[memLevel][device]->deleteBuffersWithPrefix(keyPrefix);
599 const int device_id) {
602 bufferMgrs_[memLevel][device_id]->deleteBuffer(key);
607 const size_t numBytes) {
609 const auto level =
static_cast<int>(memoryLevel);
611 return bufferMgrs_[level][deviceId]->alloc(numBytes);
616 int level =
static_cast<int>(buffer->
getType());
640 for (
auto deviceIt = levelIt->begin(); deviceIt != levelIt->end(); ++deviceIt) {
641 (*deviceIt)->checkpoint(db_id, tb_id);
652 for (
int device_id = 0; device_id <
levelSizes_[memory_level]; device_id++) {
653 bufferMgrs_[memory_level][device_id]->checkpoint(db_id, table_id);
663 for (
auto deviceIt = levelIt->begin(); deviceIt != levelIt->end(); ++deviceIt) {
664 (*deviceIt)->checkpoint();
671 bufferMgrs_[0][0]->removeTableRelatedDS(db_id, tb_id);
682 gfm->setTableEpoch(db_id, tb_id, start_epoch);
689 return gfm->getTableEpoch(db_id, tb_id);
696 gfm->resetTableEpochFloor(db_id, tb_id);
703 CHECK(global_file_mgr);
704 return global_file_mgr;
715 os <<
" \"name\": \"CPU Memory Info\",";
716 os <<
" \"TotalMB\": " << mem_info.
total / (1024. * 1024.) <<
",";
717 os <<
" \"FreeMB\": " << mem_info.
free / (1024. * 1024.) <<
",";
718 os <<
" \"ProcessMB\": " << mem_info.
resident / (1024. * 1024.) <<
",";
719 os <<
" \"VirtualMB\": " << mem_info.
vtotal / (1024. * 1024.) <<
",";
720 os <<
" \"ProcessPlusSwapMB\": " << mem_info.
regular / (1024. * 1024.) <<
",";
721 os <<
" \"ProcessSharedMB\": " << mem_info.
shared / (1024. * 1024.) <<
",";
722 os <<
" \"FragmentationPercent\": " << mem_info.
frag;
723 os <<
", \"BuddyinfoHighBlocks\": " << mem_info.
high_blocks;
724 os <<
", \"BuddyinfoAvailPages\": " << mem_info.
avail_pages;
740 return static_cast<size_t>(0);
742 size_t total_gpu_buffer_pools_size{0};
744 total_gpu_buffer_pools_size +=
747 return total_gpu_buffer_pools_size;
771 template <
typename T, std::
size_t N>
779 size_t operator()(
size_t sum,
size_t blocks)
const {
return 2 * sum + blocks; }
786 for (
size_t i = 0; i < num_blocks; ++i) {
788 std::from_chars(tokens[i].data(), tokens[i].data() + tokens[i].size(), block);
789 blocks.push_back(block);
794 if (blocks.size() < rhs.
blocks.size()) {
795 blocks.resize(rhs.
blocks.size(), 0u);
797 for (
size_t i = 0; i < rhs.
blocks.size(); ++i) {
798 blocks[i] += rhs.
blocks[i];
803 if (blocks.size() < 2u) {
808 for (
size_t order = 0; order < blocks.size(); ++order) {
809 size_t const pages = blocks[order] << order;
810 scaled += pages * (blocks.size() - 1 - order) / (blocks.size() - 1);
816 size_t highestBlock()
const {
return blocks.empty() ? 0 : blocks.back(); }
827 while (start < str.size()) {
829 start = str.find_first_not_of(
' ', start);
831 if (start == std::string_view::npos) {
835 size_t end = str.find(
' ', start);
836 tokens.push_back(str.substr(start, end - start));
847 std::ifstream file(
"/proc/buddyinfo");
848 if (!file.is_open()) {
854 constexpr
unsigned max_line_size = 256;
855 char line[max_line_size];
857 BuddyinfoBlocks frag;
862 while (file.getline(line, max_line_size)) {
865 if (5u <= tokens.size() && tokens[0] ==
"Node" && tokens[2] ==
"zone") {
866 BuddyinfoBlocks row(tokens.data() + 4, tokens.size() - 4);
870 if (tokens[3].substr(0, 3) !=
"DMA") {
size_t getAllocated() override
std::mutex buffer_access_mutex_
size_t default_gpu_slab_size
constexpr double kErrorCodeOutOfMemory
static bool at_exit_called
std::vector< int > ChunkKey
const std::string kDataDirectoryName
std::vector< MemoryData > nodeMemoryData
constexpr unsigned kMaxBuddyinfoBlocks
Buffer_Namespace::MemStatus memStatus
void deleteChunk(const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
size_t getMaxSize() override
std::vector< std::vector< AbstractBufferMgr * > > bufferMgrs_
This file includes the class specification for the FILE manager (FileMgr), and related data structure...
size_t get_slab_size(size_t initial_slab_size, size_t buffer_pool_size, size_t page_size)
std::vector< int > levelSizes_
static DataMgr * g_data_mgr_ptr
std::ostream & operator<<(std::ostream &os, const DataMgr::SystemMemoryUsage &mem_info)
SystemMemoryUsage getSystemMemoryUsage() const
size_t cpu_buffer_mem_bytes
PersistentStorageMgr * getPersistentStorageMgr() const
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
void clearMemory(const MemoryLevel memLevel)
bool g_use_cpu_mem_pool_size_for_max_cpu_slab_size
std::vector< MemoryInfo > getMemoryInfoUnlocked(const MemoryLevel memLevel) const
void addBlocks(BuddyinfoBlocks const &rhs)
void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id)
std::string dumpLevel(const MemoryLevel memLevel)
size_t getCpuBufferPoolSize() const
void convertDB(const std::string basePath)
size_t getGpuBufferPoolSize() const
constexpr size_t numCpuTiers
void allocateCpuBufferMgr(int32_t device_id, size_t total_cpu_size, size_t min_cpu_slab_size, size_t max_cpu_slab_size, size_t default_cpu_slab_size, size_t page_size, const std::vector< size_t > &cpu_tier_sizes)
static size_t getTotalSystemMemory()
Note(s): Forbid Copying Idiom 4.1.
size_t getTableEpoch(const int db_id, const int tb_id)
Buffer_Namespace::GpuCudaBufferMgr * getGpuBufferMgr(int32_t device_id) const
std::shared_ptr< ForeignStorageInterface > getForeignStorageInterface() const
void createTopLevelMetadata() const
small_vector< size_t, kMaxBuddyinfoBlocks > blocks
bool isAllocationCapped() override
static void atExitHandler()
void removeMutableTableDiskCacheData(const int db_id, const int tb_id) const
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr_
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
void populateMgrs(const SystemParameters &system_parameters, const size_t userSpecifiedNumReaderThreads, const File_Namespace::DiskCacheConfig &cache_config)
DEVICE auto accumulate(ARGS &&...args)
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
boost::container::small_vector< T, N > small_vector
size_t highestBlock() const
small_vector< std::string_view, kMaxBuddyinfoTokens > tokenize(std::string_view const str)
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Parse /proc/meminfo into key/value pairs.
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
BuddyinfoBlocks(std::string_view const *const tokens, size_t const num_blocks)
const std::vector< BufferList > & getSlabSegments()
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel) const
void removeTableRelatedDS(const int db_id, const int tb_id)
DataMgr(const std::string &dataDir, const SystemParameters &system_parameters, std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr, const bool useGpus, const size_t reservedGpuMem=(1<< 27), const size_t numReaderThreads=0, const File_Namespace::DiskCacheConfig cacheConfig=File_Namespace::DiskCacheConfig())
constexpr double kErrorCodeUnableToOpenFile
Buffer_Namespace::CpuBufferMgr * getCpuBufferMgr() const
void copy(AbstractBuffer *destBuffer, AbstractBuffer *srcBuffer)
size_t gpu_buffer_mem_bytes
void removeMutableTableCacheData(const int db_id, const int table_id) const
void resetBufferMgrs(const File_Namespace::DiskCacheConfig &cache_config, const size_t num_reader_threads, const SystemParameters &sys_params)
std::vector< int32_t > chunk_key
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
size_t sum_highest_blocks_
Allocate GPU memory using GpuBuffers via DataMgr.
void free(AbstractBuffer *buffer)
size_t operator()(size_t sum, size_t blocks) const
std::vector< size_t > CpuTierSizeVector
Parse /proc/buddyinfo into a few fragmentation-related data.
size_t default_cpu_slab_size
void setTableEpoch(const int db_id, const int tb_id, const int start_epoch)
double fragPercent() const
AbstractBuffer * alloc(const MemoryLevel memoryLevel, const int deviceId, const size_t numBytes)
size_t sumAvailPages() const
constexpr unsigned kMaxBuddyinfoTokens