OmniSciDB
a5dc49c757
|
#include <CudaMgr.h>
Public Member Functions | |
CudaMgr (const int num_gpus, const int start_gpu=0) | |
virtual | ~CudaMgr () |
void | synchronizeDevices () const |
int | getDeviceCount () const |
int | getStartGpu () const |
const heavyai::DeviceGroup & | getDeviceGroup () const |
size_t | computePaddedBufferSize (size_t buf_size, size_t granularity) const |
size_t | getGranularity (const int device_num) const |
void | copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
void | copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, CUstream cuda_stream=0) |
void | copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num, CUstream cuda_stream=0) |
int8_t * | allocatePinnedHostMem (const size_t num_bytes) |
virtual int8_t * | allocateDeviceMem (const size_t num_bytes, const int device_num, const bool is_slab=false) |
void | freePinnedHostMem (int8_t *host_ptr) |
void | freeDeviceMem (int8_t *device_ptr) |
void | zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
void | setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
size_t | getMinSharedMemoryPerBlockForAllDevices () const |
size_t | getMinNumMPsForAllDevices () const |
const std::vector < DeviceProperties > & | getAllDeviceProperties () const |
const DeviceProperties * | getDeviceProperties (const size_t device_num) const |
bool | isArchMaxwell () const |
bool | isArchMaxwellOrLater () const |
bool | isArchPascal () const |
bool | isArchPascalOrLater () const |
bool | isArchMaxwellOrLaterForAll () const |
bool | isArchVoltaOrGreaterForAll () const |
NvidiaDeviceArch | getDeviceArch () const |
void | setContext (const int device_num) const |
int | getContext () const |
Static Public Member Functions | |
static std::string | deviceArchToSM (const NvidiaDeviceArch arch) |
Private Attributes | |
int | device_count_ |
int | start_gpu_ |
size_t | min_shared_memory_per_block_for_all_devices |
size_t | min_num_mps_for_all_devices |
std::vector< DeviceProperties > | device_properties_ |
heavyai::DeviceGroup | device_group_ |
std::vector< CUcontext > | device_contexts_ |
std::mutex | device_mutex_ |
CudaMgr_Namespace::CudaMgr::CudaMgr | ( | const int | num_gpus, |
const int | start_gpu = 0 |
||
) |
Definition at line 48 of file CudaMgr.cpp.
References CHECK_EQ, device_count_, device_group_, device_properties_, logger::INFO, LOG, nvidia_jit_warmup(), setContext(), and start_gpu_.
|
virtual |
Definition at line 81 of file CudaMgr.cpp.
References CHECK, device_contexts_, device_count_, device_mutex_, logger::ERROR, LOG, and synchronizeDevices().
|
virtual |
Definition at line 333 of file CudaMgr.cpp.
References computePaddedBufferSize(), device_mutex_, getDeviceProperties(), getGranularity(), setContext(), start_gpu_, and CudaMgr_Namespace::DeviceProperties::uuid.
Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().
int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem | ( | const size_t | num_bytes | ) |
Definition at line 326 of file CudaMgr.cpp.
References setContext().
size_t CudaMgr_Namespace::CudaMgr::computePaddedBufferSize | ( | size_t | buf_size, |
size_t | granularity | ||
) | const |
Definition at line 105 of file CudaMgr.cpp.
Referenced by allocateDeviceMem().
void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice | ( | int8_t * | dest_ptr, |
int8_t * | src_ptr, | ||
const size_t | num_bytes, | ||
const int | dest_device_num, | ||
const int | src_device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 164 of file CudaMgr.cpp.
References device_contexts_, and setContext().
Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().
void CudaMgr_Namespace::CudaMgr::copyDeviceToHost | ( | int8_t * | host_ptr, |
const int8_t * | device_ptr, | ||
const size_t | num_bytes, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 143 of file CudaMgr.cpp.
References CHECK_LE, device_mutex_, and setContext().
Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().
void CudaMgr_Namespace::CudaMgr::copyHostToDevice | ( | int8_t * | device_ptr, |
const int8_t * | host_ptr, | ||
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 127 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().
|
inlinestatic |
Definition at line 162 of file CudaMgr.h.
References CudaMgr_Namespace::Ampere, CudaMgr_Namespace::Kepler, LOG, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, UNREACHABLE, CudaMgr_Namespace::Volta, and logger::WARNING.
void CudaMgr_Namespace::CudaMgr::freeDeviceMem | ( | int8_t * | device_ptr | ) |
Definition at line 392 of file CudaMgr.cpp.
References device_mutex_.
Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().
void CudaMgr_Namespace::CudaMgr::freePinnedHostMem | ( | int8_t * | host_ptr | ) |
Definition at line 74 of file CudaMgrNoCuda.cpp.
References CHECK.
|
inline |
Definition at line 134 of file CudaMgr.h.
References device_properties_.
Referenced by Executor::blockSize(), Executor::deviceCycles(), and Executor::warpSize().
int CudaMgr_Namespace::CudaMgr::getContext | ( | ) | const |
Definition at line 517 of file CudaMgr.cpp.
References device_contexts_.
Referenced by QueryEngine::getCudaStream(), and QueryEngine::QueryEngine().
|
inline |
Definition at line 186 of file CudaMgr.h.
References CudaMgr_Namespace::Ampere, device_properties_, CudaMgr_Namespace::Kepler, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, and CudaMgr_Namespace::Volta.
|
inline |
Definition at line 90 of file CudaMgr.h.
References device_count_.
Referenced by Executor::deviceCount(), get_available_gpus(), isArchMaxwell(), isArchMaxwellOrLater(), isArchPascal(), isArchPascalOrLater(), and QueryEngine::QueryEngine().
|
inline |
Definition at line 92 of file CudaMgr.h.
References device_group_.
|
inline |
Definition at line 137 of file CudaMgr.h.
References device_properties_, and to_string().
Referenced by allocateDeviceMem().
size_t CudaMgr_Namespace::CudaMgr::getGranularity | ( | const int | device_num | ) | const |
Definition at line 109 of file CudaMgr.cpp.
Referenced by allocateDeviceMem().
|
inline |
Definition at line 132 of file CudaMgr.h.
References min_num_mps_for_all_devices.
|
inline |
Definition at line 128 of file CudaMgr.h.
References min_shared_memory_per_block_for_all_devices.
|
inline |
|
inline |
Definition at line 147 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
|
inline |
Definition at line 150 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll | ( | ) | const |
Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0
Definition at line 437 of file CudaMgr.cpp.
References device_count_, and device_properties_.
|
inline |
Definition at line 153 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
|
inline |
Definition at line 156 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
Referenced by Executor::isArchPascalOrLater().
bool CudaMgr_Namespace::CudaMgr::isArchVoltaOrGreaterForAll | ( | ) | const |
Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.
Definition at line 450 of file CudaMgr.cpp.
References device_count_, and device_properties_.
void CudaMgr_Namespace::CudaMgr::setContext | ( | const int | device_num | ) | const |
Definition at line 511 of file CudaMgr.cpp.
References CHECK_LT, and device_contexts_.
Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), CudaMgr(), QueryEngine::QueryEngine(), setDeviceMem(), and synchronizeDevices().
void CudaMgr_Namespace::CudaMgr::setDeviceMem | ( | int8_t * | device_ptr, |
const unsigned char | uc, | ||
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 418 of file CudaMgr.cpp.
References setContext().
Referenced by zeroDeviceMem().
void CudaMgr_Namespace::CudaMgr::synchronizeDevices | ( | ) | const |
Definition at line 120 of file CudaMgr.cpp.
References device_count_, and setContext().
Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().
void CudaMgr_Namespace::CudaMgr::zeroDeviceMem | ( | int8_t * | device_ptr, |
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 411 of file CudaMgr.cpp.
References setDeviceMem().
|
private |
Definition at line 267 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), getContext(), setContext(), and ~CudaMgr().
|
private |
Definition at line 261 of file CudaMgr.h.
Referenced by CudaMgr(), getDeviceCount(), isArchMaxwellOrLaterForAll(), isArchVoltaOrGreaterForAll(), synchronizeDevices(), and ~CudaMgr().
|
private |
Definition at line 266 of file CudaMgr.h.
Referenced by CudaMgr(), and getDeviceGroup().
|
mutableprivate |
Definition at line 268 of file CudaMgr.h.
Referenced by allocateDeviceMem(), copyDeviceToHost(), freeDeviceMem(), and ~CudaMgr().
|
private |
Definition at line 265 of file CudaMgr.h.
Referenced by CudaMgr(), getAllDeviceProperties(), getDeviceArch(), getDeviceProperties(), isArchMaxwell(), isArchMaxwellOrLater(), isArchMaxwellOrLaterForAll(), isArchPascal(), isArchPascalOrLater(), and isArchVoltaOrGreaterForAll().
|
private |
Definition at line 264 of file CudaMgr.h.
Referenced by getMinNumMPsForAllDevices().
|
private |
Definition at line 263 of file CudaMgr.h.
Referenced by getMinSharedMemoryPerBlockForAllDevices().
|
private |
Definition at line 262 of file CudaMgr.h.
Referenced by allocateDeviceMem(), CudaMgr(), and getStartGpu().