allocateDeviceMem(const size_t num_bytes, const int device_num, const bool is_slab=false) | CudaMgr_Namespace::CudaMgr | virtual |
allocatePinnedHostMem(const size_t num_bytes) | CudaMgr_Namespace::CudaMgr | |
computePaddedBufferSize(size_t buf_size, size_t granularity) const | CudaMgr_Namespace::CudaMgr | |
copyDeviceToDevice(int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num, CUstream cuda_stream=0) | CudaMgr_Namespace::CudaMgr | |
copyDeviceToHost(int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, CUstream cuda_stream=0) | CudaMgr_Namespace::CudaMgr | |
copyHostToDevice(int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) | CudaMgr_Namespace::CudaMgr | |
CudaMgr(const int num_gpus, const int start_gpu=0) | CudaMgr_Namespace::CudaMgr | |
device_contexts_ | CudaMgr_Namespace::CudaMgr | private |
device_count_ | CudaMgr_Namespace::CudaMgr | private |
device_group_ | CudaMgr_Namespace::CudaMgr | private |
device_mutex_ | CudaMgr_Namespace::CudaMgr | mutableprivate |
device_properties_ | CudaMgr_Namespace::CudaMgr | private |
deviceArchToSM(const NvidiaDeviceArch arch) | CudaMgr_Namespace::CudaMgr | inlinestatic |
freeDeviceMem(int8_t *device_ptr) | CudaMgr_Namespace::CudaMgr | |
freePinnedHostMem(int8_t *host_ptr) | CudaMgr_Namespace::CudaMgr | |
getAllDeviceProperties() const | CudaMgr_Namespace::CudaMgr | inline |
getContext() const | CudaMgr_Namespace::CudaMgr | |
getDeviceArch() const | CudaMgr_Namespace::CudaMgr | inline |
getDeviceCount() const | CudaMgr_Namespace::CudaMgr | inline |
getDeviceGroup() const | CudaMgr_Namespace::CudaMgr | inline |
getDeviceProperties(const size_t device_num) const | CudaMgr_Namespace::CudaMgr | inline |
getGranularity(const int device_num) const | CudaMgr_Namespace::CudaMgr | |
getMinNumMPsForAllDevices() const | CudaMgr_Namespace::CudaMgr | inline |
getMinSharedMemoryPerBlockForAllDevices() const | CudaMgr_Namespace::CudaMgr | inline |
getStartGpu() const | CudaMgr_Namespace::CudaMgr | inline |
isArchMaxwell() const | CudaMgr_Namespace::CudaMgr | inline |
isArchMaxwellOrLater() const | CudaMgr_Namespace::CudaMgr | inline |
isArchMaxwellOrLaterForAll() const | CudaMgr_Namespace::CudaMgr | |
isArchPascal() const | CudaMgr_Namespace::CudaMgr | inline |
isArchPascalOrLater() const | CudaMgr_Namespace::CudaMgr | inline |
isArchVoltaOrGreaterForAll() const | CudaMgr_Namespace::CudaMgr | |
min_num_mps_for_all_devices | CudaMgr_Namespace::CudaMgr | private |
min_shared_memory_per_block_for_all_devices | CudaMgr_Namespace::CudaMgr | private |
setContext(const int device_num) const | CudaMgr_Namespace::CudaMgr | |
setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) | CudaMgr_Namespace::CudaMgr | |
start_gpu_ | CudaMgr_Namespace::CudaMgr | private |
synchronizeDevices() const | CudaMgr_Namespace::CudaMgr | |
zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) | CudaMgr_Namespace::CudaMgr | |
~CudaMgr() | CudaMgr_Namespace::CudaMgr | virtual |