25 #include <level_zero/ze_api.h>
29 L0Driver::L0Driver(ze_driver_handle_t handle) : driver_(handle) {
30 ze_context_desc_t ctx_desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC,
nullptr, 0};
31 L0_SAFE_CALL(zeContextCreate(driver_, &ctx_desc, &context_));
33 uint32_t device_count = 0;
34 L0_SAFE_CALL(zeDeviceGet(driver_, &device_count,
nullptr));
36 std::vector<ze_device_handle_t> devices(device_count);
37 L0_SAFE_CALL(zeDeviceGet(driver_, &device_count, devices.data()));
39 for (
auto device : devices) {
40 ze_device_properties_t device_properties;
41 L0_SAFE_CALL(zeDeviceGetProperties(device, &device_properties));
42 if (ZE_DEVICE_TYPE_GPU == device_properties.type) {
43 devices_.push_back(std::make_shared<L0Device>(*
this, device));
48 L0Driver::~L0Driver() {
49 auto status = (zeContextDestroy(context_));
51 std::cerr <<
"Non-zero status for context destructor" << std::endl;
55 ze_context_handle_t L0Driver::ctx()
const {
59 ze_driver_handle_t L0Driver::driver()
const {
69 uint32_t driver_count = 0;
70 zeDriverGet(&driver_count,
nullptr);
72 std::vector<ze_driver_handle_t> handles(driver_count);
73 zeDriverGet(&driver_count, handles.data());
75 std::vector<std::shared_ptr<L0Driver>>
result(driver_count);
76 for (
int i = 0; i < driver_count; i++) {
77 result[i] = std::make_shared<L0Driver>(handles[i]);
82 L0CommandList::L0CommandList(ze_command_list_handle_t handle) : handle_(handle) {}
86 L0_SAFE_CALL(zeCommandQueueExecuteCommandLists(queue.handle(), 1, &handle_,
nullptr));
88 zeCommandQueueSynchronize(queue.handle(), std::numeric_limits<uint32_t>::max()));
91 ze_command_list_handle_t L0CommandList::handle()
const {
95 L0CommandList::~L0CommandList() {
101 zeCommandListAppendMemoryCopy(handle_, dst, src, num_bytes,
nullptr, 0,
nullptr));
102 L0_SAFE_CALL(zeCommandListAppendBarrier(handle_,
nullptr, 0,
nullptr));
106 ze_device_mem_alloc_desc_t alloc_desc;
107 alloc_desc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
108 alloc_desc.pNext =
nullptr;
109 alloc_desc.flags = 0;
110 alloc_desc.ordinal = 0;
114 device.ctx(), &alloc_desc, num_bytes, 0 , device.device(), &mem));
118 L0Device::L0Device(
const L0Driver& driver, ze_device_handle_t device)
119 : device_(device), driver_(driver) {
120 ze_command_queue_handle_t queue_handle;
121 ze_command_queue_desc_t command_queue_desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
126 ZE_COMMAND_QUEUE_MODE_DEFAULT,
127 ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
129 zeCommandQueueCreate(driver_.ctx(), device_, &command_queue_desc, &queue_handle));
131 command_queue_ = std::make_shared<L0CommandQueue>(queue_handle);
134 L0Device::~L0Device() {}
136 ze_context_handle_t L0Device::ctx()
const {
137 return driver_.ctx();
139 ze_device_handle_t L0Device::device()
const {
142 std::shared_ptr<L0CommandQueue> L0Device::command_queue()
const {
143 return command_queue_;
146 std::unique_ptr<L0CommandList> L0Device::create_command_list()
const {
147 ze_command_list_desc_t desc = {
148 ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC,
153 ze_command_list_handle_t
res;
154 zeCommandListCreate(ctx(), device_, &desc, &res);
155 return std::make_unique<L0CommandList>(
res);
158 L0CommandQueue::L0CommandQueue(ze_command_queue_handle_t handle) : handle_(handle) {}
160 ze_command_queue_handle_t L0CommandQueue::handle()
const {
164 L0CommandQueue::~L0CommandQueue() {
165 auto status = (zeCommandQueueDestroy(handle_));
167 std::cerr <<
"Non-zero status for command queue destructor" << std::endl;
174 ze_module_desc_t desc{
175 .stype = ZE_STRUCTURE_TYPE_MODULE_DESC,
177 .format = ZE_MODULE_FORMAT_IL_SPIRV,
179 .pInputModule = code,
181 .pConstants =
nullptr,
183 ze_module_handle_t handle;
184 ze_module_build_log_handle_t buildlog =
nullptr;
186 auto status = zeModuleCreate(ctx(), device_, &desc, &handle, &buildlog);
189 L0_SAFE_CALL(zeModuleBuildLogGetString(buildlog, &logSize,
nullptr));
190 std::vector<char> strLog(logSize);
191 L0_SAFE_CALL(zeModuleBuildLogGetString(buildlog, &logSize, strLog.data()));
193 out.open(
"log.txt", std::ios::app);
195 std::cerr <<
"Unable to open log file.\n";
197 out << std::string(strLog.begin(), strLog.end());
204 return std::make_shared<L0Module>(handle);
214 auto& device =
drivers_[0]->devices()[device_id];
218 L0Module::L0Module(ze_module_handle_t handle) : handle_(handle) {}
220 ze_module_handle_t L0Module::handle()
const {
224 L0Module::~L0Module() {
225 auto status = zeModuleDestroy(handle_);
227 std::cerr <<
"Non-zero status for command module destructor" << std::endl;
235 ze_kernel_desc_t desc{
236 .stype = ZE_STRUCTURE_TYPE_KERNEL_DESC,
241 ze_kernel_handle_t handle;
242 L0_SAFE_CALL(zeKernelCreate(this->handle_, &desc, &handle));
243 return std::make_shared<L0Kernel>(handle, x, y, z);
246 L0Kernel::L0Kernel(ze_kernel_handle_t handle, uint32_t x, uint32_t y, uint32_t z)
247 : handle_(handle), group_size_({x, y, z}) {
248 zeKernelSetGroupSize(handle_, x, y, z);
251 ze_group_count_t& L0Kernel::group_size() {
255 ze_kernel_handle_t L0Kernel::handle()
const {
259 L0Kernel::~L0Kernel() {
260 auto status = zeKernelDestroy(handle_);
262 std::cerr <<
"Non-zero status for command kernel destructor" << std::endl;
267 const int8_t* host_ptr,
268 const size_t num_bytes,
269 const int device_num) {
270 auto& device =
drivers()[0]->devices()[device_num];
271 auto cl = device->create_command_list();
272 auto queue = device->command_queue();
274 cl->copy(device_ptr, host_ptr, num_bytes);
279 const int8_t* device_ptr,
280 const size_t num_bytes,
281 const int device_num) {
282 auto& device =
drivers_[0]->devices()[device_num];
283 auto cl = device->create_command_list();
284 auto queue = device->command_queue();
286 cl->copy(host_ptr, device_ptr, num_bytes);
292 const size_t num_bytes,
293 const int dest_device_num,
294 const int src_device_num) {
313 const size_t num_bytes,
314 const int device_num) {
318 const unsigned char uc,
319 const size_t num_bytes,
320 const int device_num) {
321 auto& device =
drivers_[0]->devices()[device_num];
322 auto cl = device->create_command_list();
324 cl->handle(), device_ptr, &uc, 1, num_bytes,
nullptr, 0,
nullptr));
325 cl->submit(*device->command_queue());
329 for (
auto& device :
drivers_[0]->devices()) {
330 L0_SAFE_CALL(zeCommandQueueSynchronize(device->command_queue()->handle(),
331 std::numeric_limits<uint32_t>::max()));
void freeDeviceMem(int8_t *device_ptr)
void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num)
#define L0_SAFE_CALL(call)
void copyHostToDevice(int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
std::shared_ptr< L0Module > create_module(uint8_t *code, size_t len, bool log=false) const
std::vector< std::shared_ptr< L0Driver > > get_drivers()
DEVICE auto copy(ARGS &&...args)
void copyDeviceToDevice(int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
std::vector< std::shared_ptr< L0Device > > devices_
void synchronizeDevices() const
void copyDeviceToHost(int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
void freePinnedHostMem(int8_t *host_ptr)
const std::vector< std::shared_ptr< L0Driver > > & drivers() const
int8_t * allocatePinnedHostMem(const size_t num_bytes)
std::vector< std::shared_ptr< L0Driver > > drivers_
void * allocate_device_mem(const size_t num_bytes, L0Device &device)
int8_t * allocateDeviceMem(const size_t num_bytes, const int device_num)
const std::vector< std::shared_ptr< L0Device > > & devices() const
std::shared_ptr< L0Kernel > create_kernel(const char *name, uint32_t x, uint32_t y, uint32_t z) const