OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sample.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <cassert>
18 #include <chrono>
19 #include <cstdint>
20 #include <cstring>
21 #include <fstream>
22 #include <iostream>
23 #include "Logger/Logger.h"
24 #include "cuda.h"
25 
26 template <typename TimeT = std::chrono::milliseconds>
27 struct measure {
28  template <typename F, typename... Args>
29  static typename TimeT::rep execution(F func, Args&&... args) {
30  auto start = std::chrono::steady_clock::now();
31  func(std::forward<Args>(args)...);
32  auto duration =
33  std::chrono::duration_cast<TimeT>(std::chrono::steady_clock::now() - start);
34  return duration.count();
35  }
36 };
37 
39  if (err != CUDA_SUCCESS) {
40  std::cout << err << std::endl;
41  }
42  assert(err == CUDA_SUCCESS);
43 }
44 
46 int main(int argc, char** argv) {
47  CUdevice device;
48  CUmodule cudaModule;
49  CUcontext context;
50  CUfunction function;
51 
52  // CUDA initialization
53  checkCudaErrors(cuInit(0));
54  checkCudaErrors(cuDeviceGet(&device, 0));
55 
56  std::ifstream t("kernel.ptx");
57  std::string str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
58 
59  // Create driver context
60  checkCudaErrors(cuCtxCreate(&context, 0, device));
61 
62  // Create module for object
63  checkCudaErrors(cuModuleLoadDataEx(&cudaModule, str.c_str(), 0, 0, 0));
64 
65  // Get kernel function
66  checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "kernel"));
67 
68  int64_t N = 1000000000LL;
69  int8_t* byte_stream_col_0 = new int8_t[N];
70  memset(byte_stream_col_0, 42, N);
71 
72  CUdeviceptr devBufferA;
73  checkCudaErrors(cuMemAlloc(&devBufferA, sizeof(int8_t) * N));
74  checkCudaErrors(cuMemcpyHtoD(devBufferA, byte_stream_col_0, sizeof(int8_t) * N));
75 
76  CUdeviceptr devBufferAA;
77  checkCudaErrors(cuMemAlloc(&devBufferAA, sizeof(CUdeviceptr)));
78  checkCudaErrors(cuMemcpyHtoD(devBufferAA, &devBufferA, sizeof(CUdeviceptr)));
79 
80  unsigned blockSizeX = 128;
81  unsigned blockSizeY = 1;
82  unsigned blockSizeZ = 1;
83  unsigned gridSizeX = 128;
84  unsigned gridSizeY = 1;
85  unsigned gridSizeZ = 1;
86 
87  CUdeviceptr devBufferB;
88  int64_t* result_vec = new int64_t[blockSizeX * gridSizeX * sizeof(int64_t)];
89  checkCudaErrors(cuMemAlloc(&devBufferB, blockSizeX * gridSizeX * sizeof(int64_t)));
90 
91  CUdeviceptr devBufferN;
92  int64_t row_count = N;
93  checkCudaErrors(cuMemAlloc(&devBufferN, sizeof(int64_t)));
94  checkCudaErrors(cuMemcpyHtoD(devBufferN, &row_count, sizeof(int64_t)));
95 
96  CUdeviceptr devBufferI;
97  int64_t init_agg_val = 0;
98  checkCudaErrors(cuMemAlloc(&devBufferI, sizeof(int64_t)));
99  checkCudaErrors(cuMemcpyHtoD(devBufferI, &init_agg_val, sizeof(int64_t)));
100 
101  void* KernelParams[] = {&devBufferAA, &devBufferN, &devBufferI, &devBufferB};
102 
104  checkCudaErrors(cuLaunchKernel(function,
105  gridSizeX,
106  gridSizeY,
107  gridSizeZ,
108  blockSizeX,
109  blockSizeY,
110  blockSizeZ,
111  0,
112  NULL,
113  KernelParams,
114  NULL));
116  cuMemcpyDtoH(result_vec, devBufferB, blockSizeX * gridSizeX * sizeof(int64_t)));
117  });
118 
119  int64_t result = 0;
120  for (size_t i = 0; i < blockSizeX * gridSizeX; ++i) {
121  result += result_vec[i];
122  }
123  std::cout << result << std::endl;
124 
125  delete[] result_vec;
126  delete[] byte_stream_col_0;
127 
128  // Clean-up
129  checkCudaErrors(cuMemFree(devBufferA));
130  checkCudaErrors(cuMemFree(devBufferAA));
131  checkCudaErrors(cuMemFree(devBufferB));
132  checkCudaErrors(cuMemFree(devBufferN));
133  checkCudaErrors(cuModuleUnload(cudaModule));
134  checkCudaErrors(cuCtxDestroy(context));
135 
136  return 0;
137 }
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
int CUcontext
Definition: nocuda.h:22
#define LOG(tag)
Definition: Logger.h:285
void checkCudaErrors(CUresult err)
Definition: sample.cpp:38
unsigned long long CUdeviceptr
Definition: nocuda.h:28
void * CUfunction
Definition: nocuda.h:25
int CUresult
Definition: nocuda.h:21
int CUdevice
Definition: nocuda.h:20
constexpr unsigned N
Definition: Utm.h:110
void * CUmodule
Definition: nocuda.h:24