21 #include <boost/filesystem/operations.hpp>
28 : cubin(nullptr), link_state(
CUlinkState{}), cubin_size(0u), jit_wall_time_idx(0u) {
29 constexpr
size_t JIT_LOG_SIZE = 8192u;
30 static_assert(0u < JIT_LOG_SIZE);
31 info_log.resize(JIT_LOG_SIZE - 1u);
32 error_log.resize(JIT_LOG_SIZE - 1u);
33 std::pair<CUjit_option, void*> options[] = {
34 {CU_JIT_LOG_VERBOSE,
reinterpret_cast<void*
>(1)},
38 {CU_JIT_THREADS_PER_BLOCK,
reinterpret_cast<void*
>(1024)},
39 {CU_JIT_WALL_TIME,
nullptr},
40 {CU_JIT_INFO_LOG_BUFFER,
reinterpret_cast<void*
>(&info_log[0])},
41 {CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
reinterpret_cast<void*
>(JIT_LOG_SIZE)},
42 {CU_JIT_ERROR_LOG_BUFFER,
reinterpret_cast<void*
>(&error_log[0])},
43 {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
reinterpret_cast<void*
>(JIT_LOG_SIZE)}};
44 constexpr
size_t n_options =
sizeof(options) /
sizeof(*options);
45 option_keys.reserve(n_options);
46 option_values.reserve(n_options);
47 for (
size_t i = 0; i < n_options; ++i) {
48 option_keys.push_back(options[i].first);
49 option_values.push_back(options[i].second);
50 if (options[i].first == CU_JIT_WALL_TIME) {
51 jit_wall_time_idx = i;
54 CHECK_EQ(CU_JIT_WALL_TIME, option_keys[jit_wall_time_idx]) << jit_wall_time_idx;
59 boost::filesystem::path get_gpu_rt_path() {
61 gpu_rt_path /=
"QueryEngine";
62 gpu_rt_path /=
"cuda_mapd_rt.fatbin";
63 if (!boost::filesystem::exists(gpu_rt_path)) {
64 throw std::runtime_error(
"HeavyDB GPU runtime library not found at " +
65 gpu_rt_path.string());
70 boost::filesystem::path get_cuda_table_functions_path() {
72 cuda_table_functions_path /=
"QueryEngine";
73 cuda_table_functions_path /=
"CudaTableFunctions.a";
74 if (!boost::filesystem::exists(cuda_table_functions_path)) {
75 throw std::runtime_error(
"HeavyDB GPU table functions module not found at " +
76 cuda_table_functions_path.string());
79 return cuda_table_functions_path;
86 CHECK_EQ(cubin_result.option_values.size(), cubin_result.option_keys.size());
87 unsigned const num_options = cubin_result.option_keys.size();
89 cubin_result.option_keys.data(),
90 cubin_result.option_values.data(),
91 &cubin_result.link_state))
92 <<
": " << cubin_result.error_log.c_str();
93 VLOG(1) <<
"CUDA JIT time to create link: " << cubin_result.jitWallTime();
94 boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
95 boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
96 CHECK(!gpu_rt_path.empty());
97 CHECK(!cuda_table_functions_path.empty());
99 CU_JIT_INPUT_FATBINARY,
104 <<
": " << cubin_result.error_log.c_str();
105 VLOG(1) <<
"CUDA JIT time to add RT fatbinary: " << cubin_result.jitWallTime();
107 CU_JIT_INPUT_LIBRARY,
108 cuda_table_functions_path.c_str(),
112 <<
": " << cubin_result.error_log.c_str();
113 VLOG(1) <<
"CUDA JIT time to add GPU table functions library: "
114 << cubin_result.jitWallTime();
116 <<
": " << cubin_result.error_log.c_str();
119 std::string add_line_numbers(
const std::string& text) {
120 std::stringstream iss(text);
125 std::getline(iss, line,
'\n');
139 CHECK_EQ(cubin_result.option_values.size(), cubin_result.option_keys.size());
141 cubin_result.option_keys.data(),
142 cubin_result.option_values.data(),
143 &cubin_result.link_state))
144 <<
": " << cubin_result.error_log.c_str();
145 VLOG(1) <<
"CUDA JIT time to create link: " << cubin_result.jitWallTime();
147 boost::filesystem::path gpu_rt_path = get_gpu_rt_path();
148 boost::filesystem::path cuda_table_functions_path = get_cuda_table_functions_path();
149 CHECK(!gpu_rt_path.empty());
150 CHECK(!cuda_table_functions_path.empty());
156 CU_JIT_INPUT_FATBINARY,
161 <<
": " << cubin_result.error_log.c_str();
162 VLOG(1) <<
"CUDA JIT time to add RT fatbinary: " << cubin_result.jitWallTime();
164 CU_JIT_INPUT_LIBRARY,
165 cuda_table_functions_path.c_str(),
169 <<
": " << cubin_result.error_log.c_str();
170 VLOG(1) <<
"CUDA JIT time to add GPU table functions library: "
171 << cubin_result.jitWallTime();
176 static_cast<void*>(const_cast<char*>(ptx.c_str())),
182 <<
": " << cubin_result.error_log.c_str() <<
"\nPTX:\n"
183 << add_line_numbers(ptx) <<
"\nEOF PTX";
184 VLOG(1) <<
"CUDA JIT time to add generated code: " << cubin_result.jitWallTime();
186 cubin_result.link_state, &cubin_result.cubin, &cubin_result.cubin_size))
187 <<
": " << cubin_result.error_log.c_str();
188 VLOG(1) <<
"CUDA Linker completed: " << cubin_result.info_log.c_str();
189 CHECK(cubin_result.cubin);
190 CHECK_LT(0u, cubin_result.cubin_size);
191 VLOG(1) <<
"Generated GPU binary code size: " << cubin_result.cubin_size <<
" bytes";
196 const size_t module_size,
197 const std::string& kernel_name,
199 const void* cuda_mgr,
200 unsigned int num_options,
204 , module_size_(module_size)
206 , kernel_name_(kernel_name)
207 , device_id_(device_id)
208 , cuda_mgr_(static_cast<const CudaMgr_Namespace::CudaMgr*>(cuda_mgr)) {
210 <<
"Unable to initialize GPU compilation context without CUDA manager";
211 cuda_mgr_->loadGpuModuleData(
212 &module_, image, num_options, options, option_vals, device_id_);
214 checkCudaErrors(cuModuleGetFunction(&kernel_, module_, kernel_name_.c_str()));
221 cuda_mgr_->unloadGpuModuleData(&
module_, device_id_);
std::string get_root_abs_path()
void checkCudaErrors(CUresult err)
void setContext(const int device_num) const
#define LOG_IF(severity, condition)
int getDeviceCount() const
CubinResult ptx_to_cubin(const std::string &ptx, const CudaMgr_Namespace::CudaMgr *cuda_mgr)
GpuDeviceCompilationContext(const void *image, const size_t module_size, const std::string &kernel_name, const int device_id, const void *cuda_mgr, unsigned int num_options, CUjit_option *options, void **option_vals)
#define DEBUG_TIMER(name)
~GpuDeviceCompilationContext()