Skip to content

Commit

Permalink
update save load and build function
Browse files Browse the repository at this point in the history
  • Loading branch information
StRigaud committed Oct 6, 2023
1 parent 47a6f94 commit 4b19066
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 96 deletions.
14 changes: 2 additions & 12 deletions clic/include/backend.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef __INCLUDE_BACKEND_HPP
#define __INCLUDE_BACKEND_HPP

#include "cache.hpp"
#include "clic.hpp"
#include "device.hpp"
#include "utils.hpp"
Expand Down Expand Up @@ -128,10 +129,7 @@ class Backend
const std::string & kernel_source,
const std::string & kernel_name,
void * kernel) const -> void = 0;
virtual auto
loadProgramFromCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void = 0;
virtual auto
saveProgramToCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void = 0;

virtual auto
executeKernel(const Device::Pointer & device,
const std::string & kernel_source,
Expand Down Expand Up @@ -296,10 +294,6 @@ class CUDABackend : public Backend
const dType & dtype,
const float & value) -> void;

auto
loadProgramFromCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void override;
auto
saveProgramToCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void override;
auto
buildKernel(const Device::Pointer & device,
const std::string & kernel_source,
Expand Down Expand Up @@ -476,10 +470,6 @@ class OpenCLBackend : public Backend
const dType & dtype,
const float & value) -> void;

auto
loadProgramFromCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void override;
auto
saveProgramToCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void override;
auto
buildKernel(const Device::Pointer & device,
const std::string & kernel_source,
Expand Down
110 changes: 67 additions & 43 deletions clic/src/cudabackend.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "backend.hpp"
#include "cle_preamble_cu.h"

#include <string>

namespace cle
{

Expand Down Expand Up @@ -706,37 +708,53 @@ CUDABackend::setMemory(const Device::Pointer & device,
#endif
}

auto
CUDABackend::loadProgramFromCache(const Device::Pointer & device, const std::string & hash, void * program) const
-> void
{
#if USE_CUDA
if (auto cuda_device = std::dynamic_pointer_cast<CUDADevice>(device))
static auto
saveBinaryToCache(const std::string & hash, const std::string & program) -> void
{
std::filesystem::path binary_path = CU_CACHE_FOLDER_PATH / std::filesystem::path(hash + ".ptx");
std::ofstream outfile(binary_path);
if (!outfile)
{
const auto & cache = cuda_device->getCache();
auto ite = cache.find(hash);
if (ite != cache.end())
{
*static_cast<CUmodule *>(program) = ite->second;
}
throw std::runtime_error("Error: Fail to open binary cache file.");
}
outfile.write(program.c_str(), program.size());
if (!outfile.good())
{
throw std::runtime_error("Error: Fail to write binary cache file.");
}
#else
throw std::runtime_error("Error: CUDA is not enabled");
#endif
}

auto
CUDABackend::saveProgramToCache(const Device::Pointer & device, const std::string & hash, void * program) const -> void
static auto
loadBinaryFromCache(const Device::Pointer & device, const std::string & hash, std::string & ptx) -> void
{
#if USE_CUDA
if (auto cuda_device = std::dynamic_pointer_cast<CUDADevice>(device))
cl_int err;
std::filesystem::path binary_path = CU_CACHE_FOLDER_PATH / std::filesystem::path(hash + ".ptx");
if (!std::filesystem::exists(binary_path))
{
cuda_device->getCache().emplace(hash, *reinterpret_cast<CUmodule *>(program));
return;
}
std::ifstream ptx_file(binary_path);
if (!ptx_file.is_open())
{
throw std::runtime_error("Error: Fail to open binary cache file.");
}
ptx_file.seekg(0, std::ios::end);
size_t size = ptx_file.tellg();
if (size == -1)
{
throw std::runtime_error("Error: Problem encountered while reading the file");
}

ptx_file.seekg(0, std::ios::beg);
ptx.resize(size);
ptx.assign((std::istreambuf_iterator<char>(ptx_file)), std::istreambuf_iterator<char>());
if (ptx_file.fail())
{
throw std::runtime_error("Error: Fail to read PTX file.");
}
#else
throw std::runtime_error("Error: CUDA is not enabled");
#endif
}
#endif


auto
Expand All @@ -753,11 +771,10 @@ CUDABackend::buildKernel(const Device::Pointer & device,
throw std::runtime_error("Error: Fail to get context from device.\nCUDA error : " + getErrorString(err) + " (" +
std::to_string(err) + ").");
}

CUmodule cuModule = nullptr;
std::string ptx;
std::string hash = std::to_string(std::hash<std::string>{}(kernel_source));
loadProgramFromCache(device, hash, &cuModule);
if (cuModule == nullptr)
loadBinaryFromCache(device, hash, ptx);
if (ptx.empty())
{
nvrtcProgram prog;
auto res = nvrtcCreateProgram(&prog, kernel_source.c_str(), nullptr, 0, nullptr, nullptr);
Expand All @@ -766,7 +783,6 @@ CUDABackend::buildKernel(const Device::Pointer & device,
throw std::runtime_error("Error: Fail to create kernel program from source.\nCUDA error : " +
getErrorString(res) + " (" + std::to_string(res) + ").");
}

const std::string arch_comp = "--gpu-architecture=compute_" + cuda_device->getArch();
const std::string woff = "--disable-warnings";
const std::array<const char *, 2> options = { arch_comp.c_str(), woff.c_str() };
Expand All @@ -781,26 +797,34 @@ CUDABackend::buildKernel(const Device::Pointer & device,
throw std::runtime_error("Error: Fail to build kernel program.\nCUDA error : " + getErrorString(res) + " (" +
std::to_string(res) + ").");
}
size_t ptxSize;
nvrtcGetPTXSize(prog, &ptxSize);
std::vector<char> ptx(ptxSize);
nvrtcGetPTX(prog, ptx.data());
res = nvrtcDestroyProgram(&prog);
if (res != NVRTC_SUCCESS)
size_t ptxSize;
nvrtcResult result = nvrtcGetPTXSize(prog, &ptxSize);
if (result != NVRTC_SUCCESS)
{
throw std::runtime_error("Error: Fail to destroy kernel program.\nCUDA error : " + getErrorString(res) + " (" +
std::to_string(res) + ").");
throw std::runtime_error("Error: Fail to get PTX size.\nCUDA error : " + getErrorString(result) + " (" +
std::to_string(result) + ").");
}

err = cuModuleLoadData(&cuModule, ptx.data());
if (err != CUDA_SUCCESS)
ptx.resize(ptxSize + 1); // +1 for null terminator
result = nvrtcGetPTX(prog, &ptx[0]);
if (result != NVRTC_SUCCESS)
{
throw std::runtime_error("Error: Fail to load module.\nCUDA error : " + getErrorString(err) + " (" +
std::to_string(err) + ").");
throw std::runtime_error("Error: Fail to get PTX.\nCUDA error : " + getErrorString(result) + " (" +
std::to_string(result) + ").");
}


saveProgramToCache(device, hash, &cuModule);
result = nvrtcDestroyProgram(&prog);
if (result != NVRTC_SUCCESS)
{
throw std::runtime_error("Error: Fail to destroy kernel program.\nCUDA error : " + getErrorString(result) + " (" +
std::to_string(result) + ").");
}
saveBinaryToCache(hash, ptx);
}
CUmodule cuModule;
err = cuModuleLoadData(&cuModule, ptx.c_str());
if (err != CUDA_SUCCESS)
{
throw std::runtime_error("Error: Fail to load module.\nCUDA error : " + getErrorString(err) + " (" +
std::to_string(err) + ").");
}
CUfunction cuFunction;
err = cuModuleGetFunction(&cuFunction, cuModule, kernel_name.c_str());
Expand Down
122 changes: 81 additions & 41 deletions clic/src/openclbackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -969,38 +969,89 @@ OpenCLBackend::setImage(const Device::Pointer & device,
#endif
}

auto
OpenCLBackend::loadProgramFromCache(const Device::Pointer & device, const std::string & hash, void * program) const
-> void
{
#if USE_OPENCL
if (auto opencl_device = std::dynamic_pointer_cast<OpenCLDevice>(device))
static auto
buildProgram(const Device::Pointer & device, cl_program program) -> void
{
auto opencl_device = std::dynamic_pointer_cast<const OpenCLDevice>(device);
cl_int buildStatus = clBuildProgram(program, 1, &opencl_device->getCLDevice(), "-w", nullptr, nullptr);
if (buildStatus != CL_SUCCESS)
{
const auto & cache = opencl_device->getCache();
auto ite = cache.find(hash);
if (ite != cache.end())
{
*static_cast<cl_program *>(program) = ite->second;
}
size_t len;
std::string buffer;
clGetProgramBuildInfo(program, opencl_device->getCLDevice(), CL_PROGRAM_BUILD_LOG, 0, nullptr, &len);
buffer.resize(len);
clGetProgramBuildInfo(program, opencl_device->getCLDevice(), CL_PROGRAM_BUILD_LOG, len, &buffer[0], &len);
std::cerr << "Build log: " << buffer << std::endl;
throw std::runtime_error("Error: Fail to build program.\nOpenCL error : " + getErrorString(buildStatus) + " (" +
std::to_string(buildStatus) + ").");
}
#else
throw std::runtime_error("Error: OpenCL is not enabled");
#endif
}

auto
OpenCLBackend::saveProgramToCache(const Device::Pointer & device, const std::string & hash, void * program) const
-> void
static auto
saveBinaryToCache(const std::string & hash, cl_program program) -> void
{
#if USE_OPENCL
if (auto opencl_device = std::dynamic_pointer_cast<OpenCLDevice>(device))
size_t binary_size;
auto err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, nullptr);
if (err != CL_SUCCESS)
{
opencl_device->getCache().emplace(hash, *static_cast<cl_program *>(program));
throw std::runtime_error("Error: Fail to fetch program binary size.\nOpenCL error : " + getErrorString(err) + " (" +
std::to_string(err) + ").");
}
#else
throw std::runtime_error("Error: OpenCL is not enabled");
#endif
std::unique_ptr<unsigned char[]> binary(new unsigned char[binary_size]);
err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *), &binary, nullptr);
if (err != CL_SUCCESS)
{
throw std::runtime_error("Error: Fail to fetch program binary.\nOpenCL error : " + getErrorString(err) + " (" +
std::to_string(err) + ").");
}
std::filesystem::path binary_path = OLC_CACHE_FOLDER_PATH / std::filesystem::path(hash + ".bin");
std::ofstream outfile(binary_path, std::ios::binary);
if (!outfile)
{
throw std::runtime_error("Error: Fail to open binary cache file.");
}
outfile.write(reinterpret_cast<char *>(binary.get()), binary_size);
if (!outfile.good())
{
throw std::runtime_error("Error: Fail to write binary cache file.");
}
}

static auto
loadBinaryFromCache(const Device::Pointer & device, const std::string & hash, cl_program program) -> void
{
cl_int err;
std::filesystem::path binary_path = OLC_CACHE_FOLDER_PATH / std::filesystem::path(hash + ".bin");
if (!std::filesystem::exists(binary_path))
{
return;
}
std::ifstream binary_file(binary_path, std::ios::binary | std::ios::ate);
if (!binary_file.is_open())
{
throw std::runtime_error("Error: Fail to open binary cache file.");
}
size_t binary_size = binary_file.tellg();
binary_file.seekg(0, std::ios::beg);
std::string binary(binary_size, '\0');
if (!binary_file.read(&binary[0], binary_size))
{
throw std::runtime_error("Error: Fail to read binary file.");
}
binary_file.close();
auto opencl_device = std::dynamic_pointer_cast<const OpenCLDevice>(device);
auto binary_code_ptr = reinterpret_cast<const unsigned char *>(binary.data());
program = clCreateProgramWithBinary(
opencl_device->getCLContext(), 1, &opencl_device->getCLDevice(), &binary_size, &binary_code_ptr, nullptr, &err);
if (err != CL_SUCCESS)
{
throw std::runtime_error("Error: Fail to create program from binary.\nOpenCL error : " + getErrorString(err) +
" (" + std::to_string(err) + ").");
}
buildProgram(device, program);
}
#endif

auto
OpenCLBackend::buildKernel(const Device::Pointer & device,
Expand All @@ -1011,33 +1062,22 @@ OpenCLBackend::buildKernel(const Device::Pointer & device,
#if USE_OPENCL
cl_int err;
auto opencl_device = std::dynamic_pointer_cast<const OpenCLDevice>(device);
cl_program prog = nullptr;
cl_program program = nullptr;
std::string hash = std::to_string(std::hash<std::string>{}(kernel_source));
loadProgramFromCache(device, hash, &prog);
if (prog == nullptr)
loadBinaryFromCache(device, hash, program);
if (program == nullptr)
{
const char * source = kernel_source.c_str();
prog = clCreateProgramWithSource(opencl_device->getCLContext(), 1, &source, nullptr, &err);
program = clCreateProgramWithSource(opencl_device->getCLContext(), 1, &source, nullptr, &err);
if (err != CL_SUCCESS)
{
throw std::runtime_error("Error: Fail to create program from source.\nOpenCL error : " + getErrorString(err) +
" (" + std::to_string(err) + ").");
}
cl_int buildStatus = clBuildProgram(prog, 1, &opencl_device->getCLDevice(), "-w", nullptr, nullptr);
if (buildStatus != CL_SUCCESS)
{
size_t len;
std::string buffer;
clGetProgramBuildInfo(prog, opencl_device->getCLDevice(), CL_PROGRAM_BUILD_LOG, 0, nullptr, &len);
buffer.resize(len);
clGetProgramBuildInfo(prog, opencl_device->getCLDevice(), CL_PROGRAM_BUILD_LOG, len, &buffer[0], &len);
std::cerr << "Build log: " << buffer << std::endl;
throw std::runtime_error("Error: Fail to build program " + kernel_name +
".\nOpenCL error : " + getErrorString(err) + " (" + std::to_string(err) + ").");
}
saveProgramToCache(device, hash, &prog);
buildProgram(device, program);
saveBinaryToCache(hash, program);
}
auto ocl_kernel = clCreateKernel(prog, kernel_name.c_str(), &err);
auto ocl_kernel = clCreateKernel(program, kernel_name.c_str(), &err);
if (err != CL_SUCCESS)
{
throw std::runtime_error("Error: Fail to create kernel.\nOpenCL error : " + getErrorString(err) + " (" +
Expand Down

0 comments on commit 4b19066

Please sign in to comment.