-
Notifications
You must be signed in to change notification settings - Fork 784
[SYCL] Implement loading SYCLBIN into kernel_bundle #18949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
Changes from all commits
7b3873c
ec21a10
7061d2c
9bb13aa
c9f17c2
258ecee
4700308
2130901
c49ddf4
af8e38e
9b632ba
fcebf1f
af1040c
c761fe8
a927f73
c3a7a09
a8b6ceb
f27d08b
36f9204
5a2f313
bffcbc8
76b07eb
d7ad2b7
f2b7fea
ab7a19a
8d96fb3
abc3a2b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
//==---- syclbin_kernel_bundle.hpp - SYCLBIN-based kernel_bundle tooling ---==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#pragma once | ||
|
||
#include <sycl/ext/oneapi/properties/properties.hpp> | ||
#include <sycl/kernel_bundle.hpp> | ||
|
||
#include <fstream> | ||
#include <string> | ||
|
||
#if __has_include(<filesystem>) | ||
#include <filesystem> | ||
#endif | ||
|
||
#if __has_include(<span>) | ||
#include <span> | ||
#endif | ||
|
||
namespace sycl { | ||
inline namespace _V1 { | ||
namespace ext::oneapi::experimental { | ||
|
||
template <bundle_state State, typename PropertyListT = empty_properties_t> | ||
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>> | ||
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs, | ||
const sycl::span<char> &Bytes, PropertyListT = {}) { | ||
std::vector<device> UniqueDevices = | ||
sycl::detail::removeDuplicateDevices(Devs); | ||
|
||
sycl::detail::KernelBundleImplPtr Impl = | ||
sycl::detail::get_kernel_bundle_impl(Ctxt, UniqueDevices, Bytes, State); | ||
return sycl::detail::createSyclObjFromImpl<kernel_bundle<State>>(Impl); | ||
} | ||
|
||
#if __cpp_lib_span | ||
template <bundle_state State, typename PropertyListT = empty_properties_t> | ||
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>> | ||
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs, | ||
const std::span<char> &Bytes, PropertyListT Props = {}) { | ||
return experimental::get_kernel_bundle( | ||
Ctxt, Devs, sycl::span<char>(Bytes.data(), Bytes.size()), Props); | ||
} | ||
#endif | ||
|
||
#if __cpp_lib_filesystem | ||
template <bundle_state State, typename PropertyListT = empty_properties_t> | ||
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>> | ||
get_kernel_bundle(const context &Ctxt, const std::vector<device> &Devs, | ||
const std::filesystem::path &Filename, | ||
PropertyListT Props = {}) { | ||
std::vector<char> RawSYCLBINData; | ||
{ | ||
std::ifstream FileStream{Filename, std::ios::binary}; | ||
if (!FileStream.is_open()) | ||
throw sycl::exception(make_error_code(errc::invalid), | ||
"Failed to open SYCLBIN file: " + | ||
Filename.string()); | ||
RawSYCLBINData = | ||
std::vector<char>{std::istreambuf_iterator<char>(FileStream), | ||
std::istreambuf_iterator<char>()}; | ||
} | ||
return experimental::get_kernel_bundle<State>( | ||
Ctxt, Devs, sycl::span<char>{RawSYCLBINData}, Props); | ||
} | ||
|
||
template <bundle_state State, typename PropertyListT = empty_properties_t> | ||
std::enable_if_t<State != bundle_state::ext_oneapi_source, kernel_bundle<State>> | ||
get_kernel_bundle(const context &Ctxt, const std::filesystem::path &Filename, | ||
PropertyListT Props = {}) { | ||
return experimental::get_kernel_bundle<State>(Ctxt, Ctxt.get_devices(), | ||
Filename, Props); | ||
} | ||
#endif | ||
|
||
} // namespace ext::oneapi::experimental | ||
} // namespace _V1 | ||
} // namespace sycl |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
//===--- Base64.h - Base64 Encoder/Decoder ----------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// Adjusted copy of llvm/include/llvm/Support/Base64.h. | ||
// TODO: Remove once we can consistently link the SYCL runtime library with | ||
// LLVMSupport. | ||
|
||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <memory> | ||
#include <string> | ||
#include <vector> | ||
|
||
namespace sycl { | ||
inline namespace _V1 { | ||
namespace detail { | ||
|
||
class Base64 { | ||
private: | ||
// Decode a single character. | ||
static inline int decode(char Ch) { | ||
if (Ch >= 'A' && Ch <= 'Z') // 0..25 | ||
return Ch - 'A'; | ||
else if (Ch >= 'a' && Ch <= 'z') // 26..51 | ||
return Ch - 'a' + 26; | ||
else if (Ch >= '0' && Ch <= '9') // 52..61 | ||
return Ch - '0' + 52; | ||
else if (Ch == '+') // 62 | ||
return 62; | ||
else if (Ch == '/') // 63 | ||
return 63; | ||
return -1; | ||
} | ||
|
||
// Decode a quadruple of characters. | ||
static inline void decode4(const char *Src, byte *Dst) { | ||
int BadCh = -1; | ||
|
||
for (auto I = 0; I < 4; ++I) { | ||
char Ch = Src[I]; | ||
int Byte = decode(Ch); | ||
|
||
if (Byte < 0) { | ||
BadCh = Ch; | ||
break; | ||
} | ||
Dst[I] = (byte)Byte; | ||
} | ||
if (BadCh != -1) | ||
throw sycl::exception(make_error_code(errc::invalid), | ||
"Invalid char in base 64 encoding."); | ||
} | ||
|
||
public: | ||
using byte = uint8_t; | ||
|
||
// Get the size of the encoded byte sequence of given size. | ||
static size_t getDecodedSize(size_t SrcSize) { return (SrcSize * 3 + 3) / 4; } | ||
|
||
// Decode a sequence of given size into a pre-allocated memory. | ||
// Returns the number of bytes in the decoded result or 0 in case of error. | ||
static size_t decode(const char *Src, byte *Dst, size_t SrcSize) { | ||
size_t SrcOff = 0; | ||
size_t DstOff = 0; | ||
|
||
// decode full quads | ||
for (size_t Qch = 0; Qch < SrcSize / 4; ++Qch, SrcOff += 4, DstOff += 3) { | ||
byte Ch[4] = {0, 0, 0, 0}; | ||
decode4(Src + SrcOff, Ch); | ||
|
||
// each quad of chars produces three bytes of output | ||
Dst[DstOff + 0] = Ch[0] | (Ch[1] << 6); | ||
Dst[DstOff + 1] = (Ch[1] >> 2) | (Ch[2] << 4); | ||
Dst[DstOff + 2] = (Ch[2] >> 4) | (Ch[3] << 2); | ||
} | ||
auto RemChars = SrcSize - SrcOff; | ||
|
||
if (RemChars == 0) | ||
return DstOff; | ||
// decode the remainder; variants: | ||
// 2 chars remain - produces single byte | ||
// 3 chars remain - produces two bytes | ||
|
||
if (RemChars != 2 && RemChars != 3) | ||
throw sycl::exception(make_error_code(errc::invalid), | ||
"Invalid encoded sequence length."); | ||
|
||
int Ch0 = decode(Src[SrcOff++]); | ||
int Ch1 = decode(Src[SrcOff++]); | ||
int Ch2 = RemChars == 3 ? decode(Src[SrcOff]) : 0; | ||
|
||
if (Ch0 < 0 || Ch1 < 0 || Ch2 < 0) | ||
throw sycl::exception( | ||
make_error_code(errc::invalid), | ||
"Invalid characters in the encoded sequence remainder."); | ||
Dst[DstOff++] = Ch0 | (Ch1 << 6); | ||
|
||
if (RemChars == 3) | ||
Dst[DstOff++] = (Ch1 >> 2) | (Ch2 << 4); | ||
return DstOff; | ||
} | ||
|
||
// Allocate minimum required amount of memory and decode a sequence of given | ||
// size into it. | ||
// Returns the decoded result. The size can be obtained via getDecodedSize. | ||
static std::unique_ptr<byte[]> decode(const char *Src, size_t SrcSize) { | ||
size_t DstSize = getDecodedSize(SrcSize); | ||
std::unique_ptr<byte[]> Dst(new byte[DstSize]); | ||
decode(Src, Dst.get(), SrcSize); | ||
return Dst; | ||
} | ||
}; | ||
|
||
} // namespace detail | ||
} // namespace _V1 | ||
} // namespace sycl |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,7 +162,7 @@ RTDeviceBinaryImage::getProperty(const char *PropName) const { | |
return *It; | ||
} | ||
|
||
void RTDeviceBinaryImage::init(sycl_device_binary Bin) { | ||
RTDeviceBinaryImage::RTDeviceBinaryImage(sycl_device_binary Bin) { | ||
ImageId = ImageCounter++; | ||
|
||
// If there was no binary, we let the owner handle initialization as they see | ||
|
@@ -199,6 +199,7 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { | |
ProgramMetadataUR.push_back( | ||
ur::mapDeviceBinaryPropertyToProgramMetadata(Prop)); | ||
} | ||
KernelNames.init(Bin, __SYCL_PROPERTY_SET_SYCL_KERNEL_NAMES); | ||
ExportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); | ||
ImportedSymbols.init(Bin, __SYCL_PROPERTY_SET_SYCL_IMPORTED_SYMBOLS); | ||
DeviceGlobals.init(Bin, __SYCL_PROPERTY_SET_SYCL_DEVICE_GLOBALS); | ||
|
@@ -211,7 +212,8 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { | |
|
||
std::atomic<uintptr_t> RTDeviceBinaryImage::ImageCounter = 1; | ||
|
||
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { | ||
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() | ||
: RTDeviceBinaryImage(nullptr) { | ||
Bin = new sycl_device_binary_struct(); | ||
Bin->Version = SYCL_DEVICE_BINARY_VERSION; | ||
Bin->Kind = SYCL_DEVICE_BINARY_OFFLOAD_KIND_SYCL; | ||
|
@@ -227,12 +229,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage() : RTDeviceBinaryImage() { | |
Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; | ||
} | ||
|
||
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | ||
std::unique_ptr<char[], std::function<void(void *)>> &&DataPtr, | ||
size_t DataSize) | ||
: DynRTDeviceBinaryImage() { | ||
Data = std::move(DataPtr); | ||
Bin->BinaryStart = reinterpret_cast<unsigned char *>(Data.get()); | ||
std::unique_ptr<sycl_device_binary_struct> CreateDefaultDynBinary( | ||
const std::unique_ptr<char[], std::function<void(void *)>> &DataPtr, | ||
size_t DataSize) { | ||
auto Bin = std::make_unique<sycl_device_binary_struct>(); | ||
Bin->BinaryStart = reinterpret_cast<unsigned char *>(DataPtr.get()); | ||
Bin->BinaryEnd = Bin->BinaryStart + DataSize; | ||
Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, DataSize); | ||
switch (Bin->Format) { | ||
|
@@ -242,9 +243,15 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | |
default: | ||
Bin->DeviceTargetSpec = __SYCL_DEVICE_BINARY_TARGET_UNKNOWN; | ||
} | ||
init(Bin); | ||
return Bin; | ||
} | ||
|
||
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | ||
std::unique_ptr<char[], std::function<void(void *)>> &&DataPtr, | ||
size_t DataSize) | ||
: RTDeviceBinaryImage(CreateDefaultDynBinary(DataPtr, DataSize).release()), | ||
Data{std::move(DataPtr)} {} | ||
|
||
DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { | ||
delete Bin; | ||
Bin = nullptr; | ||
|
@@ -479,8 +486,6 @@ static void copyProperty(sycl_device_binary_property &NextFreeProperty, | |
DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | ||
const std::vector<const RTDeviceBinaryImage *> &Imgs) | ||
: DynRTDeviceBinaryImage() { | ||
init(nullptr); | ||
|
||
// Naive merges. | ||
auto MergedSpecConstants = | ||
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { | ||
|
@@ -510,6 +515,10 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | |
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { | ||
return Img.getImplicitLocalArg(); | ||
}); | ||
auto MergedKernelNames = | ||
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { | ||
return Img.getKernelNames(); | ||
}); | ||
auto MergedExportedSymbols = | ||
naiveMergeBinaryProperties(Imgs, [](const RTDeviceBinaryImage &Img) { | ||
return Img.getExportedSymbols(); | ||
|
@@ -519,12 +528,13 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | |
return Img.getRegisteredKernels(); | ||
}); | ||
|
||
std::array<const std::vector<sycl_device_binary_property> *, 10> MergedVecs{ | ||
std::array<const std::vector<sycl_device_binary_property> *, 11> MergedVecs{ | ||
&MergedSpecConstants, &MergedSpecConstantsDefaultValues, | ||
&MergedKernelParamOptInfo, &MergedAssertUsed, | ||
&MergedDeviceGlobals, &MergedHostPipes, | ||
&MergedVirtualFunctions, &MergedImplicitLocalArg, | ||
&MergedExportedSymbols, &MergedRegisteredKernels}; | ||
&MergedKernelNames, &MergedExportedSymbols, | ||
&MergedRegisteredKernels}; | ||
|
||
// Exclusive merges. | ||
auto MergedDeviceLibReqMask = | ||
|
@@ -648,6 +658,7 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | |
CopyPropertiesVec(MergedHostPipes, HostPipes); | ||
CopyPropertiesVec(MergedVirtualFunctions, VirtualFunctions); | ||
CopyPropertiesVec(MergedImplicitLocalArg, ImplicitLocalArg); | ||
CopyPropertiesVec(MergedKernelNames, KernelNames); | ||
CopyPropertiesVec(MergedExportedSymbols, ExportedSymbols); | ||
CopyPropertiesVec(MergedRegisteredKernels, RegisteredKernels); | ||
|
||
|
@@ -675,18 +686,11 @@ DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( | |
#ifdef SYCL_RT_ZSTD_AVAILABLE | ||
CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( | ||
sycl_device_binary CompressedBin) | ||
: RTDeviceBinaryImage() { | ||
|
||
// 'CompressedBin' is part of the executable image loaded into memory | ||
// which can't be modified easily. So, we need to make a copy of it. | ||
Bin = new sycl_device_binary_struct(*CompressedBin); | ||
|
||
: RTDeviceBinaryImage(new sycl_device_binary_struct(*CompressedBin)) { | ||
// Get the decompressed size of the binary image. | ||
m_ImageSize = ZSTDCompressor::GetDecompressedSize( | ||
reinterpret_cast<const char *>(Bin->BinaryStart), | ||
static_cast<size_t>(Bin->BinaryEnd - Bin->BinaryStart)); | ||
|
||
init(Bin); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @uditagarwal97 - I am hoping you might be able to help me here. There are a few failures related to these changes and I suspect they are from the compression. One is in the unittests, which seem to be a failure (possibly a segfault) in a call to Only immediate changes I can see that could affect compression is here, where we would set the |
||
} | ||
|
||
void CompressedRTDeviceBinaryImage::Decompress() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't these need an #else ? What will happen if the compiler doesn't have filesystem or span? IIRC, we are still trying to support GCC 7.5 whose support of C++17 filesystem feature is not complete. Take a look at what is done in os_util.cpp, where the fallback includes experimental/filesystem.
Also, std::span is C++20, iirc. But we have
sycl::span
, so maybe use that?OR if I'm operating on yesterdays information and this is no longer a problem, let me know.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The filesystem functionality is part of the public interface. Similar to how the
std::span
interface isn't available, we can't offer those interfaces if the user doesn't have the headers available.Indeed! There is an overload of the new interfaces with
sycl::span
, but one of the interfaces take astd::span
directly, so we need to optionally include it to offer that interface.Definitely still a problem, but in this case the
sycl::span
interface will always be available and the rest are cases of "too bad, you can't use those. Update your compiler."