-
Notifications
You must be signed in to change notification settings - Fork 798
Handler-less kernel submit API #19294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
Changes from 3 commits
3223842
fde19ca
13424de
fbc789d
591b3ec
d235b7c
6641601
0f41d5a
a6e711e
9c8040e
31cbdb9
c5cd091
998d592
4000c07
f8e9cd6
01af8bb
4469e59
ac1a5cf
5865f3a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -149,6 +149,37 @@ class __SYCL_EXPORT SubmissionInfo { | |
ext::oneapi::experimental::event_mode_enum::none; | ||
}; | ||
|
||
using KernelParamDescGetterFuncPtr = detail::kernel_param_desc_t (*)(int); | ||
|
||
class __SYCL_EXPORT ExtendedSubmissionInfo : public SubmissionInfo { | ||
public: | ||
ExtendedSubmissionInfo() {} | ||
|
||
std::string_view &KernelName() { return MKernelName; } | ||
std::unique_ptr<detail::HostKernelBase> &HostKernel() { return MHostKernel; } | ||
slawekptak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const std::unique_ptr<detail::HostKernelBase> &HostKernel() const { | ||
return MHostKernel; | ||
} | ||
int &KernelNumArgs() { return MKernelNumArgs; } | ||
KernelParamDescGetterFuncPtr &KernelParamDescGetter() { | ||
return MKernelParamDescGetter; | ||
} | ||
bool &KernelIsESIMD() { return MKernelIsESIMD; } | ||
bool &KernelHasSpecialCaptures() { return MKernelHasSpecialCaptures; } | ||
detail::KernelNameBasedCacheT *&KernelNameBasedCachePtr() { | ||
return MKernelNameBasedCachePtr; | ||
} | ||
|
||
private: | ||
std::string_view MKernelName; | ||
std::unique_ptr<detail::HostKernelBase> MHostKernel; | ||
int MKernelNumArgs = 0; | ||
KernelParamDescGetterFuncPtr MKernelParamDescGetter = nullptr; | ||
bool MKernelIsESIMD = false; | ||
bool MKernelHasSpecialCaptures = true; | ||
detail::KernelNameBasedCacheT *MKernelNameBasedCachePtr = nullptr; | ||
}; | ||
|
||
} // namespace v1 | ||
} // namespace detail | ||
|
||
|
@@ -3609,6 +3640,38 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> { | |
} | ||
} | ||
|
||
template <int Dims, typename LambdaArgType> struct TransformUserItemType { | ||
using type = std::conditional_t< | ||
std::is_convertible_v<nd_item<Dims>, LambdaArgType>, nd_item<Dims>, | ||
std::conditional_t<std::is_convertible_v<item<Dims>, LambdaArgType>, | ||
item<Dims>, LambdaArgType>>; | ||
}; | ||
|
||
template <typename PropertiesT, typename KernelName, typename KernelType, | ||
int Dims> | ||
void ProcessExtendedSubmitProperties( | ||
PropertiesT Props, const KernelType &KernelFunc, | ||
detail::v1::ExtendedSubmissionInfo &SI) const { | ||
ProcessSubmitProperties(Props, SI); | ||
|
||
using NameT = | ||
typename detail::get_kernel_name_t<KernelName, KernelType>::name; | ||
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>; | ||
using TransformedArgType = std::conditional_t< | ||
std::is_integral<LambdaArgType>::value && Dims == 1, item<Dims>, | ||
typename TransformUserItemType<Dims, LambdaArgType>::type>; | ||
|
||
SI.HostKernel().reset( | ||
new detail::HostKernel<KernelType, TransformedArgType, Dims>( | ||
std::forward<KernelType>(KernelFunc))); | ||
SI.KernelName() = detail::getKernelName<NameT>(); | ||
SI.KernelNumArgs() = detail::getKernelNumParams<NameT>(); | ||
SI.KernelParamDescGetter() = &(detail::getKernelParamDesc<NameT>); | ||
SI.KernelIsESIMD() = detail::isKernelESIMD<NameT>(); | ||
SI.KernelHasSpecialCaptures() = detail::hasSpecialCaptures<NameT>(); | ||
SI.KernelNameBasedCachePtr() = detail::getKernelNameBasedCache<NameT>(); | ||
} | ||
|
||
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES | ||
/// TODO: Unused. Remove these when ABI-break window is open. | ||
/// Not using `type_erased_cgfo_ty` on purpose. | ||
|
@@ -3680,6 +3743,21 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> { | |
const detail::code_location &CodeLoc, | ||
bool IsTopCodeLoc) const; | ||
|
||
event submit_with_event_impl( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about eventless? It is not done yet, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think it would be similar, so I've skipped it for now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the disadvantage of returning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if returning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, good point. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sycl::detail::optional might work, good idea There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But still we need to care about the stable layout of the I think having two versions (that return There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't generally do that and we rely on backward compatibility guarantees of the C++ library we use (GNU libstdc++ on Linux/MSVC on Windows). The only exception is pre-C++11 ABI of GNU libstdc++ that pyTorch used to use (see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html). I don't see Another possible caveat is if some STL's implementation of it isn't Anyway, unless you have a known case when it doesn't work, the current approach in the rest of the project is to use |
||
const nd_range<1> Range, | ||
const detail::v1::ExtendedSubmissionInfo &ExtSubmitInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const; | ||
|
||
event submit_with_event_impl( | ||
const nd_range<2> Range, | ||
const detail::v1::ExtendedSubmissionInfo &ExtSubmitInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const; | ||
|
||
event submit_with_event_impl( | ||
const nd_range<3> Range, | ||
const detail::v1::ExtendedSubmissionInfo &ExtSubmitInfo, | ||
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const; | ||
|
||
/// A template-free version of submit_without_event as const member function. | ||
void submit_without_event_impl(const detail::type_erased_cgfo_ty &CGH, | ||
const detail::v1::SubmissionInfo &SubmitInfo, | ||
|
@@ -3763,6 +3841,23 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> { | |
TlsCodeLocCapture.isToplevel()); | ||
} | ||
|
||
template <bool UseFallbackAssert, typename PropertiesT, typename KernelName, | ||
typename KernelType, int Dims> | ||
event submit_with_event(PropertiesT Props, const nd_range<Dims> Range, | ||
const KernelType &KernelFunc, | ||
const detail::code_location &CodeLoc = | ||
detail::code_location::current()) const { | ||
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); | ||
detail::v1::ExtendedSubmissionInfo SI{}; | ||
ProcessExtendedSubmitProperties<KernelName, KernelType>(Props, KernelFunc, | ||
SI); | ||
|
||
// TODO UseFallbackAssert | ||
|
||
return submit_with_event_impl(Range, SI, TlsCodeLocCapture.query(), | ||
slawekptak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
TlsCodeLocCapture.isToplevel()); | ||
} | ||
|
||
/// Submits a command group function object to the queue, in order to be | ||
/// scheduled for execution on the device. | ||
/// | ||
|
Uh oh!
There was an error while loading. Please reload this page.