@@ -114,8 +114,26 @@ class kernel_impl {
114114 typename Param::return_type get_info (const device &Device,
115115 const range<3 > &WGSize) const ;
116116
117+ // / Query queue/launch-specific information from a kernel using the
118+ // / info::kernel_queue_specific descriptor for a specific Queue.
119+ // /
120+ // / \param Queue is a valid SYCL queue.
121+ // / \return depends on information being queried.
122+ template <typename Param>
123+ typename Param::return_type ext_oneapi_get_info (queue Queue) const ;
124+
125+ // / Query queue/launch-specific information from a kernel using the
126+ // / info::kernel_queue_specific descriptor for a specific Queue and values.
127+ // / max_num_work_groups is the only valid descriptor for this function.
128+ // /
129+ // / \param Queue is a valid SYCL queue.
130+ // / \param WorkGroupSize is the work-group size the number of work-groups is
131+ // / requested for.
132+ // / \return depends on information being queried.
117133 template <typename Param>
118- typename Param::return_type ext_oneapi_get_info (const queue &q) const ;
134+ typename Param::return_type
135+ ext_oneapi_get_info (queue Queue, const range<3 > &MaxWorkGroupSize,
136+ size_t DynamicLocalMemorySize) const ;
119137
120138 // / Get a constant reference to a raw kernel object.
121139 // /
@@ -171,6 +189,12 @@ class kernel_impl {
171189
172190 bool isBuiltInKernel (const device &Device) const ;
173191 void checkIfValidForNumArgsInfoQuery () const ;
192+
193+ // / Check if the occupancy limits are exceeded for the given kernel launch
194+ // / configuration.
195+ bool exceedsOccupancyResourceLimits (const device &Device,
196+ const range<3 > &WorkGroupSize,
197+ size_t DynamicLocalMemorySize) const ;
174198};
175199
176200template <typename Param>
@@ -217,20 +241,66 @@ kernel_impl::get_info(const device &Device,
217241 getPlugin ());
218242}
219243
244+ namespace syclex = ext::oneapi::experimental;
245+
220246template <>
221- inline typename ext::oneapi::experimental:: info::kernel_queue_specific::
222- max_num_work_group_sync:: return_type
247+ inline typename syclex:: info::kernel_queue_specific::max_num_work_groups ::
248+ return_type
223249 kernel_impl::ext_oneapi_get_info<
224- ext::oneapi::experimental::info::kernel_queue_specific::
225- max_num_work_group_sync>(const queue &Queue) const {
250+ syclex::info::kernel_queue_specific::max_num_work_groups>(
251+ queue Queue, const range<3 > &WorkGroupSize,
252+ size_t DynamicLocalMemorySize) const {
253+ if (WorkGroupSize.size () == 0 )
254+ throw exception (sycl::make_error_code (errc::invalid),
255+ " The launch work-group size cannot be zero." );
256+
226257 const auto &Plugin = getPlugin ();
227258 const auto &Handle = getHandleRef ();
259+ auto Device = Queue.get_device ();
260+
261+ uint32_t GroupCount{0 };
262+ if (auto Result = Plugin->call_nocheck <
263+ UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
264+ Handle, WorkGroupSize.size (), DynamicLocalMemorySize, &GroupCount);
265+ Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
266+ // The feature is supported. Check for other errors and throw if any.
267+ Plugin->checkUrResult (Result);
268+ return GroupCount;
269+ }
270+
271+ // Fallback. If the backend API is unsupported, this query will return either
272+ // 0 or 1 based on the kernel resource usage and the user-requested resources.
273+ return exceedsOccupancyResourceLimits (Device, WorkGroupSize,
274+ DynamicLocalMemorySize)
275+ ? 0
276+ : 1 ;
277+ }
278+
279+ template <>
280+ inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
281+ return_type
282+ kernel_impl::ext_oneapi_get_info<
283+ syclex::info::kernel_queue_specific::max_num_work_group_sync>(
284+ queue Queue, const range<3 > &WorkGroupSize,
285+ size_t DynamicLocalMemorySize) const {
286+ return ext_oneapi_get_info<
287+ syclex::info::kernel_queue_specific::max_num_work_groups>(
288+ Queue, WorkGroupSize, DynamicLocalMemorySize);
289+ }
290+
291+ template <>
292+ inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
293+ return_type
294+ kernel_impl::ext_oneapi_get_info<
295+ syclex::info::kernel_queue_specific::max_num_work_group_sync>(
296+ queue Queue) const {
297+ auto Device = Queue.get_device ();
228298 const auto MaxWorkGroupSize =
229- Queue. get_device (). get_info <info::device::max_work_group_size>( );
230- uint32_t GroupCount = 0 ;
231- Plugin-> call <UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
232- Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0 , &GroupCount);
233- return GroupCount ;
299+ get_info<info::kernel_device_specific::work_group_size>(Device );
300+ const sycl::range< 3 > WorkGroupSize{MaxWorkGroupSize, 1 , 1 } ;
301+ return ext_oneapi_get_info<
302+ syclex::info::kernel_queue_specific::max_num_work_group_sync>(
303+ Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0 ) ;
234304}
235305
236306} // namespace detail
0 commit comments