@@ -114,8 +114,26 @@ class kernel_impl {
114
114
typename Param::return_type get_info (const device &Device,
115
115
const range<3 > &WGSize) const ;
116
116
117
+ // / Query queue/launch-specific information from a kernel using the
118
+ // / info::kernel_queue_specific descriptor for a specific Queue.
119
+ // /
120
+ // / \param Queue is a valid SYCL queue.
121
+ // / \return depends on information being queried.
122
+ template <typename Param>
123
+ typename Param::return_type ext_oneapi_get_info (queue Queue) const ;
124
+
125
+ // / Query queue/launch-specific information from a kernel using the
126
+ // / info::kernel_queue_specific descriptor for a specific Queue and values.
127
+ // / max_num_work_groups is the only valid descriptor for this function.
128
+ // /
129
+ // / \param Queue is a valid SYCL queue.
130
+ // / \param WorkGroupSize is the work-group size the number of work-groups is
131
+ // / requested for.
132
+ // / \return depends on information being queried.
117
133
template <typename Param>
118
- typename Param::return_type ext_oneapi_get_info (const queue &q) const ;
134
+ typename Param::return_type
135
+ ext_oneapi_get_info (queue Queue, const range<3 > &MaxWorkGroupSize,
136
+ size_t DynamicLocalMemorySize) const ;
119
137
120
138
// / Get a constant reference to a raw kernel object.
121
139
// /
@@ -171,6 +189,12 @@ class kernel_impl {
171
189
172
190
bool isBuiltInKernel (const device &Device) const ;
173
191
void checkIfValidForNumArgsInfoQuery () const ;
192
+
193
+ // / Check if the occupancy limits are exceeded for the given kernel launch
194
+ // / configuration.
195
+ bool exceedsOccupancyResourceLimits (const device &Device,
196
+ const range<3 > &WorkGroupSize,
197
+ size_t DynamicLocalMemorySize) const ;
174
198
};
175
199
176
200
template <typename Param>
@@ -217,20 +241,66 @@ kernel_impl::get_info(const device &Device,
217
241
getPlugin ());
218
242
}
219
243
244
+ namespace syclex = ext::oneapi::experimental;
245
+
220
246
template <>
221
- inline typename ext::oneapi::experimental:: info::kernel_queue_specific::
222
- max_num_work_group_sync:: return_type
247
+ inline typename syclex:: info::kernel_queue_specific::max_num_work_groups ::
248
+ return_type
223
249
kernel_impl::ext_oneapi_get_info<
224
- ext::oneapi::experimental::info::kernel_queue_specific::
225
- max_num_work_group_sync>(const queue &Queue) const {
250
+ syclex::info::kernel_queue_specific::max_num_work_groups>(
251
+ queue Queue, const range<3 > &WorkGroupSize,
252
+ size_t DynamicLocalMemorySize) const {
253
+ if (WorkGroupSize.size () == 0 )
254
+ throw exception (sycl::make_error_code (errc::invalid),
255
+ " The launch work-group size cannot be zero." );
256
+
226
257
const auto &Plugin = getPlugin ();
227
258
const auto &Handle = getHandleRef ();
259
+ auto Device = Queue.get_device ();
260
+
261
+ uint32_t GroupCount{0 };
262
+ if (auto Result = Plugin->call_nocheck <
263
+ UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
264
+ Handle, WorkGroupSize.size (), DynamicLocalMemorySize, &GroupCount);
265
+ Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
266
+ // The feature is supported. Check for other errors and throw if any.
267
+ Plugin->checkUrResult (Result);
268
+ return GroupCount;
269
+ }
270
+
271
+ // Fallback. If the backend API is unsupported, this query will return either
272
+ // 0 or 1 based on the kernel resource usage and the user-requested resources.
273
+ return exceedsOccupancyResourceLimits (Device, WorkGroupSize,
274
+ DynamicLocalMemorySize)
275
+ ? 0
276
+ : 1 ;
277
+ }
278
+
279
+ template <>
280
+ inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
281
+ return_type
282
+ kernel_impl::ext_oneapi_get_info<
283
+ syclex::info::kernel_queue_specific::max_num_work_group_sync>(
284
+ queue Queue, const range<3 > &WorkGroupSize,
285
+ size_t DynamicLocalMemorySize) const {
286
+ return ext_oneapi_get_info<
287
+ syclex::info::kernel_queue_specific::max_num_work_groups>(
288
+ Queue, WorkGroupSize, DynamicLocalMemorySize);
289
+ }
290
+
291
+ template <>
292
+ inline typename syclex::info::kernel_queue_specific::max_num_work_group_sync::
293
+ return_type
294
+ kernel_impl::ext_oneapi_get_info<
295
+ syclex::info::kernel_queue_specific::max_num_work_group_sync>(
296
+ queue Queue) const {
297
+ auto Device = Queue.get_device ();
228
298
const auto MaxWorkGroupSize =
229
- Queue. get_device (). get_info <info::device::max_work_group_size>( );
230
- uint32_t GroupCount = 0 ;
231
- Plugin-> call <UrApiKind::urKernelSuggestMaxCooperativeGroupCountExp>(
232
- Handle, MaxWorkGroupSize, /* DynamicSharedMemorySize */ 0 , &GroupCount);
233
- return GroupCount ;
299
+ get_info<info::kernel_device_specific::work_group_size>(Device );
300
+ const sycl::range< 3 > WorkGroupSize{MaxWorkGroupSize, 1 , 1 } ;
301
+ return ext_oneapi_get_info<
302
+ syclex::info::kernel_queue_specific::max_num_work_group_sync>(
303
+ Queue, WorkGroupSize, /* DynamicLocalMemorySize */ 0 ) ;
234
304
}
235
305
236
306
} // namespace detail
0 commit comments