@@ -60,8 +60,7 @@ template <>
60
60
uint32_t queue_impl::get_info<info::queue::reference_count>() const {
61
61
ur_result_t result = UR_RESULT_SUCCESS;
62
62
getAdapter ()->call <UrApiKind::urQueueGetInfo>(
63
- MQueues[0 ], UR_QUEUE_INFO_REFERENCE_COUNT, sizeof (result), &result,
64
- nullptr );
63
+ MQueue, UR_QUEUE_INFO_REFERENCE_COUNT, sizeof (result), &result, nullptr );
65
64
return result;
66
65
}
67
66
@@ -303,55 +302,14 @@ void queue_impl::addEvent(const event &Event) {
303
302
const EventImplPtr &EImpl = getSyclObjImpl (Event);
304
303
assert (EImpl && " Event implementation is missing" );
305
304
auto *Cmd = static_cast <Command *>(EImpl->getCommand ());
306
- if (!Cmd) {
307
- // if there is no command on the event, we cannot track it with MEventsWeak
308
- // as that will leave it with no owner. Track in MEventsShared only if we're
309
- // unable to call urQueueFinish during wait.
310
- if (MEmulateOOO)
311
- addSharedEvent (Event);
312
- }
313
- // As long as the queue supports urQueueFinish we only need to store events
314
- // for undiscarded, unenqueued commands and host tasks.
315
- else if (MEmulateOOO ||
316
- (EImpl->getHandle () == nullptr && !EImpl->isDiscarded ())) {
305
+ if (Cmd != nullptr && EImpl->getHandle () == nullptr &&
306
+ !EImpl->isDiscarded ()) {
317
307
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
318
308
std::lock_guard<std::mutex> Lock{MMutex};
319
309
MEventsWeak.push_back (std::move (EventWeakPtr));
320
310
}
321
311
}
322
312
323
- // / addSharedEvent - queue_impl tracks events with weak pointers
324
- // / but some events have no other owner. In this case,
325
- // / addSharedEvent will have the queue track the events via a shared pointer.
326
- void queue_impl::addSharedEvent (const event &Event) {
327
- assert (MEmulateOOO);
328
- std::lock_guard<std::mutex> Lock (MMutex);
329
- // Events stored in MEventsShared are not released anywhere else aside from
330
- // calls to queue::wait/wait_and_throw, which a user application might not
331
- // make, and ~queue_impl(). If the number of events grows large enough,
332
- // there's a good chance that most of them are already completed and ownership
333
- // of them can be released.
334
- const size_t EventThreshold = 128 ;
335
- if (MEventsShared.size () >= EventThreshold) {
336
- // Generally, the vector is ordered so that the oldest events are in the
337
- // front and the newer events are in the end. So, search to find the first
338
- // event that isn't yet complete. All the events prior to that can be
339
- // erased. This could leave some few events further on that have completed
340
- // not yet erased, but that is OK. This cleanup doesn't have to be perfect.
341
- // This also keeps the algorithm linear rather than quadratic because it
342
- // doesn't continually recheck things towards the back of the list that
343
- // really haven't had time to complete.
344
- MEventsShared.erase (
345
- MEventsShared.begin (),
346
- std::find_if (
347
- MEventsShared.begin (), MEventsShared.end (), [](const event &E) {
348
- return E.get_info <info::event::command_execution_status>() !=
349
- info::event_command_status::complete;
350
- }));
351
- }
352
- MEventsShared.push_back (Event);
353
- }
354
-
355
313
event queue_impl::submit_impl (const detail::type_erased_cgfo_ty &CGF,
356
314
const std::shared_ptr<queue_impl> &Self,
357
315
const std::shared_ptr<queue_impl> &PrimaryQueue,
@@ -490,9 +448,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
490
448
: MExtGraphDeps.LastEventPtr ;
491
449
EventToStoreIn = EventImpl;
492
450
}
493
- // Track only if we won't be able to handle it with urQueueFinish.
494
- if (MEmulateOOO)
495
- addSharedEvent (ResEvent);
451
+
496
452
return discard_or_return (ResEvent);
497
453
}
498
454
}
@@ -600,10 +556,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
600
556
601
557
// Additionally, we can clean up the event lists that we would have
602
558
// otherwise cleared.
603
- if (!MEventsWeak.empty () || !MEventsShared. empty () ) {
559
+ if (!MEventsWeak.empty ()) {
604
560
std::lock_guard<std::mutex> Lock (MMutex);
605
561
MEventsWeak.clear ();
606
- MEventsShared.clear ();
607
562
}
608
563
if (!MStreamsServiceEvents.empty ()) {
609
564
std::lock_guard<std::mutex> Lock (MStreamsServiceEventsMutex);
@@ -612,11 +567,9 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
612
567
}
613
568
614
569
std::vector<std::weak_ptr<event_impl>> WeakEvents;
615
- std::vector<event> SharedEvents;
616
570
{
617
571
std::lock_guard<std::mutex> Lock (MMutex);
618
572
WeakEvents.swap (MEventsWeak);
619
- SharedEvents.swap (MEventsShared);
620
573
621
574
MMissedCleanupRequests.unset (
622
575
[&](MissedCleanupRequestsType &MissedCleanupRequests) {
@@ -630,27 +583,19 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
630
583
// directly. Otherwise, only wait for unenqueued or host task events, starting
631
584
// from the latest submitted task in order to minimize total amount of calls,
632
585
// then handle the rest with urQueueFinish.
633
- const bool SupportsPiFinish = !MEmulateOOO;
634
586
for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
635
587
EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
636
588
if (std::shared_ptr<event_impl> EventImplSharedPtr =
637
589
EventImplWeakPtrIt->lock ()) {
638
590
// A nullptr UR event indicates that urQueueFinish will not cover it,
639
591
// either because it's a host task event or an unenqueued one.
640
- if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandle ()) {
592
+ if (nullptr == EventImplSharedPtr->getHandle ()) {
641
593
EventImplSharedPtr->wait (EventImplSharedPtr);
642
594
}
643
595
}
644
596
}
645
- if (SupportsPiFinish) {
646
- const AdapterPtr &Adapter = getAdapter ();
647
- Adapter->call <UrApiKind::urQueueFinish>(getHandleRef ());
648
- assert (SharedEvents.empty () && " Queues that support calling piQueueFinish "
649
- " shouldn't have shared events" );
650
- } else {
651
- for (event &Event : SharedEvents)
652
- Event.wait ();
653
- }
597
+ const AdapterPtr &Adapter = getAdapter ();
598
+ Adapter->call <UrApiKind::urQueueFinish>(getHandleRef ());
654
599
655
600
std::vector<EventImplPtr> StreamsServiceEvents;
656
601
{
@@ -730,7 +675,7 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const {
730
675
nullptr , nullptr };
731
676
UrNativeDesc.pNativeData = &NativeHandleDesc;
732
677
733
- Adapter->call <UrApiKind::urQueueGetNativeHandle>(MQueues[ 0 ] , &UrNativeDesc,
678
+ Adapter->call <UrApiKind::urQueueGetNativeHandle>(MQueue , &UrNativeDesc,
734
679
&Handle );
735
680
if (getContextImplPtr ()->getBackend () == backend::opencl)
736
681
__SYCL_OCL_CALL (clRetainCommandQueue, ur::cast<cl_command_queue>(Handle ));
@@ -759,18 +704,13 @@ bool queue_impl::ext_oneapi_empty() const {
759
704
// Check the status of the backend queue if this is not a host queue.
760
705
ur_bool_t IsReady = false ;
761
706
getAdapter ()->call <UrApiKind::urQueueGetInfo>(
762
- MQueues[ 0 ] , UR_QUEUE_INFO_EMPTY, sizeof (IsReady), &IsReady, nullptr );
707
+ MQueue , UR_QUEUE_INFO_EMPTY, sizeof (IsReady), &IsReady, nullptr );
763
708
if (!IsReady)
764
709
return false ;
765
710
766
711
// We may have events like host tasks which are not submitted to the backend
767
712
// queue so we need to get their status separately.
768
713
std::lock_guard<std::mutex> Lock (MMutex);
769
- for (event Event : MEventsShared)
770
- if (Event.get_info <info::event::command_execution_status>() !=
771
- info::event_command_status::complete)
772
- return false ;
773
-
774
714
for (auto EventImplWeakPtrIt = MEventsWeak.begin ();
775
715
EventImplWeakPtrIt != MEventsWeak.end (); ++EventImplWeakPtrIt)
776
716
if (std::shared_ptr<event_impl> EventImplSharedPtr =
0 commit comments