@@ -288,8 +288,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
288
288
eventsRequest,
289
289
eventBuilder,
290
290
taskLevel,
291
- printfHandler.get (),
292
- getBcsForAuxTranslation ());
291
+ printfHandler.get ());
293
292
} else if (enqueueProperties.isFlushWithoutKernelRequired ()) {
294
293
completionStamp = enqueueCommandWithoutKernel (
295
294
surfacesForResidency,
@@ -317,7 +316,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
317
316
}
318
317
}
319
318
320
- // inherit data from event_wait_list and previous packets
319
+ // inherit data from event_wait_list and previous packets
321
320
completionStamp.flushStamp = this ->flushStamp ->peekStamp ();
322
321
completionStamp.taskCount = maxTaskCountCurrentRootDevice;
323
322
completionStamp.taskLevel = taskLevel;
@@ -327,7 +326,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
327
326
eventBuilder.getEvent ()->setStartTimeStamp ();
328
327
}
329
328
330
- // check if we have BCS associated, if so we need to make sure it is completed as well
329
+ // check if we have BCS associated, if so we need to make sure it is completed as well
331
330
if (eventBuilder.getEvent () && this ->bcsEngineTypes .size () > 0u ) {
332
331
eventBuilder.getEvent ()->setupBcs (this ->getBcsCommandStreamReceiver (this ->bcsEngineTypes [0u ])->getOsContext ().getEngineType ());
333
332
}
@@ -619,22 +618,22 @@ void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &ta
619
618
template <typename GfxFamily>
620
619
bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
621
620
bool updateTaskLevel = true ;
622
- // if we are blocked by user event then no update
621
+ // if we are blocked by user event then no update
623
622
if (taskLevel == CompletionStamp::notReady) {
624
623
updateTaskLevel = false ;
625
624
}
626
- // if we are executing command without kernel then it will inherit state from
627
- // previous commands, barrier is exception
625
+ // if we are executing command without kernel then it will inherit state from
626
+ // previous commands, barrier is exception
628
627
if (isCommandWithoutKernel (commandType) && commandType != CL_COMMAND_BARRIER) {
629
628
updateTaskLevel = false ;
630
629
}
631
- // ooq special cases starts here
630
+ // ooq special cases starts here
632
631
if (this ->isOOQEnabled ()) {
633
- // if no wait list and barrier , do not update task level
632
+ // if no wait list and barrier , do not update task level
634
633
if (eventWaitList == nullptr && commandType != CL_COMMAND_BARRIER) {
635
634
updateTaskLevel = false ;
636
635
}
637
- // if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue
636
+ // if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue
638
637
if (eventWaitList != nullptr ) {
639
638
auto taskLevelFromEvents = getTaskLevelFromWaitList (0 , numEventsInWaitList, eventWaitList);
640
639
taskLevelFromEvents++;
@@ -661,8 +660,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
661
660
EventsRequest &eventsRequest,
662
661
EventBuilder &eventBuilder,
663
662
uint32_t taskLevel,
664
- PrintfHandler *printfHandler,
665
- CommandStreamReceiver *bcsCsr) {
663
+ PrintfHandler *printfHandler) {
666
664
667
665
UNRECOVERABLE_IF (multiDispatchInfo.empty ());
668
666
@@ -755,35 +753,35 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
755
753
auto memoryCompressionState = getGpgpuCommandStreamReceiver ().getMemoryCompressionState (auxTranslationRequired, device->getHardwareInfo ());
756
754
757
755
DispatchFlags dispatchFlags (
758
- {}, // csrDependencies
759
- ×tampPacketDependencies.barrierNodes , // barrierTimestampPacketNodes
760
- {}, // pipelineSelectArgs
761
- this ->flushStamp ->getStampReference (), // flushStampReference
762
- getThrottle (), // throttle
763
- ClPreemptionHelper::taskPreemptionMode (getDevice (), multiDispatchInfo), // preemptionMode
764
- numGrfRequired, // numGrfRequired
765
- L3CachingSettings::l3CacheOn, // l3CacheSettings
766
- kernel->getThreadArbitrationPolicy (), // threadArbitrationPolicy
767
- kernel->getAdditionalKernelExecInfo (), // additionalKernelExecInfo
768
- kernel->getExecutionType (), // kernelExecutionType
769
- memoryCompressionState, // memoryCompressionState
770
- getSliceCount (), // sliceCount
771
- blocking, // blocking
772
- shouldFlushDC (commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
773
- multiDispatchInfo.usesSlm (), // useSLM
774
- !getGpgpuCommandStreamReceiver ().isUpdateTagFromWaitEnabled (), // guardCommandBufferWithPipeControl
775
- commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
776
- requiresCoherency, // requiresCoherency
777
- (QueuePriority::LOW == priority), // lowPriority
778
- implicitFlush, // implicitFlush
779
- !eventBuilder.getEvent () || getGpgpuCommandStreamReceiver ().isNTo1SubmissionModelEnabled (), // outOfOrderExecutionAllowed
780
- false , // epilogueRequired
781
- false , // usePerDssBackedBuffer
782
- kernel->isSingleSubdevicePreferred (), // useSingleSubdevice
783
- useGlobalAtomics, // useGlobalAtomics
784
- kernel->areMultipleSubDevicesInContext (), // areMultipleSubDevicesInContext
785
- kernel->requiresMemoryMigration (), // memoryMigrationRequired
786
- isTextureCacheFlushNeeded (commandType)); // textureCacheFlush
756
+ {}, // csrDependencies
757
+ ×tampPacketDependencies.barrierNodes , // barrierTimestampPacketNodes
758
+ {}, // pipelineSelectArgs
759
+ this ->flushStamp ->getStampReference (), // flushStampReference
760
+ getThrottle (), // throttle
761
+ ClPreemptionHelper::taskPreemptionMode (getDevice (), multiDispatchInfo), // preemptionMode
762
+ numGrfRequired, // numGrfRequired
763
+ L3CachingSettings::l3CacheOn, // l3CacheSettings
764
+ kernel->getThreadArbitrationPolicy (), // threadArbitrationPolicy
765
+ kernel->getAdditionalKernelExecInfo (), // additionalKernelExecInfo
766
+ kernel->getExecutionType (), // kernelExecutionType
767
+ memoryCompressionState, // memoryCompressionState
768
+ getSliceCount (), // sliceCount
769
+ blocking, // blocking
770
+ shouldFlushDC (commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
771
+ multiDispatchInfo.usesSlm (), // useSLM
772
+ !getGpgpuCommandStreamReceiver ().isUpdateTagFromWaitEnabled (), // guardCommandBufferWithPipeControl
773
+ commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
774
+ requiresCoherency, // requiresCoherency
775
+ (QueuePriority::LOW == priority), // lowPriority
776
+ implicitFlush, // implicitFlush
777
+ !eventBuilder.getEvent () || getGpgpuCommandStreamReceiver ().isNTo1SubmissionModelEnabled (), // outOfOrderExecutionAllowed
778
+ false , // epilogueRequired
779
+ false , // usePerDssBackedBuffer
780
+ kernel->isSingleSubdevicePreferred (), // useSingleSubdevice
781
+ useGlobalAtomics, // useGlobalAtomics
782
+ kernel->areMultipleSubDevicesInContext (), // areMultipleSubDevicesInContext
783
+ kernel->requiresMemoryMigration (), // memoryMigrationRequired
784
+ isTextureCacheFlushNeeded (commandType)); // textureCacheFlush
787
785
788
786
dispatchFlags.pipelineSelectArgs .mediaSamplerRequired = mediaSamplerRequired;
789
787
dispatchFlags.pipelineSelectArgs .specialPipelineSelectMode = specialPipelineSelectMode;
@@ -818,6 +816,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
818
816
}
819
817
820
818
if (enqueueProperties.blitPropertiesContainer ->size () > 0 ) {
819
+ auto bcsCsr = getBcsForAuxTranslation ();
821
820
const auto newTaskCount = bcsCsr->flushBcsTask (*enqueueProperties.blitPropertiesContainer , false , this ->isProfilingEnabled (), getDevice ());
822
821
this ->updateBcsTaskCount (bcsCsr->getOsContext ().getEngineType (), newTaskCount);
823
822
dispatchFlags.implicitFlush = true ;
@@ -861,7 +860,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
861
860
862
861
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership (*this );
863
862
864
- // store previous virtual event as it will add dependecies to new virtual event
863
+ // store previous virtual event as it will add dependecies to new virtual event
865
864
if (this ->virtualEvent ) {
866
865
DBG_LOG (EventsDebugEnable, " enqueueBlocked" , " previousVirtualEvent" , this ->virtualEvent );
867
866
}
@@ -881,7 +880,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
881
880
}
882
881
auto outEvent = eventBuilder->getEvent ();
883
882
884
- // update queue taskCount
883
+ // update queue taskCount
885
884
taskCount = outEvent->getCompletionStamp ();
886
885
887
886
std::unique_ptr<Command> command;
@@ -900,7 +899,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
900
899
if (enqueueProperties.operation != EnqueueProperties::Operation::GpuKernel) {
901
900
command = std::make_unique<CommandWithoutKernel>(*this , blockedCommandsData);
902
901
} else {
903
- // store task data in event
902
+ // store task data in event
904
903
std::vector<Surface *> allSurfaces;
905
904
Kernel *kernel = nullptr ;
906
905
for (auto &dispatchInfo : multiDispatchInfo) {
@@ -992,35 +991,35 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
992
991
993
992
auto rootDeviceIndex = getDevice ().getRootDeviceIndex ();
994
993
DispatchFlags dispatchFlags (
995
- {}, // csrDependencies
996
- ×tampPacketDependencies.barrierNodes , // barrierTimestampPacketNodes
997
- {}, // pipelineSelectArgs
998
- flushStamp->getStampReference (), // flushStampReference
999
- getThrottle (), // throttle
1000
- device->getPreemptionMode (), // preemptionMode
1001
- GrfConfig::NotApplicable, // numGrfRequired
1002
- L3CachingSettings::NotApplicable, // l3CacheSettings
1003
- ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy
1004
- AdditionalKernelExecInfo::NotApplicable, // additionalKernelExecInfo
1005
- KernelExecutionType::NotApplicable, // kernelExecutionType
1006
- MemoryCompressionState::NotApplicable, // memoryCompressionState
1007
- getSliceCount (), // sliceCount
1008
- blocking, // blocking
1009
- false , // dcFlush
1010
- false , // useSLM
1011
- !getGpgpuCommandStreamReceiver ().isUpdateTagFromWaitEnabled (), // guardCommandBufferWithPipeControl
1012
- false , // GSBA32BitRequired
1013
- false , // requiresCoherency
1014
- false , // lowPriority
1015
- (enqueueProperties.operation == EnqueueProperties::Operation::Blit), // implicitFlush
1016
- getGpgpuCommandStreamReceiver ().isNTo1SubmissionModelEnabled (), // outOfOrderExecutionAllowed
1017
- false , // epilogueRequired
1018
- false , // usePerDssBackedBuffer
1019
- false , // useSingleSubdevice
1020
- false , // useGlobalAtomics
1021
- context->containsMultipleSubDevices (rootDeviceIndex), // areMultipleSubDevicesInContext
1022
- false , // memoryMigrationRequired
1023
- false ); // textureCacheFlush
994
+ {}, // csrDependencies
995
+ ×tampPacketDependencies.barrierNodes , // barrierTimestampPacketNodes
996
+ {}, // pipelineSelectArgs
997
+ flushStamp->getStampReference (), // flushStampReference
998
+ getThrottle (), // throttle
999
+ device->getPreemptionMode (), // preemptionMode
1000
+ GrfConfig::NotApplicable, // numGrfRequired
1001
+ L3CachingSettings::NotApplicable, // l3CacheSettings
1002
+ ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy
1003
+ AdditionalKernelExecInfo::NotApplicable, // additionalKernelExecInfo
1004
+ KernelExecutionType::NotApplicable, // kernelExecutionType
1005
+ MemoryCompressionState::NotApplicable, // memoryCompressionState
1006
+ getSliceCount (), // sliceCount
1007
+ blocking, // blocking
1008
+ false , // dcFlush
1009
+ false , // useSLM
1010
+ !getGpgpuCommandStreamReceiver ().isUpdateTagFromWaitEnabled (), // guardCommandBufferWithPipeControl
1011
+ false , // GSBA32BitRequired
1012
+ false , // requiresCoherency
1013
+ false , // lowPriority
1014
+ (enqueueProperties.operation == EnqueueProperties::Operation::Blit), // implicitFlush
1015
+ getGpgpuCommandStreamReceiver ().isNTo1SubmissionModelEnabled (), // outOfOrderExecutionAllowed
1016
+ false , // epilogueRequired
1017
+ false , // usePerDssBackedBuffer
1018
+ false , // useSingleSubdevice
1019
+ false , // useGlobalAtomics
1020
+ context->containsMultipleSubDevices (rootDeviceIndex), // areMultipleSubDevicesInContext
1021
+ false , // memoryMigrationRequired
1022
+ false ); // textureCacheFlush
1024
1023
1025
1024
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver ().isStallingCommandsOnNextFlushRequired ();
1026
1025
0 commit comments