Skip to content

Commit ba63e3c

Browse files
Obtain BCS for aux translation only if needed
Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent 4896adc commit ba63e3c

File tree

2 files changed

+73
-75
lines changed

2 files changed

+73
-75
lines changed

opencl/source/command_queue/command_queue_hw.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,7 @@ class CommandQueueHw : public CommandQueue {
371371
EventsRequest &eventsRequest,
372372
EventBuilder &eventBuilder,
373373
uint32_t taskLevel,
374-
PrintfHandler *printfHandler,
375-
CommandStreamReceiver *bcsCsr);
374+
PrintfHandler *printfHandler);
376375

377376
void enqueueBlocked(uint32_t commandType,
378377
Surface **surfacesForResidency,

opencl/source/command_queue/enqueue_common.h

Lines changed: 72 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
288288
eventsRequest,
289289
eventBuilder,
290290
taskLevel,
291-
printfHandler.get(),
292-
getBcsForAuxTranslation());
291+
printfHandler.get());
293292
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
294293
completionStamp = enqueueCommandWithoutKernel(
295294
surfacesForResidency,
@@ -317,7 +316,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
317316
}
318317
}
319318

320-
//inherit data from event_wait_list and previous packets
319+
// inherit data from event_wait_list and previous packets
321320
completionStamp.flushStamp = this->flushStamp->peekStamp();
322321
completionStamp.taskCount = maxTaskCountCurrentRootDevice;
323322
completionStamp.taskLevel = taskLevel;
@@ -327,7 +326,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
327326
eventBuilder.getEvent()->setStartTimeStamp();
328327
}
329328

330-
//check if we have BCS associated, if so we need to make sure it is completed as well
329+
// check if we have BCS associated, if so we need to make sure it is completed as well
331330
if (eventBuilder.getEvent() && this->bcsEngineTypes.size() > 0u) {
332331
eventBuilder.getEvent()->setupBcs(this->getBcsCommandStreamReceiver(this->bcsEngineTypes[0u])->getOsContext().getEngineType());
333332
}
@@ -619,22 +618,22 @@ void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &ta
619618
template <typename GfxFamily>
620619
bool CommandQueueHw<GfxFamily>::isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType) {
621620
bool updateTaskLevel = true;
622-
//if we are blocked by user event then no update
621+
// if we are blocked by user event then no update
623622
if (taskLevel == CompletionStamp::notReady) {
624623
updateTaskLevel = false;
625624
}
626-
//if we are executing command without kernel then it will inherit state from
627-
//previous commands, barrier is exception
625+
// if we are executing command without kernel then it will inherit state from
626+
// previous commands, barrier is exception
628627
if (isCommandWithoutKernel(commandType) && commandType != CL_COMMAND_BARRIER) {
629628
updateTaskLevel = false;
630629
}
631-
//ooq special cases starts here
630+
// ooq special cases starts here
632631
if (this->isOOQEnabled()) {
633-
//if no wait list and barrier , do not update task level
632+
// if no wait list and barrier , do not update task level
634633
if (eventWaitList == nullptr && commandType != CL_COMMAND_BARRIER) {
635634
updateTaskLevel = false;
636635
}
637-
//if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue
636+
// if we have waitlist then deduce task level from waitlist and check if it is higher then current task level of queue
638637
if (eventWaitList != nullptr) {
639638
auto taskLevelFromEvents = getTaskLevelFromWaitList(0, numEventsInWaitList, eventWaitList);
640639
taskLevelFromEvents++;
@@ -661,8 +660,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
661660
EventsRequest &eventsRequest,
662661
EventBuilder &eventBuilder,
663662
uint32_t taskLevel,
664-
PrintfHandler *printfHandler,
665-
CommandStreamReceiver *bcsCsr) {
663+
PrintfHandler *printfHandler) {
666664

667665
UNRECOVERABLE_IF(multiDispatchInfo.empty());
668666

@@ -755,35 +753,35 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
755753
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired, device->getHardwareInfo());
756754

757755
DispatchFlags dispatchFlags(
758-
{}, //csrDependencies
759-
&timestampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
760-
{}, //pipelineSelectArgs
761-
this->flushStamp->getStampReference(), //flushStampReference
762-
getThrottle(), //throttle
763-
ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), //preemptionMode
764-
numGrfRequired, //numGrfRequired
765-
L3CachingSettings::l3CacheOn, //l3CacheSettings
766-
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
767-
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo
768-
kernel->getExecutionType(), //kernelExecutionType
769-
memoryCompressionState, //memoryCompressionState
770-
getSliceCount(), //sliceCount
771-
blocking, //blocking
772-
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush
773-
multiDispatchInfo.usesSlm(), //useSLM
774-
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
775-
commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired
776-
requiresCoherency, //requiresCoherency
777-
(QueuePriority::LOW == priority), //lowPriority
778-
implicitFlush, //implicitFlush
779-
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
780-
false, //epilogueRequired
781-
false, //usePerDssBackedBuffer
782-
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
783-
useGlobalAtomics, //useGlobalAtomics
784-
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
785-
kernel->requiresMemoryMigration(), //memoryMigrationRequired
786-
isTextureCacheFlushNeeded(commandType)); //textureCacheFlush
756+
{}, // csrDependencies
757+
&timestampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
758+
{}, // pipelineSelectArgs
759+
this->flushStamp->getStampReference(), // flushStampReference
760+
getThrottle(), // throttle
761+
ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), // preemptionMode
762+
numGrfRequired, // numGrfRequired
763+
L3CachingSettings::l3CacheOn, // l3CacheSettings
764+
kernel->getThreadArbitrationPolicy(), // threadArbitrationPolicy
765+
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
766+
kernel->getExecutionType(), // kernelExecutionType
767+
memoryCompressionState, // memoryCompressionState
768+
getSliceCount(), // sliceCount
769+
blocking, // blocking
770+
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
771+
multiDispatchInfo.usesSlm(), // useSLM
772+
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
773+
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
774+
requiresCoherency, // requiresCoherency
775+
(QueuePriority::LOW == priority), // lowPriority
776+
implicitFlush, // implicitFlush
777+
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
778+
false, // epilogueRequired
779+
false, // usePerDssBackedBuffer
780+
kernel->isSingleSubdevicePreferred(), // useSingleSubdevice
781+
useGlobalAtomics, // useGlobalAtomics
782+
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
783+
kernel->requiresMemoryMigration(), // memoryMigrationRequired
784+
isTextureCacheFlushNeeded(commandType)); // textureCacheFlush
787785

788786
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
789787
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
@@ -818,6 +816,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
818816
}
819817

820818
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
819+
auto bcsCsr = getBcsForAuxTranslation();
821820
const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice());
822821
this->updateBcsTaskCount(bcsCsr->getOsContext().getEngineType(), newTaskCount);
823822
dispatchFlags.implicitFlush = true;
@@ -861,7 +860,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
861860

862861
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
863862

864-
//store previous virtual event as it will add dependecies to new virtual event
863+
// store previous virtual event as it will add dependecies to new virtual event
865864
if (this->virtualEvent) {
866865
DBG_LOG(EventsDebugEnable, "enqueueBlocked", "previousVirtualEvent", this->virtualEvent);
867866
}
@@ -881,7 +880,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
881880
}
882881
auto outEvent = eventBuilder->getEvent();
883882

884-
//update queue taskCount
883+
// update queue taskCount
885884
taskCount = outEvent->getCompletionStamp();
886885

887886
std::unique_ptr<Command> command;
@@ -900,7 +899,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
900899
if (enqueueProperties.operation != EnqueueProperties::Operation::GpuKernel) {
901900
command = std::make_unique<CommandWithoutKernel>(*this, blockedCommandsData);
902901
} else {
903-
//store task data in event
902+
// store task data in event
904903
std::vector<Surface *> allSurfaces;
905904
Kernel *kernel = nullptr;
906905
for (auto &dispatchInfo : multiDispatchInfo) {
@@ -992,35 +991,35 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
992991

993992
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
994993
DispatchFlags dispatchFlags(
995-
{}, //csrDependencies
996-
&timestampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
997-
{}, //pipelineSelectArgs
998-
flushStamp->getStampReference(), //flushStampReference
999-
getThrottle(), //throttle
1000-
device->getPreemptionMode(), //preemptionMode
1001-
GrfConfig::NotApplicable, //numGrfRequired
1002-
L3CachingSettings::NotApplicable, //l3CacheSettings
1003-
ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy
1004-
AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
1005-
KernelExecutionType::NotApplicable, //kernelExecutionType
1006-
MemoryCompressionState::NotApplicable, //memoryCompressionState
1007-
getSliceCount(), //sliceCount
1008-
blocking, //blocking
1009-
false, //dcFlush
1010-
false, //useSLM
1011-
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
1012-
false, //GSBA32BitRequired
1013-
false, //requiresCoherency
1014-
false, //lowPriority
1015-
(enqueueProperties.operation == EnqueueProperties::Operation::Blit), //implicitFlush
1016-
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
1017-
false, //epilogueRequired
1018-
false, //usePerDssBackedBuffer
1019-
false, //useSingleSubdevice
1020-
false, //useGlobalAtomics
1021-
context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
1022-
false, //memoryMigrationRequired
1023-
false); //textureCacheFlush
994+
{}, // csrDependencies
995+
&timestampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
996+
{}, // pipelineSelectArgs
997+
flushStamp->getStampReference(), // flushStampReference
998+
getThrottle(), // throttle
999+
device->getPreemptionMode(), // preemptionMode
1000+
GrfConfig::NotApplicable, // numGrfRequired
1001+
L3CachingSettings::NotApplicable, // l3CacheSettings
1002+
ThreadArbitrationPolicy::NotPresent, // threadArbitrationPolicy
1003+
AdditionalKernelExecInfo::NotApplicable, // additionalKernelExecInfo
1004+
KernelExecutionType::NotApplicable, // kernelExecutionType
1005+
MemoryCompressionState::NotApplicable, // memoryCompressionState
1006+
getSliceCount(), // sliceCount
1007+
blocking, // blocking
1008+
false, // dcFlush
1009+
false, // useSLM
1010+
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
1011+
false, // GSBA32BitRequired
1012+
false, // requiresCoherency
1013+
false, // lowPriority
1014+
(enqueueProperties.operation == EnqueueProperties::Operation::Blit), // implicitFlush
1015+
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
1016+
false, // epilogueRequired
1017+
false, // usePerDssBackedBuffer
1018+
false, // useSingleSubdevice
1019+
false, // useGlobalAtomics
1020+
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
1021+
false, // memoryMigrationRequired
1022+
false); // textureCacheFlush
10241023

10251024
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
10261025

0 commit comments

Comments
 (0)