Skip to content

Commit 83f6f5b

Browse files
fix: Deffer timestamp packet after failed submission
Related-To: NEO-7835 Signed-off-by: Maciej Plewka <[email protected]> Source: 464c677
1 parent b89badb commit 83f6f5b

File tree

4 files changed

+114
-20
lines changed

4 files changed

+114
-20
lines changed

opencl/source/command_queue/command_queue_hw.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -367,21 +367,21 @@ class CommandQueueHw : public CommandQueue {
367367
template <uint32_t cmdType>
368368
cl_int enqueueBlitSplit(MultiDispatchInfo &dispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
369369

370-
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
371-
size_t surfaceCount,
372-
LinearStream &commandStream,
373-
size_t commandStreamStart,
374-
bool &blocking,
375-
bool clearDependenciesForSubCapture,
376-
const MultiDispatchInfo &multiDispatchInfo,
377-
const EnqueueProperties &enqueueProperties,
378-
TimestampPacketDependencies &timestampPacketDependencies,
379-
EventsRequest &eventsRequest,
380-
EventBuilder &eventBuilder,
381-
TaskCountType taskLevel,
382-
PrintfHandler *printfHandler,
383-
bool relaxedOrderingEnabled,
384-
uint32_t commandType);
370+
MOCKABLE_VIRTUAL CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
371+
size_t surfaceCount,
372+
LinearStream &commandStream,
373+
size_t commandStreamStart,
374+
bool &blocking,
375+
bool clearDependenciesForSubCapture,
376+
const MultiDispatchInfo &multiDispatchInfo,
377+
const EnqueueProperties &enqueueProperties,
378+
TimestampPacketDependencies &timestampPacketDependencies,
379+
EventsRequest &eventsRequest,
380+
EventBuilder &eventBuilder,
381+
TaskCountType taskLevel,
382+
PrintfHandler *printfHandler,
383+
bool relaxedOrderingEnabled,
384+
uint32_t commandType);
385385

386386
void enqueueBlocked(uint32_t commandType,
387387
Surface **surfacesForResidency,

opencl/source/command_queue/enqueue_common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
410410
}
411411

412412
if (completionStamp.taskCount > CompletionStamp::notReady) {
413+
if (deferredTimestampPackets.get()) {
414+
timestampPacketContainer->moveNodesToNewContainer(*deferredTimestampPackets);
415+
}
413416
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
414417
}
415418

opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -716,14 +716,14 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
716716
}
717717

718718
struct EnqueueHandlerTestBasic : public ::testing::Test {
719-
template <typename FamilyType>
720-
std::unique_ptr<MockCommandQueueHw<FamilyType>> setupFixtureAndCreateMockCommandQueue() {
719+
template <typename MockCmdQueueType, typename FamilyType>
720+
std::unique_ptr<MockCmdQueueType> setupFixtureAndCreateMockCommandQueue() {
721721
auto executionEnvironment = platform()->peekExecutionEnvironment();
722722

723723
device = std::make_unique<MockClDevice>(MockDevice::createWithExecutionEnvironment<MockDevice>(nullptr, executionEnvironment, 0u));
724724
context = std::make_unique<MockContext>(device.get());
725725

726-
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
726+
auto mockCmdQ = std::make_unique<MockCmdQueueType>(context.get(), device.get(), nullptr);
727727

728728
auto &ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> &>(mockCmdQ->getGpgpuCommandStreamReceiver());
729729
ultCsr.taskCount = initialTaskCount;
@@ -741,7 +741,7 @@ struct EnqueueHandlerTestBasic : public ::testing::Test {
741741
};
742742

743743
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) {
744-
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<FamilyType>();
744+
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueHw<FamilyType>, FamilyType>();
745745
MockKernelWithInternals kernelInternals(*device, context.get());
746746
Kernel *kernel = kernelInternals.mockKernel;
747747
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
@@ -757,7 +757,7 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
757757
}
758758

759759
HWTEST_F(EnqueueHandlerTestBasic, givenBlockedEnqueueHandlerWhenCommandIsBlokingThenCompletionStampTaskCountIsPassedToWaitForTaskCountAndCleanAllocationListAsRequiredTaskCount) {
760-
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<FamilyType>();
760+
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueHw<FamilyType>, FamilyType>();
761761

762762
MockKernelWithInternals kernelInternals(*device, context.get());
763763
Kernel *kernel = kernelInternals.mockKernel;
@@ -783,3 +783,93 @@ HWTEST_F(EnqueueHandlerTestBasic, givenBlockedEnqueueHandlerWhenCommandIsBloking
783783

784784
t0.join();
785785
}
786+
template <typename FamilyType>
787+
class MockCommandQueueFailEnqueue : public MockCommandQueueHw<FamilyType> {
788+
public:
789+
MockCommandQueueFailEnqueue(Context *context,
790+
ClDevice *device,
791+
cl_queue_properties *properties) : MockCommandQueueHw<FamilyType>(context, device, properties) {
792+
mockTagAllocator = std::make_unique<MockTagAllocator<>>(0, device->getDevice().getMemoryManager());
793+
this->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
794+
this->deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
795+
}
796+
CompletionStamp enqueueNonBlocked(Surface **surfacesForResidency,
797+
size_t surfaceCount,
798+
LinearStream &commandStream,
799+
size_t commandStreamStart,
800+
bool &blocking,
801+
bool clearDependenciesForSubCapture,
802+
const MultiDispatchInfo &multiDispatchInfo,
803+
const EnqueueProperties &enqueueProperties,
804+
TimestampPacketDependencies &timestampPacketDependencies,
805+
EventsRequest &eventsRequest,
806+
EventBuilder &eventBuilder,
807+
TaskCountType taskLevel,
808+
PrintfHandler *printfHandler,
809+
bool relaxedOrderingEnabled,
810+
uint32_t commandType) override {
811+
this->timestampPacketContainer->add(mockTagAllocator->getTag());
812+
CompletionStamp stamp{};
813+
stamp.taskCount = taskCountToReturn;
814+
return stamp;
815+
}
816+
TaskCountType taskCountToReturn = 0;
817+
std::unique_ptr<MockTagAllocator<>> mockTagAllocator;
818+
};
819+
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenEnqueueFailedThenTimestampPacketContainerIsEmpty) {
820+
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueFailEnqueue<FamilyType>, FamilyType>();
821+
822+
MockKernelWithInternals kernelInternals(*device, context.get());
823+
Kernel *kernel = kernelInternals.mockKernel;
824+
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
825+
mockCmdQ->taskCountToReturn = CompletionStamp::gpuHang;
826+
mockCmdQ->template enqueueHandler<CL_COMMAND_BARRIER>(nullptr,
827+
0,
828+
true,
829+
multiDispatchInfo,
830+
0,
831+
nullptr,
832+
nullptr);
833+
EXPECT_TRUE(mockCmdQ->timestampPacketContainer->peekNodes().empty());
834+
TimestampPacketContainer release;
835+
mockCmdQ->deferredTimestampPackets->swapNodes(release);
836+
}
837+
838+
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenEnqueueSucceedsThenTimestampPacketContainerIsNotEmpty) {
839+
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueFailEnqueue<FamilyType>, FamilyType>();
840+
841+
MockKernelWithInternals kernelInternals(*device, context.get());
842+
Kernel *kernel = kernelInternals.mockKernel;
843+
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
844+
mockCmdQ->taskCountToReturn = 100;
845+
mockCmdQ->template enqueueHandler<CL_COMMAND_BARRIER>(nullptr,
846+
0,
847+
true,
848+
multiDispatchInfo,
849+
0,
850+
nullptr,
851+
nullptr);
852+
EXPECT_FALSE(mockCmdQ->timestampPacketContainer->peekNodes().empty());
853+
TimestampPacketContainer release;
854+
mockCmdQ->timestampPacketContainer->swapNodes(release);
855+
}
856+
857+
HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenEnqueueFailedButThereIsNoDeferredContainerThenTimestampPacketContainerIsNotEmpty) {
858+
auto mockCmdQ = setupFixtureAndCreateMockCommandQueue<MockCommandQueueFailEnqueue<FamilyType>, FamilyType>();
859+
860+
MockKernelWithInternals kernelInternals(*device, context.get());
861+
Kernel *kernel = kernelInternals.mockKernel;
862+
MockMultiDispatchInfo multiDispatchInfo(device.get(), kernel);
863+
mockCmdQ->taskCountToReturn = CompletionStamp::gpuHang;
864+
mockCmdQ->deferredTimestampPackets.reset();
865+
mockCmdQ->template enqueueHandler<CL_COMMAND_BARRIER>(nullptr,
866+
0,
867+
true,
868+
multiDispatchInfo,
869+
0,
870+
nullptr,
871+
nullptr);
872+
EXPECT_FALSE(mockCmdQ->timestampPacketContainer->peekNodes().empty());
873+
TimestampPacketContainer release;
874+
mockCmdQ->timestampPacketContainer->swapNodes(release);
875+
}

opencl/test/unit_test/mocks/mock_command_queue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class MockCommandQueue : public CommandQueue {
3030
using CommandQueue::blitEnqueueAllowed;
3131
using CommandQueue::blitEnqueueImageAllowed;
3232
using CommandQueue::bufferCpuCopyAllowed;
33+
using CommandQueue::deferredTimestampPackets;
3334
using CommandQueue::device;
3435
using CommandQueue::gpgpuEngine;
3536
using CommandQueue::isCopyOnly;

0 commit comments

Comments
 (0)