Skip to content

Commit

Permalink
SWDEV-447405 - Reset the last SDMA engine after every few copies
Browse files Browse the repository at this point in the history
The copies can get blocked if the last SDMA engine is used by another
copy and this can lead to perf drop in some of the tests like Gromacs.
Resetting the last engine by checking the engine status and fetching the
new mask after few copies can avoid this.

Change-Id: I8fe8ea678db508d291c6242f3741fa9215e99921
  • Loading branch information
satyanveshd authored and mangupta committed Mar 12, 2024
1 parent 3a9ce12 commit 25c3b47
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
23 changes: 19 additions & 4 deletions rocclr/device/rocm/rocblit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ DmaBlitManager::DmaBlitManager(VirtualGPU& gpu, Setup setup)
: HostBlitManager(gpu, setup),
MinSizeForPinnedTransfer(dev().settings().pinnedMinXferSize_),
completeOperation_(false),
context_(nullptr) {}
context_(nullptr),
sdmaEngineRetainCount_(0) {}

inline void DmaBlitManager::synchronize() const {
if (syncOperation_) {
Expand Down Expand Up @@ -684,6 +685,7 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
uint32_t copyMask = 0;
uint32_t freeEngineMask = 0;
bool kUseRegularCopyApi = 0;
constexpr size_t kRetainCountThreshold = 8;
bool forceSDMA = (copyMetadata.copyEnginePreference_ ==
amd::CopyMetadata::CopyEnginePreference::SDMA);
HwQueueEngine engine = HwQueueEngine::Unknown;
Expand All @@ -694,10 +696,21 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
(dstAgent.handle != dev().getCpuAgent().handle)) {
engine = HwQueueEngine::SdmaWrite;
copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
if (copyMask == 0) {
// Track the HtoD copies and increment the count. The last used SDMA engine might be busy
// and using it everytime can cause contention. When the count exceeds the threshold,
// reset it so as to check the engine status and fetch the new mask.
sdmaEngineRetainCount_ = (sdmaEngineRetainCount_ > kRetainCountThreshold)
? 0 : sdmaEngineRetainCount_++;
}
} else if ((srcAgent.handle != dev().getCpuAgent().handle) &&
(dstAgent.handle == dev().getCpuAgent().handle)) {
engine = HwQueueEngine::SdmaRead;
copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
if (copyMask == 0 && sdmaEngineRetainCount_ > 0) {
// Track the DtoH copies and decrement the count.
sdmaEngineRetainCount_--;
}
}

if (engine == HwQueueEngine::Unknown && forceSDMA) {
Expand All @@ -714,9 +727,11 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,

if (!kUseRegularCopyApi && engine != HwQueueEngine::Unknown) {
if (copyMask == 0) {
// Check if there a recently used SDMA engine for the stream
copyMask = gpu().getLastUsedSdmaEngine();
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask);
if (sdmaEngineRetainCount_) {
// Check if there a recently used SDMA engine for the stream
copyMask = gpu().getLastUsedSdmaEngine();
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask);
}
if (copyMask == 0) {
// Check SDMA engine status
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
Expand Down
2 changes: 2 additions & 0 deletions rocclr/device/rocm/rocblit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ class DmaBlitManager : public device::HostBlitManager {
const size_t MinSizeForPinnedTransfer;
bool completeOperation_; //!< DMA blit manager must complete operation
amd::Context* context_; //!< A dummy context
mutable size_t sdmaEngineRetainCount_; //!< Keeps track of memcopies to either get the last
//!< used SDMA engine or fetch the new mask

private:
//! Disable copy constructor
Expand Down

0 comments on commit 25c3b47

Please sign in to comment.