From a8dbc0c19a2ff81c5671839a2575321dc1173e94 Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras Date: Wed, 13 Mar 2024 22:50:58 +0000 Subject: [PATCH] SWDEV-451380 - Disable kernel args for non-XGMI if HDP flush register is invalid Change-Id: I227e046e2b9cb25476a50240f5d070adbd558f21 --- hipamd/src/hip_graph_internal.cpp | 14 +++++++++--- rocclr/device/rocm/rocdevice.cpp | 35 +++++++++++++++++++----------- rocclr/device/rocm/rocsettings.cpp | 5 +++-- rocclr/device/rocm/rocsettings.hpp | 3 ++- rocclr/device/rocm/rocvirtual.cpp | 4 +++- 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/hipamd/src/hip_graph_internal.cpp b/hipamd/src/hip_graph_internal.cpp index 3b3af5e7d..fd73989ec 100644 --- a/hipamd/src/hip_graph_internal.cpp +++ b/hipamd/src/hip_graph_internal.cpp @@ -397,9 +397,17 @@ hipError_t GraphExec::CaptureAQLPackets() { } if (device_kernarg_pool_ && !device->isXgmi()) { - *device->info().hdpMemFlushCntl = 1u; - if (*device->info().hdpMemFlushCntl != UINT32_MAX) { - LogError("Unexpected HDP Register readback value!"); + if (device->info().hdpMemFlushCntl != nullptr) { + *device->info().hdpMemFlushCntl = 1u; + if (*device->info().hdpMemFlushCntl != UINT32_MAX) { + LogError("Unexpected HDP Register readback value!"); + } + } else { + amd::Command* command = new amd::Marker(*stream, true); + if (command != nullptr) { + command->enqueue(); + command->release(); + } } } diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 3b6b4d4d5..7f1a7cca3 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -704,6 +704,27 @@ bool Device::create() { return false; } + setupCpuAgent(); + + // Get Agent HDP Flush Register Memory + hsa_amd_hdp_flush_t hdpInfo; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(bkendDevice_, + static_cast(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) { + LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name); + return false; + } + + info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL; + info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL; + + bool device_kernel_args = true; + if (!isXgmi_ && ((info_.hdpMemFlushCntl == nullptr) || (info_.hdpRegFlushCntl == nullptr))) { + LogWarning("Unable to determine HDP flush register address. " + "Device kernel arguments are not supported"); + device_kernel_args = false; + } + // Create HSA settings assert(!settings_); roc::Settings* hsaSettings = new roc::Settings(); @@ -712,7 +733,7 @@ bool Device::create() { !hsaSettings->create((agent_profile_ == HSA_PROFILE_FULL), isa->versionMajor(), isa->versionMinor(), isa->versionStepping(), isa->xnack() == amd::Isa::Feature::Enabled, - coop_groups)) { + coop_groups, device_kernel_args)) { LogPrintfError("Unable to create settings for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_); return false; @@ -753,17 +774,6 @@ bool Device::create() { } info_.pciDomainID = pci_domain_id; - // Get Agent HDP Flush Register Memory - hsa_amd_hdp_flush_t hdpInfo; - if (HSA_STATUS_SUCCESS != - hsa_agent_get_info(bkendDevice_, - static_cast(HSA_AMD_AGENT_INFO_HDP_FLUSH), &hdpInfo)) { - LogPrintfError("Unable to determine HDP flush info for HSA device %s", agent_name); - return false; - } - info_.hdpMemFlushCntl = hdpInfo.HDP_MEM_FLUSH_CNTL; - info_.hdpRegFlushCntl = hdpInfo.HDP_REG_FLUSH_CNTL; - if (populateOCLDeviceConstants() == false) { LogPrintfError("populateOCLDeviceConstants failed for HSA device %s (PCI ID %x)", agent_name, pciDeviceId_); @@ -1256,7 +1266,6 @@ bool Device::populateOCLDeviceConstants() { engineAssignMap_[1 << i] = 0; } - setupCpuAgent(); checkAtomicSupport(); diff --git a/rocclr/device/rocm/rocsettings.cpp b/rocclr/device/rocm/rocsettings.cpp index 7c09f43de..100cc3f9a 100644 --- a/rocclr/device/rocm/rocsettings.cpp +++ b/rocclr/device/rocm/rocsettings.cpp @@ -101,7 +101,8 @@ Settings::Settings() { // ================================================================================================ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, - uint32_t gfxStepping, bool enableXNACK, bool coop_groups) { + uint32_t gfxStepping, bool enableXNACK, bool coop_groups, + bool device_kernel_args) { customHostAllocator_ = false; if (fullProfile) { @@ -167,7 +168,7 @@ bool Settings::create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor // Enable device kernel args for MI300* for now if (gfxipMajor == 9 && gfxipMinor == 4 && (gfxStepping == 0 || gfxStepping == 1 || gfxStepping == 2)) { - device_kernel_args_ = HIP_FORCE_DEV_KERNARG; + device_kernel_args_ = HIP_FORCE_DEV_KERNARG && device_kernel_args; } if (gfxipMajor >= 10) { diff --git a/rocclr/device/rocm/rocsettings.hpp b/rocclr/device/rocm/rocsettings.hpp index 5e2ac18b4..1c5d87041 100644 --- a/rocclr/device/rocm/rocsettings.hpp +++ b/rocclr/device/rocm/rocsettings.hpp @@ -83,7 +83,8 @@ class Settings : public device::Settings { //! Creates settings bool create(bool fullProfile, uint32_t gfxipMajor, uint32_t gfxipMinor, uint32_t gfxStepping, - bool enableXNACK, bool coop_groups = false); + bool enableXNACK, bool coop_groups = false, + bool device_kernel_args = true); private: //! Disable copy constructor diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 71c1bf324..9d2751c99 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -3211,7 +3211,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, } } - const auto pcieKernargs = !dev().isXgmi() && dev().settings().device_kernel_args_; + const auto pcieKernargs = !dev().isXgmi() && + dev().settings().device_kernel_args_ && + roc_device_.info().largeBar_; address argBuffer = hidden_arguments; bool isGraphCapture = vcmd != nullptr && vcmd->getCapturingState();