Skip to content

Commit 28b867f

Browse files
Move and extend local memory DRM allocations
Signed-off-by: Daniel Chabrowski [email protected] Related-To: NEO-6591
1 parent 60ed4c4 commit 28b867f

File tree

8 files changed

+201
-39
lines changed

8 files changed

+201
-39
lines changed

opencl/test/unit_test/test_files/igdrcl.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,4 +402,5 @@ DisableScratchPages = 0
402402
ForceAllResourcesUncached = 0
403403
ForcePreParserEnabledForMiArbCheck = -1
404404
BatchBufferStartPrepatchingWaEnabled = -1
405+
SetVmAdviseAtomicAttribute = -1
405406
DirectSubmissionForceLocalMemoryStorageMode = -1

shared/source/debug_settings/debug_variables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseTileMemoryBankInVirtualMemoryCreation, -1, "-
196196
DECLARE_DEBUG_VARIABLE(int32_t, OverrideTimestampEvents, -1, "-1: default (based on user settings), 0: Force disable timestamp events (no timestamps will be reported), 1: Force enable timestamp events")
197197
DECLARE_DEBUG_VARIABLE(int32_t, ForcePreParserEnabledForMiArbCheck, -1, "-1: default , 0: PreParser disabled, 1: PreParser enabled")
198198
DECLARE_DEBUG_VARIABLE(int32_t, BatchBufferStartPrepatchingWaEnabled, -1, "-1: default , 0: disabled, 1: enabled. WA applies valid VA pointing to 'self' instead of 0x0. This mitigates incorrect VA preparsing.")
199+
DECLARE_DEBUG_VARIABLE(int32_t, SetVmAdviseAtomicAttribute, -1, "-1: default - atomic system, 0: atomic none, 1: atomic device, 2: atomic system)")
199200
DECLARE_DEBUG_VARIABLE(bool, DisableScratchPages, false, "Disable scratch pages during VM creations")
200201
/*LOGGING FLAGS*/
201202
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

shared/source/os_interface/linux/CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,10 @@ set(NEO_CORE_OS_INTERFACE_LINUX
3838
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler.h
3939
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_bind.cpp
4040
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_bind.h
41+
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_create.cpp
4142
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_default.cpp
4243
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_default.h
4344
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_create_multi_host_allocation.cpp
44-
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_local_memory.cpp
45-
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_create.cpp
4645
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_query.cpp
4746
${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_drm.cpp
4847
${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id.h

shared/source/os_interface/linux/drm_memory_manager.cpp

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "shared/source/helpers/string.h"
2222
#include "shared/source/helpers/surface_format_info.h"
2323
#include "shared/source/memory_manager/host_ptr_manager.h"
24+
#include "shared/source/memory_manager/memory_banks.h"
25+
#include "shared/source/memory_manager/memory_pool.h"
2426
#include "shared/source/memory_manager/residency.h"
2527
#include "shared/source/os_interface/linux/allocator_helper.h"
2628
#include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
@@ -510,6 +512,7 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &
510512
allocation->setDefaultGmm(gmm.release());
511513

512514
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuRange), bufferSize);
515+
513516
bo.release();
514517
return allocation;
515518
}
@@ -1628,4 +1631,181 @@ void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) {
16281631
return bo->peekLockedAddress();
16291632
}
16301633

1634+
void createMemoryRegionsForSharedAllocation(const HardwareInfo &hwInfo, MemoryInfo &memoryInfo, const AllocationData &allocationData, MemRegionsVec &memRegions) {
1635+
auto memoryBanks = allocationData.storageInfo.memoryBanks;
1636+
1637+
if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::CPU) {
1638+
//System memory region
1639+
auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(0u, hwInfo);
1640+
memRegions.push_back(regionClassAndInstance);
1641+
}
1642+
1643+
//All local memory regions
1644+
size_t currentBank = 0;
1645+
size_t i = 0;
1646+
1647+
while (i < memoryBanks.count()) {
1648+
if (memoryBanks.test(currentBank)) {
1649+
auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(1u << currentBank, hwInfo);
1650+
memRegions.push_back(regionClassAndInstance);
1651+
i++;
1652+
}
1653+
currentBank++;
1654+
}
1655+
1656+
if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::GPU) {
1657+
//System memory region
1658+
auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(0u, hwInfo);
1659+
memRegions.push_back(regionClassAndInstance);
1660+
}
1661+
}
1662+
1663+
GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const AllocationData &allocationData) {
1664+
auto &drm = this->getDrm(allocationData.rootDeviceIndex);
1665+
1666+
const auto vmAdviseAttribute = drm.getIoctlHelper()->getVmAdviseAtomicAttribute();
1667+
if (vmAdviseAttribute == 0) {
1668+
return nullptr;
1669+
}
1670+
1671+
auto memoryInfo = drm.getMemoryInfo();
1672+
const bool useBooMmap = memoryInfo && allocationData.useMmapObject;
1673+
1674+
if (not useBooMmap) {
1675+
return nullptr;
1676+
}
1677+
1678+
auto size = allocationData.size;
1679+
auto alignment = allocationData.alignment;
1680+
1681+
auto pHwInfo = drm.getRootDeviceEnvironment().getHardwareInfo();
1682+
1683+
MemRegionsVec memRegions;
1684+
createMemoryRegionsForSharedAllocation(*pHwInfo, *memoryInfo, allocationData, memRegions);
1685+
1686+
uint32_t handle = 0;
1687+
auto ret = memoryInfo->createGemExt(&drm, memRegions, size, handle);
1688+
1689+
if (ret) {
1690+
return nullptr;
1691+
}
1692+
1693+
std::unique_ptr<BufferObject, BufferObject::Deleter> bo(new BufferObject(&drm, handle, size, maxOsContextCount));
1694+
1695+
if (!drm.getIoctlHelper()->setVmBoAdvise(&drm, bo->peekHandle(), vmAdviseAttribute, nullptr)) {
1696+
return nullptr;
1697+
}
1698+
1699+
uint64_t offset = 0;
1700+
if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) {
1701+
return nullptr;
1702+
}
1703+
1704+
auto totalSizeToAlloc = size + alignment;
1705+
auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1706+
1707+
if (cpuPointer == MAP_FAILED) {
1708+
return nullptr;
1709+
}
1710+
1711+
auto cpuBasePointer = cpuPointer;
1712+
cpuPointer = alignUp(cpuPointer, alignment);
1713+
1714+
this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast<off_t>(offset));
1715+
1716+
bo->setAddress(reinterpret_cast<uintptr_t>(cpuPointer));
1717+
1718+
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, bo.get(), cpuPointer, bo->peekAddress(), size, MemoryPool::System4KBPages);
1719+
allocation->setMmapPtr(cpuBasePointer);
1720+
allocation->setMmapSize(totalSizeToAlloc);
1721+
if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast<CacheRegion>(allocationData.cacheRegion))) {
1722+
this->munmapFunction(cpuPointer, totalSizeToAlloc);
1723+
return nullptr;
1724+
}
1725+
1726+
bo.release();
1727+
1728+
return allocation.release();
1729+
}
1730+
1731+
DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool hasMappedPtr) {
1732+
drm_prime_handle openFd = {0, 0, 0};
1733+
openFd.fd = handle;
1734+
1735+
auto ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd);
1736+
if (ret != 0) {
1737+
int err = this->getDrm(properties.rootDeviceIndex).getErrno();
1738+
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRIME_FD_TO_HANDLE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
1739+
DEBUG_BREAK_IF(ret != 0);
1740+
return nullptr;
1741+
}
1742+
1743+
if (hasMappedPtr) {
1744+
auto bo = new BufferObject(&getDrm(properties.rootDeviceIndex), openFd.handle, properties.size, maxOsContextCount);
1745+
bo->setAddress(properties.gpuAddress);
1746+
1747+
return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
1748+
handle, MemoryPool::SystemCpuInaccessible);
1749+
}
1750+
1751+
const bool useBooMmap = this->getDrm(properties.rootDeviceIndex).getMemoryInfo() && properties.useMmapObject;
1752+
if (not useBooMmap) {
1753+
auto bo = new BufferObject(&getDrm(properties.rootDeviceIndex), openFd.handle, properties.size, maxOsContextCount);
1754+
bo->setAddress(properties.gpuAddress);
1755+
1756+
return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
1757+
handle, MemoryPool::SystemCpuInaccessible);
1758+
}
1759+
1760+
auto boHandle = openFd.handle;
1761+
auto bo = findAndReferenceSharedBufferObject(boHandle, properties.rootDeviceIndex);
1762+
1763+
if (bo == nullptr) {
1764+
void *cpuPointer = nullptr;
1765+
size_t size = lseekFunction(handle, 0, SEEK_END);
1766+
1767+
bo = new BufferObject(&getDrm(properties.rootDeviceIndex), boHandle, size, maxOsContextCount);
1768+
cpuPointer = this->mmapFunction(0, size, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1769+
1770+
if (cpuPointer == MAP_FAILED) {
1771+
delete bo;
1772+
return nullptr;
1773+
}
1774+
1775+
bo->setAddress(reinterpret_cast<uintptr_t>(cpuPointer));
1776+
1777+
uint64_t offset = 0;
1778+
if (!retrieveMmapOffsetForBufferObject(properties.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) {
1779+
this->munmapFunction(cpuPointer, size);
1780+
delete bo;
1781+
return nullptr;
1782+
}
1783+
1784+
[[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, getDrm(properties.rootDeviceIndex).getFileDescriptor(), static_cast<off_t>(offset));
1785+
DEBUG_BREAK_IF(retPtr != cpuPointer);
1786+
1787+
AllocationData allocationData = {};
1788+
allocationData.rootDeviceIndex = properties.rootDeviceIndex;
1789+
allocationData.size = size;
1790+
emitPinningRequest(bo, allocationData);
1791+
1792+
bo->setUnmapSize(size);
1793+
bo->setRootDeviceIndex(properties.rootDeviceIndex);
1794+
1795+
pushSharedBufferObject(bo);
1796+
1797+
DrmAllocation *drmAllocation = nullptr;
1798+
drmAllocation = new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, cpuPointer, bo->peekAddress(), bo->peekSize(), MemoryPool::System4KBPages);
1799+
drmAllocation->setMmapPtr(cpuPointer);
1800+
drmAllocation->setMmapSize(size);
1801+
drmAllocation->setReservedAddressRange(reinterpret_cast<void *>(cpuPointer), size);
1802+
drmAllocation->setCacheRegion(&this->getDrm(properties.rootDeviceIndex), static_cast<CacheRegion>(properties.cacheRegion));
1803+
1804+
return drmAllocation;
1805+
}
1806+
1807+
return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast<void *>(bo->peekAddress()), bo->peekSize(),
1808+
handle, MemoryPool::SystemCpuInaccessible);
1809+
}
1810+
16311811
} // namespace NEO

shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp

Lines changed: 0 additions & 37 deletions
This file was deleted.

shared/source/os_interface/linux/ioctl_helper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class IoctlHelper {
116116
virtual void fillVmBindExtSyncFence(const std::unique_ptr<uint8_t[]> &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) = 0;
117117
virtual std::optional<uint64_t> getCopyClassSaturatePCIECapability() = 0;
118118
virtual std::optional<uint64_t> getCopyClassSaturateLinkCapability() = 0;
119+
virtual uint32_t getVmAdviseAtomicAttribute() = 0;
119120
virtual int vmBind(Drm *drm, const VmBindParams &vmBindParams) = 0;
120121
virtual int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) = 0;
121122
virtual bool getEuStallProperties(std::array<uint64_t, 10u> &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) = 0;
@@ -167,6 +168,7 @@ class IoctlHelperUpstream : public IoctlHelper {
167168
void fillVmBindExtSyncFence(const std::unique_ptr<uint8_t[]> &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override;
168169
std::optional<uint64_t> getCopyClassSaturatePCIECapability() override;
169170
std::optional<uint64_t> getCopyClassSaturateLinkCapability() override;
171+
uint32_t getVmAdviseAtomicAttribute() override;
170172
int vmBind(Drm *drm, const VmBindParams &vmBindParams) override;
171173
int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) override;
172174
bool getEuStallProperties(std::array<uint64_t, 10u> &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override;
@@ -231,6 +233,7 @@ class IoctlHelperPrelim20 : public IoctlHelper {
231233
void fillVmBindExtSyncFence(const std::unique_ptr<uint8_t[]> &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override;
232234
std::optional<uint64_t> getCopyClassSaturatePCIECapability() override;
233235
std::optional<uint64_t> getCopyClassSaturateLinkCapability() override;
236+
uint32_t getVmAdviseAtomicAttribute() override;
234237
int vmBind(Drm *drm, const VmBindParams &vmBindParams) override;
235238
int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) override;
236239
bool getEuStallProperties(std::array<uint64_t, 10u> &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override;

shared/source/os_interface/linux/ioctl_helper_prelim.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,17 @@ std::optional<uint64_t> IoctlHelperPrelim20::getCopyClassSaturateLinkCapability(
470470
return PRELIM_I915_COPY_CLASS_CAP_SATURATE_LINK;
471471
}
472472

473+
uint32_t IoctlHelperPrelim20::getVmAdviseAtomicAttribute() {
474+
switch (NEO::DebugManager.flags.SetVmAdviseAtomicAttribute.get()) {
475+
case 0:
476+
return PRELIM_I915_VM_ADVISE_ATOMIC_NONE;
477+
case 1:
478+
return PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE;
479+
default:
480+
return PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM;
481+
}
482+
}
483+
473484
prelim_drm_i915_gem_vm_bind translateVmBindParamsToPrelimStruct(const VmBindParams &vmBindParams) {
474485
prelim_drm_i915_gem_vm_bind vmBind{};
475486
vmBind.vm_id = vmBindParams.vmId;

shared/source/os_interface/linux/ioctl_helper_upstream.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,10 @@ std::optional<uint64_t> IoctlHelperUpstream::getCopyClassSaturateLinkCapability(
214214
return std::nullopt;
215215
}
216216

217+
uint32_t IoctlHelperUpstream::getVmAdviseAtomicAttribute() {
218+
return 0;
219+
}
220+
217221
int IoctlHelperUpstream::vmBind(Drm *drm, const VmBindParams &vmBindParams) {
218222
return 0;
219223
}

0 commit comments

Comments
 (0)