Skip to content

Commit 99eab71

Browse files
fix(sysman): handle memory info creation failure
Related-To: NEO-8452 Signed-off-by: Joshua Santosh Ranjan <[email protected]> Source: 994fb11
1 parent 4f36437 commit 99eab71

File tree

4 files changed

+143
-17
lines changed

4 files changed

+143
-17
lines changed

level_zero/sysman/source/memory/linux/sysman_os_memory_imp_prelim.cpp

+16-6
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
336336
}
337337

338338
ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) {
339+
ze_result_t status = ZE_RESULT_SUCCESS;
339340
pState->health = ZES_MEM_HEALTH_UNKNOWN;
340341
FirmwareUtil *pFwInterface = pLinuxSysmanImp->getFwUtilInterface();
341342
if (pFwInterface != nullptr) {
@@ -347,12 +348,21 @@ ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) {
347348
auto memoryInfo = pDrm->getIoctlHelper()->createMemoryInfo();
348349
hwDeviceId->closeFileDescriptor();
349350

350-
auto region = memoryInfo->getMemoryRegion(MemoryBanks::getBankForLocalMemory(subdeviceId));
351-
352-
pState->free = region.unallocatedSize;
353-
pState->size = region.probedSize;
354-
355-
return ZE_RESULT_SUCCESS;
351+
if (memoryInfo != nullptr) {
352+
auto region = memoryInfo->getMemoryRegion(MemoryBanks::getBankForLocalMemory(subdeviceId));
353+
pState->free = region.unallocatedSize;
354+
pState->size = region.probedSize;
355+
} else {
356+
pState->free = 0;
357+
pState->size = 0;
358+
status = ZE_RESULT_ERROR_UNKNOWN;
359+
if (errno == ENODEV) {
360+
status = ZE_RESULT_ERROR_DEVICE_LOST;
361+
}
362+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
363+
"Error@ %s():createMemoryInfo failed errno:%d \n", __FUNCTION__, errno);
364+
}
365+
return status;
356366
}
357367

358368
std::unique_ptr<OsMemory> OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) {

level_zero/sysman/test/unit_tests/sources/memory/linux/test_sysman_memory_prelim.cpp

+54-2
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,27 @@
1919
namespace L0 {
2020
namespace Sysman {
2121
namespace ult {
22-
2322
constexpr int32_t memoryBusWidth = 128; // bus width in bits
2423
constexpr int32_t numMemoryChannels = 8;
2524
constexpr uint32_t memoryHandleComponentCount = 1u;
2625
const std::string sampleGuid1 = "0xb15a0edc";
26+
27+
class SysmanMemoryMockIoctlHelper : public NEO::MockIoctlHelper {
28+
29+
public:
30+
using NEO::MockIoctlHelper::MockIoctlHelper;
31+
bool returnEmptyMemoryInfo = false;
32+
int32_t mockErrorNumber = 0;
33+
34+
std::unique_ptr<MemoryInfo> createMemoryInfo() override {
35+
if (returnEmptyMemoryInfo) {
36+
errno = mockErrorNumber;
37+
return {};
38+
}
39+
return NEO::MockIoctlHelper::createMemoryInfo();
40+
}
41+
};
42+
2743
class SysmanDeviceMemoryFixture : public SysmanDeviceFixture {
2844
protected:
2945
std::unique_ptr<MockMemorySysfsAccess> pSysfsAccess;
@@ -54,7 +70,7 @@ class SysmanDeviceMemoryFixture : public SysmanDeviceFixture {
5470
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
5571
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
5672
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
57-
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<NEO::MockIoctlHelper>(*pDrm));
73+
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<SysmanMemoryMockIoctlHelper>(*pDrm));
5874

5975
pSysmanDeviceImp->pMemoryHandleContext->handleList.clear();
6076
pmtMapOriginal = pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject;
@@ -351,6 +367,42 @@ TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemo
351367
}
352368
}
353369

370+
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetStateAndIoctlReturnedErrorThenApiReturnsError) {
371+
setLocalSupportedAndReinit(true);
372+
373+
auto ioctlHelper = static_cast<SysmanMemoryMockIoctlHelper *>(pDrm->ioctlHelper.get());
374+
ioctlHelper->returnEmptyMemoryInfo = true;
375+
auto handles = getMemoryHandles(memoryHandleComponentCount);
376+
for (auto handle : handles) {
377+
zes_mem_state_t state;
378+
379+
ze_result_t result = zesMemoryGetState(handle, &state);
380+
381+
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
382+
EXPECT_EQ(state.size, 0u);
383+
EXPECT_EQ(state.free, 0u);
384+
}
385+
}
386+
387+
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetStateAndDeviceIsNotAvailableThenDeviceLostErrorIsReturned) {
388+
setLocalSupportedAndReinit(true);
389+
390+
auto ioctlHelper = static_cast<SysmanMemoryMockIoctlHelper *>(pDrm->ioctlHelper.get());
391+
ioctlHelper->returnEmptyMemoryInfo = true;
392+
ioctlHelper->mockErrorNumber = ENODEV;
393+
auto handles = getMemoryHandles(memoryHandleComponentCount);
394+
for (auto handle : handles) {
395+
zes_mem_state_t state;
396+
397+
ze_result_t result = zesMemoryGetState(handle, &state);
398+
399+
EXPECT_EQ(result, ZE_RESULT_ERROR_DEVICE_LOST);
400+
EXPECT_EQ(state.size, 0u);
401+
EXPECT_EQ(state.free, 0u);
402+
errno = 0;
403+
}
404+
}
405+
354406
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthWhenPmtObjectIsNullThenFailureRetuned) {
355407
for (auto &subDeviceIdToPmtEntry : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) {
356408
if (subDeviceIdToPmtEntry.second != nullptr) {

level_zero/tools/source/sysman/memory/linux/os_memory_imp_prelim.cpp

+16-7
Original file line numberDiff line numberDiff line change
@@ -346,19 +346,28 @@ ze_result_t LinuxMemoryImp::getBandwidth(zes_mem_bandwidth_t *pBandwidth) {
346346
}
347347

348348
ze_result_t LinuxMemoryImp::getState(zes_mem_state_t *pState) {
349+
ze_result_t status = ZE_RESULT_SUCCESS;
349350
pState->health = ZES_MEM_HEALTH_UNKNOWN;
350351
FirmwareUtil *pFwInterface = pLinuxSysmanImp->getFwUtilInterface();
351352
if (pFwInterface != nullptr) {
352353
pFwInterface->fwGetMemoryHealthIndicator(&pState->health);
353354
}
354-
355355
auto memoryInfo = pDrm->getIoctlHelper()->createMemoryInfo();
356-
auto region = memoryInfo->getMemoryRegion(MemoryBanks::getBankForLocalMemory(subdeviceId));
357-
358-
pState->free = region.unallocatedSize;
359-
pState->size = region.probedSize;
360-
361-
return ZE_RESULT_SUCCESS;
356+
if (memoryInfo != nullptr) {
357+
auto region = memoryInfo->getMemoryRegion(MemoryBanks::getBankForLocalMemory(subdeviceId));
358+
pState->free = region.unallocatedSize;
359+
pState->size = region.probedSize;
360+
} else {
361+
pState->free = 0;
362+
pState->size = 0;
363+
status = ZE_RESULT_ERROR_UNKNOWN;
364+
if (errno == ENODEV) {
365+
status = ZE_RESULT_ERROR_DEVICE_LOST;
366+
}
367+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
368+
"Error@ %s():createMemoryInfo failed errno:%d \n", __FUNCTION__, errno);
369+
}
370+
return status;
362371
}
363372

364373
std::unique_ptr<OsMemory> OsMemory::create(OsSysman *pOsSysman, ze_bool_t onSubdevice, uint32_t subdeviceId) {

level_zero/tools/test/unit_tests/sources/sysman/memory/linux/test_sysman_memory_prelim.cpp

+57-2
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,32 @@ constexpr int32_t memoryBusWidth = 128; // bus width in bytes
2323
constexpr int32_t numMemoryChannels = 8;
2424
constexpr uint32_t memoryHandleComponentCount = 1u;
2525
const std::string sampleGuid1 = "0xb15a0edc";
26+
27+
class SysmanMemoryMockIoctlHelper : public NEO::MockIoctlHelper {
28+
29+
public:
30+
using NEO::MockIoctlHelper::MockIoctlHelper;
31+
bool returnEmptyMemoryInfo = false;
32+
int32_t mockErrorNumber = 0;
33+
34+
std::unique_ptr<MemoryInfo> createMemoryInfo() override {
35+
if (returnEmptyMemoryInfo) {
36+
errno = mockErrorNumber;
37+
return {};
38+
}
39+
return NEO::MockIoctlHelper::createMemoryInfo();
40+
}
41+
};
42+
2643
class SysmanDeviceMemoryFixture : public SysmanDeviceFixture {
44+
public:
45+
MockMemoryNeoDrm *pDrm = nullptr;
46+
2747
protected:
2848
std::unique_ptr<MockMemorySysfsAccess> pSysfsAccess;
2949
std::unique_ptr<MockMemoryFsAccess> pFsAccess;
3050
SysfsAccess *pSysfsAccessOld = nullptr;
3151
FsAccess *pFsAccessOriginal = nullptr;
32-
MockMemoryNeoDrm *pDrm = nullptr;
3352
Drm *pOriginalDrm = nullptr;
3453
std::vector<ze_device_handle_t> deviceHandles;
3554
PRODUCT_FAMILY productFamily;
@@ -62,7 +81,7 @@ class SysmanDeviceMemoryFixture : public SysmanDeviceFixture {
6281
pFsAccessOriginal = pLinuxSysmanImp->pFsAccess;
6382
pLinuxSysmanImp->pFsAccess = pFsAccess.get();
6483
pDrm->setMemoryType(INTEL_HWCONFIG_MEMORY_TYPE_HBM2e);
65-
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<NEO::MockIoctlHelper>(*pDrm));
84+
pDrm->ioctlHelper = static_cast<std::unique_ptr<NEO::IoctlHelper>>(std::make_unique<SysmanMemoryMockIoctlHelper>(*pDrm));
6685

6786
pSysmanDeviceImp->pMemoryHandleContext->handleList.clear();
6887
uint32_t subDeviceCount = 0;
@@ -383,6 +402,42 @@ TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemo
383402
}
384403
}
385404

405+
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetStateAndIoctlReturnedErrorThenApiReturnsError) {
406+
setLocalSupportedAndReinit(true);
407+
408+
auto ioctlHelper = static_cast<SysmanMemoryMockIoctlHelper *>(pDrm->ioctlHelper.get());
409+
ioctlHelper->returnEmptyMemoryInfo = true;
410+
auto handles = getMemoryHandles(memoryHandleComponentCount);
411+
for (auto handle : handles) {
412+
zes_mem_state_t state;
413+
414+
ze_result_t result = zesMemoryGetState(handle, &state);
415+
416+
EXPECT_EQ(result, ZE_RESULT_ERROR_UNKNOWN);
417+
EXPECT_EQ(state.size, 0u);
418+
EXPECT_EQ(state.free, 0u);
419+
}
420+
}
421+
422+
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingZetSysmanMemoryGetStateAndDeviceIsNotAvailableThenDeviceLostErrorIsReturned) {
423+
setLocalSupportedAndReinit(true);
424+
425+
auto ioctlHelper = static_cast<SysmanMemoryMockIoctlHelper *>(pDrm->ioctlHelper.get());
426+
ioctlHelper->returnEmptyMemoryInfo = true;
427+
ioctlHelper->mockErrorNumber = ENODEV;
428+
auto handles = getMemoryHandles(memoryHandleComponentCount);
429+
for (auto handle : handles) {
430+
zes_mem_state_t state;
431+
432+
ze_result_t result = zesMemoryGetState(handle, &state);
433+
434+
EXPECT_EQ(result, ZE_RESULT_ERROR_DEVICE_LOST);
435+
EXPECT_EQ(state.size, 0u);
436+
EXPECT_EQ(state.free, 0u);
437+
errno = 0;
438+
}
439+
}
440+
386441
TEST_F(SysmanDeviceMemoryFixture, GivenValidMemoryHandleWhenCallingzesSysmanMemoryGetBandwidthWhenPmtObjectIsNullThenFailureRetuned) {
387442
for (auto &subDeviceIdToPmtEntry : pLinuxSysmanImp->mapOfSubDeviceIdToPmtObject) {
388443
if (subDeviceIdToPmtEntry.second != nullptr) {

0 commit comments

Comments
 (0)