Skip to content

Commit d8311f9

Browse files
fix: WA for VF bar resource allocation post Warm reset
On Warm reset, With default bar size set by bios, VF bar allocation is getting failed because of bug in pci driver which impacts SRIOV functionality. Resize VF bar size for succesful allocation of VF bar post warm reset. Source: 3c072a6 Related-To: LOCI-4481 Signed-off-by: Bellekallu Rajkiran <[email protected]>
1 parent d8052d4 commit d8311f9

File tree

10 files changed

+265
-13
lines changed

10 files changed

+265
-13
lines changed

level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,6 @@ TEST_F(SysmanGlobalOperationsFixture, GivenGemCreateIoctlFailsWithEINVALWhenCall
729729
}
730730

731731
TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueWhenCallingResetThenSuccessIsReturned) {
732-
733732
ze_result_t result = zesDeviceReset(device, true);
734733
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
735734
}

level_zero/tools/source/sysman/linux/os_sysman_imp.cpp

+76
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "level_zero/core/source/driver/driver_handle_imp.h"
1919
#include "level_zero/tools/source/sysman/firmware_util/firmware_util.h"
2020
#include "level_zero/tools/source/sysman/linux/fs_access.h"
21+
#include "level_zero/tools/source/sysman/pci/linux/os_pci_imp.h"
2122

2223
namespace L0 {
2324

@@ -377,6 +378,56 @@ void LinuxSysmanImp::clearHPIE(int fd) {
377378
NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds just to make sure the change is propagated.
378379
}
379380

381+
// Function to adjust VF BAR size i.e Modify VF BAR Control register.
382+
// size param is an encoded value described as follows:
383+
// 0 - 1 MB (2^20 bytes)
384+
// 1 - 2 MB (2^21 bytes)
385+
// 2 - 4 MB (2^22 bytes)
386+
// 3 - 8 MB (2^23 bytes)
387+
// .
388+
// .
389+
// .
390+
// b - 2 GB (2^31 bytes)
391+
// 43 - 8 EB (2^63 bytes)
392+
ze_result_t LinuxSysmanImp::resizeVfBar(uint8_t size) {
393+
std::string pciConfigNode;
394+
pciConfigNode = gtDevicePath + "/config";
395+
396+
int fdConfig = -1;
397+
fdConfig = this->openFunction(pciConfigNode.c_str(), O_RDWR);
398+
if (fdConfig < 0) {
399+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
400+
"Config node open failed\n");
401+
return ZE_RESULT_ERROR_UNKNOWN;
402+
}
403+
std::unique_ptr<uint8_t[]> configMemory = std::make_unique<uint8_t[]>(PCI_CFG_SPACE_EXP_SIZE);
404+
memset(configMemory.get(), 0, PCI_CFG_SPACE_EXP_SIZE);
405+
if (this->preadFunction(fdConfig, configMemory.get(), PCI_CFG_SPACE_EXP_SIZE, 0) < 0) {
406+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
407+
"Read to get config space failed\n");
408+
return ZE_RESULT_ERROR_UNKNOWN;
409+
}
410+
auto reBarCapPos = L0::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), true);
411+
if (!reBarCapPos) {
412+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
413+
"VF BAR capability not found\n");
414+
return ZE_RESULT_ERROR_UNKNOWN;
415+
}
416+
417+
auto barSizePos = reBarCapPos + PCI_REBAR_CTRL + 1; // position of VF(0) BAR SIZE.
418+
if (this->pwriteFunction(fdConfig, &size, 0x01, barSizePos) < 0) {
419+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
420+
"Write to change VF bar size failed\n");
421+
return ZE_RESULT_ERROR_UNKNOWN;
422+
}
423+
if (this->closeFunction(fdConfig) < 0) {
424+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
425+
"Config node close failed\n");
426+
return ZE_RESULT_ERROR_UNKNOWN;
427+
}
428+
return ZE_RESULT_SUCCESS;
429+
}
430+
380431
// A 'warm reset' is a conventional reset that is triggered across a PCI express link.
381432
// A warm reset is triggered either when a link is forced into electrical idle or
382433
// by sending TS1 and TS2 ordered sets with the hot reset bit set.
@@ -432,6 +483,31 @@ ze_result_t LinuxSysmanImp::osWarmReset() {
432483
return ZE_RESULT_ERROR_UNKNOWN;
433484
}
434485

486+
// PCIe port driver uses the BIOS allocated VF bars on bootup. A known bug exists in pcie port driver
487+
// and is causing VF bar allocation failure in PCIe port driver after an SBR - https://bugzilla.kernel.org/show_bug.cgi?id=216795
488+
489+
// WA to adjust VF bar size to 2GB. The default VF bar size is 8GB and for 63VFs, 504GB need to be allocated which is failing on SBR.
490+
// When configured VF bar size to 2GB, an allocation of 126GB is successful. This WA resizes VF0 bar to 2GB. Once pcie port driver
491+
// issue is resolved, this WA may not be necessary. Description for 0xb is explained at function definition - resizeVfVar.
492+
if (NEO::DebugManager.flags.VfBarResourceAllocationWa.get()) {
493+
if (ZE_RESULT_SUCCESS != (result = resizeVfBar(0xb))) {
494+
return result;
495+
}
496+
497+
result = pFsAccess->write(cardBusPath + '/' + "remove", "1");
498+
if (ZE_RESULT_SUCCESS != result) {
499+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
500+
"Card Bus remove after resizing VF bar failed\n");
501+
return result;
502+
}
503+
504+
result = pFsAccess->write(rootPortPath + '/' + "rescan", "1");
505+
if (ZE_RESULT_SUCCESS != result) {
506+
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout,
507+
"Rescanning root port failed after resizing VF bar failed\n");
508+
return result;
509+
}
510+
}
435511
return result;
436512
}
437513

level_zero/tools/source/sysman/linux/os_sysman_imp.h

+1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass {
105105
SysmanDeviceImp *pParentSysmanDeviceImp = nullptr;
106106
static const std::string deviceDir;
107107
void clearHPIE(int fd);
108+
ze_result_t resizeVfBar(uint8_t size);
108109
std::mutex fwLock;
109110
};
110111

level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp

+12-9
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ ze_result_t LinuxPciImp::initializeBarProperties(std::vector<zes_pci_bar_propert
115115
return result;
116116
}
117117

118-
uint32_t LinuxPciImp::getRebarCapabilityPos() {
118+
uint32_t LinuxPciImp::getRebarCapabilityPos(uint8_t *configMemory, bool isVfBar) {
119119
uint32_t pos = PCI_CFG_SPACE_SIZE;
120120
uint32_t header = 0;
121121

@@ -127,20 +127,23 @@ uint32_t LinuxPciImp::getRebarCapabilityPos() {
127127
// could be present in PCI extended configuration space are
128128
// represented by loopCount.
129129
auto loopCount = (PCI_CFG_SPACE_EXP_SIZE - PCI_CFG_SPACE_SIZE) / 8;
130-
header = getDwordFromConfig(pos);
130+
header = getDwordFromConfig(pos, configMemory);
131131
if (!header) {
132132
return 0;
133133
}
134134

135+
const uint32_t vfRebarCapId = 0x24;
136+
uint32_t capId = isVfBar ? vfRebarCapId : PCI_EXT_CAP_ID_REBAR;
137+
135138
while (loopCount-- > 0) {
136-
if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_REBAR) {
139+
if (PCI_EXT_CAP_ID(header) == capId) {
137140
return pos;
138141
}
139142
pos = PCI_EXT_CAP_NEXT(header);
140143
if (pos < PCI_CFG_SPACE_SIZE) {
141144
return 0;
142145
}
143-
header = getDwordFromConfig(pos);
146+
header = getDwordFromConfig(pos, configMemory);
144147
}
145148
return 0;
146149
}
@@ -187,14 +190,14 @@ uint16_t LinuxPciImp::getLinkCapabilityPos() {
187190

188191
// Parse PCIe configuration space to see if resizable Bar is supported
189192
bool LinuxPciImp::resizableBarSupported() {
190-
return (getRebarCapabilityPos() > 0);
193+
return (L0::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), false) > 0);
191194
}
192195

193196
bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) {
194197
bool isBarResizable = false;
195198
uint32_t capabilityRegister = 0, controlRegister = 0;
196199
uint32_t nBars = 1;
197-
auto rebarCapabilityPos = getRebarCapabilityPos();
200+
auto rebarCapabilityPos = L0::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), false);
198201

199202
// If resizable Bar is not supported then return false.
200203
if (!rebarCapabilityPos) {
@@ -219,11 +222,11 @@ bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) {
219222
// -------------------------------------------------------------|
220223

221224
// Only first Control register(at offset 008h, as shown above), could tell about number of resizable Bars
222-
controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL);
225+
controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL, configMemory.get());
223226
nBars = BITS(controlRegister, 5, 3); // control register's bits 5,6 and 7 contain number of resizable bars information
224227
for (auto barNumber = 0u; barNumber < nBars; barNumber++) {
225228
uint32_t barId = 0;
226-
controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL);
229+
controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL, configMemory.get());
227230
barId = BITS(controlRegister, 0, 3); // Control register's bit 0,1,2 tells the index of bar
228231
if (barId == barIndex) {
229232
isBarResizable = true;
@@ -236,7 +239,7 @@ bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) {
236239
return false;
237240
}
238241

239-
capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP);
242+
capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP, configMemory.get());
240243
// Capability register's bit 4 to 31 indicates supported Bar sizes.
241244
// In possibleBarSizes, position of each set bit indicates supported bar size. Example, if set bit
242245
// position of possibleBarSizes is from 0 to n, then this indicates BAR size from 2^0 MB to 2^n MB

level_zero/tools/source/sysman/pci/linux/os_pci_imp.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass {
2626
bool resizableBarSupported() override;
2727
bool resizableBarEnabled(uint32_t barIndex) override;
2828
ze_result_t initializeBarProperties(std::vector<zes_pci_bar_properties_t *> &pBarProperties) override;
29+
static uint32_t getRebarCapabilityPos(uint8_t *configMemory, bool isVfBar);
2930
LinuxPciImp() = default;
3031
LinuxPciImp(OsSysman *pOsSysman);
3132
~LinuxPciImp() override = default;
@@ -47,7 +48,7 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass {
4748
static const std::string maxLinkSpeedFile;
4849
static const std::string maxLinkWidthFile;
4950
bool isLmemSupported = false;
50-
uint32_t getDwordFromConfig(uint32_t pos) {
51+
static inline uint32_t getDwordFromConfig(uint32_t pos, uint8_t *configMemory) {
5152
return configMemory[pos] | (configMemory[pos + 1] << 8) |
5253
(configMemory[pos + 2] << 16) | (configMemory[pos + 3] << 24);
5354
}
@@ -57,7 +58,6 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass {
5758
uint8_t getByteFromConfig(uint32_t pos, uint8_t *configMem) {
5859
return configMem[pos];
5960
}
60-
uint32_t getRebarCapabilityPos();
6161
uint16_t getLinkCapabilityPos();
6262
};
6363

level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/mock_zes_sysman_diagnostics.h

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ const std::vector<std::string> mockSupportedDiagTypes = {"MOCKSUITE1", "MOCKSUIT
1919
const std::string deviceDirDiag("device");
2020
const std::string mockRealPathConfig("/sys/devices/pci0000:89/0000:89:02.0/config");
2121
const std::string mockdeviceDirDiag("/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0");
22+
const std::string mockdeviceDirConfig("/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0/config");
2223
const std::string mockDeviceName("/MOCK_DEVICE_NAME");
2324
const std::string mockRemove("remove");
2425
const std::string mockRescan("rescan");

0 commit comments

Comments
 (0)