Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RDMA Executor support #147

Closed
wants to merge 70 commits into from
Closed
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
208c708
Merge IbVerbsUtils.hpp functions
mustafabar Nov 22, 2024
31b6e3c
Add RDMA configs
mustafabar Nov 26, 2024
acf6d71
Add RDMA resources to TransferResources
mustafabar Nov 26, 2024
a3550e3
Add RDMA resource init functions
mustafabar Nov 26, 2024
11f16c4
Add RDMA transfer and mem reg calls
mustafabar Nov 26, 2024
2866588
Add teardown
mustafabar Nov 26, 2024
2e82f8b
Add minor changes
mustafabar Nov 26, 2024
2656ce2
Add first working version
mustafabar Nov 27, 2024
ac1a036
Add topo detection code
mustafabar Nov 27, 2024
8ecd926
Reformat all files with clang-format
mustafabar Nov 27, 2024
1773313
Revert "Reformat all files with clang-format"
mustafabar Nov 27, 2024
3eb880d
Add device topo printing
mustafabar Nov 27, 2024
56fa289
Support nearest IBV and topo printing
mustafabar Nov 27, 2024
c6455cc
Minor change
mustafabar Nov 27, 2024
be33f84
Add GetClosestNicToGpu API function
mustafabar Nov 27, 2024
6bc254b
Add minor changes
mustafabar Nov 27, 2024
319ca40
Move topo printing to client side
mustafabar Dec 2, 2024
438353e
use NO_IBV_EXEC flag
mustafabar Dec 2, 2024
e66b57b
Fix output formatting
mustafabar Dec 2, 2024
a4af4ba
Minor reformatting
mustafabar Dec 2, 2024
3204df7
Init once changes
mustafabar Dec 2, 2024
aa6e3e8
Init once changes
mustafabar Dec 2, 2024
08938c0
Add better input validation error text
mustafabar Dec 2, 2024
45609ff
Remove unneeded var
mustafabar Dec 2, 2024
1bc7d22
Obtain CLOSEST_NIC in TB envs
mustafabar Dec 2, 2024
4b47a10
Fix spacing of environment
mustafabar Dec 2, 2024
ea1493d
Merge branch 'ROCm:develop' into rdma_exec_integration
mustafabar Dec 3, 2024
2e50a21
Minor formatting and notes on API changes
mustafabar Dec 3, 2024
08afd10
Restore comment
mustafabar Dec 3, 2024
fba8c5b
Unify executor results str
mustafabar Dec 4, 2024
d9a32cd
Fix spacing
mustafabar Dec 4, 2024
2731423
Fix IB_GID_INDEX usage comment
mustafabar Dec 4, 2024
6a5366c
Fix spelling
mustafabar Dec 4, 2024
e1a27cb
Check NIC index out-of-range
mustafabar Dec 4, 2024
3ec3797
Fix brackets
mustafabar Dec 4, 2024
481b0ba
Trim trailing spaces
mustafabar Dec 4, 2024
5d9d33c
Fix formatting
mustafabar Dec 4, 2024
8d4b55e
Simplify GetBusIdDistance function
mustafabar Dec 4, 2024
9edc1fb
Add more defensive check in GetBusIdDistance function
mustafabar Dec 4, 2024
2bf2d5e
Simplify NIC topo printing function
mustafabar Dec 4, 2024
0a0735f
Minor reformat
mustafabar Dec 4, 2024
80eb15a
IBV->NIC rename
mustafabar Dec 4, 2024
139ac8a
Indicate workspace
mustafabar Dec 4, 2024
b002f99
Add neater condition for skipping comma
mustafabar Dec 4, 2024
47c0d49
Redesign closest NIC capturing design
mustafabar Dec 5, 2024
72695c9
Fix spacs
mustafabar Dec 5, 2024
a862d78
Remove SetClosestNics API
mustafabar Dec 5, 2024
df0ff69
Compress error message
mustafabar Dec 5, 2024
767a204
Modify error message
mustafabar Dec 5, 2024
ce80e42
Separate debug and release ibv macros
mustafabar Dec 5, 2024
4496aa8
Separate debug and release ibv macros
mustafabar Dec 5, 2024
e2a7ec0
Reformat function calls
mustafabar Dec 5, 2024
772b5bd
Reorder args
mustafabar Dec 5, 2024
888e764
Minor edits
mustafabar Dec 5, 2024
1c37f3e
Resuse device list
mustafabar Dec 5, 2024
994f679
Remove gpuCount global
mustafabar Dec 5, 2024
aca8755
use underscore for globals
mustafabar Dec 5, 2024
0e368f5
Remove unneeded comment
mustafabar Dec 5, 2024
f9489c3
Apply minor reformatting
mustafabar Dec 6, 2024
7789220
Apply minor edits
mustafabar Dec 6, 2024
0cc628a
Relocate NIC exec code
mustafabar Dec 6, 2024
e415823
Merge branch 'ROCm:develop' into rdma_exec_integration
mustafabar Dec 6, 2024
741eaa5
Use NIC_EXEC_ENABLED macro
mustafabar Dec 6, 2024
cd2d33d
Remove whitespaces
mustafabar Dec 6, 2024
28d633d
Remove unneeded functions
mustafabar Dec 9, 2024
d51916a
Use ErrResult for error propagation to API callers
mustafabar Dec 9, 2024
a62b012
Revert "Use ErrResult for error propagation to API callers"
mustafabar Dec 13, 2024
0810906
Revert "Remove unneeded functions"
mustafabar Dec 13, 2024
607387c
Refactoring parts of the NIC executor code, detection (#4)
gilbertlee-amd Dec 16, 2024
0088f74
V1.59 candidate (#6)
mustafabar Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ set( CMAKE_CXX_FLAGS "${flags_str}${CMAKE_CXX_FLAGS}")

set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib")
include_directories(${ROCM_PATH}/include)
find_library(IBVERBS_LIBRARY ibverbs)
if (IBVERBS_LIBRARY)
message(STATUS "Found ibverbs: ${IBVERBS_LIBRARY}")
link_libraries(ibverbs)
else()
add_definitions(-DNO_IBV_EXEC)
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
message(WARNING "ibverbs not found")
endif()
link_libraries(numa hsa-runtime64 pthread)
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY .)
add_executable(TransferBench src/client/Client.cpp)
Expand Down
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@ NVFLAGS = -x cu -lnuma -arch=native
COMMON_FLAGS = -O3 -I./src/header -I./src/client -I./src/client/Presets
LDFLAGS += -lpthread

# Compile RDMA executor if IBVerbs is found in the Dynamic Linker cache
ifneq ("$(shell ldconfig -p | grep -c ibverbs)", "0")
LDFLAGS += -libverbs
NVFLAGS += -libverbs
else
CXXFLAGS += -DNO_IBV_EXEC
NVFLAGS += -DNO_IBV_EXEC
endif

all: $(EXE)

TransferBench: ./src/client/Client.cpp $(shell find -regex ".*\.\hpp")
Expand Down
26 changes: 19 additions & 7 deletions src/client/Client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,25 @@ void PrintResults(EnvVars const& ev, int const testNum,
if (t.exeSubIndex != -1)
sprintf(exeSubIndexStr, ".%d", t.exeSubIndex);

printf(" Transfer %02d %c %7.3f GB/s %c %8.3f ms %c %12lu bytes %c %s -> %s%02d%s:%03d -> %s\n",
idx, sep,
r.avgBandwidthGbPerSec, sep,
r.avgDurationMsec, sep,
r.numBytes, sep,
MemDevicesToStr(t.srcs).c_str(), ExeTypeName[exeType], exeIndex,
exeSubIndexStr, t.numSubExecs, MemDevicesToStr(t.dsts).c_str());
if(exeType == EXE_IBV || exeType == EXE_IBV_NEAREST) {
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
printf(" Transfer %02d %c %7.3f GB/s %c %8.3f ms %c %12lu bytes %c %s -> %s%02d:%s%02d -> %s\n",
idx, sep,
r.avgBandwidthGbPerSec, sep,
r.avgDurationMsec, sep,
r.numBytes, sep,
MemDevicesToStr(t.srcs).c_str(),
ExeTypeName[exeType], exeIndex,
ExeTypeName[exeType], t.exeDstIndex,
MemDevicesToStr(t.dsts).c_str());
} else {
printf(" Transfer %02d %c %7.3f GB/s %c %8.3f ms %c %12lu bytes %c %s -> %s%02d%s:%03d -> %s\n",
idx, sep,
r.avgBandwidthGbPerSec, sep,
r.avgDurationMsec, sep,
r.numBytes, sep,
MemDevicesToStr(t.srcs).c_str(), ExeTypeName[exeType], exeIndex,
exeSubIndexStr, t.numSubExecs, MemDevicesToStr(t.dsts).c_str());
}

// Show per-iteration timing information
if (ev.showIterations) {
Expand Down
2 changes: 1 addition & 1 deletion src/client/Client.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ THE SOFTWARE.

size_t const DEFAULT_BYTES_PER_TRANSFER = (1<<26);

char const ExeTypeName[4][4] = {"CPU", "GPU", "DMA", "IBV"};
char const ExeTypeName[5][4] = {"CPU", "GPU", "DMA", "IBV", "IBV"};

// Display detected hardware
void DisplayTopology(bool outputToCsv);
Expand Down
52 changes: 52 additions & 0 deletions src/client/EnvVars.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ class EnvVars
int outputToCsv; // Output in CSV format
int samplingFactor; // Affects how many different values of N are generated (when N set to 0)

// Rdma options
int ibGidIndex; // GID Index for RoCE NICs
int roceVersion; // RoCE version number
int ipAddressFamily; // IP Address Famliy
uint8_t ibPort; // NIC port number to be used
std::string closestNicStr; // Holds the user-specified list of closest NICs
mustafabar marked this conversation as resolved.
Show resolved Hide resolved

// Developer features
int gpuMaxHwQueues; // Tracks GPU_MAX_HW_QUEUES environment variable

Expand Down Expand Up @@ -146,8 +153,15 @@ class EnvVars
useSingleStream = GetEnvVar("USE_SINGLE_STREAM" , 1);
validateDirect = GetEnvVar("VALIDATE_DIRECT" , 0);
validateSource = GetEnvVar("VALIDATE_SOURCE" , 0);

ibGidIndex = GetEnvVar("IB_GID_INDEX" ,-1);
ibPort = GetEnvVar("IB_PORT_NUMBER" , 1);
roceVersion = GetEnvVar("ROCE_VERSION" , 2);
ipAddressFamily = GetEnvVar("IP_ADDRESS_FAMILY" , 4);
closestNicStr = GetEnvVar("CLOSEST_NIC" , "");

gpuMaxHwQueues = GetEnvVar("GPU_MAX_HW_QUEUES" , 4);


// Check for fill pattern
char* pattern = getenv("FILL_PATTERN");
Expand Down Expand Up @@ -299,6 +313,11 @@ class EnvVars
printf(" USE_SINGLE_STREAM - Use a single stream per GPU GFX executor instead of stream per Transfer\n");
printf(" VALIDATE_DIRECT - Validate GPU destination memory directly instead of staging GPU memory on host\n");
printf(" VALIDATE_SOURCE - Validate GPU src memory immediately after preparation\n");
printf(" IB_GID_INDEX - Required for RoCE NICs (default=3)\n");
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
printf(" IB_PORT_NUMBER - RDMA port count for RDMA NIC (default=1)\n");
printf(" IP_ADDRESS_FAMILY - IP address family (4=v4, 6=v6, default=v4)\n");
printf(" ROCE_VERSION - RoCE version (default=2)\n");
printf(" CLOSEST_NIC - Comma seperated list of per-GPU closest NIC (default=auto)\n");
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
}

void Print(std::string const& name, int32_t const value, const char* format, ...) const
Expand Down Expand Up @@ -381,6 +400,16 @@ class EnvVars
"Running in %s mode", useInteractive ? "interactive" : "non-interactive");
Print("USE_SINGLE_STREAM", useSingleStream,
"Using single stream per GFX %s", useSingleStream ? "device" : "Transfer");
Print("IB_GID_INDEX", ibGidIndex,
"RoCE GID index is set to %s", (ibGidIndex < 0 ? "auto" : std::to_string(ibGidIndex).c_str()));
Print("IB_PORT_NUMBER", ibPort,
"IB port number is set to %d", ibPort);
Print("ROCE_VERSION", roceVersion,
"RoCE version is set to %d", roceVersion);
Print("IP_ADDRESS_FAMILY", ipAddressFamily,
"IP address family is set to IPv%d", ipAddressFamily);
Print("CLOSEST_NIC", (closestNicStr == "" ? "auto" : "user-input"),
"Per-GPU closest NIC is set as %s", (closestNicStr == "" ? "auto" : closestNicStr.c_str()));

if (getenv("XCC_PREF_TABLE")) {
printf("%36s: Preferred XCC Table (XCC_PREF_TABLE)\n", "");
Expand Down Expand Up @@ -479,6 +508,29 @@ class EnvVars
cfg.gfx.useSingleTeam = gfxSingleTeam;
cfg.gfx.waveOrder = gfxWaveOrder;

cfg.rdma.ibGidIndex = ibGidIndex;
cfg.rdma.ibPort = ibPort;
cfg.rdma.ipAddressFamily = ipAddressFamily;
cfg.rdma.roceVersion = roceVersion;
std::vector<int> closestNics;
if(closestNicStr != "") {
std::stringstream ss(closestNicStr);
std::string item;
while (std::getline(ss, item, ',')) {
try {
int nic = std::stoi(item);
if (nic < 0) {
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
printf("[ERROR]: NIC index cannot be negative\n");
exit(1);
}
closestNics.push_back(nic);
} catch (const std::invalid_argument& e) {
printf("[ERROR] Invalid NIC index: %s\n", item.c_str());
exit(1);
}
}
TransferBench::SetClosestNics(closestNics);
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
}
return cfg;
}
};
Expand Down
99 changes: 96 additions & 3 deletions src/client/Topology.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ THE SOFTWARE.

#include "TransferBench.hpp"

#ifndef NO_IBV_EXEC
#include <infiniband/verbs.h>
#include <filesystem>
#endif

static int RemappedCpuIndex(int origIdx)
{
static std::vector<int> remappingCpu;
Expand All @@ -38,21 +43,107 @@ static int RemappedCpuIndex(int origIdx)
return remappingCpu[origIdx];
}

static void PrintNicToGPUTopo(bool outputToCsv)
{
#ifndef NO_IBV_EXEC
if (outputToCsv) {
printf("Device Index,Device Name,Port Active,Closest GPU(s),PCIe Bus ID\n");
}
else {
printf("Device Index | Device Name | Port Active | Closest GPU(s) | PCIe Bus ID\n");
printf("-------------+-------------+-------------+----------------+------------\n");
}
std::vector<std::string> devBusIds;
std::vector<std::string> devNames;
std::vector<bool> devPortsActive;
int devCount;
struct ibv_device **deviceList = ibv_get_device_list(&devCount);
if (deviceList && devCount > 0) {
devBusIds.resize(devCount, "");
devNames.resize(devCount, "");
devPortsActive.resize(devCount, false);
for (int i = 0; i < devCount; ++i) {
struct ibv_context *ctx = ibv_open_device(deviceList[i]);
if (ctx) {
struct ibv_device_attr deviceAttr;
if (ibv_query_device(ctx, &deviceAttr) == 0) {
devNames[i] = deviceList[i]->name;
for (int port = 1; port <= deviceAttr.phys_port_cnt; ++port) {
struct ibv_port_attr portAttr;
if (ibv_query_port(ctx, port, &portAttr) == 0 && portAttr.state == IBV_PORT_ACTIVE) {
devPortsActive[i] = true;
break;
}
}
}

std::string devicePath(deviceList[i]->dev_path);
if (std::filesystem::exists(devicePath))
{
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
std::string pciPath = std::filesystem::canonical(devicePath + "/device").string();
std::size_t pos = pciPath.find_last_of('/');
if (pos != std::string::npos) {
std::string nicBusId = pciPath.substr(pos + 1);
devBusIds[i] = nicBusId;
}
}
ibv_close_device(ctx);
}
}
ibv_free_device_list(deviceList);
}
for (int i = 0; i < devBusIds.size(); ++i)
{
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
std::string nicDevice = devNames[i];
bool portActive = devPortsActive[i];

auto closestGpus = GetClosestGpusToNic(i);
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
std::string closestGpusStr;
for (size_t j = 0; j < closestGpus.size(); ++j) {
closestGpusStr += std::to_string(closestGpus[j]);
if (j < closestGpus.size() - 1) {
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
closestGpusStr += ",";
}
}

if (outputToCsv) {
printf("%d,%s,%s,%s,%s\n",
i,
nicDevice.c_str(),
portActive ? "Yes" : "No",
closestGpusStr.c_str(),
devBusIds[i].c_str());
}
else {
printf("%-12d | %-11s | %-11s | %-13s | %-11s\n",
i,
nicDevice.c_str(),
portActive ? "Yes" : "No",
closestGpusStr.c_str(),
devBusIds[i].c_str());
mustafabar marked this conversation as resolved.
Show resolved Hide resolved
}
}
printf("\n");
#endif
}

void DisplayTopology(bool outputToCsv)
{
int numCpus = TransferBench::GetNumExecutors(EXE_CPU);
int numGpus = TransferBench::GetNumExecutors(EXE_GPU_GFX);

int numNics = TransferBench::GetNumExecutors(EXE_IBV);
char sep = (outputToCsv ? ',' : '|');

if (outputToCsv) {
printf("NumCpus,%d\n", numCpus);
printf("NumGpus,%d\n", numGpus);
printf("NumNics,%d\n", numNics);
} else {
printf("\nDetected Topology:\n");
printf("==================\n");
printf(" %d configured CPU NUMA node(s) [%d total]\n", numCpus, numa_max_node() + 1);
printf(" %d GPU device(s)\n", numGpus);
printf(" %d Supported NIC device(s)\n", numNics);
}

// Print out detected CPU topology
Expand Down Expand Up @@ -91,8 +182,10 @@ void DisplayTopology(bool outputToCsv)
}
printf("\n");

// Print out detected GPU topology
// Print out detected NIC topology
PrintNicToGPUTopo(outputToCsv);

// Print out detected GPU topology
#if defined(__NVCC__)
for (int i = 0; i < numGpus; i++) {
hipDeviceProp_t prop;
Expand Down Expand Up @@ -157,4 +250,4 @@ void DisplayTopology(bool outputToCsv)
TransferBench::GetNumExecutorSubIndices({EXE_GPU_GFX, i}));
}
#endif
}
}
Loading