Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 11 additions & 76 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,7 @@ include(CMakePushCheckState)
# Build options
#==================================================================================================
option(BUILD_LOCAL_GPU_TARGET_ONLY "Build only for GPUs detected on this machine" OFF)
option(ENABLE_NIC_EXEC "Enable RDMA NIC Executor in TransferBench" OFF)
option(ENABLE_MPI_COMM "Enable MPI Communicator support" OFF)
option(ENABLE_DMA_BUF "Enable DMA-BUF support for GPU Direct RDMA" OFF)
option(ENABLE_AMD_SMI "Enable AMD-SMI pod membership queries" OFF)
option(ENABLE_POD_COMM "Enable pod communication" OFF)
option(BUILD_RELOCATABLE_PACKAGE "Build with RVS-style relocatable RPATH and amdrocm<MAJOR>-transferbench package naming" OFF)
Expand Down Expand Up @@ -313,71 +311,13 @@ else()
message(FATAL_ERROR "HSA library or headers not found under ${ROCM_PATH}; TransferBench requires libhsa-runtime64")
endif()

## Check for infiniband verbs support
if(DEFINED ENV{DISABLE_NIC_EXEC} AND "$ENV{DISABLE_NIC_EXEC}" STREQUAL "1")
message(STATUS "Disabling NIC Executor support as env. flag DISABLE_NIC_EXEC was enabled")
elseif(NOT ENABLE_NIC_EXEC)
message(STATUS "For CMake builds, NIC Executor support requires explicit opt-in by setting CMake flag -DENABLE_NIC_EXEC=ON")
message(STATUS "- Disabling NIC Executor support")
else()
message(STATUS "Attempting to build with NIC executor support")

find_library(IBVERBS_LIBRARY ibverbs)
find_path(IBVERBS_INCLUDE_DIR infiniband/verbs.h)
if(IBVERBS_LIBRARY AND IBVERBS_INCLUDE_DIR)
add_library(ibverbs SHARED IMPORTED)
set_target_properties(ibverbs PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}" IMPORTED_LOCATION "${IBVERBS_LIBRARY}" INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}")
set(IBVERBS_FOUND 1)
message(STATUS "- Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable")
else()
if(NOT IBVERBS_LIBRARY)
message(WARNING "- IBVerbs library not found")
elseif(NOT IBVERBS_INCLUDE_DIR)
message(WARNING "- infiniband/verbs.h not found")
endif()
message(WARNING "- Building without NIC executor support. To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed")
endif()
endif()

## Check for DMA-BUF support (requires IBVERBS)
if(IBVERBS_FOUND)
if(DEFINED ENV{DISABLE_DMA_BUF} AND "$ENV{DISABLE_DMA_BUF}" STREQUAL "1")
message(STATUS "Disabling DMA-BUF support as env. flag DISABLE_DMA_BUF was enabled")
elseif(NOT ENABLE_DMA_BUF)
message(STATUS "For CMake builds, DMA-BUF support requires explicit opt-in by setting CMake flags -DENABLE_DMA_BUF=ON")
message(STATUS "- Disabling DMA-BUF support")
else()
message(STATUS "Attempting to build with DMA-BUF support")

# Check for ibv_reg_dmabuf_mr
cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES ${IBVERBS_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES ${IBVERBS_LIBRARY})
check_symbol_exists(ibv_reg_dmabuf_mr "infiniband/verbs.h" HAVE_IBV_DMABUF)
cmake_pop_check_state()

# Check for hsa_amd_portable_export_dmabuf
cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES ${HSA_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES ${HSA_LIBRARY})
check_symbol_exists(hsa_amd_portable_export_dmabuf "hsa/hsa_ext_amd.h" HAVE_ROCM_DMABUF)
cmake_pop_check_state()

# Enable DMA-BUF only if both APIs are available
if(HAVE_IBV_DMABUF AND HAVE_ROCM_DMABUF)
set(DMABUF_SUPPORT_FOUND 1)
message(STATUS "- Building with DMA-BUF support")
else()
if(NOT HAVE_IBV_DMABUF AND NOT HAVE_ROCM_DMABUF)
message(WARNING "- Building without DMA-BUF support: missing both ibv_reg_dmabuf_mr and ROCm DMA-BUF export")
elseif(NOT HAVE_IBV_DMABUF)
message(WARNING "- Building without DMA-BUF support: missing ibv_reg_dmabuf_mr")
else()
message(WARNING "- Building without DMA-BUF support: missing ROCm DMA-BUF export")
endif()
endif()
endif()
endif()
## NIC / RDMA executor
##
## libibverbs is loaded dynamically at runtime via dlopen/dlsym
## (see third-party/ibverbs/IbvDynLoad.hpp). The build neither links against
## -libverbs nor requires libibverbs-dev to be installed on the build host.
## Only libdl needs to be linked so that dlopen/dlsym resolve.
message(STATUS "NIC executor: libibverbs is loaded dynamically at runtime (no -libverbs link, no build-host dependency)")

## Check for MPI support
set(MPI_PATH "" CACHE PATH "Path to MPI installation (takes priority over system MPI)")
Expand Down Expand Up @@ -510,13 +450,11 @@ add_executable(TransferBench src/client/Client.cpp)
target_include_directories(TransferBench PRIVATE
src/header
src/client
src/client/Presets)
src/client/Presets
third-party/ibverbs)

if(IBVERBS_FOUND)
target_include_directories(TransferBench PRIVATE ${IBVERBS_INCLUDE_DIR})
target_link_libraries(TransferBench PRIVATE ${IBVERBS_LIBRARY})
target_compile_definitions(TransferBench PRIVATE NIC_EXEC_ENABLED)
endif()
# libdl supplies dlopen/dlsym used by third-party/ibverbs/IbvDynLoad.hpp.
target_link_libraries(TransferBench PRIVATE ${CMAKE_DL_LIBS})
if(MPI_COMM_FOUND)
if(TARGET MPI::MPI_CXX)
target_link_libraries(TransferBench PRIVATE MPI::MPI_CXX)
Expand All @@ -526,9 +464,6 @@ if(MPI_COMM_FOUND)
endif()
target_compile_definitions(TransferBench PRIVATE MPI_COMM_ENABLED)
endif()
if(DMABUF_SUPPORT_FOUND)
target_compile_definitions(TransferBench PRIVATE HAVE_DMABUF_SUPPORT)
endif()
if(AMD_SMI_FOUND)
target_include_directories(TransferBench PRIVATE ${AMD_SMI_INCLUDE_DIR})
target_link_libraries(TransferBench PRIVATE ${AMD_SMI_LIBRARY})
Expand Down
56 changes: 6 additions & 50 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ NVCC ?= $(CUDA_PATH)/bin/nvcc
DEBUG ?= 0

# Optional features (set to 0 to disable, 1 to enable)
# DISABLE_NIC_EXEC: Disable RDMA/NIC executor support (default: 0)
# DISABLE_MPI_COMM: Disable MPI communicator support (default: 0)
# DISABLE_DMA_BUF: Disable DMA-BUF support for GPU Direct RDMA (default: 1)
# DISABLE_AMD_SMI: Disable AMD-SMI pod membership checking support (default: 0)
# DISABLE_NVML: Disable NVML pod membership detection for CUDA builds (default: 0)
# DISABLE_POD_COMM: Disable pod communication support (default: 0)
Expand Down Expand Up @@ -83,55 +81,13 @@ ifeq ($(filter clean,$(MAKECMDGOALS)),)
else
COMMON_FLAGS += -O0 -g -ggdb3
endif
COMMON_FLAGS += -I./src/header -I./src/client -I./src/client/Presets
COMMON_FLAGS += -I./src/header -I./src/client -I./src/client/Presets -I./third-party/ibverbs

LDFLAGS += -lpthread

NIC_ENABLED = 0
# Compile RDMA executor if
# 1) DISABLE_NIC_EXEC is not set to 1
# 2) IBVerbs is found in the Dynamic Linker cache
# 3) infiniband/verbs.h is found in the default include path
DISABLE_NIC_EXEC ?= 0
ifneq ($(DISABLE_NIC_EXEC),1)
$(info Attempting to build with NIC executor support)
ifeq ("$(shell ldconfig -p | grep -c ibverbs)", "0")
$(info - ibverbs library not found)
else ifeq ("$(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'infiniband/verbs.h')", "0")
$(info - infiniband/verbs.h not found)
else
COMMON_FLAGS += -DNIC_EXEC_ENABLED
LDFLAGS += -libverbs
NIC_ENABLED = 1

# Disable DMA-BUF support by default (set DISABLE_DMA_BUF=0 to enable)
DISABLE_DMA_BUF ?= 1
ifeq ($(DISABLE_DMA_BUF), 0)
# Check for both ibv_reg_dmabuf_mr and ROCm DMA-BUF export support
HAVE_IBV_DMABUF := $(shell echo '#include <infiniband/verbs.h>' | $(CXX) -E - 2>/dev/null | grep -c 'ibv_reg_dmabuf_mr')
HAVE_ROCM_DMABUF := $(shell echo '#include <hsa/hsa_ext_amd.h>' | $(CXX) -I$(ROCM_PATH)/include -E - 2>/dev/null | grep -c 'hsa_amd_portable_export_dmabuf')

ifeq ($(HAVE_IBV_DMABUF):$(HAVE_ROCM_DMABUF), 0:0)
$(info Building without DMA-BUF support: missing both ibv_reg_dmabuf_mr and ROCm DMA-BUF export)
else ifeq ($(HAVE_IBV_DMABUF), 0)
$(info Building without DMA-BUF support: missing ibv_reg_dmabuf_mr)
else ifeq ($(HAVE_ROCM_DMABUF), 0)
$(info Building without DMA-BUF support: missing ROCm DMA-BUF export)
else
COMMON_FLAGS += -DHAVE_DMABUF_SUPPORT
$(info Building with DMA-BUF support)
endif
else
$(info Building with DMA-BUF support disabled (DISABLE_DMA_BUF=1))
endif
endif
ifeq ($(NIC_ENABLED), 0)
$(info - Building without NIC executor support)
$(info - To use the TransferBench RDMA executor, check if your system has NICs, the NIC drivers are installed, and libibverbs-dev is installed)
else
$(info - Building with NIC executor support. Can set DISABLE_NIC_EXEC=1 to disable)
endif
endif
# libibverbs is loaded dynamically at runtime via dlopen/dlsym (see
# third-party/ibverbs/IbvDynLoad.hpp), so the build never links against -libverbs
# and does not require libibverbs-dev to be installed. We only need -ldl so
# the dynamic loader API is resolvable.
LDFLAGS += -lpthread -ldl

MPI_ENABLED = 0
# Compile with MPI communicator support if
Expand Down
2 changes: 0 additions & 2 deletions build_packages_local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,7 @@ CMAKE_ARGS=(
-DCMAKE_VERBOSE_MAKEFILE=ON
-DBUILD_RELOCATABLE_PACKAGE=ON
-DBUILD_LOCAL_GPU_TARGET_ONLY=OFF
-DENABLE_NIC_EXEC=OFF
-DENABLE_MPI_COMM=OFF
-DDISABLE_DMABUF=OFF
-DGPU_TARGETS="${GPU_TARGETS}"
-DTRANSFERBENCH_PACKAGE_RELEASE="${PKG_RELEASE}"
)
Expand Down
4 changes: 1 addition & 3 deletions src/client/Client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,7 @@ int main(int argc, char **argv)
void DisplayVersion()
{
bool nicSupport = false, mpiSupport = false, podSupport = false;
#if NIC_EXEC_ENABLED
nicSupport = true;
#endif
nicSupport = IsIbvSymbolsReady();
#if MPI_COMM_ENABLED
mpiSupport = true;
#endif
Expand Down
77 changes: 34 additions & 43 deletions src/client/EnvVars.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -369,27 +369,23 @@ class EnvVars
printf(" GFX_WORD_SIZE - GFX kernel packed data size (4=DWORDx4, 2=DWORDx2, 1=DWORDx1)\n");
printf(" GPU_MAX_HW_QUEUES - Max hardware queues per GPU device (default = 4)\n");
printf(" HIDE_ENV - Hide environment variable value listing\n");
#if NIC_EXEC_ENABLED
printf(" IB_GID_INDEX - Required for RoCE NICs (default=-1/auto)\n");
printf(" IB_PORT_NUMBER - RDMA port count for RDMA NIC (default=1)\n");
printf(" IP_ADDRESS_FAMILY - IP address family (4=v4, 6=v6, default=v4)\n");
#endif
if (IsIbvSymbolsReady()) {
printf(" IB_GID_INDEX - Required for RoCE NICs (default=-1/auto)\n");
printf(" IB_PORT_NUMBER - RDMA port count for RDMA NIC (default=1)\n");
printf(" IP_ADDRESS_FAMILY - IP address family (4=v4, 6=v6, default=v4)\n");
printf(" NIC_CHUNK_BYTES - Number of bytes to send at a time using NIC (default = 1GB)\n");
printf(" NIC_CQ_POLL_BATCH - Number of CQ entries to poll per ibv_poll_cq call (default = 4)\n");
printf(" NIC_RELAX_ORDER - Set to non-zero to use relaxed ordering\n");
printf(" NIC_SERVICE_LEVEL - IB service level (sl) for InfiniBand QPs (default=0)\n");
printf(" NIC_TRAFFIC_CLASS - DSCP/traffic class byte for RoCE GRH (default=0)\n");
printf(" ROCE_VERSION - RoCE version (default=2)\n");
}
printf(" MIN_VAR_SUBEXEC - Minimum # of subexecutors to use for variable subExec Transfers\n");
printf(" MAX_VAR_SUBEXEC - Maximum # of subexecutors to use for variable subExec Transfers (0 for device limits)\n");
#if NIC_EXEC_ENABLED
printf(" NIC_CHUNK_BYTES - Number of bytes to send at a time using NIC (default = 1GB)\n");
printf(" NIC_CQ_POLL_BATCH - Number of CQ entries to poll per ibv_poll_cq call (default = 4)\n");
printf(" NIC_RELAX_ORDER - Set to non-zero to use relaxed ordering\n");
printf(" NIC_SERVICE_LEVEL - IB service level (sl) for InfiniBand QPs (default=0)\n");
printf(" NIC_TRAFFIC_CLASS - DSCP/traffic class byte for RoCE GRH (default=0)\n");
#endif
printf(" NUM_ITERATIONS - # of timed iterations per test. If negative, run for this many seconds instead\n");
printf(" NUM_SUBITERATIONS - # of sub-iterations to run per iteration. Must be non-negative\n");
printf(" NUM_WARMUPS - # of untimed warmup iterations per test\n");
printf(" OUTPUT_TO_CSV - Outputs to CSV format if set\n");
#if NIC_EXEC_ENABLED
printf(" ROCE_VERSION - RoCE version (default=2)\n");
#endif
printf(" SAMPLING_FACTOR - Add this many samples (when possible) between powers of 2 when auto-generating data sizes\n");
printf(" SHOW_BORDERS - Show ASCII box-drawing characters in tables\n");
printf(" SHOW_ITERATIONS - Show per-iteration timing info\n");
Expand Down Expand Up @@ -443,9 +439,9 @@ class EnvVars
{
int numGpuDevices = TransferBench::GetNumExecutors(EXE_GPU_GFX);
std::string nicSupport = "";
#if NIC_EXEC_ENABLED
nicSupport = " (with NIC support)";
#endif
if (IsIbvSymbolsReady()) {
nicSupport = " (with NIC support)";
}
if (!outputToCsv) {
if (!hideEnv) printf("[Common] (Suppress by setting HIDE_ENV=1)\n");
}
Expand Down Expand Up @@ -499,43 +495,38 @@ class EnvVars
Print("GPU_MAX_HW_QUEUES", gpuMaxHwQueues,
"Max %d hardware queues per GPU device", gpuMaxHwQueues);

#if NIC_EXEC_ENABLED
Print("IP_ADDRESS_FAMILY", ipAddressFamily,
"IP address family is set to IPv%d", ipAddressFamily);

Print("IB_GID_INDEX", ibGidIndex,
"RoCE GID index is set to %s", (ibGidIndex < 0 ? "auto" : std::to_string(ibGidIndex).c_str()));
Print("IB_PORT_NUMBER", ibPort,
"IB port number is set to %d", ibPort);
#endif
if (IsIbvSymbolsReady()) {
Print("IP_ADDRESS_FAMILY", ipAddressFamily,
"IP address family is set to IPv%d", ipAddressFamily);
Print("IB_GID_INDEX", ibGidIndex,
"RoCE GID index is set to %s", (ibGidIndex < 0 ? "auto" : std::to_string(ibGidIndex).c_str()));
Print("IB_PORT_NUMBER", ibPort,
"IB port number is set to %d", ibPort);
Print("NIC_CHUNK_BYTES", nicChunkBytes,
"Sending %lu bytes at a time for NIC RDMA", nicChunkBytes);
Print("NIC_CQ_POLL_BATCH", nicCqPollBatch,
"Polling %d CQ entries per ibv_poll_cq call", nicCqPollBatch);
Print("NIC_RELAX_ORDER", nicRelaxedOrder,
"Using %s ordering for NIC RDMA", nicRelaxedOrder ? "relaxed" : "strict");
Print("NIC_SERVICE_LEVEL", nicServiceLevel,
"IB service level (sl) set to %d", nicServiceLevel);
Print("NIC_TRAFFIC_CLASS", nicTrafficClass,
"RoCE traffic class (DSCP) set to %d", nicTrafficClass);
Print("ROCE_VERSION", roceVersion,
"RoCE version is set to %d", roceVersion);
}
Print("MIN_VAR_SUBEXEC", minNumVarSubExec,
"Using at least %d subexecutor(s) for variable subExec tranfers", minNumVarSubExec);
Print("MAX_VAR_SUBEXEC", maxNumVarSubExec,
"Using up to %s subexecutors for variable subExec transfers",
maxNumVarSubExec ? std::to_string(maxNumVarSubExec).c_str() : "all available");
#if NIC_EXEC_ENABLED
Print("NIC_CHUNK_BYTES", nicChunkBytes,
"Sending %lu bytes at a time for NIC RDMA", nicChunkBytes);
Print("NIC_CQ_POLL_BATCH", nicCqPollBatch,
"Polling %d CQ entries per ibv_poll_cq call", nicCqPollBatch);
Print("NIC_RELAX_ORDER", nicRelaxedOrder,
"Using %s ordering for NIC RDMA", nicRelaxedOrder ? "relaxed" : "strict");
Print("NIC_SERVICE_LEVEL", nicServiceLevel,
"IB service level (sl) set to %d", nicServiceLevel);
Print("NIC_TRAFFIC_CLASS", nicTrafficClass,
"RoCE traffic class (DSCP) set to %d", nicTrafficClass);
#endif
Print("NUM_ITERATIONS", numIterations,
(numIterations == 0) ? "Running infinitely" :
"Running %d %s", abs(numIterations), (numIterations > 0 ? " timed iteration(s)" : "seconds(s) per Test"));
Print("NUM_SUBITERATIONS", numSubIterations,
"Running %s subiterations", (numSubIterations == 0 ? "infinite" : std::to_string(numSubIterations)).c_str());
Print("NUM_WARMUPS", numWarmups,
"Running %d warmup iteration(s) per Test", numWarmups);
#if NIC_EXEC_ENABLED
Print("ROCE_VERSION", roceVersion,
"RoCE version is set to %d", roceVersion);
#endif
Print("SHOW_BORDERS", showBorders, "%s ASCII box-drawing characaters in tables", showBorders ? "Showing" : "Hiding");
Print("SHOW_ITERATIONS", showIterations,
"%s per-iteration timing", showIterations ? "Showing" : "Hiding");
Expand Down
3 changes: 1 addition & 2 deletions src/client/Topology.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ static int RemappedCpuIndex(int origIdx)

static void PrintNicToGPUTopo(bool outputToCsv)
{
#ifdef NIC_EXEC_ENABLED
if (!IsIbvSymbolsReady()) return;
printf(" NIC | Device Name | Active | PCIe Bus ID | NUMA | Closest GPU(s) | GID Index | GID Descriptor\n");
if(!outputToCsv)
printf("-----+-------------+--------+--------------+------+----------------+-----------+-------------------\n");
Expand Down Expand Up @@ -73,7 +73,6 @@ static void PrintNicToGPUTopo(bool outputToCsv)
);
}
printf("\n");
#endif
}

void DisplaySingleRankTopology(bool outputToCsv)
Expand Down
Loading