From d380693772a83b377bb8e38f264255d2c2839b63 Mon Sep 17 00:00:00 2001 From: akolliasAMD <99202231+akolliasAMD@users.noreply.github.com> Date: Tue, 13 Aug 2024 14:36:55 -0600 Subject: [PATCH] Cherry-pick for Fix DMABUF support (#1218) (#1237) * Fix DMABUF support (#1218) * Fix DMABUF support * Reduce log output by moving dmabuf allocation details to TRACE * Enable peer memory GDR support if ib_umem_get_peer is in kernel * Checking kernel header files only when missing sysfs entry (#1239) * Update CHANGELOG.md * Update NOTICES.txt for NCCL 2.20.5 --------- Co-authored-by: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com> --- CHANGELOG.md | 1 + src/init.cc | 6 +++++- src/transport/net.cc | 4 ++-- src/transport/net_ib.cc | 25 +++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 82d9ae831..381798b6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Full documentation for RCCL is available at [https://rccl.readthedocs.io](https: - Bug when configuring RCCL for only LL128 protocol - Scratch memory allocation after API change for MSCCL - Incorrect minNchannels in multi-node +- GDR support flag now set with DMABUF ## RCCL 2.18.6 for ROCm 6.1.0 ### Changed diff --git a/src/init.cc b/src/init.cc index 73c3f017d..63725bda9 100644 --- a/src/init.cc +++ b/src/init.cc @@ -722,7 +722,11 @@ static ncclResult_t fillInfo(struct ncclComm* comm, struct ncclPeerInfo* info, u #endif CUDACHECK(hipFree(ptr)); info->hasFineGrain = true; - NCCLCHECK(ncclGpuGdrSupport(comm, &info->gdrSupport)); + // GPU supports GDR if DMABUF is supported + if (dmaBufSupported(comm) == ncclSuccess) + info->gdrSupport = 1; + else + NCCLCHECK(ncclGpuGdrSupport(comm, &info->gdrSupport)); } else { info->hasFineGrain = false; diff --git a/src/transport/net.cc b/src/transport/net.cc index 3fdc1dda7..be284c13e 100644 --- a/src/transport/net.cc +++ b/src/transport/net.cc @@ -837,7 +837,7 @@ static ncclResult_t sendProxyConnect(struct ncclProxyConnection* connection, str CUCHECK(hsa_amd_portable_export_dmabuf((const void*)resources->buffers[p], resources->buffSizes[p], &dmabuf_fd, &offset)); NCCLCHECK(proxyState->ncclNet->regMrDmaBuf(resources->netSendComm, resources->buffers[p], resources->buffSizes[p], type, offset, dmabuf_fd, &resources->mhandles[p])); (void)close(dmabuf_fd); - INFO(NCCL_INIT|NCCL_NET, "hsa_amd_portable_export_dmabuf buffer %p size %d handle %x offset %ld", + TRACE(NCCL_INIT|NCCL_NET, "hsa_amd_portable_export_dmabuf buffer %p size %d handle %x offset %ld", (const void*)resources->buffers[p], resources->buffSizes[p], dmabuf_fd, offset); } else // FALL-THROUGH to nv_peermem GDR path #endif @@ -1003,7 +1003,7 @@ static ncclResult_t recvProxyConnect(struct ncclProxyConnection* connection, str CUCHECK(hsa_amd_portable_export_dmabuf((const void*)resources->buffers[p], resources->buffSizes[p], &dmabuf_fd, &offset)); NCCLCHECK(proxyState->ncclNet->regMrDmaBuf(resources->netRecvComm, resources->buffers[p], resources->buffSizes[p], type, offset, dmabuf_fd, &resources->mhandles[p])); (void)close(dmabuf_fd); - INFO(NCCL_INIT|NCCL_NET, "hsa_amd_portable_export_dmabuf buffer %p size %d handle %x offset %ld", + TRACE(NCCL_INIT|NCCL_NET, "hsa_amd_portable_export_dmabuf buffer %p size %d handle %x offset %ld", (const void*)resources->buffers[p], resources->buffSizes[p], dmabuf_fd, offset); } else // FALL-THROUGH to nv_peermem GDR path #endif diff --git a/src/transport/net_ib.cc b/src/transport/net_ib.cc index f463e6712..5691f77c6 100644 --- a/src/transport/net_ib.cc +++ b/src/transport/net_ib.cc @@ -23,6 +23,7 @@ #include #define ENABLE_TIMER 0 #include "timer.h" +#include #include "ibvwrap.h" #include "graph/xml.h" @@ -361,6 +362,30 @@ ncclResult_t ncclIbGdrSupport() { NCCLCHECK(ncclTopoGetStrFromSys("/proc/sys/kernel", "numa_balancing", strValue)); if (strcmp(strValue, "1") == 0 && roMode == 0) moduleLoaded = 0; + } else if (moduleLoaded == 0) { + char kernel_header_file[256]; + struct utsname utsname; + char buf[256]; + FILE *fp = NULL; + //check for kernel name exists + if (uname(&utsname) == -1) { + INFO(NCCL_NET,"Could not get kernel name"); + } else { + //format and store the kernel conf file location + snprintf(kernel_header_file, sizeof(kernel_header_file), "/lib/modules/%s/build/include/rdma/ib_umem.h", utsname.release); + fp = fopen(kernel_header_file, "r"); + if (fp == NULL) { + INFO(NCCL_INIT,"Could not open kernel header file %s", kernel_header_file); + } else { + //look for kernel_opt1 and kernel_opt2 in the conf file and check + while (fgets(buf, sizeof(buf), fp) != NULL) { + if (strstr(buf, "ib_umem_get_peer") != NULL) { + moduleLoaded = 1; + INFO(NCCL_INIT,"Found ib_umem_get_peer in %s", kernel_header_file); + } + } + } + } } #else // Check for the nv_peer_mem module being loaded