diff --git a/src/graph/connect.cc b/src/graph/connect.cc index 6c681c397..db34f1df6 100644 --- a/src/graph/connect.cc +++ b/src/graph/connect.cc @@ -624,9 +624,12 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa NCCLCHECK(connectRings(comm, ringRecv, ringSend, ringPrev, ringNext)); NCCLCHECK(connectTrees(comm, treeToParent, treeToChild0, treeToChild1, treePatterns)); + int maxChannels = 2*CHANNEL_LIMIT; + // Only use full MAXCHANNELS for gfx94x int maxChannels = IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? - ((comm->topo->nodes[GPU].nodes[0].gpu.cu == 80 || comm->topo->nodes[GPU].nodes[0].gpu.cu == 20) + ((comm->topo->nodes[GPU].nodes[0].gpu.cu == 80 || comm->topo->nodes[GPU].nodes[0].gpu.cu == 20 + comm->topo->nodes[GPU].nodes[0].gpu.cu == 96 || comm->topo->nodes[GPU].nodes[0].gpu.cu == 24) ? comm->topo->nodes[GPU].nodes[0].gpu.cu : MAXCHANNELS) : 2*CHANNEL_LIMIT; if (graphs[NCCL_ALGO_RING]->nIntraChannels > 0 || comm->nNodes > 1) {