From 359f3eb0e4e06fa05e40ac5df7f019462270f7d4 Mon Sep 17 00:00:00 2001 From: sstamenk Date: Thu, 30 Jan 2025 23:13:03 +0100 Subject: [PATCH] Add gfx1101 support --- .../optimizers/auto_mixed_precision.cc | 5 +++-- .../optimizers/generic_layout_optimizer.cc | 16 +++++++------- tensorflow/core/util/gpu_device_functions.h | 6 +++--- tensorflow/tools/ci_build/Dockerfile.rocm | 2 +- .../Dockerfile.rocm.manylinux2014 | 2 +- .../Dockerfile.rocm.manylinux_2_28 | 2 +- .../Dockerfile.rocm.ub20 | 2 +- .../Dockerfile.rocm.ub22 | 2 +- .../Dockerfile.rocm.ub24 | 2 +- .../tf_sig_build_dockerfiles/setup.rocm.sh | 2 +- .../gpu/llvm_gpu_backend/gpu_backend_lib.cc | 3 ++- .../xla/stream_executor/device_description.h | 21 ++++++++++--------- 12 files changed, 35 insertions(+), 30 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 089b68d42f924b..7b75f3cbe9584b 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -116,8 +116,9 @@ bool HasFastFP16Support(const DeviceProperties& props) { #elif TENSORFLOW_USE_ROCM absl::flat_hash_set FP16SupportedDevices = { {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"}, - {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"}, - {"gfx1200"},{"gfx1201"}, {"gfx1102"} + {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, + {"gfx1100"}, {"gfx1101"}, {"gfx1102"}, + {"gfx1200"}, {"gfx1201"} }; std::string gcnArchName = props.environment().at("architecture"); std::vector gpu_arch = absl::StrSplit(gcnArchName, ":"); diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index 075f8e9e437596..a724a8085b4eb4 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -70,13 +70,15 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) { bool is_enabled = se::gpu::UseNhwcLayoutForRocm(); if ((compute_capability_it->second == "gfx908" || compute_capability_it->second == "gfx90a" || - compute_capability_it->second == "gfx940" || - compute_capability_it->second == "gfx941" || - compute_capability_it->second == "gfx942" || - compute_capability_it->second == "gfx1200" || - compute_capability_it->second == "gfx1201" || - compute_capability_it->second == "gfx1102") && is_enabled) { - gpu_stats.num_voltas++; + compute_capability_it->second == "gfx940" || + compute_capability_it->second == "gfx941" || + compute_capability_it->second == "gfx942" || + compute_capability_it->second == "gfx1101" || + compute_capability_it->second == "gfx1102" || + compute_capability_it->second == "gfx1200" || + compute_capability_it->second == "gfx1201") && + is_enabled) { + gpu_stats.num_voltas++; } #endif if (compute_capability_it == device.second.environment().end()) { diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h index 7d35df67d6b986..39c9d9d0bfb432 100644 --- a/tensorflow/core/util/gpu_device_functions.h +++ b/tensorflow/core/util/gpu_device_functions.h @@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ #define ADDRSP1 __attribute__((address_space(1))) __device__ float @@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ // Low level instructions don't return. For now, assume that return value // is always unused. __device__ float GpuAtomicAdd(float* dst, float val) { @@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) { return GpuAtomicAdd(ptr, value); } -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__ __device__ float GpuAtomicAddShared(float* dst, float val) { atomicAdd(dst, val); return val; diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm index 9e416399c8f206..e27fdf75b6f978 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rocm +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -7,7 +7,7 @@ ARG ROCM_DEB_REPO=https://repo.radeon.com/rocm/apt/6.2/ ARG ROCM_BUILD_NAME=ubuntu ARG ROCM_BUILD_NUM=main ARG ROCM_PATH=/opt/rocm/ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ARG DEBIAN_FRONTEND=noninteractive ENV TF_NEED_ROCM 1 diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 index 08db953ce95727..bb754568d66c02 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 index 5abf3782f27f64..2ce772270d375f 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 index c99e425228d36b..6dddb73d1b49a6 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 @@ -2,7 +2,7 @@ FROM ubuntu:20.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 index 1249aef65406c7..a3a8edddd0f909 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 @@ -2,7 +2,7 @@ FROM ubuntu:22.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 index 0285ba7a56601c..45c6bbe00030fe 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub24 @@ -2,7 +2,7 @@ FROM ubuntu:24.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh index 266d0399e003c3..668ce5f5ccb777 100755 --- a/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh +++ b/tensorflow/tools/tf_sig_build_dockerfiles/setup.rocm.sh @@ -145,7 +145,7 @@ then echo "build:rocm_base --copt=-fclang-abi-compat=17" >> /etc/bazel.bazelrc fi -GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201"} +GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS:-"gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1200 gfx1201"} echo $ROCM_VERSION echo $ROCM_REPO diff --git a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 0480d6fb77745b..432bd35a731dee 100644 --- a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -974,7 +974,8 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token, return "+sramecc"; } else if (token == "sramecc-") { if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" || - gfx == "gfx942" || gfx == "gfx1200" || gfx == "gfx1201" || gfx == "gfx1102") + gfx == "gfx942" || gfx == "gfx1101" || gfx == "gfx1102" || + gfx == "gfx1200" || gfx == "gfx1201") return ""; return "-sramecc"; } else if (token == "xnack+") { diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h index 85282acb8cedc8..c36ed5e990ab43 100644 --- a/third_party/xla/xla/stream_executor/device_description.h +++ b/third_party/xla/xla/stream_executor/device_description.h @@ -188,8 +188,9 @@ class RocmComputeCapability { bool gfx10_rx69xx() const { return gfx_version() == "gfx1030"; } - bool gfx11_rx7900() const { return (gfx_version() == "gfx1100") || - (gfx_version() == "gfx1102"); } + bool gfx11_rx7900() const { return (gfx_version() == "gfx1100" || + gfx_version() == "gfx1101" || + gfx_version() == "gfx1102"); } bool gfx12_rx8900() const { return ((gfx_version() == "gfx1200") || (gfx_version() == "gfx1201")); } @@ -237,14 +238,14 @@ class RocmComputeCapability { std::string gcn_arch_name_ = "gfx000"; // default to invalid arch. static constexpr absl::string_view kSupportedGfxVersions[]{ - "gfx900", // MI25 - "gfx906", // MI50 / MI60 - "gfx908", // MI100 - "gfx90a", // MI200 - "gfx940", "gfx941", "gfx942", // MI300 - "gfx1030", // RX68xx / RX69xx - "gfx1100", "gfx1102", // RX7900 - "gfx1200", "gfx1201", // RX8900 + "gfx900", // MI25 + "gfx906", // MI50 / MI60 + "gfx908", // MI100 + "gfx90a", // MI200 + "gfx940", "gfx941", "gfx942", // MI300 + "gfx1030", // RX68xx / RX69xx + "gfx1100", "gfx1101", "gfx1102", // RX7900 + "gfx1200", "gfx1201", // RX8900 }; };