From e15a44a726c56c54cc00677d2f275308af073172 Mon Sep 17 00:00:00 2001 From: "Chao Chen[AMD]" Date: Sat, 18 Jan 2025 03:17:42 +0800 Subject: [PATCH] fixed gfx1202 error as gfx1102 --- .../core/grappler/optimizers/auto_mixed_precision.cc | 2 +- .../grappler/optimizers/generic_layout_optimizer.cc | 2 +- tensorflow/core/util/gpu_device_functions.h | 6 +++--- tensorflow/tools/ci_build/Dockerfile.rocm | 2 +- .../Dockerfile.rocm.manylinux2014 | 2 +- .../Dockerfile.rocm.manylinux_2_28 | 2 +- .../tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 | 2 +- .../tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 | 2 +- .../service/gpu/llvm_gpu_backend/gpu_backend_lib.cc | 2 +- .../xla/xla/stream_executor/device_description.h | 12 ++++++------ 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index d58a0acacb9815..7a8ca30dd509d4 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -116,7 +116,7 @@ bool HasFastFP16Support(const DeviceProperties& props) { absl::flat_hash_set FP16SupportedDevices = { {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"}, {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"}, - {"gfx1200"},{"gfx1201"}, {"gfx1202"} + {"gfx1200"},{"gfx1201"}, {"gfx1102"} }; std::string gcnArchName = props.environment().at("architecture"); std::vector gpu_arch = absl::StrSplit(gcnArchName, ":"); diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc index 2b6c52ff033e13..03b5c3370b6d94 100644 --- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc @@ -75,7 +75,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) { compute_capability_it->second == "gfx942" || compute_capability_it->second == "gfx1200" || compute_capability_it->second == "gfx1201" || - compute_capability_it->second == "gfx1202") && is_enabled) { + compute_capability_it->second == "gfx1102") && is_enabled) { gpu_stats.num_voltas++; } #endif diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h index 78f072696dee87..8137c7eccf21f5 100644 --- a/tensorflow/core/util/gpu_device_functions.h +++ b/tensorflow/core/util/gpu_device_functions.h @@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1202__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__ #define ADDRSP1 __attribute__((address_space(1))) __device__ float @@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) { } #endif -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1202__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__ // Low level instructions don't return. For now, assume that return value // is always unused. __device__ float GpuAtomicAdd(float* dst, float val) { @@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) { return GpuAtomicAdd(ptr, value); } -#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1202__ +#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__ __device__ float GpuAtomicAddShared(float* dst, float val) { atomicAdd(dst, val); return val; diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm index 3a638c85d13223..dbacfc722b38cc 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rocm +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -7,7 +7,7 @@ ARG ROCM_DEB_REPO=https://repo.radeon.com/rocm/apt/6.1.2/ ARG ROCM_BUILD_NAME=ubuntu ARG ROCM_BUILD_NUM=main ARG ROCM_PATH=/opt/rocm/ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" ARG DEBIAN_FRONTEND=noninteractive ENV TF_NEED_ROCM 1 diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 index cb31965028a758..19d1a8449bcfef 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 index 32fac0cf74ac93..cb976aedd36e49 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install ROCM diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 index 7a2f89416163a5..c99e425228d36b 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 @@ -2,7 +2,7 @@ FROM ubuntu:20.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 index d62b6a2fe8d745..1249aef65406c7 100644 --- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 +++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 @@ -2,7 +2,7 @@ FROM ubuntu:22.04 ################################################################################ -ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202" +ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102" ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS} # Install build dependencies diff --git a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index dfc0e469b77743..4a71bbcc6ce1fd 100644 --- a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -877,7 +877,7 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token, return "+sramecc"; } else if (token == "sramecc-") { if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" || - gfx == "gfx942" || gfx == "gfx1200" || gfx == "gfx1201" || gfx == "gfx1202") + gfx == "gfx942" || gfx == "gfx1200" || gfx == "gfx1201" || gfx == "gfx1102") return ""; return "-sramecc"; } else if (token == "xnack+") { diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h index e75c2dd4bae003..37a11619b268d4 100644 --- a/third_party/xla/xla/stream_executor/device_description.h +++ b/third_party/xla/xla/stream_executor/device_description.h @@ -184,11 +184,11 @@ class RocmComputeCapability { bool gfx10_rx69xx() const { return gfx_version() == "gfx1030"; } - bool gfx11_rx7900() const { return gfx_version() == "gfx1100"; } + bool gfx11_rx7900() const { return (gfx_version() == "gfx1100" || + gfx_version() == "gfx1102"); } - bool gfx12_rx8900() const { return ((gfx_version() == "gfx1200") || - (gfx_version() == "gfx1201") || - (gfx_version() == "gfx1202")); } + bool gfx12_rx8900() const { return (gfx_version() == "gfx1200" || + gfx_version() == "gfx1201"); } bool has_nhwc_layout_support() const { return gfx9_mi100_or_later(); } @@ -232,8 +232,8 @@ class RocmComputeCapability { "gfx90a", // MI200 "gfx940", "gfx941", "gfx942", // MI300 "gfx1030", // RX68xx / RX69xx - "gfx1100", // RX7900 - "gfx1200", "gfx1201", "gfx1202", // RX8900 + "gfx1100", "gfx1102", // RX7900 + "gfx1200", "gfx1201", // RX8900 }; };