From e15a44a726c56c54cc00677d2f275308af073172 Mon Sep 17 00:00:00 2001
From: "Chao Chen[AMD]" <cchen104@amd.com>
Date: Sat, 18 Jan 2025 03:17:42 +0800
Subject: [PATCH] fixed gfx1202 error as gfx1102

---
 .../core/grappler/optimizers/auto_mixed_precision.cc |  2 +-
 .../grappler/optimizers/generic_layout_optimizer.cc  |  2 +-
 tensorflow/core/util/gpu_device_functions.h          |  6 +++---
 tensorflow/tools/ci_build/Dockerfile.rocm            |  2 +-
 .../Dockerfile.rocm.manylinux2014                    |  2 +-
 .../Dockerfile.rocm.manylinux_2_28                   |  2 +-
 .../tf_sig_build_dockerfiles/Dockerfile.rocm.ub20    |  2 +-
 .../tf_sig_build_dockerfiles/Dockerfile.rocm.ub22    |  2 +-
 .../service/gpu/llvm_gpu_backend/gpu_backend_lib.cc  |  2 +-
 .../xla/xla/stream_executor/device_description.h     | 12 ++++++------
 10 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
index d58a0acacb9815..7a8ca30dd509d4 100644
--- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
+++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
@@ -116,7 +116,7 @@ bool HasFastFP16Support(const DeviceProperties& props) {
   absl::flat_hash_set<std::string> FP16SupportedDevices = {
       {"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"},
       {"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"},
-      {"gfx1200"},{"gfx1201"}, {"gfx1202"}
+      {"gfx1200"},{"gfx1201"}, {"gfx1102"}
   };
   std::string gcnArchName = props.environment().at("architecture");
   std::vector<std::string> gpu_arch = absl::StrSplit(gcnArchName, ":");
diff --git a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc
index 2b6c52ff033e13..03b5c3370b6d94 100644
--- a/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc
@@ -75,7 +75,7 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) {
 	 compute_capability_it->second == "gfx942" ||
    compute_capability_it->second == "gfx1200" ||
    compute_capability_it->second == "gfx1201" ||
-   compute_capability_it->second == "gfx1202") && is_enabled) {
+   compute_capability_it->second == "gfx1102") && is_enabled) {
        gpu_stats.num_voltas++;
     }
 #endif
diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h
index 78f072696dee87..8137c7eccf21f5 100644
--- a/tensorflow/core/util/gpu_device_functions.h
+++ b/tensorflow/core/util/gpu_device_functions.h
@@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) {
 }
 #endif
 
-#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1202__
+#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__
 
 #define ADDRSP1 __attribute__((address_space(1)))
 __device__ float
@@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) {
 }
 #endif
 
-#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1202__
+#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__
 // Low level instructions don't return. For now, assume that return value
 // is always unused.
 __device__ float GpuAtomicAdd(float* dst, float val) {
@@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) {
   return GpuAtomicAdd(ptr, value);
 }
 
-#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1202__
+#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__
 __device__ float GpuAtomicAddShared(float* dst, float val) {
   atomicAdd(dst, val);
   return val;
diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm
index 3a638c85d13223..dbacfc722b38cc 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rocm
+++ b/tensorflow/tools/ci_build/Dockerfile.rocm
@@ -7,7 +7,7 @@ ARG ROCM_DEB_REPO=https://repo.radeon.com/rocm/apt/6.1.2/
 ARG ROCM_BUILD_NAME=ubuntu
 ARG ROCM_BUILD_NUM=main
 ARG ROCM_PATH=/opt/rocm/
-ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202"
+ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102"
 
 ARG DEBIAN_FRONTEND=noninteractive
 ENV TF_NEED_ROCM 1
diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014
index cb31965028a758..19d1a8449bcfef 100644
--- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014
+++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux2014
@@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh
 COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt
 RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt
 
-ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202"
+ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102"
 ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
 
 # Install ROCM
diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28
index 32fac0cf74ac93..cb976aedd36e49 100644
--- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28
+++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.manylinux_2_28
@@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh
 COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt
 RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt
 
-ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202"
+ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102"
 ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
 
 # Install ROCM
diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20
index 7a2f89416163a5..c99e425228d36b 100644
--- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20
+++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub20
@@ -2,7 +2,7 @@
 FROM ubuntu:20.04
 ################################################################################
 
-ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202"
+ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102"
 ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
 
 # Install build dependencies
diff --git a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22 b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22
index d62b6a2fe8d745..1249aef65406c7 100644
--- a/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22
+++ b/tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile.rocm.ub22
@@ -2,7 +2,7 @@
 FROM ubuntu:22.04
 ################################################################################
 
-ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1202"
+ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1200 gfx1201 gfx1102"
 ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}
 
 # Install build dependencies
diff --git a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index dfc0e469b77743..4a71bbcc6ce1fd 100644
--- a/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/third_party/xla/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -877,7 +877,7 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token,
     return "+sramecc";
   } else if (token == "sramecc-") {
     if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" ||
-        gfx == "gfx942" ||  gfx == "gfx1200" || gfx == "gfx1201" || gfx == "gfx1202")
+        gfx == "gfx942" ||  gfx == "gfx1200" || gfx == "gfx1201" || gfx == "gfx1102")
       return "";
     return "-sramecc";
   } else if (token == "xnack+") {
diff --git a/third_party/xla/xla/stream_executor/device_description.h b/third_party/xla/xla/stream_executor/device_description.h
index e75c2dd4bae003..37a11619b268d4 100644
--- a/third_party/xla/xla/stream_executor/device_description.h
+++ b/third_party/xla/xla/stream_executor/device_description.h
@@ -184,11 +184,11 @@ class RocmComputeCapability {
 
   bool gfx10_rx69xx() const { return gfx_version() == "gfx1030"; }
 
-  bool gfx11_rx7900() const { return gfx_version() == "gfx1100"; }
+  bool gfx11_rx7900() const { return (gfx_version() == "gfx1100" ||
+                                      gfx_version() == "gfx1102"); }
 
-  bool gfx12_rx8900() const { return ((gfx_version() == "gfx1200") || 
-                                      (gfx_version() == "gfx1201") || 
-                                      (gfx_version() == "gfx1202")); }
+  bool gfx12_rx8900() const { return (gfx_version() == "gfx1200" || 
+                                      gfx_version() == "gfx1201"); }
 
   bool has_nhwc_layout_support() const { return gfx9_mi100_or_later(); }
 
@@ -232,8 +232,8 @@ class RocmComputeCapability {
       "gfx90a",                       // MI200
       "gfx940",  "gfx941", "gfx942",  // MI300
       "gfx1030",                      // RX68xx / RX69xx
-      "gfx1100",                       // RX7900
-      "gfx1200", "gfx1201", "gfx1202", // RX8900
+      "gfx1100", "gfx1102",            // RX7900
+      "gfx1200", "gfx1201",            // RX8900
   };
 };