Skip to content

Commit

Permalink
Merge pull request #2831 from ROCm/add_gfx1101_support
Browse files Browse the repository at this point in the history
Add gfx1101 support
  • Loading branch information
i-chaochen authored Jan 31, 2025
2 parents 656add2 + 5063b21 commit 21b23e3
Show file tree
Hide file tree
Showing 11 changed files with 32 additions and 28 deletions.
5 changes: 3 additions & 2 deletions tensorflow/core/grappler/optimizers/auto_mixed_precision.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ bool HasFastFP16Support(const DeviceProperties& props) {
#elif TENSORFLOW_USE_ROCM
absl::flat_hash_set<std::string> FP16SupportedDevices = {
{"gfx906"}, {"gfx908"}, {"gfx90a"}, {"gfx910"}, {"gfx940"}, {"gfx941"},
{"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"}, {"gfx1100"},
{"gfx1200"}, {"gfx1201"}, {"gfx1102"}
{"gfx942"}, {"gfx1010"}, {"gfx1012"}, {"gfx1030"},
{"gfx1100"}, {"gfx1101"}, {"gfx1102"},
{"gfx1200"}, {"gfx1201"}
};
std::string gcnArchName = props.environment().at("architecture");
std::vector<std::string> gpu_arch = absl::StrSplit(gcnArchName, ":");
Expand Down
16 changes: 9 additions & 7 deletions tensorflow/core/grappler/optimizers/generic_layout_optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,15 @@ inline GpuStats GetNumGPUs(const Cluster& cluster) {
bool is_enabled = se::gpu::UseNhwcLayoutForRocm();
if ((compute_capability_it->second == "gfx908" ||
compute_capability_it->second == "gfx90a" ||
compute_capability_it->second == "gfx940" ||
compute_capability_it->second == "gfx941" ||
compute_capability_it->second == "gfx942" ||
compute_capability_it->second == "gfx1200" ||
compute_capability_it->second == "gfx1201" ||
compute_capability_it->second == "gfx1102") && is_enabled) {
gpu_stats.num_voltas++;
compute_capability_it->second == "gfx940" ||
compute_capability_it->second == "gfx941" ||
compute_capability_it->second == "gfx942" ||
compute_capability_it->second == "gfx1101" ||
compute_capability_it->second == "gfx1102" ||
compute_capability_it->second == "gfx1200" ||
compute_capability_it->second == "gfx1201") &&
is_enabled) {
gpu_stats.num_voltas++;
}
#endif
if (compute_capability_it == device.second.environment().end()) {
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/core/util/gpu_device_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ __device__ inline double GpuAtomicAdd(double* ptr, double value) {
}
#endif

#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1200__ || __gfx1201__ || __gfx1102__
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__

#define ADDRSP1 __attribute__((address_space(1)))
__device__ float
Expand Down Expand Up @@ -963,7 +963,7 @@ __device__ inline int64_t GpuAtomicMin(int64_t* ptr, int64_t value) {
}
#endif

#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__|| __gfx1200__ || __gfx1201__ || __gfx1102__
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
// Low level instructions don't return. For now, assume that return value
// is always unused.
__device__ float GpuAtomicAdd(float* dst, float val) {
Expand All @@ -978,7 +978,7 @@ __device__ inline T GpuAtomicAddShared(T* ptr, T value) {
return GpuAtomicAdd(ptr, value);
}

#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__|| __gfx1200__ || __gfx1201__ || __gfx1102__
#if __gfx908__ || __gfx90a__ || __gfx940__ || __gfx941__ || __gfx942__ || __gfx1101__ || __gfx1102__ || __gfx1200__ || __gfx1201__
__device__ float GpuAtomicAddShared(float* dst, float val) {
atomicAdd(dst, val);
return val;
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/tools/ci_build/Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ ARG ROCM_DEB_REPO=https://repo.radeon.com/rocm/apt/6.2/
ARG ROCM_BUILD_NAME=ubuntu
ARG ROCM_BUILD_NUM=main
ARG ROCM_PATH=/opt/rocm/
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"

ARG DEBIAN_FRONTEND=noninteractive
ENV TF_NEED_ROCM 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ COPY setup.packages.rocm.cs7.sh setup.packages.rocm.cs7.sh
COPY builder.packages.rocm.cs7.txt builder.packages.rocm.cs7.txt
RUN /setup.packages.rocm.cs7.sh /builder.packages.rocm.cs7.txt

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install ROCM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ COPY setup.packages.rocm.el8.sh setup.packages.rocm.el8.sh
COPY builder.packages.rocm.el8.txt builder.packages.rocm.el8.txt
RUN /setup.packages.rocm.el8.sh /builder.packages.rocm.el8.txt

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install ROCM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:20.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:22.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
FROM ubuntu:24.04
################################################################################

ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1102 gfx1200 gfx1201"
ARG GPU_DEVICE_TARGETS="gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102 gfx1200 gfx1201"
ENV GPU_DEVICE_TARGETS=${GPU_DEVICE_TARGETS}

# Install build dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@ std::string MapGCNArchNameTokenToFeatureStr(const std::string& token,
return "+sramecc";
} else if (token == "sramecc-") {
if (gfx == "gfx90a" || gfx == "gfx940" || gfx == "gfx941" ||
gfx == "gfx942" || gfx == "gfx1200" || gfx == "gfx1201" ||
gfx == "gfx1102")
gfx == "gfx942" || gfx == "gfx1101" || gfx == "gfx1102" ||
gfx == "gfx1200" || gfx == "gfx1201")
return "";
return "-sramecc";
} else if (token == "xnack+") {
Expand Down
17 changes: 9 additions & 8 deletions third_party/xla/xla/stream_executor/device_description.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ class RocmComputeCapability {
bool gfx10_rx69xx() const { return gfx_version() == "gfx1030"; }

bool gfx11_rx7900() const { return (gfx_version() == "gfx1100" ||
gfx_version() == "gfx1101" ||
gfx_version() == "gfx1102"); }

bool gfx12_rx8900() const { return ((gfx_version() == "gfx1200") ||
Expand Down Expand Up @@ -151,14 +152,14 @@ class RocmComputeCapability {
std::string gcn_arch_name_ = "gfx000"; // default to invalid arch.

static constexpr absl::string_view kSupportedGfxVersions[]{
"gfx900", // MI25
"gfx906", // MI50 / MI60
"gfx908", // MI100
"gfx90a", // MI200
"gfx940", "gfx941", "gfx942", // MI300
"gfx1030", // RX68xx / RX69xx
"gfx1100", "gfx1102", // RX7900
"gfx1200", "gfx1201", // RX8900
"gfx900", // MI25
"gfx906", // MI50 / MI60
"gfx908", // MI100
"gfx90a", // MI200
"gfx940", "gfx941", "gfx942", // MI300
"gfx1030", // RX68xx / RX69xx
"gfx1100", "gfx1101", "gfx1102", // RX7900
"gfx1200", "gfx1201", // RX8900
};
};

Expand Down

0 comments on commit 21b23e3

Please sign in to comment.