ROCm · fsx950223 · Nov 21, 2024 · Nov 21, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/build_rocm_python3 b/build_rocm_python3
@@ -47,15 +47,15 @@ if [ -f /usertools/rocm.bazelrc ]; then
 	if [[ -n $nightly ]]; then
                 # Remove any previous builds and build nightly
                 rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl
-		python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version &&
-		bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
+		#python3 tensorflow/tools/ci_build/update_version.py --nightly --rocm_version &&
+		bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --cxxopt="-DTENSORFLOW_HSACO_USE_ROCM_LLVM" --config=v1 --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
 		./bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --nightly_flag &&
 		pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
 	else
                 # Remove any previous builds and build release
                 rm -f $TF_PKG_LOC/tensorflow*.whl
-		python3 tensorflow/tools/ci_build/update_version.py --rocm_version &&
-		bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
+		#python3 tensorflow/tools/ci_build/update_version.py --rocm_version &&
+		bazel --bazelrc=/usertools/rocm.bazelrc build $RESOURCE_OPTION --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --cxxopt="-DTENSORFLOW_HSACO_USE_ROCM_LLVM" --config=v1 --config=rocm --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
 		./bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --project_name tensorflow_rocm &&
 		pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
 	fi
@@ -66,13 +66,13 @@ else
         if [[ -n $nightly ]]; then
                 # Remove any previous builds and build nightly
                 rm -f $TF_PKG_LOC/tf_nightly_rocm*.whl
-	        bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
+	        bazel build $RESOURCE_OPTION --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --cxxopt="-DTENSORFLOW_HSACO_USE_ROCM_LLVM" --config=v1 --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
 	        bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm --nightly_flag &&
 	        pip3 install --upgrade $TF_PKG_LOC/tf_nightly_rocm*.whl
         else
                 # Remove any previous builds and build release
                 rm -f $TF_PKG_LOC/tensorflow*.whl
-	        bazel build $RESOURCE_OPTION --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
+	        bazel build $RESOURCE_OPTION --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" --cxxopt="-DTENSORFLOW_HSACO_USE_ROCM_LLVM" --config=v1 --config=opt --config=rocm //tensorflow/tools/pip_package:build_pip_package --verbose_failures &&
 	        bazel-bin/tensorflow/tools/pip_package/build_pip_package $TF_PKG_LOC --rocm &&
 	        pip3 install --upgrade $TF_PKG_LOC/tensorflow*.whl
         fi

diff --git a/tensorflow/compiler/xla/service/gpu/autotuner_util.cc b/tensorflow/compiler/xla/service/gpu/autotuner_util.cc
@@ -56,16 +56,19 @@ static auto& autotune_cache ABSL_GUARDED_BY(autotune_cache_mu) =
 
 namespace {
 
-void CSVLegend(std::ostream& os) {
+void CSVLegend(std::ostream& os, bool full_string=false) {
 
   os << kCsvComment << " m" << kCsvSep << "n" << kCsvSep << "k" << kCsvSep
         << "batch_count" << kCsvSep << "trans_a" << kCsvSep 
-        << "trans_b" << kCsvSep 
-        << "type_a" << kCsvSep << "type_b" << kCsvSep 
+        << "trans_b" << kCsvSep << "type_a" << kCsvSep << "type_b" << kCsvSep 
         << "type_c" << kCsvSep << "lda" << kCsvSep << "ldb" << kCsvSep
         << "ldc" << kCsvSep << "stride_a" << kCsvSep
-        << "stride_b" << kCsvSep << "stride_c" << kCsvSep
-        << "alg_index" << std::endl;
+        << "stride_b" << kCsvSep << "stride_c";
+  if (full_string) {
+    os << kCsvSep << "alpha_re" << kCsvSep << "alpha_im" << kCsvSep
+        << "beta" << kCsvSep << "epilogue";
+  }
+  os << kCsvSep << "alg_index" << std::endl;
 }
 
 }  // namespace
@@ -89,7 +92,7 @@ void CSVLegend(std::ostream& os) {
       if (!s_dump_fs->is_open()) {
         LOG(WARNING) << "Unable to open: " << dump_path << " for writing!";
       } 
-      CSVLegend(*s_dump_fs);
+      CSVLegend(*s_dump_fs, true);
     }
     *s_dump_fs << key.Get() << kCsvSep << it->second << std::endl;
   }

diff --git a/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
@@ -367,7 +367,7 @@ StatusOr<bool> RunOnInstruction(HloInstruction* gemm,
 
   GemmAutotuner autotuner(config);
   TF_ASSIGN_OR_RETURN(auto new_algorithm,
-    AutotunerUtil::Autotune(se::gpu::ToCSVString(gemm_config, false), config, 
+    AutotunerUtil::Autotune(se::gpu::ToCSVString(gemm_config, true), config, 
      [&]() -> StatusOr<AutotunerUtil::CacheValue> {
         TF_ASSIGN_OR_RETURN(auto algo, autotuner(gemm, gemm_config));
         return algo.has_gemm() ? algo.gemm().algorithm() : se::blas::kDefaultAlgorithm;
@@ -410,7 +410,7 @@ StatusOr<AutotunerUtil::CacheValue> GemmAlgorithmPicker::RunStandalone(
   GemmAutotuner autotuner(config_);
   GemmConfig gemm_config{cfg};
 
-  return AutotunerUtil::Autotune(se::gpu::ToCSVString(gemm_config, false), config_, 
+  return AutotunerUtil::Autotune(se::gpu::ToCSVString(gemm_config, true), config_, 
      [&]() -> StatusOr<AutotunerUtil::CacheValue> { 
         TF_ASSIGN_OR_RETURN(auto algo, autotuner(gemm_config, std::move(input_shapes), 
                 output_shape, debug_options)); 

diff --git a/tensorflow/compiler/xla/service/gpu/gpublas_lt_matmul_thunk.cc b/tensorflow/compiler/xla/service/gpu/gpublas_lt_matmul_thunk.cc
@@ -173,7 +173,9 @@ auto CublasLtMatmulThunk::GetCachedMatmulPlan(
         return std::move(plan);
       }
     }
-    return InternalError("Wrong algorithm ID: %d", algorithm_id);
+    TF_RETURN_IF_ERROR(plan->SetAlgorithm(algorithms[0]));
+    LOG(WARNING) << "Wrong algorithm ID: " << algorithm_id << " use default instead.";
+    return std::move(plan);
   };
   return cache.GetOrCreate(canonical_hlo_, create);
 }

diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/BUILD
@@ -65,6 +65,7 @@ cc_library(
         "@llvm-project//llvm:Target",
     ] + if_rocm_is_configured([
         "@local_config_rocm//rocm:rocm_headers",
+        "//tensorflow/tsl/platform:rocm_rocdl_path",
         "@llvm-project//llvm:AMDGPUCodeGen",
     ]),
 )