From 9839139a1e8c535d1b036748a9b3c5bf62ba629b Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Thu, 30 Jan 2025 17:32:39 -0800 Subject: [PATCH] add support for debian based LMDE6 GCC 12.20 on LDME6 has some bug or is even more strict for handling the warnings as an error than the gcc13/14 on fedora. I had earlier the CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized but for LDME6, i need to add another flag also: $CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized -Wno-error=uninitialized These warnings are fixed in newer fbgemm which is one sub-project that pytorch uses, but for current pytorch stable version this is now better to fix just by not treating gcc warnings as an error. fixes: https://github.com/lamikr/rocm_sdk_builder/issues/175 Signed-off-by: Mika Laitio --- ...-preconfig-build-and-install-scripts.patch | 6 +-- ...ssage-if-ROCM_SOURCE_DIR-not-defined.patch | 6 +-- ...HIP-force-ROCM-detection-and-patches.patch | 6 +-- .../pytorch/0004-add-HSA_RUNTIME_64_LIB.patch | 6 +-- ...parameter-is-null-optimization-error.patch | 6 +-- ...with-min-and-max-for-fedora-40-issue.patch | 6 +-- ...erbose-output-on-dumpversion-command.patch | 6 +-- ...due-to-ubuntu-22.04-linking-failures.patch | 6 +-- ...09-enable-ucc-on-rocm-pytorch-builds.patch | 6 +-- ...X-linking-error-by-using-dynamic-lib.patch | 6 +-- ...011-allow-to-specify-target-gpu-list.patch | 6 +-- ...eError-must-be-called-with-a-datacla.patch | 6 +-- .../0013-fix-the-build-on-LMDE-6.patch | 43 +++++++++++++++++++ 13 files changed, 79 insertions(+), 36 deletions(-) create mode 100644 patches/rocm-6.1.2/pytorch/0013-fix-the-build-on-LMDE-6.patch diff --git a/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch b/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch index 6b77c8a..3f00031 100644 --- a/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch +++ b/patches/rocm-6.1.2/pytorch/0001-pytorch_rocm-preconfig-build-and-install-scripts.patch @@ -1,7 +1,7 @@ -From 38451c19cfaf82deb0775e087229982c3badd7e7 Mon Sep 17 00:00:00 2001 +From cf2b399adb9b031c9669f5b183a0a8df80d92dae Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 11 Dec 2023 09:20:07 -0800 -Subject: [PATCH 01/12] pytorch_rocm preconfig, build and install scripts +Subject: [PATCH 01/13] pytorch_rocm preconfig, build and install scripts - clean previous build, build wheel and install wheel scripts "-Wno-error=maybe-uninitialized" is needed during @@ -109,5 +109,5 @@ index 00000000000..7ad2528e9fe +fi +USE_FLASH_ATTENTION=ON AOTRITON_INSTALLED_PREFIX=${install_dir_prefix_rocm} CMAKE_PREFIX_PATH="${install_dir_prefix_rocm};${install_dir_prefix_rocm}/lib64 python" tools/amd_build/build_amd.py -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch b/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch index 3f87e40..5365534 100644 --- a/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch +++ b/patches/rocm-6.1.2/pytorch/0002-show-error-message-if-ROCM_SOURCE_DIR-not-defined.patch @@ -1,7 +1,7 @@ -From 2d18cb6ea9ff90313777154d6e035c27a500ebd8 Mon Sep 17 00:00:00 2001 +From f20c909dd0c1de15734a8df48256b60cf85b9841 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 10:16:19 -0700 -Subject: [PATCH 02/12] show error message if ROCM_SOURCE_DIR not defined +Subject: [PATCH 02/13] show error message if ROCM_SOURCE_DIR not defined ROCM_SOURCE_DIR is required by by third_party/kineto module and if it is not set, kineto will not find the @@ -31,5 +31,5 @@ index c4661e39e18..6e7c87631f8 100644 endif() -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch b/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch index b389706..9b907c1 100644 --- a/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch +++ b/patches/rocm-6.1.2/pytorch/0003-LoadHIP-force-ROCM-detection-and-patches.patch @@ -1,7 +1,7 @@ -From 246975a0a4785fbb915e7066158f12ef1a009e3c Mon Sep 17 00:00:00 2001 +From fb5157871cfea6430d9f650c23a8ad74b6a6ada7 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 10:32:33 -0700 -Subject: [PATCH 03/12] LoadHIP force ROCM detection and patches +Subject: [PATCH 03/13] LoadHIP force ROCM detection and patches - set HIP_ROOT_DIR to ROCM_PATH which is set by the build scripts @@ -174,5 +174,5 @@ index fa39156031f..70fe47a9e64 100644 # check whether HIP declares new types set(file "${PROJECT_BINARY_DIR}/hip_new_types.cc") -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0004-add-HSA_RUNTIME_64_LIB.patch b/patches/rocm-6.1.2/pytorch/0004-add-HSA_RUNTIME_64_LIB.patch index 5c87a22..d8f67a4 100644 --- a/patches/rocm-6.1.2/pytorch/0004-add-HSA_RUNTIME_64_LIB.patch +++ b/patches/rocm-6.1.2/pytorch/0004-add-HSA_RUNTIME_64_LIB.patch @@ -1,7 +1,7 @@ -From 00e4efbe7a8fe419c7457aabe0438f9aaa5b90bf Mon Sep 17 00:00:00 2001 +From 40ecf0f9f591402ec5bde0000693847ea85e9c51 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 31 Aug 2024 23:28:55 -0700 -Subject: [PATCH 04/12] add HSA_RUNTIME_64_LIB +Subject: [PATCH 04/13] add HSA_RUNTIME_64_LIB - cmake library search works differently on ubuntu 24.04 compared to other distros.and fails to @@ -66,5 +66,5 @@ index 8a7329ddab7..fba1921bbce 100644 set_target_properties(torch_shm_manager PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../lib") -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch b/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch index 06b0bed..4e0ee94 100644 --- a/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch +++ b/patches/rocm-6.1.2/pytorch/0005-fix-gcc-parameter-is-null-optimization-error.patch @@ -1,7 +1,7 @@ -From 5eaa2afdc091972febc90c6d24f7c2a6171a0301 Mon Sep 17 00:00:00 2001 +From 8f84b4c978bf4e7e49dae357a08adaba4ec9f7a1 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 10 May 2024 19:25:50 -0700 -Subject: [PATCH 05/12] fix gcc parameter is null optimization error +Subject: [PATCH 05/13] fix gcc parameter is null optimization error https://github.com/pytorch/pytorch/issues/112089 and @@ -29,5 +29,5 @@ index ceeb607d52a..93f3d66aef0 100644 if(INSTALL_TEST) -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch b/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch index 7042273..a3fecb2 100644 --- a/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch +++ b/patches/rocm-6.1.2/pytorch/0006-replace-clamp-with-min-and-max-for-fedora-40-issue.patch @@ -1,7 +1,7 @@ -From 68e3860ce38f9fd060b0c7d96531e0dfd11ae2fa Mon Sep 17 00:00:00 2001 +From f340c58db9b4ce5638295748158943902087b653 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 31 May 2024 18:35:12 -0700 -Subject: [PATCH 06/12] replace clamp with min and max for fedora 40 issue +Subject: [PATCH 06/13] replace clamp with min and max for fedora 40 issue Fedora 40/gcc 14 throws following error during build time for clamp function usage during pytorch build time. @@ -40,5 +40,5 @@ index 5682ba27573..862bcb9614d 100644 }); }); -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch b/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch index 40814e3..9ee7cec 100644 --- a/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch +++ b/patches/rocm-6.1.2/pytorch/0007-handle-hipcc-verbose-output-on-dumpversion-command.patch @@ -1,7 +1,7 @@ -From d8b07816e1c9c0c8efdc8f9b1a7f82c20762f7e9 Mon Sep 17 00:00:00 2001 +From 824216fed9e1abf4257e0ed12d3882b6f0a574df Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 15 Jul 2024 00:57:55 -0400 -Subject: [PATCH 07/12] handle hipcc verbose output on dumpversion command +Subject: [PATCH 07/13] handle hipcc verbose output on dumpversion command Signed-off-by: Mika Laitio --- @@ -31,5 +31,5 @@ index bc1a9d8e6c0..665f83f8a9d 100644 minimum_required_version = MINIMUM_MSVC_VERSION compiler_info = subprocess.check_output(compiler, stderr=subprocess.STDOUT) -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0008-disable-mpitest-due-to-ubuntu-22.04-linking-failures.patch b/patches/rocm-6.1.2/pytorch/0008-disable-mpitest-due-to-ubuntu-22.04-linking-failures.patch index 182a1ab..7bc50e7 100644 --- a/patches/rocm-6.1.2/pytorch/0008-disable-mpitest-due-to-ubuntu-22.04-linking-failures.patch +++ b/patches/rocm-6.1.2/pytorch/0008-disable-mpitest-due-to-ubuntu-22.04-linking-failures.patch @@ -1,7 +1,7 @@ -From ad76d338537ff206f111d05ce5455418bf9a9a6a Mon Sep 17 00:00:00 2001 +From df61e58851af495396bdf9a82296cd42c7f40942 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Fri, 30 Aug 2024 14:07:36 -0700 -Subject: [PATCH 08/12] disable mpitest due to ubuntu 22.04 linking failures +Subject: [PATCH 08/13] disable mpitest due to ubuntu 22.04 linking failures - rocm-openmpi that is tried to link contains dependency to libpmix.so and on ubuntu 22.04 that is not linked @@ -44,5 +44,5 @@ index 5c8974836de..5f920230086 100644 install(TARGETS ProcessGroupMPITest DESTINATION bin) endif() -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0009-enable-ucc-on-rocm-pytorch-builds.patch b/patches/rocm-6.1.2/pytorch/0009-enable-ucc-on-rocm-pytorch-builds.patch index 9b03692..8401960 100644 --- a/patches/rocm-6.1.2/pytorch/0009-enable-ucc-on-rocm-pytorch-builds.patch +++ b/patches/rocm-6.1.2/pytorch/0009-enable-ucc-on-rocm-pytorch-builds.patch @@ -1,7 +1,7 @@ -From cc425877bf112546a7e52df018aff6bfaaf8e9b3 Mon Sep 17 00:00:00 2001 +From 528edf1b1906d84e158065b4bf1ecb3eefec435e Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Thu, 17 Oct 2024 22:51:26 -0700 -Subject: [PATCH 09/12] enable ucc on rocm pytorch builds +Subject: [PATCH 09/13] enable ucc on rocm pytorch builds - enable UCC and UCX by using environment variables - add 2 missing method calls used by ROCM to cuda version of @@ -45,5 +45,5 @@ index 678d576e4bf..18c9cfaa046 100755 +export UCX_HOME=$install_dir_prefix_rocm +USE_SYSTEM_UCC=1 USE_UCC=1 BUILD_TEST=0 USE_FLASH_ATTENTION=ON AOTRITON_INSTALLED_PREFIX=${install_dir_prefix_rocm} ROCM_PATH=${install_dir_prefix_rocm} ROCM_SOURCE_DIR=${install_dir_prefix_rocm} CMAKE_CXX_FLAGS="$CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized" CMAKE_PREFIX_PATH="${install_dir_prefix_rocm};${install_dir_prefix_rocm}/lib64/cmake;${install_dir_prefix_rocm}/lib/cmake;${install_dir_prefix_rocm}/lib64;${install_dir_prefix_rocm}/lib" ROCM_VERSION=${rocm_version_str} HIP_ROOT_DIR=${install_dir_prefix_rocm} USE_ROCM=1 PYTORCH_BUILD_VERSION="$(git describe --tags --abbrev=0 | sed 's/^v//')" PYTORCH_BUILD_NUMBER=1 python setup.py bdist_wheel -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0010-fix-GOTPCRELX-linking-error-by-using-dynamic-lib.patch b/patches/rocm-6.1.2/pytorch/0010-fix-GOTPCRELX-linking-error-by-using-dynamic-lib.patch index 99ee3bb..985b450 100644 --- a/patches/rocm-6.1.2/pytorch/0010-fix-GOTPCRELX-linking-error-by-using-dynamic-lib.patch +++ b/patches/rocm-6.1.2/pytorch/0010-fix-GOTPCRELX-linking-error-by-using-dynamic-lib.patch @@ -1,7 +1,7 @@ -From a64c4f19e14df9edc37b943679fc91efabdd79d3 Mon Sep 17 00:00:00 2001 +From f3639887d7c211eaa3fdf33474a79c4707a4827f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Mon, 28 Oct 2024 19:23:26 -0700 -Subject: [PATCH 10/12] fix GOTPCRELX linking error by using dynamic lib +Subject: [PATCH 10/13] fix GOTPCRELX linking error by using dynamic lib Signed-off-by: Mika Laitio --- @@ -35,5 +35,5 @@ index ec6f09b6053..cb6080f5f7a 100644 set(AOTRITON_FOUND TRUE) endif() # __AOTRITON_INCLUDED -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0011-allow-to-specify-target-gpu-list.patch b/patches/rocm-6.1.2/pytorch/0011-allow-to-specify-target-gpu-list.patch index ed900df..7e9dba6 100644 --- a/patches/rocm-6.1.2/pytorch/0011-allow-to-specify-target-gpu-list.patch +++ b/patches/rocm-6.1.2/pytorch/0011-allow-to-specify-target-gpu-list.patch @@ -1,7 +1,7 @@ -From f1b12565107005f62c5ef0f47cd18a23cbcc0b1e Mon Sep 17 00:00:00 2001 +From 1d34f727313fa0e0614cb4c9b274642e3f298e8f Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Thu, 7 Nov 2024 22:24:08 -0800 -Subject: [PATCH 11/12] allow to specify target gpu list +Subject: [PATCH 11/13] allow to specify target gpu list Signed-off-by: Mika Laitio --- @@ -26,5 +26,5 @@ index 18c9cfaa046..86ac0cb005e 100755 export CFLAGS="-Wno-error=maybe-uninitialized" unset CPPFLAGS -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0012-bitsandbytes-TypeError-must-be-called-with-a-datacla.patch b/patches/rocm-6.1.2/pytorch/0012-bitsandbytes-TypeError-must-be-called-with-a-datacla.patch index c3da142..3f88616 100644 --- a/patches/rocm-6.1.2/pytorch/0012-bitsandbytes-TypeError-must-be-called-with-a-datacla.patch +++ b/patches/rocm-6.1.2/pytorch/0012-bitsandbytes-TypeError-must-be-called-with-a-datacla.patch @@ -1,7 +1,7 @@ -From c640a84b387fdc8effbddabe12ebd3b19e9fb89e Mon Sep 17 00:00:00 2001 +From 1ef5608571aaa4301554420982d3cce91f965ff6 Mon Sep 17 00:00:00 2001 From: Mika Laitio Date: Sat, 25 Jan 2025 16:01:30 -0800 -Subject: [PATCH 12/12] bitsandbytes TypeError: must be called with a dataclass +Subject: [PATCH 12/13] bitsandbytes TypeError: must be called with a dataclass type or instance bitsandbyte test in @@ -75,5 +75,5 @@ index ba36f40a226..a64a519e2ee 100644 # Define `instance_descriptor` function with clear conditional handling if attrs_descriptor_available: -- -2.48.1 +2.39.5 diff --git a/patches/rocm-6.1.2/pytorch/0013-fix-the-build-on-LMDE-6.patch b/patches/rocm-6.1.2/pytorch/0013-fix-the-build-on-LMDE-6.patch new file mode 100644 index 0000000..ded0efb --- /dev/null +++ b/patches/rocm-6.1.2/pytorch/0013-fix-the-build-on-LMDE-6.patch @@ -0,0 +1,43 @@ +From 6de324a5c613664b7ec3162fc399c1fc4f4b7665 Mon Sep 17 00:00:00 2001 +From: Mika Laitio +Date: Thu, 30 Jan 2025 17:06:12 -0800 +Subject: [PATCH 13/13] fix the build on LMDE 6 + +GCC 12.20 on LDME6 has some bug or is even more strict +for handling the warnings as an error than the gcc13/14 +on fedora. I had earlier the + +CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized +but for LDME6, i need to add another flag also: +$CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized -Wno-error=uninitialized + +I think these warnings are fixed in newer fbgemm which is one +sub-project that pytorch uses. For stable pytorch version the easiest way to fix +this now is just disable gcc warnings to be triggered as an error. + +Signed-off-by: Mika Laitio +--- + build_rocm.sh | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/build_rocm.sh b/build_rocm.sh +index 86ac0cb005e..77c69772565 100755 +--- a/build_rocm.sh ++++ b/build_rocm.sh +@@ -23,9 +23,11 @@ else + echo "Building Pytorch for GPU's used on rocm sdk build." + fi + unset LDFLAGS +-export CFLAGS="-Wno-error=maybe-uninitialized" ++#export CFLAGS="-Wno-error=maybe-uninitialized -Wno-error=uninitialized" ++#export CPPFLAGS="-Wno-error=maybe-uninitialized -Wno-error=uninitialized" ++unset CFLAGS + unset CPPFLAGS + unset PKG_CONFIG_PATH + export UCC_HOME=$install_dir_prefix_rocm + export UCX_HOME=$install_dir_prefix_rocm +-USE_SYSTEM_UCC=1 USE_UCC=1 BUILD_TEST=0 USE_FLASH_ATTENTION=ON AOTRITON_INSTALLED_PREFIX=${install_dir_prefix_rocm} ROCM_PATH=${install_dir_prefix_rocm} ROCM_SOURCE_DIR=${install_dir_prefix_rocm} CMAKE_CXX_FLAGS="$CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized" CMAKE_PREFIX_PATH="${install_dir_prefix_rocm};${install_dir_prefix_rocm}/lib64/cmake;${install_dir_prefix_rocm}/lib/cmake;${install_dir_prefix_rocm}/lib64;${install_dir_prefix_rocm}/lib" ROCM_VERSION=${rocm_version_str} HIP_ROOT_DIR=${install_dir_prefix_rocm} USE_ROCM=1 PYTORCH_BUILD_VERSION="$(git describe --tags --abbrev=0 | sed 's/^v//')" PYTORCH_BUILD_NUMBER=1 python setup.py bdist_wheel ++USE_SYSTEM_UCC=1 USE_UCC=1 BUILD_TEST=0 USE_FLASH_ATTENTION=ON AOTRITON_INSTALLED_PREFIX=${install_dir_prefix_rocm} ROCM_PATH=${install_dir_prefix_rocm} ROCM_SOURCE_DIR=${install_dir_prefix_rocm} CMAKE_C_FLAGS="$CMAKE_C_FLAGS -Wno-error=maybe-uninitialized -Wno-error=uninitialized" CMAKE_CXX_FLAGS="$CMAKE_CXX_FLAGS -Wno-error=maybe-uninitialized -Wno-error=uninitialized" CMAKE_PREFIX_PATH="${install_dir_prefix_rocm};${install_dir_prefix_rocm}/lib64/cmake;${install_dir_prefix_rocm}/lib/cmake;${install_dir_prefix_rocm}/lib64;${install_dir_prefix_rocm}/lib" ROCM_VERSION=${rocm_version_str} HIP_ROOT_DIR=${install_dir_prefix_rocm} USE_ROCM=1 PYTORCH_BUILD_VERSION="$(git describe --tags --abbrev=0 | sed 's/^v//')" PYTORCH_BUILD_NUMBER=1 python setup.py bdist_wheel +-- +2.39.5 +