Skip to content

Commit

Permalink
Merge amd-staging into amd-mainline 20241221
Browse files Browse the repository at this point in the history
Signed-off-by: Maisam Arif <[email protected]>
  • Loading branch information
Arif, Maisam authored Dec 21, 2024
2 parents 0ccb473 + e47add5 commit 6114678
Show file tree
Hide file tree
Showing 19 changed files with 463 additions and 168 deletions.
167 changes: 167 additions & 0 deletions .github/workflows/amdsmi-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
name: Build and Install AMDSMI

on:
pull_request:
branches:
- amd-staging
push:
branches:
- amd-staging

jobs:
build-and-install-deb:
name: Build and Install on Debian-based Systems
runs-on:
- self-hosted
- ${{ vars.RUNNER_TYPE }}
strategy:
matrix:
os:
- Ubuntu20
- Ubuntu22

steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
clean: false

- name: Set Docker Image for Ubuntu20
if: matrix.os == 'Ubuntu20'
run: echo "DOCKER_IMAGE=${{ secrets.UBUNTU20_DOCKER_IMAGE }}" >> $GITHUB_ENV

- name: Set Docker Image for Ubuntu22
if: matrix.os == 'Ubuntu22'
run: echo "DOCKER_IMAGE=${{ secrets.UBUNTU22_DOCKER_IMAGE }}" >> $GITHUB_ENV

- name: Build and Install on ${{ matrix.os }}
run: |
docker run --privileged -v ${{ github.workspace }}:/src:rw $DOCKER_IMAGE /bin/bash -c "
set -e
cd /src
echo 'Inside Docker Container on ${{ matrix.os }}'
# Build AMD-SMI
BUILD_FOLDER=/src/build
DEB_BUILD='amd-smi-lib*99999-local_amd64.deb'
DEB_BUILD_TEST='amd-smi-lib-tests*99999-local_amd64.deb'
sudo rm -rf \$BUILD_FOLDER
mkdir -p \$BUILD_FOLDER
cd \$BUILD_FOLDER
cmake .. -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON
make -j \$(nproc)
make package
# Install AMD-SMI
sudo apt install -y \$BUILD_FOLDER/\$DEB_BUILD
sudo ln -s /opt/rocm/bin/amd-smi /usr/local/bin
# Check If Installed
echo 'Listing installed packages:'
python3 -m pip list | grep amd
python3 -m pip list | grep pip
python3 -m pip list | grep setuptools
echo 'Completed build and installation on ${{ matrix.os }}'
"
build-and-install-rpm:
name: Build and Install on RPM-based Systems
runs-on:
- self-hosted
- ${{ vars.RUNNER_TYPE }}
strategy:
matrix:
os:
- SLES
- RHEL8
- RHEL9
- Mariner2
- AzureLinux3
- AlmaLinux8

steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
clean: false

- name: Set Docker Image and Package Manager for SLES
if: matrix.os == 'SLES'
run: |
echo "DOCKER_IMAGE=${{ secrets.SLES_DOCKER_IMAGE }}" >> $GITHUB_ENV
echo "PACKAGE_MANAGER=zypper" >> $GITHUB_ENV
- name: Set Docker Image and Package Manager for RHEL8
if: matrix.os == 'RHEL8'
run: |
echo "DOCKER_IMAGE=${{ secrets.RHEL8_DOCKER_IMAGE }}" >> $GITHUB_ENV
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
- name: Set Docker Image and Package Manager for RHEL9
if: matrix.os == 'RHEL9'
run: |
echo "DOCKER_IMAGE=${{ secrets.RHEL9_DOCKER_IMAGE }}" >> $GITHUB_ENV
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
- name: Set Docker Image and Package Manager for Mariner2
if: matrix.os == 'Mariner2'
run: |
echo "DOCKER_IMAGE=${{ secrets.MARINER2_DOCKER_IMAGE }}" >> $GITHUB_ENV
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
- name: Set Docker Image and Package Manager for AzureLinux3
if: matrix.os == 'AzureLinux3'
run: |
echo "DOCKER_IMAGE=${{ secrets.AZURELINUX3_DOCKER_IMAGE }}" >> $GITHUB_ENV
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
- name: Set Docker Image and Package Manager for AlmaLinux8
if: matrix.os == 'AlmaLinux8'
run: |
echo "DOCKER_IMAGE=${{ secrets.ALMALINUX8_DOCKER_IMAGE }}" >> $GITHUB_ENV
echo "PACKAGE_MANAGER=dnf" >> $GITHUB_ENV
- name: Build and Install on ${{ matrix.os }}
run: |
docker run --privileged -v ${{ github.workspace }}:/src:rw $DOCKER_IMAGE /bin/bash -c "
set -ex
cd /src
echo 'Inside Docker Container on ${{ matrix.os }}'
# Build AMD-SMI
BUILD_FOLDER=/src/build
RPM_BUILD='amd-smi-lib-*99999-local*.rpm'
rm -rf \$BUILD_FOLDER
mkdir -p \$BUILD_FOLDER
cd \$BUILD_FOLDER
cmake .. -DCPACK_RPM_PACKAGE_RELOCATABLE=ON -DBUILD_TESTS=ON -DENABLE_ESMI_LIB=ON
make -j \$(nproc)
make package
# Adjust permissions of the build directory
chmod -R a+rw \$BUILD_FOLDER
# Install AMD-SMI
case '$PACKAGE_MANAGER' in
zypper)
timeout 10m sudo zypper --no-refresh --no-gpg-checks install -y \$BUILD_FOLDER/\$RPM_BUILD
;;
dnf)
timeout 10m dnf install -y --skip-broken \$BUILD_FOLDER/\$RPM_BUILD
;;
*)
echo 'Unsupported package manager: $PACKAGE_MANAGER'
exit 1
;;
esac
ln -s /opt/rocm/bin/amd-smi /usr/local/bin
# Check If Installed
echo 'Listing installed packages:'
python3 -m pip list | grep amd
python3 -m pip list | grep pip
python3 -m pip list | grep setuptools
echo 'Completed build and installation on ${{ matrix.os }}'
"
8 changes: 4 additions & 4 deletions .github/workflows/rocm_ci_caller.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
name: ROCm CI Caller
on:
# Commenting below to avoid re-runs of amd smi for trivial rebases
# pull_request:
# branches: [amd-staging]
# types: [opened, reopened, synchronize]
push:
pull_request:
branches: [amd-staging, amd-mainline]
types: [opened, reopened, synchronize]
push:
branches: [amd-mainline]
workflow_dispatch:

jobs:
Expand Down
74 changes: 19 additions & 55 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,15 @@ GPU: 1

### Optimized

- **Added additional help information to `amd-smi set --help` command**.
- sub commands now detail what values are acceptable as input. These include:
- `amd-smi set --perf-level` with performance levels
- `amd-smi set --profile` with power profiles
- `amd-smi set --perf-determinism` with preset GPU frequency limits
- `amd-smi set --power-cap` with valid power cap values
- `amd-smi set --soc-pstate` with soc pstate policy ids
- `amd-smi set --xgmi-plpd` with xgmi per link power down policy ids

- **Modified `amd-smi` CLI to allow case insensitive arguments if the argument does not begin with a single dash**.
- With this change `amd-smi version` and `amd-smi VERSION` will now yield the same output.
- `amd-smi static --bus` and `amd-smi STATIC --BUS` will produce identical results.
Expand All @@ -212,14 +221,14 @@ $ amd-smi xgmi
LINK METRIC TABLE:
bdf bit_rate max_bandwidth link_type 0000:05:00.0 0000:26:00.0 0000:46:00.0 0000:65:00.0 0000:85:00.0 0000:a6:00.0 0000:c6:00.0 0000:e5:00.0
GPU0 0000:05:00.0 32 Gb/s 512 Gb/s XGMI
Read N/A 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB
Write N/A 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB
Read N/A 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB
Write N/A 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB
GPU1 0000:26:00.0 32 Gb/s 512 Gb/s XGMI
Read 1.123 PB N/A 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB
Write 229.1 MB N/A 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB
Read 1.123 PB N/A 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB
Write 229.1 MB N/A 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB
GPU2 0000:46:00.0 32 Gb/s 512 Gb/s XGMI
Read 1.123 PB 1.123 PB N/A 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB
Write 229.1 MB 229.1 MB N/A 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB
Read 1.123 PB 1.123 PB N/A 1.123 PB 1.123 PB 1.123 PB 1.123 PB 1.123 PB
Write 229.1 MB 229.1 MB N/A 229.1 MB 229.1 MB 229.1 MB 229.1 MB 229.1 MB
...
```

Expand All @@ -231,11 +240,13 @@ GPU2 0000:46:00.0 32 Gb/s 512 Gb/s XGMI

- **AMD SMI only reports 63 GPU devices when setting CPX on all 8 GPUs**
When setting CPX as a partition mode, there is a DRM node limitation of 64.

This is a known limitation of the Linux kernel, not the driver. Other drivers, such as those using PCIe space (e.g., ast), may be occupying the necessary DRM nodes.

The number of DRM nodes used can be checked via `ls /sys/class/drm`

- References to kernel changes:
- [Updates to number of node](https://cgit.freedesktop.org/drm/libdrm/commit/?id=7130cb163eb860d4a965c6708b64fe87cee881d6)
- [Identification of node type](https://cgit.freedesktop.org/drm/libdrm/commit/?id=3bc3cca230c5a064b2f554f26fdec27db0f5ead8)

Options are as follows:
1) ***Workaround - removing other devices using DRM nodes***

Expand All @@ -246,54 +257,7 @@ GPU2 0000:46:00.0 32 Gb/s 512 Gb/s XGMI
d. Confirm `amd-smi list` reports all nodes (this can vary per MI ASIC)

2) ***Update your OS' kernel***
Typically you can find examples online by searching "`Update kernel <your OS version> command line`"

Ex. "Update kernel Ubuntu 22.04 command line" should provide some good examples.
https://phoenixnap.com/kb/how-to-update-kernel-ubuntu

3) ***Building and installing your own kernel***
*This option is helpful for users on OS distributions that have not yet merged the necessary changes.*
https://phoenixnap.com/kb/build-linux-kernel

All changes are in the mainline kernel if users need to build their own.

References to kernel changes:
```text
for libdrm :
Author: James Zhu <[email protected]>
Date: Mon Aug 7 10:14:18 2023 -0400
xf86drm: use drm device name to identify drm node type
Currently drm node's minor range is used to identify node's type.
Since kernel drm uses node type name and minor to generate drm
device name, It will be more general to use drm device name to
identify drm node type.
Signed-off-by: James Zhu <[email protected]>
Reviewed-by: Simon Ser <[email protected]>
commit 1080273c2b31db6f031a7f889f3104f53ab4502c
Author: James Zhu <[email protected]>
Date: Mon Aug 7 10:06:32 2023 -0400
xf86drm: update DRM_NODE_NAME_MAX supporting more nodes
Current DRM_NODE_NAME_MAX only can support up to 999 nodes,
Update to support up to 2^MINORBITS nodes.
Signed-off-by: James Zhu <[email protected]>
Reviewed-by: Simon Ser <[email protected]>
```

## amd_smi_lib for ROCm 6.3.1

Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ find_program(GIT NAMES git)

## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "amdsmi_pkg_ver")
get_package_version_number("24.7.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
get_package_version_number("24.7.2" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
message("Package version: ${PKG_VERSION_STR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}")
Expand Down
Loading

0 comments on commit 6114678

Please sign in to comment.