Skip to content

Always calculate rotary embedding caches in model builder #3976

Always calculate rotary embedding caches in model builder

Always calculate rotary embedding caches in model builder #3976

name: "Windows CPU arm64 Build"
on:
workflow_dispatch:
push:
branches:
- main
- rel-*
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
env:
binaryDir: 'build/cpu/win-arm64'
ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1"
ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime"
jobs:
windows-cpu-arm64-build:
runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-win11-arm64-cpu" ]
steps:
- name: Checkout OnnxRuntime GenAI repo
uses: actions/checkout@v4
with:
submodules: true
- name: Setup Visual Studio 2022
uses: microsoft/[email protected]
with:
vs-version: '17.4'
msbuild-architecture: arm64
- uses: nuget/setup-nuget@v2
with:
nuget-version: '5.x'
- name: Setup Java 21
uses: actions/setup-java@v4
with:
java-version: '21'
distribution: 'temurin'
cache: 'gradle'
- name: Setup Gradle
uses: gradle/actions/setup-gradle@v3
with:
gradle-version: '8.6'
- name: Download OnnxRuntime Nightly
shell: powershell
run: |
$resp = Invoke-RestMethod "${{ env.ORT_NIGHTLY_REST_API }}"
$ORT_NIGHTLY_VERSION = $resp.value[0].versions[0].normalizedVersion
Write-Host "$ORT_NIGHTLY_VERSION"
"ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -x -NonInteractive
- run: Get-ChildItem ${{ env.ORT_PACKAGE_NAME }} -Recurse
continue-on-error: true
- name: Extract OnnxRuntime library and header files
run: |
mkdir ort/lib
move ${{ env.ORT_PACKAGE_NAME }}/build/native/include ort/
move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-arm64/native/* ort/lib/
- name: Configure CMake
run: |
python -m pip install wheel requests
cmake --preset windows_arm64_cpu_release
- name: Build with CMake
run: |
cmake --build --preset windows_arm64_cpu_release --parallel
- name: Install the Python Wheel and Test Dependencies
run: |
python -m pip install "numpy<2" coloredlogs flatbuffers packaging protobuf sympy pytest
python -m pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ ort-nightly-qnn
python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps
- name: Run the Python Tests
run: |
python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"
- name: Build the C# API and Run the C# Tests
run: |
cd test\csharp
dotnet test /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release" /p:OrtLibDir="$env:GITHUB_WORKSPACE\ort\lib"
- name: Build the Java API and Run the Java Tests
run: |
python build.py --config=Release --build_dir $env:binaryDir --build_java --parallel
- name: Verify Build Artifacts
if: always()
continue-on-error: true
run: |
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse
Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir\test -Recurse
- name: Run tests
run: |
copy $env:GITHUB_WORKSPACE\ort\lib\* .\$env:binaryDir\Release
& .\$env:binaryDir\Release\unit_tests.exe