Docker update trt onnx (#432)

* add trt and onnx * add trt and onnx 2
michaelfeil · Oct 19, 2024 · 6a9f8cd · 6a9f8cd
1 parent 6df448f
commit 6a9f8cd
Show file tree

Hide file tree

Showing 8 changed files with 259 additions and 71 deletions.
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -32,6 +32,17 @@ jobs:
       platforms: "linux/amd64"
     secrets: inherit
 
+  docker-container-push-onnx-trt:
+    uses:
+      ./.github/workflows/release_docker_container.yaml
+    with:
+      # working-directory: libs/infinity_emb
+      dockerfile: libs/infinity_emb/Dockerfile.trt_onnx_auto
+      image: michaelf34/infinity
+      appendix_tag: "-trt-onnx"
+      platforms: "linux/amd64"
+    secrets: inherit
+
   # docker-container-push-amd:
   #   uses:
   #     ./.github/workflows/release_docker_container.yaml

diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml
@@ -1,20 +1,35 @@
+# run all commands here via: `make template_docker`
+
 # 1. Guide: pip install jinja2 jinja2-cli
 nvidia:
   # 2 .command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto
   base_image: 'nvidia/cuda:12.1.1-base-ubuntu22.04'
-  main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test  
-
 cpu:
   # 2. command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto
   base_image: 'ubuntu:22.04' 
-  main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test 
   pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock 
 
 amd:
   # 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto
   base_image: 'rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0'
-  main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test 
-  pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock 
+  pyproject_sed: |
+    RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml 
+    RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml 
+    RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock 
   poetry_virtualenvs_create: "false"
   poetry_virtualenvs_in_project: "false"
+  poetry_extras: "all onnxruntime-gpu"
   python_version: python3.10
+
+trt:
+  base_image: nvidia/cuda:12.1.1-devel-ubuntu22.04
+  poetry_extras: "all onnxruntime-gpu"
+  extra_installs_main: | 
+    # Install utils for tensorrt
+    RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip
+    RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation
+    RUN poetry run $PYTHON -m pip install --no-cache-dir "tensorrt==10.2.0" "tensorrt_lean==10.2.0" "tensorrt_dispatch==10.2.0"
+    ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/$PYTHON/dist-packages/tensorrt/
+    # ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
+    # ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
+  python_version: python3.10
diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto
@@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_VIRTUALENVS_IN_PROJECT="false" \
     # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
-    EXTRAS="all" \
+    EXTRAS="all onnxruntime-gpu" \
     PYTHON="python3.10"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -36,29 +37,44 @@ RUN echo "Poetry version:" && poetry --version
 # Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
-RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock
+RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml 
+RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml 
+RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock 
+
 RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+#
+
 
 FROM builder as testing
 # install lint and test dependencies
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +116,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto
@@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_NO_INTERACTION=1 \
     EXTRAS="all" \
     PYTHON="python3.11"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+#
+
 
 FROM builder as testing
 # install lint and test dependencies
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +113,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile.jinja2
@@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \
     # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
-    EXTRAS="all" \
+    EXTRAS="{{poetry_extras | default('all')}}" \
     PYTHON="{{python_version | default('python3.11')}}"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+{% set main_install2 = "poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test" %}
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -37,28 +38,40 @@ RUN echo "Poetry version:" && poetry --version
 COPY poetry.lock poetry.toml pyproject.toml README.md /app/
 # Install dependencies only
 {{pyproject_sed | default('#')}}
-RUN {{main_install}} && poetry cache clear pypi --all
+RUN {{main_install2}} && poetry cache clear pypi --all
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
-RUN {{main_install|replace("--no-root","")}} && poetry cache clear pypi --all
+RUN {{main_install2|replace("--no-root","")}} && poetry cache clear pypi --all
+{{extra_installs_main | default('#')}}
+
 
 FROM builder as testing
 # install lint and test dependencies
-RUN {{main_install|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
+RUN {{main_install2|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +113,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]
diff --git a/libs/infinity_emb/Dockerfile.nvidia_auto b/libs/infinity_emb/Dockerfile.nvidia_auto
@@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \
     POETRY_NO_INTERACTION=1 \
     EXTRAS="all" \
     PYTHON="python3.11"
-RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y 
+RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
 WORKDIR /app
 
 FROM base as builder
+
 # Set the working directory for the app
 # Define the version of Poetry to install (default is 1.7.1)
 # Define the directory to install Poetry to (default is /opt/poetry)
@@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
 COPY infinity_emb infinity_emb
 # Install dependency with infinity_emb package
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
+#
+
 
 FROM builder as testing
 # install lint and test dependencies
 RUN poetry install --no-interaction --no-ansi  --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
 # lint 
-RUN poetry run ruff .
-RUN poetry run black --check .
+RUN poetry run ruff check .
 RUN poetry run mypy .
 # pytest
 COPY tests tests
 # run end to end tests because of duration of build in github ci.
 # Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
 # poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
-RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
-poetry run python -m pytest tests/end_to_end -x ; \
-else \
-poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
-fi
+RUN if [ -z "$TARGETPLATFORM" ]; then \
+      ARCH=$(uname -m); \
+      if [ "$ARCH" = "x86_64" ]; then \
+          TARGETPLATFORM="linux/amd64"; \
+      elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
+          TARGETPLATFORM="linux/arm64"; \
+      else \
+          echo "Unsupported architecture: $ARCH"; exit 1; \
+      fi; \
+    fi; \
+    echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
+    if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
+        poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
+    else \
+        poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
+    fi
 RUN echo "all tests passed" > "test_results.txt"
 
 
@@ -100,17 +113,11 @@ ARG MODEL_NAME
 RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
-ARG EXTRA_PACKAGES
-RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi
+
 # will exit with 3 if model is downloaded # TODO: better exit code
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
-# flash attention fa2
-FROM tested-builder AS production-with-fa2
-RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
-ENTRYPOINT ["infinity_emb"]
-
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]