Skip to content

Commit

Permalink
Docker update trt onnx (#432)
Browse files Browse the repository at this point in the history
* add trt and onnx

* add trt and onnx 2
  • Loading branch information
michaelfeil authored Oct 19, 2024
1 parent 6df448f commit 6a9f8cd
Show file tree
Hide file tree
Showing 8 changed files with 259 additions and 71 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ jobs:
platforms: "linux/amd64"
secrets: inherit

docker-container-push-onnx-trt:
uses:
./.github/workflows/release_docker_container.yaml
with:
# working-directory: libs/infinity_emb
dockerfile: libs/infinity_emb/Dockerfile.trt_onnx_auto
image: michaelf34/infinity
appendix_tag: "-trt-onnx"
platforms: "linux/amd64"
secrets: inherit

# docker-container-push-amd:
# uses:
# ./.github/workflows/release_docker_container.yaml
Expand Down
25 changes: 20 additions & 5 deletions libs/infinity_emb/Docker.template.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,35 @@
# run all commands here via: `make template_docker`

# 1. Guide: pip install jinja2 jinja2-cli
nvidia:
# 2 .command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s nvidia > Dockerfile.nvidia_auto
base_image: 'nvidia/cuda:12.1.1-base-ubuntu22.04'
main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test

cpu:
# 2. command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s cpu > Dockerfile.cpu_auto
base_image: 'ubuntu:22.04'
main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test
pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml && rm poetry.lock

amd:
# 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto
base_image: 'rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0'
main_install: poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test
pyproject_sed: RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock
pyproject_sed: |
RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml
RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml
RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock
poetry_virtualenvs_create: "false"
poetry_virtualenvs_in_project: "false"
poetry_extras: "all onnxruntime-gpu"
python_version: python3.10

trt:
base_image: nvidia/cuda:12.1.1-devel-ubuntu22.04
poetry_extras: "all onnxruntime-gpu"
extra_installs_main: |
# Install utils for tensorrt
RUN apt-get install -y --no-install-recommends openmpi-bin libopenmpi-dev git git-lfs python3-pip
RUN poetry run $PYTHON -m pip install --no-cache-dir flash-attn --no-build-isolation
RUN poetry run $PYTHON -m pip install --no-cache-dir "tensorrt==10.2.0" "tensorrt_lean==10.2.0" "tensorrt_dispatch==10.2.0"
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/$PYTHON/dist-packages/tensorrt/
# ENV LD_LIBRARY_PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt:/usr/lib/x86_64-linux-gnu:/app/.venv/lib/$(PYTHON)/site-packages/tensorrt_libs:${LD_LIBRARY_PATH}
# ENV PATH /app/.venv/lib/$(PYTHON)/site-packages/tensorrt/bin:${PATH}
python_version: python3.10
44 changes: 27 additions & 17 deletions libs/infinity_emb/Dockerfile.amd_auto
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
POETRY_VIRTUALENVS_IN_PROJECT="false" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
EXTRAS="all" \
EXTRAS="all onnxruntime-gpu" \
PYTHON="python3.10"
RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y
RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
WORKDIR /app

FROM base as builder

# Set the working directory for the app
# Define the version of Poetry to install (default is 1.7.1)
# Define the directory to install Poetry to (default is /opt/poetry)
Expand All @@ -36,29 +37,44 @@ RUN echo "Poetry version:" && poetry --version
# Copy the rest of the app source code (this layer will be invalidated and rebuilt whenever the source code changes)
COPY poetry.lock poetry.toml pyproject.toml README.md /app/
# Install dependencies only
RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml && sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml && sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock
RUN sed -i 's|"pypi"|"pytorch_rocm"|' pyproject.toml
RUN sed -i 's|torch = "2.4.1"|torch = "2.4.1"|' pyproject.toml
RUN sed -i 's|torchvision = {version = "\*"|torchvision = {version = "0.19.1"|' pyproject.toml && rm poetry.lock

RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
#


FROM builder as testing
# install lint and test dependencies
RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
# lint
RUN poetry run ruff .
RUN poetry run black --check .
RUN poetry run ruff check .
RUN poetry run mypy .
# pytest
COPY tests tests
# run end to end tests because of duration of build in github ci.
# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
poetry run python -m pytest tests/end_to_end -x ; \
else \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
fi
RUN if [ -z "$TARGETPLATFORM" ]; then \
ARCH=$(uname -m); \
if [ "$ARCH" = "x86_64" ]; then \
TARGETPLATFORM="linux/amd64"; \
elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
TARGETPLATFORM="linux/arm64"; \
else \
echo "Unsupported architecture: $ARCH"; exit 1; \
fi; \
fi; \
echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
else \
poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
fi
RUN echo "all tests passed" > "test_results.txt"


Expand Down Expand Up @@ -100,17 +116,11 @@ ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
ARG EXTRA_PACKAGES
RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi

# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# flash attention fa2
FROM tested-builder AS production-with-fa2
RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
FROM tested-builder AS production
ENTRYPOINT ["infinity_emb"]
37 changes: 22 additions & 15 deletions libs/infinity_emb/Dockerfile.cpu_auto
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \
POETRY_NO_INTERACTION=1 \
EXTRAS="all" \
PYTHON="python3.11"
RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y
RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
WORKDIR /app

FROM base as builder

# Set the working directory for the app
# Define the version of Poetry to install (default is 1.7.1)
# Define the directory to install Poetry to (default is /opt/poetry)
Expand All @@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
#


FROM builder as testing
# install lint and test dependencies
RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
# lint
RUN poetry run ruff .
RUN poetry run black --check .
RUN poetry run ruff check .
RUN poetry run mypy .
# pytest
COPY tests tests
# run end to end tests because of duration of build in github ci.
# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
poetry run python -m pytest tests/end_to_end -x ; \
else \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
fi
RUN if [ -z "$TARGETPLATFORM" ]; then \
ARCH=$(uname -m); \
if [ "$ARCH" = "x86_64" ]; then \
TARGETPLATFORM="linux/amd64"; \
elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
TARGETPLATFORM="linux/arm64"; \
else \
echo "Unsupported architecture: $ARCH"; exit 1; \
fi; \
fi; \
echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
else \
poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
fi
RUN echo "all tests passed" > "test_results.txt"


Expand Down Expand Up @@ -100,17 +113,11 @@ ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
ARG EXTRA_PACKAGES
RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi

# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# flash attention fa2
FROM tested-builder AS production-with-fa2
RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
FROM tested-builder AS production
ENTRYPOINT ["infinity_emb"]
45 changes: 26 additions & 19 deletions libs/infinity_emb/Dockerfile.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ ENV PYTHONUNBUFFERED=1 \
POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
EXTRAS="all" \
EXTRAS="{{poetry_extras | default('all')}}" \
PYTHON="{{python_version | default('python3.11')}}"
RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y
RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
WORKDIR /app

FROM base as builder
{% set main_install2 = "poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test" %}
# Set the working directory for the app
# Define the version of Poetry to install (default is 1.7.1)
# Define the directory to install Poetry to (default is /opt/poetry)
Expand All @@ -37,28 +38,40 @@ RUN echo "Poetry version:" && poetry --version
COPY poetry.lock poetry.toml pyproject.toml README.md /app/
# Install dependencies only
{{pyproject_sed | default('#')}}
RUN {{main_install}} && poetry cache clear pypi --all
RUN {{main_install2}} && poetry cache clear pypi --all
COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
RUN {{main_install|replace("--no-root","")}} && poetry cache clear pypi --all
RUN {{main_install2|replace("--no-root","")}} && poetry cache clear pypi --all
{{extra_installs_main | default('#')}}


FROM builder as testing
# install lint and test dependencies
RUN {{main_install|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
RUN {{main_install2|replace("--without", "--with")|replace("--no-root","")}} && poetry cache clear pypi --all
# lint
RUN poetry run ruff .
RUN poetry run black --check .
RUN poetry run ruff check .
RUN poetry run mypy .
# pytest
COPY tests tests
# run end to end tests because of duration of build in github ci.
# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
poetry run python -m pytest tests/end_to_end -x ; \
else \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
fi
RUN if [ -z "$TARGETPLATFORM" ]; then \
ARCH=$(uname -m); \
if [ "$ARCH" = "x86_64" ]; then \
TARGETPLATFORM="linux/amd64"; \
elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
TARGETPLATFORM="linux/arm64"; \
else \
echo "Unsupported architecture: $ARCH"; exit 1; \
fi; \
fi; \
echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
else \
poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
fi
RUN echo "all tests passed" > "test_results.txt"


Expand Down Expand Up @@ -100,17 +113,11 @@ ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
ARG EXTRA_PACKAGES
RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi

# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# flash attention fa2
FROM tested-builder AS production-with-fa2
RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
FROM tested-builder AS production
ENTRYPOINT ["infinity_emb"]
37 changes: 22 additions & 15 deletions libs/infinity_emb/Dockerfile.nvidia_auto
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ ENV PYTHONUNBUFFERED=1 \
POETRY_NO_INTERACTION=1 \
EXTRAS="all" \
PYTHON="python3.11"
RUN apt-get update && apt-get install build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl -y
RUN apt-get update && apt-get install -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
WORKDIR /app

FROM base as builder

# Set the working directory for the app
# Define the version of Poetry to install (default is 1.7.1)
# Define the directory to install Poetry to (default is /opt/poetry)
Expand All @@ -41,24 +42,36 @@ RUN poetry install --no-interaction --no-ansi --no-root --extras "${EXTRAS}" --w
COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --without lint,test && poetry cache clear pypi --all
#


FROM builder as testing
# install lint and test dependencies
RUN poetry install --no-interaction --no-ansi --extras "${EXTRAS}" --with lint,test && poetry cache clear pypi --all
# lint
RUN poetry run ruff .
RUN poetry run black --check .
RUN poetry run ruff check .
RUN poetry run mypy .
# pytest
COPY tests tests
# run end to end tests because of duration of build in github ci.
# Run tests/end_to_end on TARGETPLATFORM x86_64 otherwise run tests/end_to_end_gpu
# poetry run python -m pytest tests/end_to_end -x # TODO: does not work.
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
poetry run python -m pytest tests/end_to_end -x ; \
else \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
fi
RUN if [ -z "$TARGETPLATFORM" ]; then \
ARCH=$(uname -m); \
if [ "$ARCH" = "x86_64" ]; then \
TARGETPLATFORM="linux/amd64"; \
elif [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then \
TARGETPLATFORM="linux/arm64"; \
else \
echo "Unsupported architecture: $ARCH"; exit 1; \
fi; \
fi; \
echo "Running tests on TARGETPLATFORM=$TARGETPLATFORM"; \
if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then \
poetry run python -m pytest tests/end_to_end/test_api_with_dummymodel.py -x ; \
else \
poetry run python -m pytest tests/end_to_end -m "not performance" -x ; \
fi
RUN echo "all tests passed" > "test_results.txt"


Expand Down Expand Up @@ -100,17 +113,11 @@ ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
ARG EXTRA_PACKAGES
RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${EXTRA_PACKAGES} ; fi

# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# flash attention fa2
FROM tested-builder AS production-with-fa2
RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
FROM tested-builder AS production
ENTRYPOINT ["infinity_emb"]
Loading

0 comments on commit 6a9f8cd

Please sign in to comment.