From 8771d2d97bc700701ab5a1c7bf79d8ed79bd9972 Mon Sep 17 00:00:00 2001 From: Michael Feil <63565275+michaelfeil@users.noreply.github.com> Date: Sun, 21 Jul 2024 14:56:05 -0700 Subject: [PATCH] Add flash_attn support (#306) (#313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add flash_attn support (#306) * add dockerfile for flash_attn setup * remove test.py * parametrize model name and engine * Update Dockerfile --------- Co-authored-by: Michael Feil <63565275+michaelfeil@users.noreply.github.com> * Delete libs/infinity_emb/Dockerfile.flash --------- Co-authored-by: Göktürk <87906890+gokturkDev@users.noreply.github.com> --- libs/infinity_emb/Dockerfile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile index 7b94957a..a324e022 100644 --- a/libs/infinity_emb/Dockerfile +++ b/libs/infinity_emb/Dockerfile @@ -103,6 +103,11 @@ RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${E RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ] ENTRYPOINT ["infinity_emb"] +# flash attention fa2 +FROM tested-builder AS production-with-fa2 +RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl +ENTRYPOINT ["infinity_emb"] + # Use a multi-stage build -> production version FROM tested-builder AS production ENTRYPOINT ["infinity_emb"]