Add flash_attn support (#306) (#313)

* Add flash_attn support (#306) * add dockerfile for flash_attn setup * remove test.py * parametrize model name and engine * Update Dockerfile --------- Co-authored-by: Michael Feil <[email protected]> * Delete libs/infinity_emb/Dockerfile.flash --------- Co-authored-by: Göktürk <[email protected]>
michaelfeil · Jul 21, 2024 · 8771d2d · 8771d2d
1 parent eb185ff
commit 8771d2d
Showing 1 changed file with 5 additions and 0 deletions.
diff --git a/libs/infinity_emb/Dockerfile b/libs/infinity_emb/Dockerfile
@@ -103,6 +103,11 @@ RUN if [ -n "${EXTRA_PACKAGES}" ]; then python -m pip install --no-cache-dir ${E
 RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
+# flash attention fa2
+FROM tested-builder AS production-with-fa2
+RUN python -m pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.3cxx11abiFalse-cp310-cp310-linux_x86_64.whl
+ENTRYPOINT ["infinity_emb"]
+
 # Use a multi-stage build -> production version
 FROM tested-builder AS production
 ENTRYPOINT ["infinity_emb"]