-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.yml
76 lines (70 loc) · 2.1 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
name: tritonsprongs
x-base_inference_service: &base_inference_service
image: ${TRITON_INFERENCE_SERVER_IMAGE:-nvcr.io/nvidia/tritonserver}:${TRITON_INFERENCE_SERVER_TAG:-24.07-py3}
command:
- sh
- -c
- |
tritonserver \
--model-repository=${TRITON_INFERENCE_SERVER_MODEL_REPO:-/model-repository} \
--model-control-mode=${TRITON_INERENCE_SERVER_MODEL_CONTROL_MODE:-explicit} \
--model-config-name=${TRITON_INERENCE_SERVER_MODEL_CONFIG_NAME:-"config"} \
--load-model=embed_image \
--load-model=siglip_vision \
--load-model=embed_text \
--load-model=multilingual_e5_large \
--load-model=siglip_text \
--load-model=translate \
--load-model=fasttext_language_identification \
--load-model=sentencex \
--load-model=seamlessm4t_text2text \
--log-verbose=1 \
--log-format=ISO8601 \
--log-info=true \
--http-header-forward-pattern=".*" \
--exit-on-error=false
environment:
- PYTHONNOUSERSITE=True
- PYTHONDONTWRITEBYTECODE=1
- LOG_LEVEL=DEBUG
- MODEL_CACHE_DIR=/models
- HF_HUB_CACHE=/models
ports:
- 8000:8000
- 8001:8001
- 8002:8002
restart: "no"
shm_size: "1gb"
volumes:
- ${MODEL_REPOSITORY_SOURCE:-./model-repository}:${MODEL_REPOSITORY_TARGET:-/model-repository}:z
- ${HF_HUB_CACHE_SOURCE:-./models}:${HF_HUB_CACHE_TARGET:-/models}:z
services:
inference_cpu:
<<: *base_inference_service
container_name: triton-inference-server-cpu
profiles:
- cpu
inference_gpu:
<<: *base_inference_service
container_name: triton-inference-server-gpu
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
profiles:
- gpu
conda-pack-builder:
image: local/conda-pack-builder:latest
build:
context: .
dockerfile: Dockerfile
args:
- BASE_IMAGE=${COMPOSE_CONDA_PACK_BASE_IMAGE:-continuumio/miniconda3:latest}
network_mode: "host"
profiles:
- build
volumes:
- ./model-repository:/model-repository:z