-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* commit diff to neuron * update: neuron * update: docker image * update: readme * update
- Loading branch information
1 parent
dd72f23
commit 1bc513b
Showing
7 changed files
with
565 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
**/.venv | ||
**/*_cache | ||
**/__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# Adapted from: https://github.com/huggingface/optimum-neuron/blob/main/text-generation-inference/Dockerfile | ||
# Python base image | ||
FROM ubuntu:22.04 AS base | ||
|
||
RUN apt-get update -y \ | ||
&& apt-get install -y --no-install-recommends \ | ||
python3-pip \ | ||
python3-setuptools \ | ||
python-is-python3 \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& apt-get clean | ||
RUN pip3 --no-cache-dir install --upgrade pip | ||
|
||
# Neuron base image (used for deployment) | ||
FROM base AS neuron | ||
# Install system prerequisites | ||
RUN apt-get update -y \ | ||
&& apt-get install -y --no-install-recommends \ | ||
gnupg2 \ | ||
wget \ | ||
python3-dev \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& apt-get clean | ||
|
||
RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list | ||
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - | ||
|
||
# Install neuronx packages | ||
RUN apt-get update -y \ | ||
&& apt-get install -y --no-install-recommends \ | ||
aws-neuronx-dkms=2.18.20.0 \ | ||
aws-neuronx-collectives=2.22.33.0-d2128d1aa \ | ||
aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \ | ||
aws-neuronx-tools=2.19.0.0 \ | ||
libxml2 \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& apt-get clean | ||
|
||
|
||
ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}" | ||
|
||
FROM neuron AS infinity | ||
RUN apt-get update -y && apt-get install -y nano | ||
WORKDIR /app | ||
|
||
COPY requirements_no_gpu.txt requirements_no_gpu.txt | ||
|
||
# RUN pip3 install \ | ||
# neuronx-cc==2.15.143.0 \ | ||
# torch-neuronx==2.1.2.2.3.2 \ | ||
# transformers-neuronx==0.12.313 \ | ||
# libneuronxla==2.0.5347.0 \ | ||
# --extra-index-url=https://pip.repos.neuron.amazonaws.com | ||
RUN pip3 config set global.extra-index-url https://pip.repos.neuron.amazonaws.com | ||
RUN pip3 install -r requirements_no_gpu.txt | ||
RUN pip3 install --upgrade-strategy eager optimum[neuronx] | ||
RUN pip3 install --upgrade \ | ||
neuronx-cc==2.* \ | ||
libneuronxla==2.0.5347.0 \ | ||
torch-neuronx==2.1.2.2.2.0 \ | ||
transformers-neuronx==0.12.313 \ | ||
torch==2.1.2.* \ | ||
torchvision==0.16.* \ | ||
neuronx_distributed \ | ||
--extra-index-url=https://pip.repos.neuron.amazonaws.com | ||
|
||
|
||
# COPY reqs_frozen.txt reqs_frozen.txt | ||
# RUN pip3 install -r reqs_frozen.txt | ||
# Install optimum-neuron | ||
#14 19.70 Successfully installed aiohappyeyeballs-2.4.4 aiohttp-3.11.9 aiosignal-1.3.1 async-timeout-5.0.1 attrs-24.2.0 coloredlogs-15.0.1 datasets-3.1.0 dill-0.3.8 frozenlist-1.5.0 fsspec-2024.9.0 humanfriendly-10.0 multidict-6.1.0 multiprocess-0.70.16 optimum-1.18.0 optimum-neuron-0.0.1 pandas-2.2.3 propcache-0.2.1 pyarrow-18.1.0 pytz-2024.2 requests-2.32.3 sentencepiece-0.2.0 tokenizers-0.15.2 transformers-4.39.3 tzdata-2024.2 xxhash-3.5.0 yarl-1.18.3 | ||
# RUN pip3 install optimum[neuronx] --extra-index-url=https://pip.repos.neuron.amazonaws.com | ||
# | ||
# TGI base env | ||
ENV HF_HUB_ENABLE_HF_TRANSFER=1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Is an mirror of | ||
# 763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference-neuronx:2.1.2-transformers4.43.2-neuronx-py310-sdk2.20.0-ubuntu20.04 | ||
FROM michaelf34/aws-neuron-base-img:0.0.25-inference AS base | ||
|
||
WORKDIR /app | ||
|
||
COPY ./infra/aws_neuron/requirements_no_gpu.txt requirements_no_gpu.txt | ||
RUN pip3 install -r requirements_no_gpu.txt | ||
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com | ||
# req | ||
# RUN pip3 install --no-deps --upgrade optimum[neuronx]==1.20.0 | ||
RUN pip3 install --no-deps sentence_transformers==3.3.1 | ||
RUN apt-get update && apt-get install nano | ||
# RUN pip3 install --upgrade neuronx-cc==2.15.* torch-neuronx torchvision transformers-neuronx libneuronxla protobuf | ||
# libneuronxla-2.0.5347.0 ml-dtypes-0.2.0 neuronx-cc-2.15.143.0+e39249ad setuptools-69.5.1 torch-neuronx-2.1.2.2.3.2 torch-xla-2.1.5 transformers-neuronx-0.12.313 | ||
RUN pip3 install --upgrade neuronx-cc==2.15.* torch-neuronx torchvision transformers-neuronx libneuronxla protobuf optimum-neuron==0.0.20 | ||
|
||
# base is also checkpointed to | ||
# docker pull michaelf34/aws-neuron-base-img:neuroncc2-15--optimum-1-17--transformers-4-36 | ||
FROM base AS infinity_latest | ||
COPY ./libs/infinity_emb . | ||
RUN pip3 install -e . | ||
ENV INFINITY_BATCH_SIZE=8 | ||
ENV INFINITY_ENGINE=neuron | ||
ENTRYPOINT [ "infinity_emb" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# Launch an EC2 Instance on AWS: | ||
|
||
### Start a EC2 Instance with Huggingface AMI (free AMI image with Neuron Tools/Docker installed) | ||
- https://aws.amazon.com/marketplace/pp/prodview-gr3e6yiscria2 | ||
- View Purchase Options -> Configure | ||
- Use `64-Bit AMI`, `20241115 (Nov 18, 2024)` | ||
- Region, e.g. `us-west-2` | ||
- Set Instance type `inf2.xlarge` (has two neuron accelerators) | ||
- Login with username `ubuntu` (using your standard EC2 setup e.g. `ssh [email protected]`) | ||
|
||
### Optional: build docker image from scratch | ||
```bash | ||
git clone https://github.com/michaelfeil/infinity | ||
cd infinity | ||
docker buildx build -t michaelf34/infinity:0.0.x-neuron -f ./infra/aws_neuron/Dockerfile.neuron | ||
``` | ||
|
||
### Run the image on EC2 | ||
|
||
```bash | ||
docker run -it --rm --device=/dev/neuron0 michaelf34/infinity:0.0.71-neuron v2 --model-id BAAI/bge-small-en-v1.5 --batch-size 8 --log-level debug | ||
``` | ||
|
||
### Run task on ECS (Work in progress) | ||
|
||
1. Create a AWS ECS Cluster with EC2: | ||
- Amazon Machine Image (AMI): Amazon Linux 2 - *Neuron* | ||
- inf2.xlarge as machine type. | ||
|
||
2. Create a Task: | ||
```json | ||
{ | ||
"family": "ecs-infinity-neuron", | ||
"requiresCompatibilities": ["EC2"], | ||
"placementConstraints": [ | ||
{ | ||
"type": "memberOf", | ||
"expression": "attribute:ecs.os-type == linux" | ||
}, | ||
{ | ||
"type": "memberOf", | ||
"expression": "attribute:ecs.instance-type == inf2.xlarge" | ||
} | ||
], | ||
"executionRoleArn": "${YOUR_EXECUTION_ROLE}", | ||
"containerDefinitions": [ | ||
{ | ||
"entryPoint": [ | ||
"infinity_emb", | ||
"v2" | ||
], | ||
"portMappings": [ | ||
{ | ||
"hostPort": 7997, | ||
"protocol": "tcp", | ||
"containerPort": 7997 | ||
} | ||
], | ||
"linuxParameters": { | ||
"devices": [ | ||
{ | ||
"containerPath": "/dev/neuron0", | ||
"hostPath": "/dev/neuron0", | ||
"permissions": [ | ||
"read", | ||
"write" | ||
] | ||
} | ||
], | ||
"capabilities": { | ||
"add": [ | ||
"IPC_LOCK" | ||
] | ||
} | ||
}, | ||
"cpu": 0, | ||
"memoryReservation": 1000, | ||
"image": "michaelf34/infinity:0.0.71-neuron", | ||
"essential": true, | ||
"name": "infinity-neuron" | ||
} | ||
] | ||
} | ||
``` | ||
|
||
You can also add logging: | ||
``` | ||
// same indent as "linuxParameters" | ||
"logConfiguration": { | ||
"logDriver": "awslogs", | ||
"options": { | ||
"awslogs-group": "/ecs/ecs-infinity-neuron", | ||
"mode": "non-blocking", | ||
"awslogs-create-group": "true", | ||
"max-buffer-size": "25m", | ||
"awslogs-region": "us-west-2", // set correct location. | ||
"awslogs-stream-prefix": "ecs" | ||
}, | ||
"secretOptions": [] | ||
} | ||
``` |
Oops, something went wrong.