Skip to content

Commit

Permalink
Adds neuron support (#486)
Browse files Browse the repository at this point in the history
* commit diff to neuron

* update: neuron

* update: docker image

* update: readme

* update
  • Loading branch information
michaelfeil authored Dec 3, 2024
1 parent dd72f23 commit 1bc513b
Show file tree
Hide file tree
Showing 7 changed files with 565 additions and 1 deletion.
3 changes: 3 additions & 0 deletions infra/aws_neuron/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
**/.venv
**/*_cache
**/__pycache__
76 changes: 76 additions & 0 deletions infra/aws_neuron/Dockerfile.base
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Adapted from: https://github.com/huggingface/optimum-neuron/blob/main/text-generation-inference/Dockerfile
# Python base image
FROM ubuntu:22.04 AS base

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
python3-pip \
python3-setuptools \
python-is-python3 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN pip3 --no-cache-dir install --upgrade pip

# Neuron base image (used for deployment)
FROM base AS neuron
# Install system prerequisites
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
gnupg2 \
wget \
python3-dev \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -

# Install neuronx packages
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
aws-neuronx-dkms=2.18.20.0 \
aws-neuronx-collectives=2.22.33.0-d2128d1aa \
aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \
aws-neuronx-tools=2.19.0.0 \
libxml2 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean


ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"

FROM neuron AS infinity
RUN apt-get update -y && apt-get install -y nano
WORKDIR /app

COPY requirements_no_gpu.txt requirements_no_gpu.txt

# RUN pip3 install \
# neuronx-cc==2.15.143.0 \
# torch-neuronx==2.1.2.2.3.2 \
# transformers-neuronx==0.12.313 \
# libneuronxla==2.0.5347.0 \
# --extra-index-url=https://pip.repos.neuron.amazonaws.com
RUN pip3 config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
RUN pip3 install -r requirements_no_gpu.txt
RUN pip3 install --upgrade-strategy eager optimum[neuronx]
RUN pip3 install --upgrade \
neuronx-cc==2.* \
libneuronxla==2.0.5347.0 \
torch-neuronx==2.1.2.2.2.0 \
transformers-neuronx==0.12.313 \
torch==2.1.2.* \
torchvision==0.16.* \
neuronx_distributed \
--extra-index-url=https://pip.repos.neuron.amazonaws.com


# COPY reqs_frozen.txt reqs_frozen.txt
# RUN pip3 install -r reqs_frozen.txt
# Install optimum-neuron
#14 19.70 Successfully installed aiohappyeyeballs-2.4.4 aiohttp-3.11.9 aiosignal-1.3.1 async-timeout-5.0.1 attrs-24.2.0 coloredlogs-15.0.1 datasets-3.1.0 dill-0.3.8 frozenlist-1.5.0 fsspec-2024.9.0 humanfriendly-10.0 multidict-6.1.0 multiprocess-0.70.16 optimum-1.18.0 optimum-neuron-0.0.1 pandas-2.2.3 propcache-0.2.1 pyarrow-18.1.0 pytz-2024.2 requests-2.32.3 sentencepiece-0.2.0 tokenizers-0.15.2 transformers-4.39.3 tzdata-2024.2 xxhash-3.5.0 yarl-1.18.3
# RUN pip3 install optimum[neuronx] --extra-index-url=https://pip.repos.neuron.amazonaws.com
#
# TGI base env
ENV HF_HUB_ENABLE_HF_TRANSFER=1

25 changes: 25 additions & 0 deletions infra/aws_neuron/Dockerfile.neuron
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Is an mirror of
# 763104351884.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference-neuronx:2.1.2-transformers4.43.2-neuronx-py310-sdk2.20.0-ubuntu20.04
FROM michaelf34/aws-neuron-base-img:0.0.25-inference AS base

WORKDIR /app

COPY ./infra/aws_neuron/requirements_no_gpu.txt requirements_no_gpu.txt
RUN pip3 install -r requirements_no_gpu.txt
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
# req
# RUN pip3 install --no-deps --upgrade optimum[neuronx]==1.20.0
RUN pip3 install --no-deps sentence_transformers==3.3.1
RUN apt-get update && apt-get install nano
# RUN pip3 install --upgrade neuronx-cc==2.15.* torch-neuronx torchvision transformers-neuronx libneuronxla protobuf
# libneuronxla-2.0.5347.0 ml-dtypes-0.2.0 neuronx-cc-2.15.143.0+e39249ad setuptools-69.5.1 torch-neuronx-2.1.2.2.3.2 torch-xla-2.1.5 transformers-neuronx-0.12.313
RUN pip3 install --upgrade neuronx-cc==2.15.* torch-neuronx torchvision transformers-neuronx libneuronxla protobuf optimum-neuron==0.0.20

# base is also checkpointed to
# docker pull michaelf34/aws-neuron-base-img:neuroncc2-15--optimum-1-17--transformers-4-36
FROM base AS infinity_latest
COPY ./libs/infinity_emb .
RUN pip3 install -e .
ENV INFINITY_BATCH_SIZE=8
ENV INFINITY_ENGINE=neuron
ENTRYPOINT [ "infinity_emb" ]
101 changes: 101 additions & 0 deletions infra/aws_neuron/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Launch an EC2 Instance on AWS:

### Start a EC2 Instance with Huggingface AMI (free AMI image with Neuron Tools/Docker installed)
- https://aws.amazon.com/marketplace/pp/prodview-gr3e6yiscria2
- View Purchase Options -> Configure
- Use `64-Bit AMI`, `20241115 (Nov 18, 2024)`
- Region, e.g. `us-west-2`
- Set Instance type `inf2.xlarge` (has two neuron accelerators)
- Login with username `ubuntu` (using your standard EC2 setup e.g. `ssh [email protected]`)

### Optional: build docker image from scratch
```bash
git clone https://github.com/michaelfeil/infinity
cd infinity
docker buildx build -t michaelf34/infinity:0.0.x-neuron -f ./infra/aws_neuron/Dockerfile.neuron
```

### Run the image on EC2

```bash
docker run -it --rm --device=/dev/neuron0 michaelf34/infinity:0.0.71-neuron v2 --model-id BAAI/bge-small-en-v1.5 --batch-size 8 --log-level debug
```

### Run task on ECS (Work in progress)

1. Create a AWS ECS Cluster with EC2:
- Amazon Machine Image (AMI): Amazon Linux 2 - *Neuron*
- inf2.xlarge as machine type.

2. Create a Task:
```json
{
"family": "ecs-infinity-neuron",
"requiresCompatibilities": ["EC2"],
"placementConstraints": [
{
"type": "memberOf",
"expression": "attribute:ecs.os-type == linux"
},
{
"type": "memberOf",
"expression": "attribute:ecs.instance-type == inf2.xlarge"
}
],
"executionRoleArn": "${YOUR_EXECUTION_ROLE}",
"containerDefinitions": [
{
"entryPoint": [
"infinity_emb",
"v2"
],
"portMappings": [
{
"hostPort": 7997,
"protocol": "tcp",
"containerPort": 7997
}
],
"linuxParameters": {
"devices": [
{
"containerPath": "/dev/neuron0",
"hostPath": "/dev/neuron0",
"permissions": [
"read",
"write"
]
}
],
"capabilities": {
"add": [
"IPC_LOCK"
]
}
},
"cpu": 0,
"memoryReservation": 1000,
"image": "michaelf34/infinity:0.0.71-neuron",
"essential": true,
"name": "infinity-neuron"
}
]
}
```

You can also add logging:
```
// same indent as "linuxParameters"
"logConfiguration": {
"logDriver": "awslogs",
"options": {
"awslogs-group": "/ecs/ecs-infinity-neuron",
"mode": "non-blocking",
"awslogs-create-group": "true",
"max-buffer-size": "25m",
"awslogs-region": "us-west-2", // set correct location.
"awslogs-stream-prefix": "ecs"
},
"secretOptions": []
}
```
Loading

0 comments on commit 1bc513b

Please sign in to comment.