-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
35 lines (26 loc) · 1.03 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# This docker file builds and runs the docker image for the topic embedding job
# For more information, please refer to https://aka.ms/vscode-docker-python
FROM python:3.10-slim-buster
# Keeps Python from generating .pyc files in the container
ENV PYTHONDONTWRITEBYTECODE=1
# Turns off buffering for easier container logging
ENV PYTHONUNBUFFERED=1
# Preprocess all the articles in the dataset
ENV COGTEXT_DATA_FRACTION=1.0
# Required to compile hdbscan
RUN apt-get update && apt-get upgrade -y
RUN apt-get install -y --no-install-recommends \
gcc python-dev
# Install pip requirements
# FIXME use mamba/micromamba to manage dependencies
RUN pip install pip -U
COPY requirements_hpc.txt .
RUN pip install --no-cache-dir -r requirements_hpc.txt
WORKDIR /app
COPY . /app
# Creates a non-root user with an explicit UID and adds permission to access the /app folder
RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
USER appuser
ENV COGTEXT_DATA_FRACTION=1.0
# Entry point
CMD ["python", "jobs/topic_embedding.py"]