-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
156 lines (124 loc) · 4.71 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# General arguments
ARG JAVA_VERSION
ARG SCALA_VERSION
ARG PYTHON_VERSION
ARG SPARK_VERSION
ARG HADOOP_VERSION
ARG ALMOND_VERSION
############################### stage ###############################
# Base image
FROM maven:3.9-amazoncorretto-${JAVA_VERSION}-debian-bullseye AS base
ARG USERNAME=user
ARG USER_UID=1000
ARG USER_GID=1000
ENV WORKSPACE=/app
# Modify via ENV variables
ENV USERNAME=${USERNAME} \
USER_UID=${USER_UID} \
USER_GID=${USER_GID}
# Create user
RUN groupadd --gid ${USER_GID} ${USERNAME} \
&& useradd -s /bin/bash --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME}
# Install base packages
RUN apt-get update \
&& apt-get install --no-install-recommends -y \
wget \
&& rm -rf /var/lib/apt/lists/*
############################### stage ###############################
FROM base as base-spark
ARG SPARK_VERSION
ARG SCALA_VERSION
ARG HADOOP_VERSION
ENV SPARK_HOME=/usr/spark
ENV SPARK_CONF_DIR=/usr/spark/conf
ENV PATH=$SPARK_HOME/bin:$PATH
ARG SPARK_URL_2_12=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
ARG SPARK_URL_2_13=https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}-scala${SCALA_VERSION}.tgz
# Install Spark
RUN \
if [ "$SCALA_VERSION" = "2.12" ]; then \
SPARK_URL=${SPARK_URL_2_12}; \
elif [ "$SCALA_VERSION" = "2.13" ]; then \
SPARK_URL=${SPARK_URL_2_13}; \
fi \
&& wget -O spark_bin.tgz ${SPARK_URL} \
&& tar -xvzf spark_bin.tgz \
&& mv spark-* spark-${SPARK_VERSION} \
&& mkdir -p $SPARK_HOME \
&& mv spark-${SPARK_VERSION}/* $SPARK_HOME/ \
&& rm spark_bin.tgz \
&& rm -rf spark-${SPARK_VERSION} \
&& mkdir -p ${WORKSPACE}/tmp/spark/logs \
&& chmod 777 -R ${WORKSPACE} \
&& jar cv0f $SPARK_HOME/spark-libs.jar -C $SPARK_HOME/jars/ .
ENV SPARK_HISTORY_OPTS="$SPARK_HISTORY_OPTS -Dspark.history.fs.logDirectory=file://$WORKSPACE/tmp/spark/logs"
############################### stage ###############################
# Python base
FROM base-spark AS base-python
ARG PYTHON_VERSION
# Set the installation directory for Python
ENV PYTHON_INSTALL_DIR /opt/python
# IMPROVE THIS!!
RUN apt-get update
RUN apt-get install -y --no-install-recommends build-essential gcc
RUN apt-get install -y --no-install-recommends libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev wget libffi-dev
# Download and install Python from source
RUN wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tar.xz \
&& tar -xf Python-${PYTHON_VERSION}.tar.xz \
&& cd Python-${PYTHON_VERSION} \
&& ./configure --prefix=${PYTHON_INSTALL_DIR} --enable-optimizations \
&& make -j$(nproc) \
&& make install \
&& cd .. \
&& rm -rf Python-${PYTHON_VERSION} Python-${PYTHON_VERSION}.tar.xz
# Set the path to the Python installation directory
ENV PATH ${PYTHON_INSTALL_DIR}/bin:$PATH
COPY requirements.txt .
RUN pip3 install -r requirements.txt
USER ${USERNAME}
############################### stage ###############################
# This stage is intended to be used for development purposes. It
# allows to develop the application within the container, e.g. using
# vscode remote containers plugin.
FROM base-python AS dev
ARG TARGETPLATFORM
ARG ALMOND_VERSION
ARG SCALA_VERSION
USER root
# Add sudo support
RUN apt-get update \
&& apt-get install -y sudo \
&& echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME
# Install packages for local development
RUN apt-get update \
&& apt-get install --no-install-recommends --no-install-suggests -y \
build-essential \
ca-certificates \
curl \
git \
ssh \
vim \
zip unzip \
&& rm -rf /var/lib/apt/lists/*
# Change default user
USER ${USERNAME}
# Change wordkir to install coursier and almond for the default user
WORKDIR /home/${USERNAME}
# Install coursier. The URL changes depending on the architecture, so this little
# hack will set the correct URL. Only tested with ARM (Apple Silicon) and AMD64.
ARG COURSIER_URL_AMD64=https://github.com/coursier/launchers/raw/master/cs-x86_64-pc-linux.gz
ARG COURSIER_URL_M1=https://github.com/VirtusLab/coursier-m1/releases/latest/download/cs-aarch64-pc-linux.gz
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
COURSIER_URL=${COURSIER_URL_AMD64}; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
COURSIER_URL=${COURSIER_URL_M1}; \
else \
COURSIER_URL=${COURSIER_URL_AMD64}; \
fi \
&& curl -fL ${COURSIER_URL} | gzip -d > cs \
&& chmod +x cs
# Install almond
RUN ./cs launch --fork almond:${ALMOND_VERSION} --scala ${SCALA_VERSION} -- --install
WORKDIR ${WORKSPACE}
CMD [ "sleep", "infinity" ]