Skip to content

Commit

Permalink
Install java for spark and R-datascience images (for packages like RJ…
Browse files Browse the repository at this point in the history
…Demetra) (#181)

* moved java install in separate .sh, added libbz2-dev for jdk compiler

* typos since jupyter-spark-r does not exist

* make use of JAVA_HOME for Renviron, added java test for spark images

* dummy changes for matrix to build from base

* dev

* debug

* no spaces

* debug

* debug more

* tweaked main-workflow to copy scripts in opt for all images

* typo in README + debug mode

* debug more

* typo

* put also script install-java for rdatascience

* forgot java version variables for r-datascience

* apt-get update in r-datascience before install-java.sh

* took ouf debug code

* added ENV in dockerfiles so vars are passed to containers

* move libbz2 into install-jva.sh for
  • Loading branch information
odysseu authored Feb 9, 2024
1 parent 0731bbc commit b563e68
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 15 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ They can be used alone but are designed to work with the [Onyxia](https://github
PM-->PYTENSORFLOW[python-tensorflow]:::package;
PM-->PYTORCH[python-pytorch]:::package;
RM-->RDS[r-datascience]:::package;
RM-->RSPARK[r-sparkr]:::package;
RM-->RSPARK[sparkr]:::package;
RM-->RPYJU[r-python-julia]:::package;
PYSPARK--> JPYSPARK[jupyter-pyspark]:::ide;
PYDS--> JPYDS[jupyter-python]:::ide;
Expand All @@ -31,7 +31,6 @@ They can be used alone but are designed to work with the [Onyxia](https://github
PYDS--> VSCODEPYDS[vscode-python]:::ide;
PYTENSORFLOW--> VSCODEPYTENSORFLOW[vscode-tensorflow]:::ide;
PYTORCH--> VSCODEPYTORCH[vscode-pytorch]:::ide;
RSPARK -->JSPARKR[jupyter-sparkr]:::ide;
RDS--> JRDS[jupyter-r]:::ide;
RSPARK -->RSTUDIOSPARKR[rstudio-sparkr]:::ide;
RDS--> RSTUDIORDS[rstudio-r]:::ide;
Expand Down Expand Up @@ -65,7 +64,7 @@ If Onyxia support is checked, it means that the Onyxia product could inject auto
There is multiple recipes:

**Your user has non root capabilities:**
- use an init script : https://github.com/InseeFrLab/images-datascience/blob/main/base/common-scripts/onyxia-init.sh#L7
- use an init script : https://github.com/InseeFrLab/images-datascience/blob/main/scripts/onyxia-init.sh#L7
- you can use an init region script location injected by Onyxia with a curl to an endpoint with your certiticates and put it in a path let's say /tmp/ca-certificates:
- put this path in env variable PATH_TO_CABUNDLE then onyxia-init.sh script will configure git, pip and conda to user this certificates.

Expand Down
10 changes: 9 additions & 1 deletion r-datascience/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,18 @@ FROM $BASE_IMAGE

LABEL maintainer="InseeFrLab <[email protected]>"

ARG JAVA_VERSION="17"
ENV JAVA_VERSION=${JAVA_VERSION}
ENV JAVA_HOME="/usr/lib/jvm/java-$JAVA_VERSION-openjdk-amd64"
ENV PATH="${JAVA_HOME}/bin:${PATH}"

USER root

# Install additional libraries and R packages for datascience
RUN /opt/install-quarto.sh && \
RUN apt-get update && \
# Install JDK
/opt/install-java.sh && \
/opt/install-quarto.sh && \
# Install Shiny Server
/rocker_scripts/install_shiny_server.sh && \
# Install packages bundles from rocker
Expand Down
4 changes: 4 additions & 0 deletions r-datascience/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,7 @@ commandTests:
command: "which"
args: ["quarto"]
expectedOutput: ["/usr/local/bin/quarto"]
- name: "Does the binary exists?"
command: "which"
args: ["java"]
expectedOutput: ["/usr/lib/jvm/java-17-openjdk-amd64/bin/java"]
12 changes: 12 additions & 0 deletions scripts/install-java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
set -e

apt-get install -y --no-install-recommends \
ca-certificates-java \
openjdk-${JAVA_VERSION}-jre-headless \
openjdk-${JAVA_VERSION}-jdk-headless \
libbz2-dev # for jdk

if command -v R; then
R CMD javareconf
fi
1 change: 0 additions & 1 deletion scripts/install-spark-hadoop-hive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ set -e
HADOOP_VERSION="3.3.6"
HIVE_VERSION="2.3.9"
HIVE_LISTENER_VERSION="0.0.3"
JAVA_VERSION="17"

export SPARK_BUILD_S3_BUCKET="https://minio.lab.sspcloud.fr/projet-onyxia/build"
export SPARK_BUILD_NAME="spark-${SPARK_VERSION}-bin-hadoop-${HADOOP_VERSION}-hive-${HIVE_VERSION}-java-${JAVA_VERSION}"
Expand Down
6 changes: 3 additions & 3 deletions scripts/onyxia-init.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

echo "start of onyxia-init.sh script en tant que :"
echo "start of onyxia-init.sh script as user :"
whoami

sudo true -nv 2>&1
Expand Down Expand Up @@ -141,8 +141,8 @@ if command -v R; then
echo -e "SPARK_HOME=$SPARK_HOME" >> ${R_HOME}/etc/Renviron.site
echo -e "HADOOP_HOME=$HADOOP_HOME" >> ${R_HOME}/etc/Renviron.site
echo -e "HADOOP_OPTIONAL_TOOLS=$HADOOP_OPTIONAL_TOOLS" >> ${R_HOME}/etc/Renviron.site
if [[ -e "/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64" ]]; then
echo -e "JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64" >> ${R_HOME}/etc/Renviron.site
if [[ -e "$JAVA_HOME" ]]; then
echo -e "JAVA_HOME=$JAVA_HOME" >> ${R_HOME}/etc/Renviron.site
fi
env | grep "KUBERNETES" >> ${R_HOME}/etc/Renviron.site
env | grep "IMAGE_NAME" >> ${R_HOME}/etc/Renviron.site
Expand Down
15 changes: 9 additions & 6 deletions spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,30 @@ LABEL maintainer="InseeFrLab <[email protected]>"
ARG BASE_IMAGE

ARG SPARK_VERSION="3.5.0"

ENV SPARK_VERSION=${SPARK_VERSION}
ENV JAVA_VERSION="17"

ENV JAVA_HOME="/usr/lib/jvm/java-$JAVA_VERSION-openjdk-amd64"
ENV PATH="${JAVA_HOME}/bin:${PATH}"

ENV HADOOP_HOME="/opt/hadoop"
ENV SPARK_HOME="/opt/spark"
ENV HIVE_HOME="/opt/hive"
ENV PYTHONPATH="$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip"
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M"
ENV JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64"

ENV HADOOP_OPTIONAL_TOOLS="hadoop-aws"

ENV PATH="${JAVA_HOME}/bin:${SPARK_HOME}/bin:${HADOOP_HOME}/bin:${PATH}"
ENV PATH="${SPARK_HOME}/bin:${HADOOP_HOME}/bin:${PATH}"

USER root

RUN --mount=type=secret,id=github_token \
apt-get update && \
# Install JDK
apt-get install -y --no-install-recommends \
ca-certificates-java \
openjdk-17-jre-headless && \
# Install Spark/Hadoop/Hive
/opt/install-java.sh && \
# Install Spark/Hadoop/Hive
/opt/install-spark-hadoop-hive.sh && \
# Put Spark config in the right place
cp /opt/spark-env.sh $SPARK_HOME/conf && \
Expand Down
6 changes: 5 additions & 1 deletion spark/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,8 @@ commandTests:
- name: "Does the binary exists?"
command: "which"
args: ["argo"]
expectedOutput: ["/usr/local/bin/argo"]
expectedOutput: ["/usr/local/bin/argo"]
- name: "Does the binary exists?"
command: "which"
args: ["java"]
expectedOutput: ["/usr/lib/jvm/java-17-openjdk-amd64/bin/java"]

0 comments on commit b563e68

Please sign in to comment.