diff --git a/README.md b/README.md index 1d829e30..8c918565 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ They can be used alone but are designed to work with the [Onyxia](https://github PM-->PYTENSORFLOW[python-tensorflow]:::package; PM-->PYTORCH[python-pytorch]:::package; RM-->RDS[r-datascience]:::package; - RM-->RSPARK[r-sparkr]:::package; + RM-->RSPARK[sparkr]:::package; RM-->RPYJU[r-python-julia]:::package; PYSPARK--> JPYSPARK[jupyter-pyspark]:::ide; PYDS--> JPYDS[jupyter-python]:::ide; @@ -31,7 +31,6 @@ They can be used alone but are designed to work with the [Onyxia](https://github PYDS--> VSCODEPYDS[vscode-python]:::ide; PYTENSORFLOW--> VSCODEPYTENSORFLOW[vscode-tensorflow]:::ide; PYTORCH--> VSCODEPYTORCH[vscode-pytorch]:::ide; - RSPARK -->JSPARKR[jupyter-sparkr]:::ide; RDS--> JRDS[jupyter-r]:::ide; RSPARK -->RSTUDIOSPARKR[rstudio-sparkr]:::ide; RDS--> RSTUDIORDS[rstudio-r]:::ide; @@ -65,7 +64,7 @@ If Onyxia support is checked, it means that the Onyxia product could inject auto There is multiple recipes: **Your user has non root capabilities:** -- use an init script : https://github.com/InseeFrLab/images-datascience/blob/main/base/common-scripts/onyxia-init.sh#L7 +- use an init script : https://github.com/InseeFrLab/images-datascience/blob/main/scripts/onyxia-init.sh#L7 - you can use an init region script location injected by Onyxia with a curl to an endpoint with your certiticates and put it in a path let's say /tmp/ca-certificates: - put this path in env variable PATH_TO_CABUNDLE then onyxia-init.sh script will configure git, pip and conda to user this certificates. diff --git a/r-datascience/Dockerfile b/r-datascience/Dockerfile index c8936b94..41c0153f 100644 --- a/r-datascience/Dockerfile +++ b/r-datascience/Dockerfile @@ -3,10 +3,18 @@ FROM $BASE_IMAGE LABEL maintainer="InseeFrLab " +ARG JAVA_VERSION="17" +ENV JAVA_VERSION=${JAVA_VERSION} +ENV JAVA_HOME="/usr/lib/jvm/java-$JAVA_VERSION-openjdk-amd64" +ENV PATH="${JAVA_HOME}/bin:${PATH}" + USER root # Install additional libraries and R packages for datascience -RUN /opt/install-quarto.sh && \ +RUN apt-get update && \ + # Install JDK + /opt/install-java.sh && \ + /opt/install-quarto.sh && \ # Install Shiny Server /rocker_scripts/install_shiny_server.sh && \ # Install packages bundles from rocker diff --git a/r-datascience/tests.yaml b/r-datascience/tests.yaml index c4628ad1..8b21f111 100644 --- a/r-datascience/tests.yaml +++ b/r-datascience/tests.yaml @@ -44,3 +44,7 @@ commandTests: command: "which" args: ["quarto"] expectedOutput: ["/usr/local/bin/quarto"] + - name: "Does the binary exists?" + command: "which" + args: ["java"] + expectedOutput: ["/usr/lib/jvm/java-17-openjdk-amd64/bin/java"] diff --git a/scripts/install-java.sh b/scripts/install-java.sh new file mode 100644 index 00000000..86727f91 --- /dev/null +++ b/scripts/install-java.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +apt-get install -y --no-install-recommends \ + ca-certificates-java \ + openjdk-${JAVA_VERSION}-jre-headless \ + openjdk-${JAVA_VERSION}-jdk-headless \ + libbz2-dev # for jdk + +if command -v R; then + R CMD javareconf +fi diff --git a/scripts/install-spark-hadoop-hive.sh b/scripts/install-spark-hadoop-hive.sh index b6e749c0..7656bed1 100644 --- a/scripts/install-spark-hadoop-hive.sh +++ b/scripts/install-spark-hadoop-hive.sh @@ -4,7 +4,6 @@ set -e HADOOP_VERSION="3.3.6" HIVE_VERSION="2.3.9" HIVE_LISTENER_VERSION="0.0.3" -JAVA_VERSION="17" export SPARK_BUILD_S3_BUCKET="https://minio.lab.sspcloud.fr/projet-onyxia/build" export SPARK_BUILD_NAME="spark-${SPARK_VERSION}-bin-hadoop-${HADOOP_VERSION}-hive-${HIVE_VERSION}-java-${JAVA_VERSION}" diff --git a/scripts/onyxia-init.sh b/scripts/onyxia-init.sh index bd03f3fd..c694d584 100644 --- a/scripts/onyxia-init.sh +++ b/scripts/onyxia-init.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -echo "start of onyxia-init.sh script en tant que :" +echo "start of onyxia-init.sh script as user :" whoami sudo true -nv 2>&1 @@ -141,8 +141,8 @@ if command -v R; then echo -e "SPARK_HOME=$SPARK_HOME" >> ${R_HOME}/etc/Renviron.site echo -e "HADOOP_HOME=$HADOOP_HOME" >> ${R_HOME}/etc/Renviron.site echo -e "HADOOP_OPTIONAL_TOOLS=$HADOOP_OPTIONAL_TOOLS" >> ${R_HOME}/etc/Renviron.site - if [[ -e "/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64" ]]; then - echo -e "JAVA_HOME=/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64" >> ${R_HOME}/etc/Renviron.site + if [[ -e "$JAVA_HOME" ]]; then + echo -e "JAVA_HOME=$JAVA_HOME" >> ${R_HOME}/etc/Renviron.site fi env | grep "KUBERNETES" >> ${R_HOME}/etc/Renviron.site env | grep "IMAGE_NAME" >> ${R_HOME}/etc/Renviron.site diff --git a/spark/Dockerfile b/spark/Dockerfile index faac9cca..9fb0edd6 100644 --- a/spark/Dockerfile +++ b/spark/Dockerfile @@ -6,27 +6,30 @@ LABEL maintainer="InseeFrLab " ARG BASE_IMAGE ARG SPARK_VERSION="3.5.0" + ENV SPARK_VERSION=${SPARK_VERSION} +ENV JAVA_VERSION="17" + +ENV JAVA_HOME="/usr/lib/jvm/java-$JAVA_VERSION-openjdk-amd64" +ENV PATH="${JAVA_HOME}/bin:${PATH}" ENV HADOOP_HOME="/opt/hadoop" ENV SPARK_HOME="/opt/spark" ENV HIVE_HOME="/opt/hive" ENV PYTHONPATH="$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip" ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M" -ENV JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" + ENV HADOOP_OPTIONAL_TOOLS="hadoop-aws" -ENV PATH="${JAVA_HOME}/bin:${SPARK_HOME}/bin:${HADOOP_HOME}/bin:${PATH}" +ENV PATH="${SPARK_HOME}/bin:${HADOOP_HOME}/bin:${PATH}" USER root RUN --mount=type=secret,id=github_token \ apt-get update && \ # Install JDK - apt-get install -y --no-install-recommends \ - ca-certificates-java \ - openjdk-17-jre-headless && \ - # Install Spark/Hadoop/Hive + /opt/install-java.sh && \ + # Install Spark/Hadoop/Hive /opt/install-spark-hadoop-hive.sh && \ # Put Spark config in the right place cp /opt/spark-env.sh $SPARK_HOME/conf && \ diff --git a/spark/tests.yaml b/spark/tests.yaml index 8ae34e63..1b1d12f9 100644 --- a/spark/tests.yaml +++ b/spark/tests.yaml @@ -53,4 +53,8 @@ commandTests: - name: "Does the binary exists?" command: "which" args: ["argo"] - expectedOutput: ["/usr/local/bin/argo"] \ No newline at end of file + expectedOutput: ["/usr/local/bin/argo"] + - name: "Does the binary exists?" + command: "which" + args: ["java"] + expectedOutput: ["/usr/lib/jvm/java-17-openjdk-amd64/bin/java"] \ No newline at end of file