Skip to content
This repository has been archived by the owner on Mar 7, 2024. It is now read-only.

fix Dockerfile and utf8 reading #107

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 31 additions & 30 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,58 +1,59 @@
FROM openjdk:8
FROM ubuntu:20.04
MAINTAINER [email protected]

ENV PRESTO_VERSION=315
ENV PRESTO_HOME=/opt/presto
ENV PRESTO_CONF_DIR=${PRESTO_HOME}/etc
ENV TRINO_VERSION=433
ENV TRINO_HOME=/opt/trino
ENV TRINO_CONF_DIR=${TRINO_HOME}/etc

# Add less for pagenation
RUN apt-get update && apt-get install -y --no-install-recommends \
less && \
rm -rf /var/lib/apt/lists/
# Install necessary packages including curl, ca-certificates, wget, Python 3, and Java 17
RUN apt-get update && \
apt-get install -y --no-install-recommends curl ca-certificates wget python-is-python3 openjdk-17-jdk && \
rm -rf /var/lib/apt/lists/*

# Download presto cluster
RUN curl -L https://repo1.maven.org/maven2/io/trino/presto-server/${PRESTO_VERSION}/presto-server-${PRESTO_VERSION}.tar.gz -o /tmp/presto-server.tgz && \
tar -xzf /tmp/presto-server.tgz -C /opt && \
ln -s /opt/presto-server-${PRESTO_VERSION} ${PRESTO_HOME} && \
mkdir -p ${PRESTO_HOME}/data && \
rm -f /tmp/presto-server.tgz

# Download presto CLI
ADD https://repo1.maven.org/maven2/io/trino/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar ${PRESTO_HOME}/bin/
# Download trino cluster
RUN curl -L https://repo1.maven.org/maven2/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz -o /tmp/trino-server.tgz && \
tar -xzf /tmp/trino-server.tgz -C /opt && \
ln -s /opt/trino-server-${TRINO_VERSION} ${TRINO_HOME} && \
mkdir -p ${TRINO_HOME}/data && \
rm -f /tmp/trino-server.tgz

RUN chmod +x ${PRESTO_HOME}/bin/presto-cli-${PRESTO_VERSION}-executable.jar
# Download trino CLI
ADD https://repo1.maven.org/maven2/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar ${TRINO_HOME}/bin/

ARG PRESTO_TILEDB_VERSION=latest
RUN chmod +x ${TRINO_HOME}/bin/trino-cli-${TRINO_VERSION}-executable.jar

# Download latest presto release
RUN mkdir ${PRESTO_HOME}/plugin/tiledb && \
cd ${PRESTO_HOME}/plugin/tiledb && \
curl -s https://api.github.com/repos/TileDB-Inc/TileDB-Trino/releases/${PRESTO_TILEDB_VERSION} \
ARG TRINO_TILEDB_VERSION=1.17.2

# Download latest trino release
RUN mkdir ${TRINO_HOME}/plugin/tiledb && \
cd ${TRINO_HOME}/plugin/tiledb && \
curl -s https://api.github.com/repos/TileDB-Inc/TileDB-Trino/releases/tags/${TRINO_TILEDB_VERSION} \
| grep "browser_download_url.*jar" \
| cut -d : -f 2,3 \
| tr -d \" \
| wget -i -

# Add entry script to start presto server and cli
ADD docker/entrypoint.sh ${PRESTO_HOME}/bin/
# Add entry script to start trino server and cli
ADD docker/entrypoint.sh ${TRINO_HOME}/bin/

RUN chmod +x ${PRESTO_HOME}/bin/entrypoint.sh
RUN chmod +x ${TRINO_HOME}/bin/entrypoint.sh

# Add example arrays
ADD src/test/resources/tiledb_arrays /opt/tiledb_example_arrays

WORKDIR ${PRESTO_HOME}
WORKDIR ${TRINO_HOME}

# Add configuration parameters
COPY docker/etc ${PRESTO_HOME}/etc
COPY docker/etc ${TRINO_HOME}/etc

# Expose port for presto ui
# Expose port for trino ui
EXPOSE 8080

ENV PATH=${PATH}:"${PRESTO_HOME}/bin"
ENV PATH=${PATH}:"${TRINO_HOME}/bin"

# Volumes for config and data (used for stats)
VOLUME ["${PRESTO_HOME}/etc", "${PRESTO_HOME}/data"]
VOLUME ["${TRINO_HOME}/etc", "${TRINO_HOME}/data"]

# Set default command to entry point script
CMD ["./bin/entrypoint.sh"]
Expand Down
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,32 @@ see the [official TileDB documentation](https://docs.tiledb.io/en/latest/introdu
This connector allows running SQL on TileDB arrays via Trino. The TileDB-Trino interface supports column subselection on attributes and predicate pushdown on dimension fields, leading to superb performance for
projection and range queries.

## Docker

A quickstart Docker image is available. The docker image will start a single-node
Trino cluster and open the CLI Trino interface where SQL can be run.
The Docker image includes two example tiledb arrays
`/opt/tiledb_example_arrays/dense_global` and `/opt/tiledb_example_arrays/sparse_global`.
Simply build and run:

```
docker build -t tiledb-trino .

docker run -it --rm tiledb-trino

```

or mount a local array into the Docker container with the `-v` option:

```
docker run -it --rm -v /local/array/path:/data/local_array tiledb-trino
```

In the above example, replace `/local/array/path` with the path to the
array folder on your local machine. The `/data/local_array` path is the
path you will use within the Docker image to access `/local/array/path`
(you can replace it with another path of your choice).


## Installation

Expand Down
10 changes: 5 additions & 5 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#!/bin/bash

if ! ${PRESTO_HOME}/bin/launcher status; then
${PRESTO_HOME}/bin/launcher start;
if ! ${TRINO_HOME}/bin/launcher status; then
${TRINO_HOME}/bin/launcher start;
sleep 2;
fi

printf "Waiting for presto to initialize.."
until ${PRESTO_HOME}/bin/presto-cli-${PRESTO_VERSION}-executable.jar --execute 'SELECT * FROM system.runtime.nodes' &> /dev/null ;
printf "Waiting for trino to initialize.."
until ${TRINO_HOME}/bin/trino-cli-${TRINO_VERSION}-executable.jar --execute 'SELECT * FROM system.runtime.nodes' &> /dev/null ;
do
printf ".";
sleep 1;
Expand All @@ -15,4 +15,4 @@ do
done
printf "\n"

${PRESTO_HOME}/bin/presto-cli-${PRESTO_VERSION}-executable.jar --schema tiledb --catalog tiledb "$@"
${TRINO_HOME}/bin/trino-cli-${TRINO_VERSION}-executable.jar --schema tiledb --catalog tiledb "$@"
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
<dependency>
<groupId>io.tiledb</groupId>
<artifactId>tiledb-java</artifactId>
<version>0.19.5</version>
<version>0.19.7</version>
</dependency>

<dependency>
Expand Down
1 change: 1 addition & 0 deletions src/main/java/io/trino/plugin/tiledb/TileDBModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public static Type prestoTypeFromTileDBType(Datatype type)
return BIGINT;
case TILEDB_STRING_ASCII:
case TILEDB_CHAR:
case TILEDB_STRING_UTF8:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need follow up to add a test for this

return VARCHAR;
case TILEDB_FLOAT32:
return REAL;
Expand Down