diff --git a/.gitignore b/.gitignore index a9364500c..4b7d1d6a1 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ hillview.redo *.log.lck *.log.lck.* *.pid +*.pyc out apache-tomcat-* web/classes @@ -70,6 +71,7 @@ web/src/main/webapp/dist hs_err_pid* tmp +repository/*.jar # data which is too big to put into git data/ontime/On_Time_On_Time* diff --git a/README.md b/README.md index f48fce9cd..3a51a3100 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ ![Hillview project logo](hillview-logo.png) Hillview: a big data spreadsheet. Hillview is a cloud-based -service for visualizing interactively large datasets. -The hillview user interface executes in a browser. +service for visualizing interactively large datasets. +The hillview user interface executes in a browser. Contents: @@ -115,7 +115,9 @@ Hillview uses `ssh` to deploy code on the cluster. Prior to deployment you must setup `ssh` on the cluster to use password-less access to the cluster machines, as described here: https://www.ssh.com/ssh/copy-id. You must also install Java on all -machines in the cluster. +machines in the cluster. Each machine in the cluster must allow +connections on the network ports described in the [configuration +file](#service-configuration). *Please note that Hillview allows arbitrary access to files on the worker nodes from the client application running with the privileges @@ -123,8 +125,8 @@ of the user specified in the configuration file.* ## 3.1 Service configuration -The configuration of the Hillview service is described in a Json file -(enhanced with comments); two sample files are `bin/config.json`and +The configuration of the Hillview service is described in a Json file +(enhanced with comments); two sample files are `bin/config.json`and `bin/config-local.json`. The file `config-local.json` treats the local machine as a one-machine cluster. @@ -228,7 +230,7 @@ They are described [here](bin/README.md). # 4. Developing Hillview We only provide development instructions for Linux or MacOS, but there is -no reason Hillview could not be developed on Windows. +no reason Hillview could not be developed on Windows. ## 4.1. Software Dependencies @@ -307,7 +309,7 @@ Subsequent builds can just run $ bin/rebuild.sh ``` -Hillview is currently split into two separate Maven projects. One can +Hillview is currently split into two separate Maven projects. One can also build the two projects separately, as follows: * platform: pure Java, includes the entire back-end. This produces a @@ -342,7 +344,7 @@ standard. Download and install Intellij IDEA: https://www.jetbrains.com/idea/. The web project typescript requires the (paid) Ultimate version of Intellij. -First run maven to generate the Java code automatically generated for gRPC: +First run maven to generate the Java code automatically generated for gRPC: ``` $ cd platform @@ -355,7 +357,7 @@ add three modules: web/pom.xml, platform/pom.xml, and the root folder hillview i ## 4.5. Setup VS Code -Download and install Visual Studio Code: https://code.visualstudio.com/download. +Download and install Visual Studio Code: https://code.visualstudio.com/download. Here is a step-by-step guide to add the necessary extensions, run Maven commands, and attach a debugger: 1. Install these extensions and then restart the VS Code. @@ -364,16 +366,16 @@ Here is a step-by-step guide to add the necessary extensions, run Maven commands - `Language Support for Java(TM) by Red Hat redhat.java`: recognize projects with Maven or Gradle build in the directory hierarchy. - `Maven for Java`: provides a project explorer and shortcuts to execute Maven commands. -2. Select `Add workspace folder...` at the Welcome page, then choose `hillview/platform/` directory. The platform module should be displayed in the `Explorer` view. -3. Add `web` module to the workspace by clicking `File`->`Add Folder to Workspace...` and then choose `hillview/web/` directory. +2. Select `Add workspace folder...` at the Welcome page, then choose `hillview/platform/` directory. The platform module should be displayed in the `Explorer` view. +3. Add `web` module to the workspace by clicking `File`->`Add Folder to Workspace...` and then choose `hillview/web/` directory. 4. Save the workspace by clicking `File`->`Save Workspace As...` and store it in your personal folder outside `hillview/` root directory. -5. Next, about executing Maven commands; in the `Explorer` view, click `MAVEN PROJECTS`. There are two Maven folders correspond to `web` and `platform` modules; +5. Next, about executing Maven commands; in the `Explorer` view, click `MAVEN PROJECTS`. There are two Maven folders correspond to `web` and `platform` modules; click those folders to expand and display the Maven pom files. The Maven commands will be displayed by right clicking the pom files. 6. Finally, about attaching a debugger: - Bring up the `Run` view, select the `Run` icon in the `Activity Bar` on the left side of VS Code. - - From the `Run` view, click `create a launch.json file`, you will see the `platform` and `web` modules listed. We will create two `launch.json` files, one for `platform` module and the other for `web` module. - - When configuring the `launch.json` for `platform` module, you must select `Java` option. Otherwise, choose `Chrome (preview)` option when configuring the `web` module. Then, delete the auto generated `configurations` - and specify the correct configuration to attach the debugger. The important fields are `url`, `hostname`, `port`, and `request`. More about this is here + - From the `Run` view, click `create a launch.json file`, you will see the `platform` and `web` modules listed. We will create two `launch.json` files, one for `platform` module and the other for `web` module. + - When configuring the `launch.json` for `platform` module, you must select `Java` option. Otherwise, choose `Chrome (preview)` option when configuring the `web` module. Then, delete the auto generated `configurations` + and specify the correct configuration to attach the debugger. The important fields are `url`, `hostname`, `port`, and `request`. More about this is here [VS Code Debugging#launch-configuration](https://code.visualstudio.com/docs/editor/debugging#_launch-configurations) and [VS Code#Java-Debugging](https://code.visualstudio.com/docs/java/java-debugging#_attach). ## 4.6 Debugging @@ -381,7 +383,7 @@ redhat.java`: recognize projects with Maven or Gradle build in the directory hie Debugging on a single machine can done as follows: - you can start the back-end service under the debugger, by starting the HillviewBackend binary with command-line arguments 127.0.0.1:3569 -- you can start the front-end service by attaching +- you can start the front-end service by attaching to the Java process created by Java Tomcat. The frontend-start.sh script has a line that sets up the environment variables to enable this. @@ -426,6 +428,6 @@ Here is a step-by-step guide to submitting contributions: `@Nullable` annotation (from javax.annotation) for all pointers which can be null. Use `Converters.checkNull` to cast a @Nullable pointer to a non-null pointer. - + * Some code executes on multiple machines or in multiple threads. In particular, all classes that derive from `IMap` or `ISketch` should be immutable. diff --git a/bin/README.md b/bin/README.md index 6200c9974..95b144de5 100644 --- a/bin/README.md +++ b/bin/README.md @@ -18,6 +18,9 @@ deploys it, and restarts the service * `upload-file.sh`: Given a csv file it will guess a schema for it and upload it to a remote cluster chopped into small pieces. +* `dump-greenplum.sh`: This script is used to connect Hillview + with [Greenplum](https://greenplum.org/) distributed databases. + It should be installed on each Greenplum worker machine The following are templates that are used to generate actual shell scripts on a remoate cluster when Hillview is installed diff --git a/bin/delete-data.py b/bin/delete-data.py index 2051552ea..7fa35bac0 100755 --- a/bin/delete-data.py +++ b/bin/delete-data.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This script deletes a specific folder on all the machines in a Hillview cluster.""" # pylint: disable=invalid-name diff --git a/bin/deploy-greenplum.py b/bin/deploy-greenplum.py new file mode 100755 index 000000000..b48b2a5b5 --- /dev/null +++ b/bin/deploy-greenplum.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# This script installs Hillview next to a greenplum database. +# It needs a config-greenplum.json file that has a description of the +# Greenplum database installation. See also the section +# on Greenplum installation from https://github.com/vmware/hillview/blob/master/docs/userManual.md + +# Copyright (c) 2020 VMware Inc. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=invalid-name +from argparse import ArgumentParser +from jproperties import Properties +import os +from hillviewCommon import ClusterConfiguration, get_config, get_logger, execute_command + +def main(): + parser = ArgumentParser() + parser.add_argument("config", help="json cluster configuration file") + args = parser.parse_args() + config = get_config(parser, args) + + execute_command("./package-binaries.sh") + web = config.get_webserver() + web.copy_file_to_remote("../hillview-bin.zip", ".", "") + web.copy_file_to_remote("config-greenplum.json", ".", "") + web.run_remote_shell_command("unzip -o hillview-bin.zip") + web.run_remote_shell_command("cd bin; ./upload-data.py -d . -s dump-greenplum.sh config-greenplum.json") + web.run_remote_shell_command("cd bin; ./redeploy.sh -s config-greenplum.json") + web.copy_file_to_remote("../repository/PROGRESS_DATADIRECT_JDBC_DRIVER_PIVOTAL_GREENPLUM_5.1.4.000275.jar", + config.service_folder + "/" + config.tomcat + "/lib", "") + # Generate properties file + with open("greenplum.properties", "rb") as f: + p = Properties() + p.load(f, "utf-8") + p["greenplumDumpScript"] = config.service_folder + "/dump-greenplum.sh" + with open("hillview.properties", "wb") as f: + p.store(f, encoding="utf-8") + web.copy_file_to_remote("hillview.properties", config.service_folder, "") + os.remove("hillview.properties") + +if __name__ == "__main__": + main() diff --git a/bin/deploy.py b/bin/deploy.py index 4cfcacd65..4a89637c4 100755 --- a/bin/deploy.py +++ b/bin/deploy.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This python program deploys the files needed by the Hillview service on the machines specified in the configuration file.""" diff --git a/bin/download-data.py b/bin/download-data.py index 021da0c59..d8ba6c661 100755 --- a/bin/download-data.py +++ b/bin/download-data.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This script takes a cluster configuration and a file pattern. It downloads the files that match from all machines in the cluster.""" diff --git a/bin/dump-greenplum.sh b/bin/dump-greenplum.sh new file mode 100644 index 000000000..b7af229b4 --- /dev/null +++ b/bin/dump-greenplum.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Copyright (c) 2020 VMware Inc. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This script is used when connecting to a Greenplum database +# to dump data in an external web table. See +# https://gpdb.docs.pivotal.io/6-10/admin_guide/load/topics/g-defining-a-command-based-writable-external-web-table.html +# and https://gpdb.docs.pivotal.io/6-10/ref_guide/sql_commands/CREATE_EXTERNAL_TABLE.html +# The script receives data at stdin + +# Single argument is the directory where the data is to be dumped +DIR=$1 +PREFIX="file" +mkdir -p ${DIR} || exit 1 +echo "$(${DIR}/${PREFIX}${GP_SEGMENT_ID} \ No newline at end of file diff --git a/bin/greenplum.properties b/bin/greenplum.properties new file mode 100644 index 000000000..15671b927 --- /dev/null +++ b/bin/greenplum.properties @@ -0,0 +1,11 @@ +# This properties file is a blueprint for a hillview.properties file +# used with a Greenplum installation. + +########################################################### +# Parameters interfacing Hillview with a Greenplum database + +# This script is invoked when data is dumped from an external web table +greenplumDumpScript = /home/gpdamin/hillview/dump-greenplum.sh +# This directory is used to store the data dumped from Greenplum before it's parsed by Hillview. +# The directory must be writable by the segment hosts. +greenplumDumpDirectory = /tmp diff --git a/bin/hillviewCommon.py b/bin/hillviewCommon.py index 657a5d6cc..6505e9797 100644 --- a/bin/hillviewCommon.py +++ b/bin/hillviewCommon.py @@ -1,6 +1,7 @@ """Common functions used by the Hillview deployment scripts""" # pylint: disable=invalid-name,too-few-public-methods, bare-except +from __future__ import print_function import os.path import os import subprocess @@ -8,8 +9,12 @@ import json import getpass import logging +import sys from argparse import ArgumentParser +is3 = sys.version_info[0] == 3 +print("Python version is", 3 if is3 else 2) + def get_logger(module_name): """ Returns the logger object """ logger = logging.getLogger(module_name) @@ -37,8 +42,10 @@ class RemoteHost: """Abstraction for a remote host""" def __init__(self, user, host, parent, heapsize="200M"): """Create a remote host""" - assert isinstance(user, str) - assert isinstance(host, str) + global is3 + if is3: + assert isinstance(user, str) + assert isinstance(host, str) assert parent is None or isinstance(parent, RemoteHost) self.host = host self.user = user diff --git a/bin/install-dependencies.sh b/bin/install-dependencies.sh index de95526b9..1d5266cce 100755 --- a/bin/install-dependencies.sh +++ b/bin/install-dependencies.sh @@ -32,6 +32,7 @@ esac ${SUDO} ${INSTALL} install wget maven ${NODEJS} ${NPM} ${LIBFORTRAN} unzip gzip python3 echo "Installing typescript compiler" ${SUDO} npm install -g typescript@3.9.7 +pip install jproperties # Download apache if not there. pushd .. @@ -75,6 +76,6 @@ popd # Install Cassandra and populate a test database if [ ${INSTALL_CASSANDRA} -eq 1 ]; then - ./${mydir}/install-cassandra.sh - sudo apt install mysql-server + ./${mydir}/install-cassandra.sh + sudo apt install mysql-server fi diff --git a/bin/package-binaries.sh b/bin/package-binaries.sh index 5a317d198..e22249d7f 100755 --- a/bin/package-binaries.sh +++ b/bin/package-binaries.sh @@ -4,12 +4,21 @@ # Should be run after the binaries have been built. # This archive has to be unpacked in the toplevel Hillview folder. -set -e +set -ex ARCHIVE=hillview-bin.zip +#TARARCHIVE=hillview.tar.gz +#echo "Creating ${ARCHIVE} and ${TARARCHIVE} in toplevel directory." echo "Creating ${ARCHIVE} in toplevel directory." cd .. + +FILES="platform/target/hillview-server-jar-with-dependencies.jar web/target/web-1.0-SNAPSHOT.war platform/target/DataUpload-jar-with-dependencies.jar bin/*.py bin/*.sh bin/*.bat bin/config.json bin/config-local.json" + rm -f ${ARCHIVE} -zip ${ARCHIVE} platform/target/hillview-server-jar-with-dependencies.jar web/target/web-1.0-SNAPSHOT.war platform/target/DataUpload-jar-with-dependencies.jar bin/*.py bin/*.sh bin/*.bat bin/config.json bin/config-local.json +zip ${ARCHIVE} ${FILES} + +#rm -f ${TARARCHIVE} +#tar cvfz ${TARARCHIVE} ${FILES} + cd bin diff --git a/bin/rebuild.sh b/bin/rebuild.sh index b51f906aa..5c8683a62 100755 --- a/bin/rebuild.sh +++ b/bin/rebuild.sh @@ -44,8 +44,8 @@ if [ "x${TOOLSARGS}" != "x" ]; then fi export MAVEN_OPTS="-Xmx2048M" pushd ${mydir}/../platform -mvn ${TOOLSARGS} ${TESTARGS} clean install +mvn ${TOOLSARGS} ${TESTARGS} install popd pushd ${mydir}/../web -mvn ${TESTARGS} clean package +mvn ${TESTARGS} package popd diff --git a/bin/run-on-all.py b/bin/run-on-all.py index cf210dcc0..890642ce9 100755 --- a/bin/run-on-all.py +++ b/bin/run-on-all.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 # -*-python-*- """This script runs a command on all worker hosts of a Hillview cluster.""" diff --git a/bin/start.py b/bin/start.py index 3603df997..c663f7fd7 100755 --- a/bin/start.py +++ b/bin/start.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This python starts the Hillview service on the machines specified in the configuration file.""" diff --git a/bin/status.py b/bin/status.py index 0dc2def2e..e91fce27b 100755 --- a/bin/status.py +++ b/bin/status.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This program checks if the Hillview service is running on all machines specified in the configuration file.""" diff --git a/bin/stop.py b/bin/stop.py index fb4a1e6f3..855b21810 100755 --- a/bin/stop.py +++ b/bin/stop.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This Python program stops the Hillview service on the machines specified in the configuration file.""" diff --git a/bin/upload-data.py b/bin/upload-data.py index 6c48ab193..297dbf3cb 100755 --- a/bin/upload-data.py +++ b/bin/upload-data.py @@ -1,4 +1,5 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +# We attempted to make this program work with both python2 and python3 """This script takes a set of files and a cluster configuration describing a set of machines. It uploads the files to the given machines in round-robin fashion. @@ -78,7 +79,7 @@ def main(): if args.files: copy_files(config, folder, args.files, copyOptions) else: - logger.error("No files to upload to the machines provided in a Hillview configuration") + logger.info("No files to upload to the machines provided in a Hillview configuration") logger.info("Done.") if __name__ == "__main__": diff --git a/deployment/README.md b/deployment/README.md index 85662516e..30754a624 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -2,20 +2,12 @@ We are deprecating Ansible for Hillview's management; the deployment is now done using just: ![Python scripts](../README.md#3-deploying-the-hillview-service-on-a-cluster). -This folder contains some left-over ansible scripts for managing -software installation on the cluster; they will be eventually -deprecated. +This folder contains a left-over ansible script for installing +java on the cluster. -* install-java.yaml: installs Java 8 on the machines specified in the - configuration file (see below) +## Cluster configuration file -* demo-data-cleaner.yaml: creates a flights dataset with only 15 - columns from the full dataset on all machines in the cluster (the - full dataset must be already installed) - -### Cluster configuration file - -The cluster where the Hillview service is deployed is described by a +The cluster where the Hillview service is deployed is described by a `hosts` file containing the list of machines where the service should be deployed. Here is an example of such a file: @@ -29,7 +21,7 @@ deployed. Here is an example of such a file: ``` The `web` group describes the front-end web server. - The `backends` group lists all machines running the `hillview` -service. Individual machines heap sizes can be specified by setting -the machine's `heap_size` variable. +service. The script is invoked using: + +`ansible-playbook -i hosts -u user install-java.yaml` diff --git a/deployment/demo-data-cleaner.yaml b/deployment/demo-data-cleaner.yaml deleted file mode 100644 index ced11f052..000000000 --- a/deployment/demo-data-cleaner.yaml +++ /dev/null @@ -1,5 +0,0 @@ ---- -- hosts: backends - tasks: - - name: Cleanup demo data - shell: cd hillview; java -jar data-cleaner-jar-with-dependencies.jar diff --git a/docs/federated-mysql.png b/docs/federated-mysql.png new file mode 100755 index 000000000..52f8cd24f Binary files /dev/null and b/docs/federated-mysql.png differ diff --git a/docs/greenplum-integration.png b/docs/greenplum-integration.png new file mode 100755 index 000000000..54e8b22d4 Binary files /dev/null and b/docs/greenplum-integration.png differ diff --git a/docs/hillview-cassandra.png b/docs/hillview-cassandra.png new file mode 100755 index 000000000..1db120c2e Binary files /dev/null and b/docs/hillview-cassandra.png differ diff --git a/docs/userManual.md b/docs/userManual.md index f085563fa..38b755153 100644 --- a/docs/userManual.md +++ b/docs/userManual.md @@ -26,7 +26,7 @@ one row for an airline flight. Columns in this dataset include: the date of the the origin and destination cities, the origin and destination states, the origin airport code, the distance flown, the departure and arrival delay. -Updated on 2020 Sep 02. +Updated on 2020 Sep 11. # Contents * 1 [Basic concepts](#1-basic-concepts) @@ -38,6 +38,7 @@ Updated on 2020 Sep 02. * 2.3 [Interval values](#23-interval-values) * 2.4 [Data conversions](#24-data-conversions) * 2.4.1 [JavaScript conversions](#241-javascript-conversions) + * 2.4.2 [JDBC conversions](#242-jdbc-conversions) * 2.5 [Metadata](#25-metadata) * 2.5.1 [Mapping a dataset to a metadata directory](#251-mapping-a-dataset-to-a-metadata-directory) * 2.5.2 [Data schema](#252-data-schema) @@ -57,8 +58,10 @@ Updated on 2020 Sep 02. * 3.3.5 [Reading JSON files](#335-reading-json-files) * 3.3.6 [Reading ORC files](#336-reading-orc-files) * 3.3.7 [Reading data from SQL databases](#337-reading-data-from-sql-databases) - * 3.3.8 [Reading data from Cassandra databases](#338-reading-data-from-cassandra-databases) - * 3.3.9 [Reading Parquet files](#339-reading-parquet-files) + * 3.3.7.1 [Reading data from Greenplum databases](#3371-reading-data-from-greenplum-databases) + * 3.3.7.2 [Reading from a federated set of MySQL databases](#3372-reading-from-a-federated-set-of-mysql-databases) + * 3.3.7.3 [Reading data from Cassandra databases](#3373-reading-data-from-cassandra-databases) + * 3.3.8 [Reading Parquet files](#338-reading-parquet-files) * 3.4 [Navigating multiple datasets](#34-navigating-multiple-datasets) * 4 [Data views](#4-data-views) * 4.1 [The heading of a view](#41-the-heading-of-a-view) @@ -209,7 +212,6 @@ in buckets [2,3) and [3,4). In Javascript the intervals are exposed as arrays with two numeric values. - ### 2.4 Data conversions This section describes how Hillview data is represented in various external @@ -233,6 +235,21 @@ Here is how various Hillview datatypes are represented in JavaScript: |`Duration`|A JavaScript number representing the number of milliseconds in the duration.| |`Interval`|An array with two JavaScript number values.| +#### 2.4.2 JDBC conversions + +When reading data from a JDBC source Hillview applies the following conversions: + +|JDBC datatypes|Hillview representation| +|-------------|-----------------------| +|`TINYINT`,`SMALLINT`,`INTEGER`|`Integer`| +|`BOOLEAN`,`BIT`|`String` (false,true)| +|`BIGINT`,`FLOAT`,`REAL`,`DOUBLE`,`DECIMAL`,`NUMERIC`|`Double`| +|`CHAR`,`VARCHAR`,`LOGVARCHAR`,`NVARCHAR`,`LONGNVARCHAR`,`SQLXML`|`String`| +|`DATE`,`TIME`,`TIMESTAMP`|`Localdate`| +|`TIME_WITH_TIMEZONE`,`TIMESTAMP_WITH_TIMEZONE`|`Date`| +|`NULL`|`None`| +|Other|Error: not supported| + ### 2.5 Metadata This section describes various kinds of metadata manipulated by Hillview. @@ -406,7 +423,7 @@ storage. files](#335-reading-json-files). * Parquet files: allows the user to [read the data from a set of - Parquet files](#339-reading-parquet-files). + Parquet files](#338-reading-parquet-files). * ORC files: allows the user to [read the data from a set of ORC files](#336-reading-orc-files). @@ -617,27 +634,18 @@ file it will perform type conversions at loading time, as follows: #### 3.3.7 Reading data from SQL databases -The following menu allows the user to load data from a set of -federated databases that are exposed as a JDBC service. *Each worker -machine in the cluster will attempt to connect to the database -independently.* This works best when a separate database server is -deployed on each local Hillview machine hosting a worker. - -Currently there is no way to load data from a single external database -when Hillview is deployed as a cloud service; however, data can be -loaded from a database when Hillview is deployed as a service running -on the local user machine. - +The following menu allows the user to load data from a +parallel database or a federated set of databases that expose some JDBC services. The following menu allows the user to specify the data to load. ![Specifying database connections](db-menu-mysql.png) * database kind: A drop-down menu indicating the kind of database to - load data from. Currently we support 'mysql', 'impala', and 'cassandra'. + load data from. Currently we support 'mysql', + 'greenplum', and 'cassandra'. Each of these is discussed in a separate + section below. -* host: The network name of a machine hosting the database. *TODO* - this should be a pattern enabling each worker to specify a different - machine. +* host: The network name of a machine hosting the database. * port: The network port where the database service is listening. @@ -649,11 +657,70 @@ The following menu allows the user to specify the data to load. * password: Credentials of the user connecting to the database. -Numeric values are converted either to integers (if they fit into -32-bits) or to doubles. Boolean values are read as strings -containing two values, "true" and "false". +##### 3.3.7.1 Reading data from Greenplum databases + +Hillview can read data from a [Greenplum massively parallel database](https://greenplum.org/). +The following diagram illustrates how Hillview interact with Greenplum. + +![Hillview-Greenplum integration](greenplum-integration.png) + +* The hillview root node can run anywhere (including the same machine as the Master Host), + but it needs to be able to open + a JDBC connection to the Greenplum Master Host. The Master Host must be specified + as `host` in the connection dialog shown in (#337-reading-data-from-sql-databases). + The default network port for Greenplum is `5432`. + +* Each hillview worker must be deployed on the same machine which contains a + Greenplum segment host, to ensure high bandwidth access to the data. + The Hillview [configuration file](../README.md#31-service-configuration) + should have one worker for each segment host. Hillview aggregators are + optional. This requires Java to be installed on all segment machines. + +* The Hillview workers network port must be allowed by the network firewall. + +* The Hillview workers must be able to read and delete files written by the + Greenplum segment hosts. -#### 3.3.8 Reading data from Cassandra databases +* We provide a script `../bin/deploy-greenplum.py` to aid in the deployment + of Hillview next to a Greenplum database. The script is invoked with a + Hillview cluster configuration file as an argument. + +The interaction between Hillview and Greenplum proceeds as follows: + +1. The user initiates a connection to a Greenplum database by filling the + form shown in (#337-reading-data-from-sql-databases). + +2. The Hillview root node initiates a JDBC connection to the Greenplum + Master host. Using this connection the Hillview root node obtains + the schema of the data. + +3. The Hillview root node instructs Greenplum to dump the data in the + table in parallel on all segment hosts (using Greenplum + [EXTERNAL WEB TABLES](https://gpdb.docs.pivotal.io/6-10/ref_guide/sql_commands/CREATE_EXTERNAL_TABLE.html)). + +4. The Hillview root node instructs all hillview workers to read the + dumped data, passing along the schema previously obtained. + +5. From this point on Hillview no longer needs to interact with Greenplum. + +##### 3.3.7.2 Reading from a federated set of MySQL databases + +The image below shows a system where Hillview reads directly from a set of +independent MySQL databases (this can be easily extended +to any database that supports JDBC). + +![Hillview reading from a federated MySQL set of databases](federated-mysql.png) + +In this model an independent Hillview worked is deployed on each machine +hosting a database. The main assumption is that the data management system +shards tables across databases such that different shards of a table +are stored with the same table name across different databases. +Hillview allows the user to visualize the union of all table fragments. +The JDBC connection parameters introduced by the user in the dialog +shown in [the section above](#337-reading-data-from-sql-databases) describe +simultaneoulsy all connections from the workers. + +##### 3.3.7.3 Reading data from Cassandra databases Hillview can read data from [Cassandra distributed databases](https://cassandra.apache.org/). For this purpose a Hillview worker should be deployed on each Cassandra node. @@ -661,6 +728,8 @@ Moreover, Hillview must have read access to Cassandra's SSTables. Hillview assumes that no writes are in progress while reading the data from storage. +![Hillview reading from a Cassandra database](hillview-cassandra.png) + The following menu allows the user to specify the data to load. ![Specifying Cassandra database connections](db-menu-cassandra.png) @@ -687,7 +756,7 @@ The following menu allows the user to specify the data to load. * password: Credentials of the user connecting to the database. -#### 3.3.9 Reading Parquet files +#### 3.3.8 Reading Parquet files Hillview can read data from [Apache Parquet files](http://parquet.apache.org), a columnar storage format. The diff --git a/docs/userManual.src b/docs/userManual.src index 7f55aa87d..7a1df007d 100644 --- a/docs/userManual.src +++ b/docs/userManual.src @@ -133,7 +133,6 @@ in buckets [2,3) and [3,4). In Javascript the intervals are exposed as arrays with two numeric values. - ### Data conversions This section describes how Hillview data is represented in various external @@ -157,6 +156,21 @@ Here is how various Hillview datatypes are represented in JavaScript: |`Duration`|A JavaScript number representing the number of milliseconds in the duration.| |`Interval`|An array with two JavaScript number values.| +#### JDBC conversions + +When reading data from a JDBC source Hillview applies the following conversions: + +|JDBC datatypes|Hillview representation| +|-------------|-----------------------| +|`TINYINT`,`SMALLINT`,`INTEGER`|`Integer`| +|`BOOLEAN`,`BIT`|`String` (false,true)| +|`BIGINT`,`FLOAT`,`REAL`,`DOUBLE`,`DECIMAL`,`NUMERIC`|`Double`| +|`CHAR`,`VARCHAR`,`LOGVARCHAR`,`NVARCHAR`,`LONGNVARCHAR`,`SQLXML`|`String`| +|`DATE`,`TIME`,`TIMESTAMP`|`Localdate`| +|`TIME_WITH_TIMEZONE`,`TIMESTAMP_WITH_TIMEZONE`|`Date`| +|`NULL`|`None`| +|Other|Error: not supported| + ### Metadata This section describes various kinds of metadata manipulated by Hillview. @@ -541,27 +555,18 @@ file it will perform type conversions at loading time, as follows: #### Reading data from SQL databases -The following menu allows the user to load data from a set of -federated databases that are exposed as a JDBC service. *Each worker -machine in the cluster will attempt to connect to the database -independently.* This works best when a separate database server is -deployed on each local Hillview machine hosting a worker. - -Currently there is no way to load data from a single external database -when Hillview is deployed as a cloud service; however, data can be -loaded from a database when Hillview is deployed as a service running -on the local user machine. - +The following menu allows the user to load data from a +parallel database or a federated set of databases that expose some JDBC services. The following menu allows the user to specify the data to load. ![Specifying database connections](db-menu-mysql.png) * database kind: A drop-down menu indicating the kind of database to - load data from. Currently we support 'mysql', 'impala', and 'cassandra'. + load data from. Currently we support 'mysql', + 'greenplum', and 'cassandra'. Each of these is discussed in a separate + section below. -* host: The network name of a machine hosting the database. *TODO* - this should be a pattern enabling each worker to specify a different - machine. +* host: The network name of a machine hosting the database. * port: The network port where the database service is listening. @@ -573,11 +578,70 @@ The following menu allows the user to specify the data to load. * password: Credentials of the user connecting to the database. -Numeric values are converted either to integers (if they fit into -32-bits) or to doubles. Boolean values are read as strings -containing two values, "true" and "false". +##### Reading data from Greenplum databases + +Hillview can read data from a [Greenplum massively parallel database](https://greenplum.org/). +The following diagram illustrates how Hillview interact with Greenplum. + +![Hillview-Greenplum integration](greenplum-integration.png) + +* The hillview root node can run anywhere (including the same machine as the Master Host), + but it needs to be able to open + a JDBC connection to the Greenplum Master Host. The Master Host must be specified + as `host` in the connection dialog shown in (#reading-data-from-sql-databases). + The default network port for Greenplum is `5432`. + +* Each hillview worker must be deployed on the same machine which contains a + Greenplum segment host, to ensure high bandwidth access to the data. + The Hillview [configuration file](../README.md#31-service-configuration) + should have one worker for each segment host. Hillview aggregators are + optional. This requires Java to be installed on all segment machines. + +* The Hillview workers network port must be allowed by the network firewall. + +* The Hillview workers must be able to read and delete files written by the + Greenplum segment hosts. -#### Reading data from Cassandra databases +* We provide a script `../bin/deploy-greenplum.py` to aid in the deployment + of Hillview next to a Greenplum database. The script is invoked with a + Hillview cluster configuration file as an argument. + +The interaction between Hillview and Greenplum proceeds as follows: + +1. The user initiates a connection to a Greenplum database by filling the + form shown in (#reading-data-from-sql-databases). + +2. The Hillview root node initiates a JDBC connection to the Greenplum + Master host. Using this connection the Hillview root node obtains + the schema of the data. + +3. The Hillview root node instructs Greenplum to dump the data in the + table in parallel on all segment hosts (using Greenplum + [EXTERNAL WEB TABLES](https://gpdb.docs.pivotal.io/6-10/ref_guide/sql_commands/CREATE_EXTERNAL_TABLE.html)). + +4. The Hillview root node instructs all hillview workers to read the + dumped data, passing along the schema previously obtained. + +5. From this point on Hillview no longer needs to interact with Greenplum. + +##### Reading from a federated set of MySQL databases + +The image below shows a system where Hillview reads directly from a set of +independent MySQL databases (this can be easily extended +to any database that supports JDBC). + +![Hillview reading from a federated MySQL set of databases](federated-mysql.png) + +In this model an independent Hillview worked is deployed on each machine +hosting a database. The main assumption is that the data management system +shards tables across databases such that different shards of a table +are stored with the same table name across different databases. +Hillview allows the user to visualize the union of all table fragments. +The JDBC connection parameters introduced by the user in the dialog +shown in [the section above](#reading-data-from-sql-databases) describe +simultaneoulsy all connections from the workers. + +##### Reading data from Cassandra databases Hillview can read data from [Cassandra distributed databases](https://cassandra.apache.org/). For this purpose a Hillview worker should be deployed on each Cassandra node. @@ -585,6 +649,8 @@ Moreover, Hillview must have read access to Cassandra's SSTables. Hillview assumes that no writes are in progress while reading the data from storage. +![Hillview reading from a Cassandra database](hillview-cassandra.png) + The following menu allows the user to specify the data to load. ![Specifying Cassandra database connections](db-menu-cassandra.png) diff --git a/platform/src/main/java/org/hillview/LazySchema.java b/platform/src/main/java/org/hillview/LazySchema.java new file mode 100644 index 000000000..7c58adbd7 --- /dev/null +++ b/platform/src/main/java/org/hillview/LazySchema.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 VMware Inc. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hillview; + +import org.hillview.table.Schema; +import org.hillview.utils.Converters; +import org.hillview.utils.Utilities; + +import javax.annotation.Nullable; +import java.nio.file.Paths; + +/** + * A LazySchema may only contain a Path, loading the schema on demand from the filesystem. + */ +public class LazySchema { + @Nullable + Schema schema; + @Nullable + String schemaPath; + + public LazySchema() { + this.schema = null; + this.schemaPath = null; + } + + public LazySchema(Schema schema) { + this.schema = schema; + this.schemaPath = null; + } + + public LazySchema(@Nullable String schemaPath) { + this.schema = null; + this.schemaPath = schemaPath; + } + + @Nullable + public synchronized Schema getSchema() { + if (this.schema == null) { + if (Utilities.isNullOrEmpty(this.schemaPath)) + return null; + this.schema = Schema.readFromJsonFile( + Paths.get(Converters.checkNull(this.schemaPath))); + } + return this.schema; + } + + public boolean isNull() { + return this.schema == null && Utilities.isNullOrEmpty(this.schemaPath); + } +} diff --git a/platform/src/main/java/org/hillview/main/BatchLogAnalysis.java b/platform/src/main/java/org/hillview/main/BatchLogAnalysis.java index ee0a68012..1a3f7537a 100644 --- a/platform/src/main/java/org/hillview/main/BatchLogAnalysis.java +++ b/platform/src/main/java/org/hillview/main/BatchLogAnalysis.java @@ -57,7 +57,7 @@ private static HeatmapData heatmapErrTime(FileSetDescription desc, int numOfTime /* Load data through file desc */ Empty e = Empty.getInstance(); LocalDataSet local = new LocalDataSet(e); - IMap> finder = new FindFilesMap(desc); + IMap> finder = new FindFilesMap<>(desc); IDataSet found = local.blockingFlatMap(finder); IMap loader = new LoadFilesMap(); IDataSet table = found.blockingMap(loader); diff --git a/platform/src/main/java/org/hillview/main/DataUpload.java b/platform/src/main/java/org/hillview/main/DataUpload.java index b0a71f3e6..46884ae74 100644 --- a/platform/src/main/java/org/hillview/main/DataUpload.java +++ b/platform/src/main/java/org/hillview/main/DataUpload.java @@ -20,6 +20,7 @@ import javax.annotation.Nullable; import org.apache.commons.io.FilenameUtils; +import org.hillview.LazySchema; import org.hillview.management.ClusterConfig; import org.hillview.storage.*; import org.hillview.table.Schema; @@ -196,7 +197,8 @@ public int run(String... args) throws Exception { ldr.addFixedColumns = false; // we don't need these loader = ldr; } else { - loader = new CsvFileLoader(parameters.filename, parsConfig, parameters.inputSchemaName); + loader = new CsvFileLoader(parameters.filename, parsConfig, + new LazySchema(parameters.inputSchemaName)); } parts = this.chop(loader, config, parameters); diff --git a/platform/src/main/java/org/hillview/main/DemoDataCleaner.java b/platform/src/main/java/org/hillview/main/DemoDataCleaner.java index 098bf128e..4d2277b42 100644 --- a/platform/src/main/java/org/hillview/main/DemoDataCleaner.java +++ b/platform/src/main/java/org/hillview/main/DemoDataCleaner.java @@ -17,6 +17,7 @@ package org.hillview.main; +import org.hillview.LazySchema; import org.hillview.storage.CsvFileLoader; import org.hillview.storage.CsvFileWriter; import org.hillview.storage.OrcFileWriter; @@ -59,8 +60,8 @@ public static void main(String[] args) throws IOException { CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = true; - CsvFileLoader r = new CsvFileLoader(filename, config, dataFolder + "/On_Time.schema"); - + CsvFileLoader r = new CsvFileLoader(filename, config, + new LazySchema(dataFolder + "/On_Time.schema")); System.out.println("Reading " + f); ITable tbl = r.load(); assert tbl != null; diff --git a/platform/src/main/java/org/hillview/maps/FindFilesMap.java b/platform/src/main/java/org/hillview/maps/FindFilesMap.java index 06ab99dbf..2f29b68d0 100644 --- a/platform/src/main/java/org/hillview/maps/FindFilesMap.java +++ b/platform/src/main/java/org/hillview/maps/FindFilesMap.java @@ -18,7 +18,6 @@ package org.hillview.maps; import org.apache.commons.io.filefilter.WildcardFileFilter; -import org.hillview.dataset.api.Empty; import org.hillview.dataset.api.IMap; import org.hillview.storage.FileSetDescription; import org.hillview.storage.IFileReference; @@ -40,8 +39,9 @@ /** * Scans a folder and finds files matching a pattern. Creates a list of file * loaders that can be invoked to load the actual file data as tables. + * T is not used for anything. */ -public class FindFilesMap implements IMap> { +public class FindFilesMap implements IMap> { static final long serialVersionUID = 1; private final FileSetDescription description; @@ -53,11 +53,10 @@ public FindFilesMap(FileSetDescription description) { * Returns a list of IFileReference objects, one for each of the files that * match the specification. * - * @param empty: - * unused. + * @param unused: not used */ @Override - public List apply(@Nullable Empty empty) { + public List apply(@Nullable T unused) { String[] paths = this.description.fileNamePattern.trim().split("\\s*,\\s*"); HillviewLogger.instance.info("Find files", "pattern: {0}", this.description.fileNamePattern); List files = new ArrayList(); diff --git a/platform/src/main/java/org/hillview/maps/LoadDatabaseTableMap.java b/platform/src/main/java/org/hillview/maps/LoadDatabaseTableMap.java index c1ec7524d..1d2df3ea5 100644 --- a/platform/src/main/java/org/hillview/maps/LoadDatabaseTableMap.java +++ b/platform/src/main/java/org/hillview/maps/LoadDatabaseTableMap.java @@ -19,9 +19,9 @@ import org.hillview.dataset.api.Empty; import org.hillview.dataset.api.IMap; -import org.hillview.storage.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcConnectionInformation; import org.hillview.table.api.ITable; -import org.hillview.storage.JdbcDatabase; +import org.hillview.storage.jdbc.JdbcDatabase; import javax.annotation.Nullable; import java.sql.SQLException; diff --git a/platform/src/main/java/org/hillview/storage/CassandraConnectionInfo.java b/platform/src/main/java/org/hillview/storage/CassandraConnectionInfo.java index b20153a23..3dec596a5 100644 --- a/platform/src/main/java/org/hillview/storage/CassandraConnectionInfo.java +++ b/platform/src/main/java/org/hillview/storage/CassandraConnectionInfo.java @@ -17,6 +17,8 @@ package org.hillview.storage; +import org.hillview.storage.jdbc.JdbcConnectionInformation; + /** * This information is required to open a Cassandra database connection. */ diff --git a/platform/src/main/java/org/hillview/storage/ColumnLimits.java b/platform/src/main/java/org/hillview/storage/ColumnLimits.java index 59c410873..4bfe05f93 100644 --- a/platform/src/main/java/org/hillview/storage/ColumnLimits.java +++ b/platform/src/main/java/org/hillview/storage/ColumnLimits.java @@ -48,7 +48,7 @@ public void put(RangeFilterDescription filter) { this.columnLimits.put(filter.cd.name, filter); } - Collection allFilters() { + public Collection allFilters() { return this.columnLimits.values(); } diff --git a/platform/src/main/java/org/hillview/storage/CsvFileLoader.java b/platform/src/main/java/org/hillview/storage/CsvFileLoader.java index 9af283c7b..0ff17f433 100644 --- a/platform/src/main/java/org/hillview/storage/CsvFileLoader.java +++ b/platform/src/main/java/org/hillview/storage/CsvFileLoader.java @@ -20,6 +20,7 @@ import com.univocity.parsers.csv.CsvFormat; import com.univocity.parsers.csv.CsvParser; import com.univocity.parsers.csv.CsvParserSettings; +import org.hillview.LazySchema; import org.hillview.table.api.*; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; @@ -27,11 +28,9 @@ import org.hillview.table.membership.FullMembershipSet; import org.hillview.table.rows.GuessSchema; import org.hillview.utils.HillviewLogger; -import org.hillview.utils.Utilities; import javax.annotation.Nullable; import java.io.*; -import java.nio.file.Paths; /** * Knows how to read a CSV file (comma-separated file). @@ -56,16 +55,15 @@ public static class Config implements Serializable { private final Config configuration; @Nullable private Schema actualSchema; - @Nullable - private final String schemaPath; + private final LazySchema schema; private boolean guessSchema; - public CsvFileLoader(String path, Config configuration, @Nullable String schemaPath) { + public CsvFileLoader(String path, Config configuration, LazySchema schema) { super(path); this.configuration = configuration; - this.schemaPath = schemaPath; + this.schema = schema; this.allowFewerColumns = configuration.allowFewerColumns; - this.guessSchema = Utilities.isNullOrEmpty(schemaPath); + this.guessSchema = this.schema.isNull(); } @Nullable @@ -77,9 +75,7 @@ public CsvFileLoader(String path, Config configuration, @Nullable String schemaP @Override public void prepareLoading() { - if (!Utilities.isNullOrEmpty(this.schemaPath)) - this.actualSchema = Schema.readFromJsonFile(Paths.get(this.schemaPath)); - + this.actualSchema = this.schema.getSchema(); this.file = this.getFileReader(); CsvParserSettings settings = new CsvParserSettings(); CsvFormat format = new CsvFormat(); diff --git a/platform/src/main/java/org/hillview/storage/FileSetDescription.java b/platform/src/main/java/org/hillview/storage/FileSetDescription.java index 5ca16124c..0a9063e11 100644 --- a/platform/src/main/java/org/hillview/storage/FileSetDescription.java +++ b/platform/src/main/java/org/hillview/storage/FileSetDescription.java @@ -17,9 +17,12 @@ package org.hillview.storage; +import org.hillview.LazySchema; import org.hillview.dataset.api.IJson; +import org.hillview.table.Schema; import org.hillview.table.api.ITable; import org.hillview.utils.Converters; +import org.hillview.utils.HillviewLogger; import org.hillview.utils.Utilities; import javax.annotation.Nullable; @@ -49,6 +52,11 @@ public class FileSetDescription implements IJson { */ @Nullable public String schemaFile; + /** + * Schema given explicitly. + */ + @Nullable + public Schema schema; /** * If true the files are expected to have a header row. */ @@ -71,17 +79,23 @@ public class FileSetDescription implements IJson { public Double startTime; @Nullable public Double endTime; + /** + * If true the file is deleted after loading the data. This is + * useful for temporary files. + */ + public boolean deleteAfterLoading; @SuppressWarnings("unused") public String getBasename() { return Utilities.getFolder(this.fileNamePattern); } - @Nullable - private String getSchemaPath() { + public LazySchema getSchema() { + if (this.schema != null) + return new LazySchema(this.schema); if (Utilities.isNullOrEmpty(this.schemaFile)) - return null; - return Paths.get(Utilities.getFolder(this.fileNamePattern), this.schemaFile).toString(); + return new LazySchema((String)null); + return new LazySchema(Paths.get(Utilities.getFolder(this.fileNamePattern), this.schemaFile).toString()); } @Nullable @@ -109,11 +123,11 @@ public ITable load() { config.allowFewerColumns = true; config.hasHeaderRow = FileSetDescription.this.headerRow; loader = new CsvFileLoader( - this.pathname, config, FileSetDescription.this.getSchemaPath()); + this.pathname, config, FileSetDescription.this.getSchema()); break; case "orc": loader = new OrcFileLoader( - this.pathname, FileSetDescription.this.getSchemaPath(), true); + this.pathname, FileSetDescription.this.getSchema(), true); break; case "parquet": loader = new ParquetFileLoader( @@ -121,7 +135,7 @@ public ITable load() { break; case "json": loader = new JsonFileLoader( - this.pathname, FileSetDescription.this.getSchemaPath()); + this.pathname, FileSetDescription.this.getSchema()); break; case "hillviewlog": loader = new HillviewLogs.LogFileLoader(this.pathname); @@ -143,7 +157,16 @@ public ITable load() { throw new RuntimeException( "Unexpected file kind " + FileSetDescription.this.fileKind); } - return Converters.checkNull(loader.load()); + ITable result = Converters.checkNull(loader.load()); + if (FileSetDescription.this.deleteAfterLoading) { + File file = new File(this.pathname); + boolean success = file.delete(); + if (!success) + HillviewLogger.instance.error("Error deleting file", "{0}", this.pathname); + else + HillviewLogger.instance.info("Deleted file", "{0}", this.pathname); + } + return result; } public long getSizeInBytes() { diff --git a/platform/src/main/java/org/hillview/storage/JsonFileLoader.java b/platform/src/main/java/org/hillview/storage/JsonFileLoader.java index a33a59ce7..87dc0453f 100644 --- a/platform/src/main/java/org/hillview/storage/JsonFileLoader.java +++ b/platform/src/main/java/org/hillview/storage/JsonFileLoader.java @@ -23,6 +23,7 @@ import com.google.gson.JsonPrimitive; import com.google.gson.internal.Streams; import com.google.gson.stream.JsonReader; +import org.hillview.LazySchema; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.Table; @@ -30,7 +31,6 @@ import javax.annotation.Nullable; import java.io.*; -import java.nio.file.Paths; import java.util.*; /** @@ -45,21 +45,17 @@ * TODO: add support for streaming reads */ public class JsonFileLoader extends TextFileLoader { - @Nullable - private final String schemaPath; + private final LazySchema lazySchema; - public JsonFileLoader(String filename, @Nullable String schemaPath) { + public JsonFileLoader(String filename, LazySchema lazySchema) { super(filename); - this.schemaPath = schemaPath; + this.lazySchema = lazySchema; this.currentRow = 0; this.currentColumn = -1; } public ITable load() { - Schema schema = null; - if (this.schemaPath != null) - schema = Schema.readFromJsonFile(Paths.get(this.schemaPath)); - + Schema schema = this.lazySchema.getSchema(); Reader file = this.getFileReader(); JsonReader jReader = new JsonReader(file); JsonElement elem = Streams.parse(jReader); diff --git a/platform/src/main/java/org/hillview/storage/OrcFileLoader.java b/platform/src/main/java/org/hillview/storage/OrcFileLoader.java index 9918fe590..28908d29f 100644 --- a/platform/src/main/java/org/hillview/storage/OrcFileLoader.java +++ b/platform/src/main/java/org/hillview/storage/OrcFileLoader.java @@ -24,6 +24,7 @@ import org.apache.orc.Reader; import org.apache.orc.RecordReader; import org.apache.orc.TypeDescription; +import org.hillview.LazySchema; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.Table; @@ -34,7 +35,6 @@ import javax.annotation.Nullable; import java.io.IOException; -import java.nio.file.Paths; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.util.ArrayList; @@ -47,11 +47,7 @@ public class OrcFileLoader extends TextFileLoader { private final boolean lazy; private final Configuration conf; - /** - * Path of the Hillview Schema if specified. - */ - @Nullable - private final String schemaPath; + private final LazySchema lzschema; /** * Orc schema of the full file. */ @@ -64,10 +60,10 @@ public class OrcFileLoader extends TextFileLoader { @Nullable private Schema hillviewSchema = null; - public OrcFileLoader(String path, @Nullable String schemaPath, boolean lazy) { + public OrcFileLoader(String path, LazySchema lzschema, boolean lazy) { super(path); this.lazy = lazy; - this.schemaPath = schemaPath; + this.lzschema = lzschema; this.conf = new Configuration(); // https://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); @@ -394,8 +390,7 @@ private static List readColumns( @Override public ITable load() { try { - if (this.schemaPath != null) - this.hillviewSchema = Schema.readFromJsonFile(Paths.get(this.schemaPath)); + this.hillviewSchema = this.lzschema.getSchema(); Reader reader = OrcFile.createReader(new Path(this.filename), OrcFile.readerOptions(conf)); this.schema = reader.getSchema(); diff --git a/platform/src/main/java/org/hillview/storage/jdbc/GreenplumJdbcConnection.java b/platform/src/main/java/org/hillview/storage/jdbc/GreenplumJdbcConnection.java new file mode 100644 index 000000000..f2c795f07 --- /dev/null +++ b/platform/src/main/java/org/hillview/storage/jdbc/GreenplumJdbcConnection.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 VMware Inc. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hillview.storage.jdbc; + +import org.hillview.storage.ColumnLimits; +import org.hillview.utils.Utilities; + +import javax.annotation.Nullable; + +public class GreenplumJdbcConnection extends JdbcConnection { + public static final String DRIVER = "com.pivotal.jdbc.GreenplumDriver"; + + GreenplumJdbcConnection(JdbcConnectionInformation conn) { + super(';', ';', conn); + try { + Class.forName(DRIVER); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + @Override + String getQueryToReadSize(@Nullable ColumnLimits limits) { + return "SELECT COUNT(*) FROM " + this.info.table; + } + + @Override + public String getURL() { + this.addParameterIfNotNullOrEmpty("DatabaseName", info.database); + if (!Utilities.isNullOrEmpty(info.user) && !Utilities.isNullOrEmpty(info.password)) + this.addParameter("AuthenticationMethod", "userIdPassword"); + this.addParameterIfNotNullOrEmpty("User", info.user); + this.addParameterIfNotNullOrEmpty("Password", info.password); + StringBuilder builder = new StringBuilder(); + this.addBaseUrl(builder); + this.appendParametersToUrl(builder); + return builder.toString(); + } + + void addBaseUrl(StringBuilder urlBuilder) { + urlBuilder.append("jdbc:pivotal:"); + urlBuilder.append(info.databaseKind); + urlBuilder.append("://"); + urlBuilder.append(info.host); + if (info.port >= 0) { + urlBuilder.append(":"); + urlBuilder.append(info.port); + } + } +} diff --git a/platform/src/main/java/org/hillview/storage/ImpalaJdbcConnection.java b/platform/src/main/java/org/hillview/storage/jdbc/ImpalaJdbcConnection.java similarity index 83% rename from platform/src/main/java/org/hillview/storage/ImpalaJdbcConnection.java rename to platform/src/main/java/org/hillview/storage/jdbc/ImpalaJdbcConnection.java index e12b44156..faedc35ff 100644 --- a/platform/src/main/java/org/hillview/storage/ImpalaJdbcConnection.java +++ b/platform/src/main/java/org/hillview/storage/jdbc/ImpalaJdbcConnection.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 VMware Inc. All Rights Reserved. + * Copyright (c) 2020 VMware Inc. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.hillview.storage; +package org.hillview.storage.jdbc; import org.hillview.utils.Converters; import org.hillview.utils.Utilities; @@ -25,14 +25,6 @@ public class ImpalaJdbcConnection extends JdbcConnection { super(';', ';', conn); } - @Override - public String getQueryToReadTable(int rowCount) { - String result = "SELECT * FROM " + Converters.checkNull(this.info.table); - if (rowCount >= 0) - result += " LIMIT " + rowCount; - return result; - } - @Override public String getURL() { this.addParameter("UseNativeQuery", "1"); diff --git a/platform/src/main/java/org/hillview/storage/JdbcConnection.java b/platform/src/main/java/org/hillview/storage/jdbc/JdbcConnection.java similarity index 88% rename from platform/src/main/java/org/hillview/storage/JdbcConnection.java rename to platform/src/main/java/org/hillview/storage/jdbc/JdbcConnection.java index de483d44a..fc557b227 100644 --- a/platform/src/main/java/org/hillview/storage/JdbcConnection.java +++ b/platform/src/main/java/org/hillview/storage/jdbc/JdbcConnection.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 VMware Inc. All Rights Reserved. + * Copyright (c) 2020 VMware Inc. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,17 +15,20 @@ * limitations under the License. */ -package org.hillview.storage; +package org.hillview.storage.jdbc; import org.hillview.sketches.results.IHistogramBuckets; +import org.hillview.storage.ColumnLimits; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.columns.ColumnQuantization; import org.hillview.table.columns.DoubleColumnQuantization; +import org.hillview.utils.Converters; import org.hillview.utils.Utilities; import javax.annotation.Nullable; import java.util.HashMap; +import java.util.LinkedHashMap; /** * Base abstract class that handles various specifics of JDBC driver requirements. @@ -40,7 +43,7 @@ abstract class JdbcConnection { */ private final char urlOptionsBegin; public final JdbcConnectionInformation info; - private final HashMap params = new HashMap(); + private final HashMap params = new LinkedHashMap(); static JdbcConnection create(JdbcConnectionInformation conn) { if (Utilities.isNullOrEmpty(conn.databaseKind)) @@ -50,6 +53,8 @@ static JdbcConnection create(JdbcConnectionInformation conn) { return new MySqlJdbcConnection(conn); case "impala": return new ImpalaJdbcConnection(conn); + case "greenplum": + return new GreenplumJdbcConnection(conn); default: throw new RuntimeException("Unsupported JDBC database kind " + conn.databaseKind); } @@ -65,7 +70,12 @@ static JdbcConnection create(JdbcConnectionInformation conn) { * @param rowCount Number of rows to read. * @return A SQL query string that reads the specified number of rows. */ - public abstract String getQueryToReadTable(int rowCount); + public String getQueryToReadTable(int rowCount) { + String result = "SELECT * FROM " + Converters.checkNull(this.info.table); + if (rowCount >= 0) + result += " LIMIT " + rowCount; + return result; + } String getQueryToReadSize(@Nullable ColumnLimits columnLimits) { throw new UnsupportedOperationException(); @@ -110,6 +120,11 @@ void addParameter(String param, String value) { this.params.put(param, value); } + void addParameterIfNotNullOrEmpty(String param, @Nullable String value) { + if (!Utilities.isNullOrEmpty(value)) + this.params.put(param, value); + } + JdbcConnection(char urlOptionsSeparator, char urlOptionsBegin, JdbcConnectionInformation info) { this.urlOptionsSeparator = urlOptionsSeparator; diff --git a/platform/src/main/java/org/hillview/storage/JdbcConnectionInformation.java b/platform/src/main/java/org/hillview/storage/jdbc/JdbcConnectionInformation.java similarity index 94% rename from platform/src/main/java/org/hillview/storage/JdbcConnectionInformation.java rename to platform/src/main/java/org/hillview/storage/jdbc/JdbcConnectionInformation.java index 1d979b715..9eb1dd8a2 100644 --- a/platform/src/main/java/org/hillview/storage/JdbcConnectionInformation.java +++ b/platform/src/main/java/org/hillview/storage/jdbc/JdbcConnectionInformation.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 VMware Inc. All Rights Reserved. + * Copyright (c) 2020 VMware Inc. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.hillview.storage; +package org.hillview.storage.jdbc; import javax.annotation.Nullable; import java.io.Serializable; diff --git a/platform/src/main/java/org/hillview/storage/JdbcDatabase.java b/platform/src/main/java/org/hillview/storage/jdbc/JdbcDatabase.java similarity index 94% rename from platform/src/main/java/org/hillview/storage/JdbcDatabase.java rename to platform/src/main/java/org/hillview/storage/jdbc/JdbcDatabase.java index e77e5903c..0c0342019 100644 --- a/platform/src/main/java/org/hillview/storage/JdbcDatabase.java +++ b/platform/src/main/java/org/hillview/storage/jdbc/JdbcDatabase.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 VMware Inc. All Rights Reserved. + * Copyright (c) 2020 VMware Inc. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,9 +15,10 @@ * limitations under the License. */ -package org.hillview.storage; +package org.hillview.storage.jdbc; import org.hillview.sketches.results.*; +import org.hillview.storage.ColumnLimits; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.SmallTable; @@ -150,7 +151,7 @@ public int distinctCount(String columnName, @Nullable ColumnLimits columnLimits) * last column has the count of each row. */ public SmallTable topFreq(Schema schema, int maxRows, - @Nullable ColumnLimits columnLimits) { + @Nullable ColumnLimits columnLimits) throws SQLException { assert this.conn.info.table != null; String query = this.conn.getQueryToComputeFreqValues(schema, maxRows, columnLimits); ResultSet rs = this.getQueryResult(query); @@ -170,7 +171,7 @@ public JsonGroups histogram( ColumnDescription cd, IHistogramBuckets buckets, @Nullable ColumnLimits columnLimits, @Nullable ColumnQuantization quantization, - int rowCount) { + int rowCount) throws SQLException { String query = this.conn.getQueryForHistogram(cd, columnLimits, buckets, quantization); ResultSet rs = this.getQueryResult(query); List cols = JdbcDatabase.convertResultSet(rs); @@ -201,7 +202,7 @@ public JsonGroups histogram( IHistogramBuckets buckets0, IHistogramBuckets buckets1, @Nullable ColumnLimits columnLimits, @Nullable ColumnQuantization quantization0, - @Nullable ColumnQuantization quantization1) { + @Nullable ColumnQuantization quantization1) throws SQLException { // TODO: this does not currently compute nulls String query = this.conn.getQueryForHeatmap(cd0, cd1, columnLimits, @@ -240,7 +241,7 @@ public JsonGroups histogram( * @param limits Limits on the data to read. */ @SuppressWarnings("MismatchedQueryAndUpdateOfCollection") - public DataRange numericDataRange(ColumnDescription cd, @Nullable ColumnLimits limits) { + public DataRange numericDataRange(ColumnDescription cd, @Nullable ColumnLimits limits) throws SQLException { String query = this.conn.getQueryForNumericRange(cd, null, limits); ResultSet rs = this.getQueryResult(query); List cols = JdbcDatabase.convertResultSet(rs); @@ -261,7 +262,7 @@ public DataRange numericDataRange(ColumnDescription cd, @Nullable ColumnLimits l } public StringQuantiles stringBuckets(ColumnDescription cd, int stringsToSample, - @Nullable ColumnLimits columnLimits) { + @Nullable ColumnLimits columnLimits) throws SQLException { @Nullable String max = null; JsonList boundaries = new JsonList(); long presentCount, missingCount; @@ -292,6 +293,7 @@ public StringQuantiles stringBuckets(ColumnDescription cd, int stringsToSample, List cols = JdbcDatabase.convertResultSet(rs); SmallTable table = new SmallTable(cols); assert table.getNumOfRows() == 1; + @SuppressWarnings("MismatchedQueryAndUpdateOfCollection") RowSnapshot row = new RowSnapshot(table, 0); presentCount = Converters.toLong(row.getDouble("nonnulls")); missingCount = Converters.toLong(row.getDouble("total")) - presentCount; @@ -332,24 +334,25 @@ public List loadColumns(List names) { * Get the data in the JDBC database table. * @param rowCount Maximum number of rows. If negative, bring all rows. */ - private ResultSet getDataInTable(int rowCount) { + private ResultSet getDataInTable(int rowCount) throws SQLException { assert this.conn.info.table != null; String query = this.conn.getQueryToReadTable(rowCount); return this.getQueryResult(query); } - private ResultSet getQueryResult(String query) { - try { - // System.out.println(query); - HillviewLogger.instance.info("Executing SQL query", "{0}", query); - Statement st = Converters.checkNull(this.connection).createStatement(); - return st.executeQuery(query); - } catch (SQLException ex) { - throw new RuntimeException(ex); - } + public void executeUpdate(String query) throws SQLException { + Statement statement = Converters.checkNull(this.connection).createStatement(); + HillviewLogger.instance.info("Executing SQL update query", "{0}", query); + statement.executeUpdate(query); } - public ITable getQueryData(String query) { + private ResultSet getQueryResult(String query) throws SQLException { + HillviewLogger.instance.info("Executing SQL query", "{0}", query); + Statement st = Converters.checkNull(this.connection).createStatement(); + return st.executeQuery(query); + } + + public ITable getQueryData(String query) throws SQLException { ResultSet rs = this.getQueryResult(query); List columns = JdbcDatabase.convertResultSet(rs); return new Table(columns, null, null); @@ -395,10 +398,12 @@ private static ColumnDescription getDescription(ResultSetMetaData meta, int colI case Types.TIMESTAMP_WITH_TIMEZONE: kind = ContentsKind.Date; break; + case Types.NULL: + kind = ContentsKind.None; + break; case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: - case Types.NULL: case Types.OTHER: case Types.JAVA_OBJECT: case Types.DISTINCT: @@ -486,10 +491,12 @@ private static void appendNext(List cols, col.append(Converters.toDouble(instant)); } break; + case Types.NULL: + col.appendMissing(); + break; case Types.BINARY: case Types.VARBINARY: case Types.LONGVARBINARY: - case Types.NULL: case Types.OTHER: case Types.JAVA_OBJECT: case Types.DISTINCT: diff --git a/platform/src/main/java/org/hillview/storage/MySqlJdbcConnection.java b/platform/src/main/java/org/hillview/storage/jdbc/MySqlJdbcConnection.java similarity index 98% rename from platform/src/main/java/org/hillview/storage/MySqlJdbcConnection.java rename to platform/src/main/java/org/hillview/storage/jdbc/MySqlJdbcConnection.java index 61aa41388..77e6db0f8 100644 --- a/platform/src/main/java/org/hillview/storage/MySqlJdbcConnection.java +++ b/platform/src/main/java/org/hillview/storage/jdbc/MySqlJdbcConnection.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 VMware Inc. All Rights Reserved. + * Copyright (c) 2020 VMware Inc. All Rights Reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,11 +15,12 @@ * limitations under the License. */ -package org.hillview.storage; +package org.hillview.storage.jdbc; import org.hillview.sketches.results.DoubleHistogramBuckets; import org.hillview.sketches.results.IHistogramBuckets; import org.hillview.sketches.results.StringHistogramBuckets; +import org.hillview.storage.ColumnLimits; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.api.ContentsKind; @@ -38,7 +39,6 @@ import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.util.Collection; -import java.util.TimeZone; import java.util.function.Function; public class MySqlJdbcConnection extends JdbcConnection { @@ -382,14 +382,6 @@ String getHistogramQuery() { } } - @Override - public String getQueryToReadTable(int rowCount) { - String result = "SELECT * FROM " + Converters.checkNull(this.info.table); - if (rowCount >= 0) - result += " LIMIT " + rowCount; - return result; - } - @Override public String getURL() { // This stuff is extremely annoying diff --git a/platform/src/main/java/org/hillview/storage/jdbc/package-info.java b/platform/src/main/java/org/hillview/storage/jdbc/package-info.java new file mode 100644 index 000000000..fa98c55a5 --- /dev/null +++ b/platform/src/main/java/org/hillview/storage/jdbc/package-info.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 VMware Inc. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Package that doesn't allow null values as method parameters. + */ + +@ParametersAreNonnullByDefault +@FieldsAreNonnullByDefault +@MethodsAreNonnullByDefault +package org.hillview.storage.jdbc; + +import org.hillview.utils.FieldsAreNonnullByDefault; +import org.hillview.utils.MethodsAreNonnullByDefault; + +import javax.annotation.ParametersAreNonnullByDefault; diff --git a/platform/src/main/java/org/hillview/table/Schema.java b/platform/src/main/java/org/hillview/table/Schema.java index 9e201795d..b7d256e10 100644 --- a/platform/src/main/java/org/hillview/table/Schema.java +++ b/platform/src/main/java/org/hillview/table/Schema.java @@ -38,7 +38,7 @@ * A schema is an ordering of the columns, plus a map from a column name to a column description. * Column names are case-sensitive. */ -public final class Schema implements IJson { +public class Schema implements IJson { static final long serialVersionUID = 1; private final LinkedHashMap columns; diff --git a/platform/src/main/java/org/hillview/utils/JsonString.java b/platform/src/main/java/org/hillview/utils/JsonInString.java similarity index 79% rename from platform/src/main/java/org/hillview/utils/JsonString.java rename to platform/src/main/java/org/hillview/utils/JsonInString.java index a89e34437..38bf80153 100644 --- a/platform/src/main/java/org/hillview/utils/JsonString.java +++ b/platform/src/main/java/org/hillview/utils/JsonInString.java @@ -20,6 +20,7 @@ import com.google.gson.JsonElement; import com.google.gson.JsonNull; import com.google.gson.JsonParser; +import org.apache.commons.lang.StringEscapeUtils; import org.hillview.dataset.api.IJsonSketchResult; import javax.annotation.Nullable; @@ -27,16 +28,20 @@ /** * A string whose value is a JSON object. */ -public class JsonString implements IJsonSketchResult { +public class JsonInString implements IJsonSketchResult { static final long serialVersionUID = 1; @Nullable private final String value; - public JsonString(@Nullable String value) { + public JsonInString(@Nullable String value) { this.value = value; } + public static JsonInString makeJsonString(String value) { + return new JsonInString("\"" + StringEscapeUtils.escapeJavaScript(value) + "\""); + } + @Override public JsonElement toJsonTree() { if (this.value == null) diff --git a/platform/src/main/java/org/hillview/utils/TestUtils.java b/platform/src/main/java/org/hillview/utils/TestUtils.java index 970b5ee33..41154737c 100644 --- a/platform/src/main/java/org/hillview/utils/TestUtils.java +++ b/platform/src/main/java/org/hillview/utils/TestUtils.java @@ -17,6 +17,7 @@ package org.hillview.utils; +import org.hillview.LazySchema; import org.hillview.storage.CsvFileLoader; import org.hillview.table.api.ContentsKind; import org.hillview.table.api.ITable; @@ -41,7 +42,7 @@ public static ITable loadTableFromCSV(String dataFolder, String csvFile, String CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = true; - CsvFileLoader r = new CsvFileLoader(path.toString(), config, schemaPath.toString()); + CsvFileLoader r = new CsvFileLoader(path.toString(), config, new LazySchema(schemaPath.toString())); return Converters.checkNull(r.load()); } diff --git a/platform/src/test/java/org/hillview/test/dataStructures/JsonTest.java b/platform/src/test/java/org/hillview/test/dataStructures/JsonTest.java index 9bb87f9a1..5d3be70ae 100644 --- a/platform/src/test/java/org/hillview/test/dataStructures/JsonTest.java +++ b/platform/src/test/java/org/hillview/test/dataStructures/JsonTest.java @@ -19,6 +19,7 @@ import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; +import org.hillview.LazySchema; import org.hillview.dataset.api.IJson; import org.hillview.sketches.results.Count; import org.hillview.sketches.results.NextKList; @@ -114,7 +115,7 @@ public void convert() { public void jsonReaderTest() { final String jsonFolder = "../data/ontime"; final String jsonSample = "short.schema"; - JsonFileLoader reader = new JsonFileLoader(jsonFolder + "/" + jsonSample, null); + JsonFileLoader reader = new JsonFileLoader(jsonFolder + "/" + jsonSample, new LazySchema()); ITable table = reader.load(); Assert.assertNotNull(table); Assert.assertEquals("Table[2x15]", table.toString()); diff --git a/platform/src/test/java/org/hillview/test/dataset/LoadRaceTest.java b/platform/src/test/java/org/hillview/test/dataset/LoadRaceTest.java index 1a267129b..fc01dfc83 100644 --- a/platform/src/test/java/org/hillview/test/dataset/LoadRaceTest.java +++ b/platform/src/test/java/org/hillview/test/dataset/LoadRaceTest.java @@ -44,7 +44,7 @@ public void load() throws InterruptedException { desc.fileKind = "csv"; desc.fileNamePattern = "../data/ontime/*_1.csv"; desc.headerRow = true; - IMap> finder = new FindFilesMap(desc); + IMap> finder = new FindFilesMap<>(desc); IDataSet found = local.blockingFlatMap(finder); IMap loader = new LoadFilesMap(); diff --git a/platform/src/test/java/org/hillview/test/storage/CsvFileTest.java b/platform/src/test/java/org/hillview/test/storage/CsvFileTest.java index 16266fe60..155400838 100644 --- a/platform/src/test/java/org/hillview/test/storage/CsvFileTest.java +++ b/platform/src/test/java/org/hillview/test/storage/CsvFileTest.java @@ -20,6 +20,7 @@ import com.univocity.parsers.csv.CsvFormat; import com.univocity.parsers.csv.CsvWriter; import com.univocity.parsers.csv.CsvWriterSettings; +import org.hillview.LazySchema; import org.hillview.storage.CsvFileLoader; import org.hillview.storage.CsvFileWriter; import org.hillview.table.ColumnDescription; @@ -73,7 +74,7 @@ private ITable readTable(String folder, String file, boolean header) { CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = header; - CsvFileLoader r = new CsvFileLoader(path.toString(), config, null); + CsvFileLoader r = new CsvFileLoader(path.toString(), config, new LazySchema()); return r.load(); } @@ -84,7 +85,7 @@ private void readFileWithSchema(String criteoFile) { config.allowFewerColumns = false; config.hasHeaderRow = false; config.separator = '\t'; - CsvFileLoader r = new CsvFileLoader(path.toString(), config, schemaPath.toString()); + CsvFileLoader r = new CsvFileLoader(path.toString(), config, new LazySchema(schemaPath.toString())); ITable t = r.load(); Assert.assertNotNull(t); } @@ -120,7 +121,7 @@ public void readCsvFileWithSchemaTest() { CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = true; - CsvFileLoader r = new CsvFileLoader(path.toString(), config, schemaPath.toString()); + CsvFileLoader r = new CsvFileLoader(path.toString(), config, new LazySchema(schemaPath.toString())); ITable t = r.load(); Assert.assertNotNull(t); } @@ -131,7 +132,7 @@ public void readCsvFileGuessSchemaTest() { CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = true; - CsvFileLoader r = new CsvFileLoader(path.toString(), config, null); + CsvFileLoader r = new CsvFileLoader(path.toString(), config, new LazySchema()); ITable t = r.load(); Assert.assertNotNull(t); } @@ -142,7 +143,7 @@ public void readUTF16FileTest() { CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = true; - CsvFileLoader r = new CsvFileLoader(path.toString(), config, null); + CsvFileLoader r = new CsvFileLoader(path.toString(), config, new LazySchema()); ITable t = r.load(); Assert.assertNotNull(t); Assert.assertEquals("Table[3x5]", t.toString()); @@ -166,7 +167,7 @@ private void writeReadTable(ITable table) throws IOException { CsvFileLoader.Config config = new CsvFileLoader.Config(); config.allowFewerColumns = false; config.hasHeaderRow = true; - CsvFileLoader r = new CsvFileLoader(path, config, schemaPath.toString()); + CsvFileLoader r = new CsvFileLoader(path, config, new LazySchema(schemaPath.toString())); ITable t = r.load(); Assert.assertNotNull(t); diff --git a/platform/src/test/java/org/hillview/test/storage/DataUploadTest.java b/platform/src/test/java/org/hillview/test/storage/DataUploadTest.java index 0c99b2dce..e73a91db5 100644 --- a/platform/src/test/java/org/hillview/test/storage/DataUploadTest.java +++ b/platform/src/test/java/org/hillview/test/storage/DataUploadTest.java @@ -19,13 +19,13 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; +import org.hillview.LazySchema; import org.hillview.main.DataUpload; import org.hillview.storage.OrcFileLoader; import org.hillview.table.api.ContentsKind; import org.hillview.table.api.IColumn; import org.hillview.table.api.ITable; import org.hillview.test.BaseTest; -import org.hillview.utils.Converters; import org.junit.Assert; import org.junit.Test; @@ -78,7 +78,7 @@ public void testChopCsvToOrc() throws Exception { "-f", file, "-o", "-l", "200000", "-h", "-d", dir.toString()); - OrcFileLoader loader = new OrcFileLoader(dir.toString() + "/2016_10.orc", null, false); + OrcFileLoader loader = new OrcFileLoader(dir.toString() + "/2016_10.orc", new LazySchema(), false); ITable table = loader.load(); Assert.assertNotNull(table); IColumn date = table.getLoadedColumn("FlightDate"); diff --git a/platform/src/test/java/org/hillview/test/storage/ImpalaTest.java b/platform/src/test/java/org/hillview/test/storage/ImpalaTest.java index 93afbbe49..5519985fa 100644 --- a/platform/src/test/java/org/hillview/test/storage/ImpalaTest.java +++ b/platform/src/test/java/org/hillview/test/storage/ImpalaTest.java @@ -17,7 +17,7 @@ package org.hillview.test.storage; -import org.hillview.storage.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcConnectionInformation; import org.junit.Test; import java.sql.SQLException; diff --git a/platform/src/test/java/org/hillview/test/storage/JdbcTest.java b/platform/src/test/java/org/hillview/test/storage/JdbcTest.java index 06a67f84b..d48590cec 100644 --- a/platform/src/test/java/org/hillview/test/storage/JdbcTest.java +++ b/platform/src/test/java/org/hillview/test/storage/JdbcTest.java @@ -17,8 +17,8 @@ package org.hillview.test.storage; -import org.hillview.storage.JdbcConnectionInformation; -import org.hillview.storage.JdbcDatabase; +import org.hillview.storage.jdbc.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcDatabase; import org.hillview.table.api.ITable; import org.hillview.test.BaseTest; import org.junit.Assert; diff --git a/platform/src/test/java/org/hillview/test/storage/MysqlTest.java b/platform/src/test/java/org/hillview/test/storage/MysqlTest.java index 6f1000316..db11827d0 100644 --- a/platform/src/test/java/org/hillview/test/storage/MysqlTest.java +++ b/platform/src/test/java/org/hillview/test/storage/MysqlTest.java @@ -19,8 +19,8 @@ import org.hillview.sketches.results.*; import org.hillview.storage.ColumnLimits; -import org.hillview.storage.JdbcConnectionInformation; -import org.hillview.storage.JdbcDatabase; +import org.hillview.storage.jdbc.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcDatabase; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.SmallTable; diff --git a/platform/src/test/java/org/hillview/test/storage/OrcFileTest.java b/platform/src/test/java/org/hillview/test/storage/OrcFileTest.java index 4ccef5f73..1b4b417b1 100644 --- a/platform/src/test/java/org/hillview/test/storage/OrcFileTest.java +++ b/platform/src/test/java/org/hillview/test/storage/OrcFileTest.java @@ -24,6 +24,7 @@ import org.apache.orc.Reader; import org.apache.orc.RecordReader; import org.apache.orc.TypeDescription; +import org.hillview.LazySchema; import org.hillview.storage.CsvFileLoader; import org.hillview.storage.OrcFileLoader; import org.hillview.storage.OrcFileWriter; @@ -75,7 +76,7 @@ public void writeSmallFileTest() { OrcFileWriter ofw = new OrcFileWriter(orcFile); ofw.writeTable(t); - OrcFileLoader loader = new OrcFileLoader(orcFile, null, false); + OrcFileLoader loader = new OrcFileLoader(orcFile, new LazySchema(), false); ITable table = loader.load(); Assert.assertNotNull(table); Assert.assertEquals(t.toLongString(20), table.toLongString(20)); @@ -86,7 +87,7 @@ public void writeSmallFileTest() { public void convertCsvFileTest() { String file = CsvFileTest.ontimeFolder + "/" + CsvFileTest.csvFile; CsvFileLoader.Config config = new CsvFileLoader.Config(); - CsvFileLoader loader = new CsvFileLoader(file, config, null); + CsvFileLoader loader = new CsvFileLoader(file, config, new LazySchema()); ITable table = loader.load(); Assert.assertNotNull(table); String orcFile = "tmpX.orc"; @@ -115,7 +116,7 @@ public void writeNullTest() { OrcFileWriter ofw = new OrcFileWriter(orcFile); ofw.writeTable(tbl); - OrcFileLoader loader = new OrcFileLoader(orcFile, null, false); + OrcFileLoader loader = new OrcFileLoader(orcFile, new LazySchema(), false); ITable table = loader.load(); Assert.assertNotNull(table); Assert.assertEquals(tbl.toLongString(20), table.toLongString(20)); @@ -155,7 +156,7 @@ public void readOrcColumnTest() throws IOException { @Test public void readOrcFileTest() { String orcFile = orcFolder + orcOutFile; - OrcFileLoader loader = new OrcFileLoader(orcFile, null, false); + OrcFileLoader loader = new OrcFileLoader(orcFile, new LazySchema(), false); ITable table = loader.load(); Table ref = TestTables.testRepTable(); Assert.assertNotNull(table); @@ -170,7 +171,7 @@ public void readOrcFileTestWithSchema() { String tmpSchema = "tmpOrcSchema"; schema.writeToJsonFile(Paths.get(tmpSchema)); - OrcFileLoader loader = new OrcFileLoader(orcFile, tmpSchema, false); + OrcFileLoader loader = new OrcFileLoader(orcFile, new LazySchema(tmpSchema), false); ITable table = loader.load(); Assert.assertNotNull(table); Assert.assertEquals(ref.toLongString(20), table.toLongString(20)); @@ -186,7 +187,7 @@ public void readOrcFileTestWithSchema() { @Test public void readOrcFileLazyTest() { String orcFile = orcFolder + orcOutFile; - OrcFileLoader loader = new OrcFileLoader(orcFile, null, true); + OrcFileLoader loader = new OrcFileLoader(orcFile, new LazySchema(), true); ITable table = loader.load(); Table ref = TestTables.testRepTable(); Assert.assertNotNull(table); diff --git a/repository/README.md b/repository/README.md new file mode 100644 index 000000000..89e424d03 --- /dev/null +++ b/repository/README.md @@ -0,0 +1,3 @@ +This directory is a maven repository that contains optional jars that +are not public (e.g., Impala and Greenplum JDBC). These jars are not +checked-in as part of the git repository. \ No newline at end of file diff --git a/web/src/main/java/org/hillview/RpcObjectManager.java b/web/src/main/java/org/hillview/RpcObjectManager.java index 06675a8bc..313bee3ef 100644 --- a/web/src/main/java/org/hillview/RpcObjectManager.java +++ b/web/src/main/java/org/hillview/RpcObjectManager.java @@ -25,8 +25,12 @@ import javax.annotation.Nullable; import javax.websocket.Session; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; import java.util.HashMap; import java.util.List; +import java.util.Properties; import java.util.function.Consumer; /** @@ -40,6 +44,12 @@ * This is a singleton pattern. */ public final class RpcObjectManager { + /** + * This file contains the global properties that control hillview. + * This file is read by the root node. + */ + static final String propertiesFile = "hillview.properties"; + /** * Well-known id of the initial object. */ @@ -51,6 +61,9 @@ public final class RpcObjectManager { // the unique global instance. public static final RpcObjectManager instance; + // Global application properties + public final Properties properties; + // Map the session to the targetId object that is replying to the request, if any. private final HashMap sessionRequest = new HashMap(10); @@ -63,6 +76,14 @@ public final class RpcObjectManager { // Private constructor private RpcObjectManager() { this.objectLog = new RedoLog(); + this.properties = new Properties(); + try (FileInputStream prop = new FileInputStream(propertiesFile)) { + this.properties.load(prop); + } catch (FileNotFoundException ex) { + HillviewLogger.instance.info("No properties file found", "{0}", propertiesFile); + } catch (IOException ex) { + HillviewLogger.instance.error("Error while loading properties from file", ex); + } } synchronized void addSession(Session session, @Nullable RpcTarget target) { diff --git a/web/src/main/java/org/hillview/RpcTarget.java b/web/src/main/java/org/hillview/RpcTarget.java index 093248c33..057a1669e 100644 --- a/web/src/main/java/org/hillview/RpcTarget.java +++ b/web/src/main/java/org/hillview/RpcTarget.java @@ -32,7 +32,7 @@ import javax.annotation.Nullable; import javax.websocket.Session; -import java.lang.reflect.Method; +import java.beans.Statement; import java.util.ArrayList; import java.util.List; import java.util.function.BiFunction; @@ -99,22 +99,6 @@ private synchronized void saveSubscription(RpcRequestContext context, Subscripti RpcObjectManager.instance.addSubscription(context, sub); } - /** - * Use reflection to fina a method with a given name that has an @HillviewRpc annotation. - * All these methods should have the following signature: - * method(RpcRequest req, RpcRequestContext context). - */ - @Nullable - private Method getMethod(String method) { - Class type = this.getClass(); - for (Method m : type.getDeclaredMethods()) { - if (m.getName().equals(method) && - m.isAnnotationPresent(HillviewRpc.class)) - return m; - } - return null; - } - /** * Dispatches an RPC request for execution. * This will look up the method in the RpcRequest using reflection @@ -136,12 +120,9 @@ void execute(RpcRequest request, RpcRequestContext context) { TODO: check if computation has happened and just send result back. */ try { - Method method = this.getMethod(request.method); - if (method == null) - throw new HillviewException(this.toString() + ": No such method " + request.method); - HillviewLogger.instance.info("Executing", "request={0}, context={1}", - request.toString(), context.toString()); - method.invoke(this, request, context); + Object[] args = { request, context }; + Statement s = new Statement(this, request.method, args); + s.execute(); } catch (Exception ex) { HillviewLogger.instance.error("Exception while invoking method", ex); RpcReply reply = request.createReply(ex); diff --git a/web/src/main/java/org/hillview/benchmarks/DPAccuracyBenchmarks.java b/web/src/main/java/org/hillview/benchmarks/DPAccuracyBenchmarks.java index e34a37fe7..67c88c783 100644 --- a/web/src/main/java/org/hillview/benchmarks/DPAccuracyBenchmarks.java +++ b/web/src/main/java/org/hillview/benchmarks/DPAccuracyBenchmarks.java @@ -78,7 +78,7 @@ IDataSet loadData() { Empty e = Empty.getInstance(); LocalDataSet local = new LocalDataSet(e); - IMap> finder = new FindFilesMap(fsd); + IMap> finder = new FindFilesMap<>(fsd); IDataSet found = local.blockingFlatMap(finder); IMap loader = new LoadFilesMap(); return found.blockingMap(loader); diff --git a/web/src/main/java/org/hillview/benchmarks/DPPerfBenchmarks.java b/web/src/main/java/org/hillview/benchmarks/DPPerfBenchmarks.java index b98bf4abb..cc2c27b12 100644 --- a/web/src/main/java/org/hillview/benchmarks/DPPerfBenchmarks.java +++ b/web/src/main/java/org/hillview/benchmarks/DPPerfBenchmarks.java @@ -32,8 +32,8 @@ import org.hillview.sketches.results.*; import org.hillview.storage.FileSetDescription; import org.hillview.storage.IFileReference; -import org.hillview.storage.JdbcConnectionInformation; -import org.hillview.storage.JdbcDatabase; +import org.hillview.storage.jdbc.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcDatabase; import org.hillview.table.ColumnDescription; import org.hillview.table.PrivacySchema; import org.hillview.table.QuantizationSchema; @@ -161,7 +161,7 @@ private void loadData(HashSet datasets) throws SQLException, IOException } private IDataSet loadTable(IDataSet start, FileSetDescription desc) { - IMap> finder = new FindFilesMap(desc); + IMap> finder = new FindFilesMap<>(desc); IDataSet found = start.blockingFlatMap(finder); IMap loader = new LoadFilesMap(); return found.blockingMap(loader); @@ -253,15 +253,20 @@ private void benchmarkHeatmap(ExperimentConfig conf, ColumnDescription col0, Col Runnable r; if (conf.dataset == Dataset.DB) { r = () -> { - JsonGroups> h = this.database.histogram2D(col0, col1, - c0.buckets, c1.buckets, null, c0.quantization, c1.quantization); - if (conf.usePostProcessing) { - ISketch>> pre = - new PrecomputedSketch>>(h); // not really used - DPHeatmapSketch, JsonGroups>> postProcess = - new DPHeatmapSketch<>(pre, ps.getColumnIndex(col0.name, col1.name), - c0.decomposition, c1.decomposition, epsilon, this.flightsWrapper.laplace); - postProcess.postProcess(h); + JsonGroups> h = null; + try { + h = this.database.histogram2D(col0, col1, + c0.buckets, c1.buckets, null, c0.quantization, c1.quantization); + if (conf.usePostProcessing) { + ISketch>> pre = + new PrecomputedSketch>>(h); // not really used + DPHeatmapSketch, JsonGroups>> postProcess = + new DPHeatmapSketch<>(pre, ps.getColumnIndex(col0.name, col1.name), + c0.decomposition, c1.decomposition, epsilon, this.flightsWrapper.laplace); + postProcess.postProcess(h); + } + } catch (SQLException throwables) { + throwables.printStackTrace(); } }; } else { @@ -298,14 +303,19 @@ private void benchmarkHistogram(ExperimentConfig conf, ColumnDescription col) { if (conf.dataset == Dataset.DB) { r = () -> { - JsonGroups histo = this.database.histogram( - col, c.buckets, null, c.quantization, 0); - if (conf.usePostProcessing) { - ISketch> pre = new PrecomputedSketch<>(histo); // not really used - PostProcessedSketch, Two>> post = - new DPHistogram<>(pre, ps.getColumnIndex(col.name), - c.decomposition, epsilon, false, this.flightsWrapper.laplace); - post.postProcess(histo); + JsonGroups histo = null; + try { + histo = this.database.histogram( + col, c.buckets, null, c.quantization, 0); + if (conf.usePostProcessing) { + ISketch> pre = new PrecomputedSketch<>(histo); // not really used + PostProcessedSketch, Two>> post = + new DPHistogram<>(pre, ps.getColumnIndex(col.name), + c.decomposition, epsilon, false, this.flightsWrapper.laplace); + post.postProcess(histo); + } + } catch (SQLException throwables) { + throwables.printStackTrace(); } }; } else { diff --git a/web/src/main/java/org/hillview/targets/GreenplumTarget.java b/web/src/main/java/org/hillview/targets/GreenplumTarget.java new file mode 100644 index 000000000..050a47e67 --- /dev/null +++ b/web/src/main/java/org/hillview/targets/GreenplumTarget.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2020 VMware Inc. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.hillview.targets; + +import org.hillview.*; +import org.hillview.sketches.PrecomputedSketch; +import org.hillview.storage.jdbc.JdbcConnectionInformation; +import org.hillview.table.api.ITable; +import org.hillview.utils.JsonInString; + +import java.sql.SQLException; + +@SuppressWarnings("SqlNoDataSourceInspection") +public class GreenplumTarget extends SimpleDBTarget { + static final String filePrefix = "file"; // Should match the prefix in the dump script + + /* + This is the expected contents of the dump-greenplum.sh script: +#!/bin/sh +DIR=$1 +PREFIX="file" +mkdir -p ${DIR} || exit 1 +echo "$(${DIR}/${PREFIX}${GP_SEGMENT_ID} + */ + + public GreenplumTarget(JdbcConnectionInformation conn, HillviewComputation c, String dir) { + super(conn, c, dir); + } + + @HillviewRpc + public void dumpTable(RpcRequest request, RpcRequestContext context) throws SQLException { + String tmpTableName = request.parseArgs(String.class); + String dumpScriptName = RpcObjectManager.instance.properties.getProperty( + "greenplumDumpScript", "/home/gpadmin/hillview/dump-greenplum.sh"); + String dumpDirectory = RpcObjectManager.instance.properties.getProperty( + "greenplumDumpDirectory", "/tmp"); + + // This creates a virtual table that will write its partitions + // in files named like ${dumpDirectory}/${tmpTableName}/${filePrefix}Number + String tableName = this.jdbc.table; + String query = "CREATE WRITABLE EXTERNAL WEB TABLE " + + tmpTableName + " (LIKE " + tableName + ") EXECUTE '" + + dumpScriptName + " " + dumpDirectory + "/" + tmpTableName + "' FORMAT 'CSV'"; + + this.database.executeUpdate(query); + query = "INSERT INTO " + tmpTableName + " SELECT * FROM " + tableName; + this.database.executeUpdate(query); + + PrecomputedSketch sk = new PrecomputedSketch( + JsonInString.makeJsonString(dumpDirectory + "/" + tmpTableName + "/" + filePrefix + "*")); + this.runCompleteSketch(this.table, sk, request, context); + } +} diff --git a/web/src/main/java/org/hillview/targets/InitialObjectTarget.java b/web/src/main/java/org/hillview/targets/InitialObjectTarget.java index 1fa196e40..3daf93597 100644 --- a/web/src/main/java/org/hillview/targets/InitialObjectTarget.java +++ b/web/src/main/java/org/hillview/targets/InitialObjectTarget.java @@ -20,6 +20,7 @@ import org.apache.commons.io.FileUtils; import org.hillview.*; import org.hillview.sketches.PrecomputedSketch; +import org.hillview.storage.jdbc.JdbcConnectionInformation; import org.hillview.table.PrivacySchema; import org.hillview.dataset.RemoteDataSet; import org.hillview.dataset.api.*; @@ -91,17 +92,17 @@ private void initialize(final HostList description) { @HillviewRpc public void getUIConfig(RpcRequest request, RpcRequestContext context) { - JsonString result; + JsonInString result; try { - result = new JsonString(Utilities.textFileContents("uiconfig.json")); + result = new JsonInString(Utilities.textFileContents("uiconfig.json")); result.toJsonTree(); // force parsing of the JSON -- to catch syntax errors } catch (Exception e) { HillviewLogger.instance.warn("File uiconfig.json file could not be loaded", "{0}", e.getMessage()); - result = new JsonString("{}"); + result = new JsonInString("{}"); } Converters.checkNull(this.emptyDataset); - PrecomputedSketch sk = new PrecomputedSketch(result); + PrecomputedSketch sk = new PrecomputedSketch(result); this.runCompleteSketch(this.emptyDataset, sk, request, context); } @@ -117,7 +118,7 @@ public void openingBookmark(RpcRequest request, RpcRequestContext context) { throw new RuntimeException(e); } Converters.checkNull(this.emptyDataset); - PrecomputedSketch sk = new PrecomputedSketch(new JsonString(content)); + PrecomputedSketch sk = new PrecomputedSketch(new JsonInString(content)); this.runCompleteSketch(this.emptyDataset, sk, request, context); } @@ -168,11 +169,25 @@ public void loadDBTable(RpcRequest request, RpcRequestContext context) { this.runMap(this.emptyDataset, mapper, (d, c) -> new TableTarget(d, c, dir), request, context); } + @HillviewRpc + public void loadGreenplumTable(RpcRequest request, RpcRequestContext context) { + // To load the data from greenplum we first use JDBC to connect to the + // Greenplum root node and retrieve the metadata for the table. This + // path is similar to the simpleDB + JdbcConnectionInformation conn = request.parseArgs(JdbcConnectionInformation.class); + IMap map = new IdMap(); + Converters.checkNull(this.emptyDataset); + String dir = Paths.get(Converters.checkNull(conn.databaseKind).toLowerCase(), + Converters.checkNull(conn.database), + conn.table).toString(); + this.runMap(this.emptyDataset, map, (e, c) -> new GreenplumTarget(conn, c, dir), request, context); + } + @HillviewRpc public void findFiles(RpcRequest request, RpcRequestContext context) { FileSetDescription desc = request.parseArgs(FileSetDescription.class); HillviewLogger.instance.info("Finding files", "{0}", desc); - IMap> finder = new FindFilesMap(desc); + IMap> finder = new FindFilesMap<>(desc); Converters.checkNull(this.emptyDataset); String folder = Utilities.getFolder(desc.fileNamePattern); @@ -194,7 +209,7 @@ public void findLogs(RpcRequest request, RpcRequestContext context) { desc.fileKind = "hillviewlog"; desc.fileNamePattern = "./hillview*.log"; desc.repeat = 1; - IMap> finder = new FindFilesMap(desc); + IMap> finder = new FindFilesMap<>(desc); HillviewLogger.instance.info("Finding log files"); assert this.emptyDataset != null; this.runFlatMap(this.emptyDataset, finder, diff --git a/web/src/main/java/org/hillview/targets/PrivateSimpleDBTarget.java b/web/src/main/java/org/hillview/targets/PrivateSimpleDBTarget.java index 5546acf92..f4e985273 100644 --- a/web/src/main/java/org/hillview/targets/PrivateSimpleDBTarget.java +++ b/web/src/main/java/org/hillview/targets/PrivateSimpleDBTarget.java @@ -27,7 +27,7 @@ import org.hillview.maps.highorder.IdMap; import org.hillview.sketches.PrecomputedSketch; import org.hillview.sketches.results.*; -import org.hillview.storage.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcConnectionInformation; import org.hillview.table.ColumnDescription; import org.hillview.table.PrivacySchema; import org.hillview.table.SmallTable; @@ -67,8 +67,8 @@ private PrivacySchema getPrivacySchema() { public void changePrivacy(RpcRequest request, RpcRequestContext context) { this.wrapper.setPrivacySchema(request.parseArgs(PrivacySchema.class)); HillviewLogger.instance.info("Updated privacy schema"); - PrecomputedSketch empty = - new PrecomputedSketch(new JsonString("{}")); + PrecomputedSketch empty = + new PrecomputedSketch(new JsonInString("{}")); this.runCompleteSketch(this.table, empty, request, context); } @@ -92,7 +92,7 @@ public void getDataQuantiles(RpcRequest request, RpcRequestContext context) { } @HillviewRpc - public void histogramAndCDF(RpcRequest request, RpcRequestContext context) { + public void histogramAndCDF(RpcRequest request, RpcRequestContext context) throws SQLException { HistogramRequestInfo info = request.parseArgs(HistogramRequestInfo.class); assert info.size() == 2; @@ -149,7 +149,7 @@ public void hLogLog(RpcRequest request, RpcRequestContext context) { } @HillviewRpc - public void histogram2D(RpcRequest request, RpcRequestContext context) { + public void histogram2D(RpcRequest request, RpcRequestContext context) throws SQLException { HistogramRequestInfo info = request.parseArgs(HistogramRequestInfo.class); assert info.size() == 2; JsonGroups> heatmap = this.database.histogram2D( diff --git a/web/src/main/java/org/hillview/targets/PrivateTableTarget.java b/web/src/main/java/org/hillview/targets/PrivateTableTarget.java index 292e989ca..cd96f4ea2 100644 --- a/web/src/main/java/org/hillview/targets/PrivateTableTarget.java +++ b/web/src/main/java/org/hillview/targets/PrivateTableTarget.java @@ -51,8 +51,8 @@ private PrivacySchema getPrivacySchema() { public void changePrivacy(RpcRequest request, RpcRequestContext context) { this.wrapper.setPrivacySchema(request.parseArgs(PrivacySchema.class)); HillviewLogger.instance.info("Updated privacy schema"); - PrecomputedSketch empty = - new PrecomputedSketch(new JsonString("{}")); + PrecomputedSketch empty = + new PrecomputedSketch(new JsonInString("{}")); this.runCompleteSketch(this.table, empty, request, context); } @@ -60,8 +60,8 @@ public void changePrivacy(RpcRequest request, RpcRequestContext context) { public void savePrivacy(RpcRequest request, RpcRequestContext context) { this.wrapper.savePrivacySchema(); HillviewLogger.instance.info("Saved privacy schema"); - PrecomputedSketch empty = - new PrecomputedSketch(new JsonString("{}")); + PrecomputedSketch empty = + new PrecomputedSketch(new JsonInString("{}")); this.runCompleteSketch(this.table, empty, request, context); } diff --git a/web/src/main/java/org/hillview/targets/SimpleDBTarget.java b/web/src/main/java/org/hillview/targets/SimpleDBTarget.java index f9738f383..65c05e13c 100644 --- a/web/src/main/java/org/hillview/targets/SimpleDBTarget.java +++ b/web/src/main/java/org/hillview/targets/SimpleDBTarget.java @@ -27,8 +27,8 @@ import org.hillview.maps.highorder.IdMap; import org.hillview.sketches.results.*; import org.hillview.storage.ColumnLimits; -import org.hillview.storage.JdbcConnectionInformation; -import org.hillview.storage.JdbcDatabase; +import org.hillview.storage.jdbc.JdbcConnectionInformation; +import org.hillview.storage.jdbc.JdbcDatabase; import org.hillview.table.ColumnDescription; import org.hillview.table.Schema; import org.hillview.table.SmallTable; @@ -107,7 +107,7 @@ public void hLogLog(RpcRequest request, RpcRequestContext context) { this.runSketch(this.table, sk, request, context); } - private void heavyHitters(RpcRequest request, RpcRequestContext context) { + private void heavyHitters(RpcRequest request, RpcRequestContext context) throws SQLException { HeavyHittersRequestInfo info = request.parseArgs(HeavyHittersRequestInfo.class); SmallTable tbl = this.database.topFreq( info.columns, Converters.toInt(Math.ceil(info.amount * info.totalRows / 100)), @@ -134,17 +134,17 @@ private void heavyHitters(RpcRequest request, RpcRequestContext context) { } @HillviewRpc - public void heavyHittersMG(RpcRequest request, RpcRequestContext context) { + public void heavyHittersMG(RpcRequest request, RpcRequestContext context) throws SQLException { this.heavyHitters(request, context); } @HillviewRpc - public void heavyHittersSampling(RpcRequest request, RpcRequestContext context) { + public void heavyHittersSampling(RpcRequest request, RpcRequestContext context) throws SQLException { this.heavyHitters(request, context); } @HillviewRpc - public void getDataQuantiles(RpcRequest request, RpcRequestContext context) { + public void getDataQuantiles(RpcRequest request, RpcRequestContext context) throws SQLException { QuantilesArgs[] info = request.parseArgs(QuantilesArgs[].class); JsonList result = new JsonList(info.length); for (QuantilesArgs quantilesArgs : info) { @@ -166,7 +166,7 @@ public void getDataQuantiles(RpcRequest request, RpcRequestContext context) { } @HillviewRpc - public void histogramAndCDF(RpcRequest request, RpcRequestContext context) { + public void histogramAndCDF(RpcRequest request, RpcRequestContext context) throws SQLException { HistogramRequestInfo info = request.parseArgs(HistogramRequestInfo.class); assert info.size() == 2; ColumnDescription cd = info.histos[0].cd; // both args should be on the same column @@ -183,7 +183,7 @@ public void histogramAndCDF(RpcRequest request, RpcRequestContext context) { } @HillviewRpc - public void histogram2D(RpcRequest request, RpcRequestContext context) { + public void histogram2D(RpcRequest request, RpcRequestContext context) throws SQLException { HistogramRequestInfo info = request.parseArgs(HistogramRequestInfo.class); assert info.size() == 2; JsonGroups> heatmap = this.database.histogram2D( diff --git a/web/src/main/java/org/hillview/targets/TableTarget.java b/web/src/main/java/org/hillview/targets/TableTarget.java index cbb02454f..3e9adff55 100644 --- a/web/src/main/java/org/hillview/targets/TableTarget.java +++ b/web/src/main/java/org/hillview/targets/TableTarget.java @@ -207,8 +207,8 @@ static class GeoFileInformation { String geoFile; // e.g., geo/us_states/cb_2019_us_state_20m.shp // Supported formats: shapeFile (shp) - public JsonString createJSON(PolygonSet ps) { - return new JsonString( + public JsonInString createJSON(PolygonSet ps) { + return new JsonInString( "{" + "columnName:" + this.columnName + ",\n" + "property:" + this.property + ",\n" + @@ -242,7 +242,7 @@ public void getGeo(RpcRequest request, RpcRequestContext context) throws IOExcep if (geoInfo == null) throw new RuntimeException("No geographic data found for column " + desc.name); PolygonSet ps = new PolygonSet(geoInfo.geoFile); - PrecomputedSketch pk = new PrecomputedSketch<>(geoInfo.createJSON(ps)); + PrecomputedSketch pk = new PrecomputedSketch<>(geoInfo.createJSON(ps)); this.runCompleteSketch(this.table, pk, request, context); } @@ -267,8 +267,8 @@ public void createBookmark(RpcRequest request, RpcRequestContext context) { } catch (Exception e) { throw new RuntimeException(e); } - JsonString bookmarkFile = new JsonString(guid + InitialObjectTarget.bookmarkExtension); - PrecomputedSketch empty = new PrecomputedSketch(bookmarkFile); + JsonInString bookmarkFile = new JsonInString(guid + InitialObjectTarget.bookmarkExtension); + PrecomputedSketch empty = new PrecomputedSketch(bookmarkFile); this.runCompleteSketch(this.table, empty, request, context); } diff --git a/web/src/main/webapp/dataViews/tableView.ts b/web/src/main/webapp/dataViews/tableView.ts index 17e3a1ad3..2a9013134 100644 --- a/web/src/main/webapp/dataViews/tableView.ts +++ b/web/src/main/webapp/dataViews/tableView.ts @@ -605,11 +605,7 @@ export class TableView extends TSViewBase implements IScrollTarget, OnNextK { action: () => this.showColumns(-1, true), help: "Sort the data first on this column, in decreasing order", }, !this.isPrivate()); - const foldoutMenu = this.contextMenu.addExpandableItem({ - text: "Charts", - action: () => null, - help: "Choose a chart to draw.", - }); + const foldoutMenu = this.contextMenu.addExpandableItem("Charts", "Choose a chart to draw."); this.contextMenu.addItem({ text: "Rename...", action: () => this.renameColumn(), @@ -698,8 +694,11 @@ export class TableView extends TSViewBase implements IScrollTarget, OnNextK { ); foldoutMenu.addItem({ text: "Quartiles", - action: () => this.chart( - this.schema.getCheckedDescriptions(this.getSelectedColNames()), "QuartileVector"), + action: () => { + console.log("Computing quartiles"); + this.chart( + this.schema.getCheckedDescriptions(this.getSelectedColNames()), "QuartileVector") + }, help: "Plot the data in the selected columns as a vector of quartiles. " + "Applies to one or two columns only.", diff --git a/web/src/main/webapp/initialObject.ts b/web/src/main/webapp/initialObject.ts index bf2f2db25..15f5d0589 100644 --- a/web/src/main/webapp/initialObject.ts +++ b/web/src/main/webapp/initialObject.ts @@ -16,18 +16,18 @@ */ import {DatasetView, IDatasetSerialization} from "./datasetView"; -import {SchemaReceiver} from "./modules"; +import {SchemaReceiver, TableTargetAPI} from "./modules"; import { FileSetDescription, FileSizeSketchInfo, JdbcConnectionInformation, CassandraConnectionInfo, - RemoteObjectId, + RemoteObjectId, FederatedDatabase, TableSummary, } from "./javaBridge"; import {OnCompleteReceiver, RemoteObject} from "./rpc"; import {BaseReceiver} from "./tableTarget"; import {FullPage, PageTitle} from "./ui/fullPage"; -import {ICancellable, significantDigits, getUUID} from "./util"; +import {ICancellable, significantDigits, getUUID, assertNever} from "./util"; export interface FilesLoaded { kind: "Files"; @@ -74,15 +74,17 @@ export function getDescription(data: DataLoaded): PageTitle { * Initiates an RPC to get the file size. */ class FilesReceiver extends OnCompleteReceiver { - constructor(loadMenuPage: FullPage, operation: ICancellable, protected data: DataLoaded) { - super(loadMenuPage, operation, "Get file info"); + constructor(sourcePage: FullPage, operation: ICancellable, + protected data: DataLoaded, + protected newDataset: boolean) { + super(sourcePage, operation, "Get file info"); } public run(remoteObjId: RemoteObjectId): void { const fn = new RemoteObject(remoteObjId); const rr = fn.createStreamingRpcRequest("getFileSize", null); rr.chain(this.operation); - const observer = new FileSizeReceiver(this.page, rr, this.data, fn); + const observer = new FileSizeReceiver(this.page, rr, this.data, fn, this.newDataset); rr.invoke(observer); } } @@ -92,10 +94,11 @@ class FilesReceiver extends OnCompleteReceiver { * It initiates a loadTable RPC request to load data from these files as a table. */ class FileSizeReceiver extends OnCompleteReceiver { - constructor(loadMenuPage: FullPage, operation: ICancellable, + constructor(sourcePage: FullPage, operation: ICancellable, protected data: DataLoaded, - protected remoteObj: RemoteObject) { - super(loadMenuPage, operation, "Load data"); + protected remoteObj: RemoteObject, + protected newDataset: boolean) { + super(sourcePage, operation, "Load data"); } public run(size: FileSizeSketchInfo): void { @@ -109,7 +112,7 @@ class FileSizeReceiver extends OnCompleteReceiver { // TODO: prune seems to be broken. const rr = this.remoteObj.createStreamingRpcRequest("prune", null); rr.chain(this.operation); - const observer = new FilePruneReceiver(this.page, rr, this.data, size); + const observer = new FilePruneReceiver(this.page, rr, this.data, size, this.newDataset); rr.invoke(observer); } else { const fileSize = "Loading " + size.fileCount + " file(s), total size " + @@ -117,16 +120,17 @@ class FileSizeReceiver extends OnCompleteReceiver { const fn = new RemoteObject(this.remoteObj.remoteObjectId); const rr = fn.createStreamingRpcRequest("loadTable", null); rr.chain(this.operation); - const observer = new RemoteTableReceiver(this.page, rr, this.data, fileSize); + const observer = new RemoteTableReceiver(this.page, rr, this.data, fileSize, this.newDataset); rr.invoke(observer); } } } class FilePruneReceiver extends OnCompleteReceiver { - constructor(loadMenuPage: FullPage, operation: ICancellable, - protected data: DataLoaded, protected readonly size: FileSizeSketchInfo) { - super(loadMenuPage, operation, "Load data"); + constructor(sourcePage: FullPage, operation: ICancellable, + protected data: DataLoaded, protected readonly size: FileSizeSketchInfo, + protected newDataset: boolean) { + super(sourcePage, operation, "Load data"); } public run(remoteObjId: RemoteObjectId): void { @@ -135,7 +139,7 @@ class FilePruneReceiver extends OnCompleteReceiver { const fn = new RemoteObject(remoteObjId); const rr = fn.createStreamingRpcRequest("loadTable", null); rr.chain(this.operation); - const observer = new RemoteTableReceiver(this.page, rr, this.data, fileSize); + const observer = new RemoteTableReceiver(this.page, rr, this.data, fileSize, this.newDataset); rr.invoke(observer); } } @@ -144,16 +148,52 @@ class FilePruneReceiver extends OnCompleteReceiver { * Receives the ID for a remote table and initiates a request to get the * table schema. */ -export class RemoteTableReceiver extends BaseReceiver { +class RemoteTableReceiver extends BaseReceiver { + /** + * Create a renderer for a new table. + * @param sourcePage Parent page initiating this request. + * @param data Data that has been loaded. + * @param operation Operation that will bring the results. + * @param description Description of the files that are being loaded. + * @param newDataset If true this is a new dataset. + */ + constructor(sourcePage: FullPage, operation: ICancellable, protected data: DataLoaded, + description: string, protected newDataset: boolean) { + super(sourcePage, operation, description, null); + } + + public run(value: RemoteObjectId): void { + super.run(value); + const rr = this.remoteObject.createGetSummaryRequest(); + rr.chain(this.operation); + const title = getDescription(this.data); + if (this.newDataset) { + const dataset = new DatasetView(this.remoteObject.remoteObjectId, title.format, this.data, this.page); + const newPage = dataset.newPage(title, null); + rr.invoke(new SchemaReceiver(newPage, rr, this.remoteObject, dataset, null, null)); + } else { + rr.invoke(new SchemaReceiver(this.page, rr, this.remoteObject, this.page.dataset!, null, null)); + } + } +} + +/** + * Receives the ID for a remote GreenplumTarget and initiates a request to get the + * table schema. + */ +class GreenplumTableReceiver extends BaseReceiver { /** * Create a renderer for a new table. * @param loadMenuPage Parent page initiating this request, always the page of the LoadMenu. * @param data Data that has been loaded. + * @param initialObject Handle to the initial object; used later to load the files + * obtained from dumping the table. * @param operation Operation that will bring the results. * @param progressInfo Description of the files that are being loaded. */ - constructor(loadMenuPage: FullPage, operation: ICancellable, protected data: DataLoaded, - progressInfo: string) { + constructor(loadMenuPage: FullPage, operation: ICancellable, + protected initialObject: InitialObject, + protected data: DataLoaded, progressInfo: string) { super(loadMenuPage, operation, progressInfo, null); } @@ -164,7 +204,56 @@ export class RemoteTableReceiver extends BaseReceiver { const title = getDescription(this.data); const dataset = new DatasetView(this.remoteObject.remoteObjectId, title.format, this.data, this.page); const newPage = dataset.newPage(title, null); - rr.invoke(new SchemaReceiver(newPage, rr, this.remoteObject, dataset, null, null)); + rr.invoke(new GreenplumSchemaReceiver(newPage, rr, this.initialObject, this.remoteObject)); + } +} + +class GreenplumSchemaReceiver extends OnCompleteReceiver { + constructor(page: FullPage, operation: ICancellable, + protected initialObject: InitialObject, + protected remoteObject: TableTargetAPI) { + super(page, operation, "Get schema"); + } + + public run(ts: TableSummary): void { + if (ts.schema == null) { + this.page.reportError("No schema received; empty dataset?"); + return; + } + // Ask Greenplum to dump the data; receive back the name of the temporary files + // where the tables are stored on the remote machines. + // This is the name of the temporary table used. + const tableName = "T" + getUUID().replace(/-/g, ''); + const rr = this.remoteObject.createStreamingRpcRequest("dumpTable", tableName); + rr.invoke(new GreenplumLoader(this.page, ts, this.initialObject, rr)); + } +} + +class GreenplumLoader extends OnCompleteReceiver { + constructor(page: FullPage, protected summary: TableSummary, + protected remoteObject: InitialObject, + operation: ICancellable) { + super(page, operation, "Find table fragments"); + } + + public run(value: string): void { + const files: FileSetDescription = { + fileKind: "csv", + fileNamePattern: value, + schemaFile: null, + headerRow: false, + schema: this.summary.schema, + name: (this.page.dataset?.loaded as TablesLoaded).description.table, + repeat: 1, + logFormat: null, + startTime: null, + endTime: null, + deleteAfterLoading: true + }; + const rr = this.remoteObject.createStreamingRpcRequest("findFiles", files); + const observer = new FilesReceiver(this.page, rr, + { kind: "Files", description: files }, false); + rr.invoke(observer); } } @@ -186,48 +275,61 @@ export class InitialObject extends RemoteObject { public loadFiles(files: FileSetDescription, loadMenuPage: FullPage): void { const rr = this.createStreamingRpcRequest("findFiles", files); const observer = new FilesReceiver(loadMenuPage, rr, - { kind: "Files", description: files }); + { kind: "Files", description: files }, true); rr.invoke(observer); } public loadCassandraFiles(conn: CassandraConnectionInfo, loadMenuPage: FullPage): void { const rr = this.createStreamingRpcRequest("findCassandraFiles", conn); const observer = new FilesReceiver(loadMenuPage, rr, - { kind: "SSTable", description: conn }); + { kind: "SSTable", description: conn }, true); rr.invoke(observer); } public loadLogs(loadMenuPage: FullPage): void { // Use a guid to force the request to reload every time - const rr = this.createStreamingRpcRequest("findLogs", getUUID()); - const observer = new FilesReceiver(loadMenuPage, rr, { kind: "Hillview logs"} ); + const rr = this.createStreamingRpcRequest("findLogs", getUUID());`` + const observer = new FilesReceiver(loadMenuPage, rr, { kind: "Hillview logs"}, true); rr.invoke(observer); } - public loadDBTable(conn: JdbcConnectionInformation, loadMenuPage: FullPage): void { - const rr = this.createStreamingRpcRequest("loadDBTable", conn); + protected loadTable(conn: JdbcConnectionInformation, loadMenuPage: FullPage, method: string): void { + const rr = this.createStreamingRpcRequest(method, conn); const observer = new RemoteTableReceiver(loadMenuPage, rr, - { kind: "DB", description: conn }, "loading database table"); + { kind: "DB", description: conn }, "loading database table", true); rr.invoke(observer); } public loadSimpleDBTable(conn: JdbcConnectionInformation, loadMenuPage: FullPage): void { - const rr = this.createStreamingRpcRequest("loadSimpleDBTable", conn); - const observer = new RemoteTableReceiver(loadMenuPage, rr, - { kind: "DB", description: conn }, "loading database table"); + this.loadTable(conn, loadMenuPage, "loadSimpleDBTable"); + } + + protected loadGreenplumTable(conn: JdbcConnectionInformation, loadMenuPage: FullPage, method: string): void { + const rr = this.createStreamingRpcRequest(method, conn); + const observer = new GreenplumTableReceiver(loadMenuPage, rr, this, + { kind: "DB", description: conn }, "loading Greenplum table"); rr.invoke(observer); } - public loadFederatedDBTable(conn: JdbcConnectionInformation | CassandraConnectionInfo, - db: String, loadMenuPage: FullPage): void { + public loadFederatedDBTable(conn: JdbcConnectionInformation | CassandraConnectionInfo | null, + db: FederatedDatabase | null, loadMenuPage: FullPage): void { + if (db == null || conn == null) { + loadMenuPage.reportError("Unknown database kind"); + return; + } switch (db) { case "mysql": case "impala": - this.loadDBTable(conn, loadMenuPage); + this.loadTable(conn as JdbcConnectionInformation, loadMenuPage, "loadDBTable"); break; case "cassandra": - this.loadCassandraFiles(conn, loadMenuPage); + this.loadCassandraFiles(conn as CassandraConnectionInfo, loadMenuPage); + break; + case "greenplum": + this.loadGreenplumTable(conn as JdbcConnectionInformation, loadMenuPage, "loadGreenplumTable"); break; + default: + assertNever(db); } } } diff --git a/web/src/main/webapp/javaBridge.ts b/web/src/main/webapp/javaBridge.ts index ed393d8fb..714fff301 100644 --- a/web/src/main/webapp/javaBridge.ts +++ b/web/src/main/webapp/javaBridge.ts @@ -58,6 +58,8 @@ export function asContentsKind(kind: string): ContentsKind { } } +export type FederatedDatabase = "mysql" | "impala" | "cassandra" | "greenplum"; + export type SimpleFeatureCollection = FeatureCollection; /** @@ -162,6 +164,7 @@ export interface FileSetDescription { fileKind: DataKinds; fileNamePattern: string; schemaFile: string | null; + schema: Schema | null; headerRow?: boolean; cookie?: string; repeat: number; @@ -169,6 +172,7 @@ export interface FileSetDescription { logFormat: string | null; startTime: number | null; endTime: number | null; + deleteAfterLoading: boolean; } export interface CountWithConfidence { diff --git a/web/src/main/webapp/loadView.ts b/web/src/main/webapp/loadView.ts index 474836869..a5ba4c35a 100644 --- a/web/src/main/webapp/loadView.ts +++ b/web/src/main/webapp/loadView.ts @@ -17,7 +17,14 @@ import {DatasetView} from "./datasetView"; import {InitialObject} from "./initialObject"; -import {FileSetDescription, JdbcConnectionInformation, CassandraConnectionInfo, Status, UIConfig} from "./javaBridge"; +import { + FileSetDescription, + JdbcConnectionInformation, + CassandraConnectionInfo, + Status, + UIConfig, + FederatedDatabase +} from "./javaBridge"; import {OnCompleteReceiver, RemoteObject} from "./rpc"; import {Test} from "./test"; import {IDataView} from "./ui/dataview"; @@ -26,7 +33,7 @@ import {ErrorDisplay} from "./ui/errReporter"; import {FullPage} from "./ui/fullPage"; import {MenuItem, SubMenu, TopMenu, TopMenuItem} from "./ui/menu"; import {ViewKind} from "./ui/ui"; -import {Converters, ICancellable, loadFile, getUUID, disableSuggestions} from "./util"; +import {Converters, ICancellable, loadFile, getUUID, disableSuggestions, assertNever} from "./util"; import {HillviewToplevel} from "./toplevel"; /** @@ -80,13 +87,15 @@ export class LoadView extends RemoteObject implements IDataView { const files: FileSetDescription = { fileNamePattern: "data/ontime/????_*.csv*", schemaFile: "short.schema", + schema: null, headerRow: true, repeat: 1, name: "Flights (15 columns)", fileKind: "csv", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; this.init.loadFiles(files, this.page); }, @@ -97,13 +106,15 @@ export class LoadView extends RemoteObject implements IDataView { const files: FileSetDescription = { fileNamePattern: "data/ontime_small_orc/*.orc", schemaFile: "schema", + schema: null, headerRow: true, repeat: 1, name: "Flights (15 columns, ORC)", fileKind: "orc", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; this.init.loadFiles(files, this.page); }, @@ -114,13 +125,15 @@ export class LoadView extends RemoteObject implements IDataView { const files: FileSetDescription = { fileNamePattern: "data/ontime_big_orc/*.orc", schemaFile: "schema", + schema: null, headerRow: true, repeat: 1, name: "Flights (ORC)", fileKind: "orc", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; this.init.loadFiles(files, this.page); }, @@ -132,13 +145,15 @@ export class LoadView extends RemoteObject implements IDataView { const files: FileSetDescription = { fileNamePattern: "data/ontime_private/????_*.csv*", schemaFile: "short.schema", + schema: null, headerRow: true, repeat: 1, name: "Flights (private)", fileKind: "csv", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; this.init.loadFiles(files, this.page); }, @@ -151,13 +166,15 @@ export class LoadView extends RemoteObject implements IDataView { const files: FileSetDescription = { fileNamePattern: "data/ontime_private/*.orc", schemaFile: "schema", + schema: null, headerRow: true, repeat: 1, name: "Flights (private)", fileKind: "orc", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; this.init.loadFiles(files, this.page); }, @@ -238,7 +255,8 @@ export class LoadView extends RemoteObject implements IDataView { text: "Federated DB tables...", action: () => { const dialog = new DBDialog(true); - dialog.setAction(() => this.init.loadFederatedDBTable(dialog.getDBConnection(), dialog.getDatabaseKind() , this.page)); + dialog.setAction(() => + this.init.loadFederatedDBTable(dialog.getDBConnection(), dialog.getDbKind() , this.page)); dialog.show(); }, help: "A set of database tables residing in databases on each worker machine." @@ -448,6 +466,7 @@ class CSVFileDialog extends Dialog { public getFiles(): FileSetDescription { return { schemaFile: this.getFieldValue("schemaFile"), + schema: null, fileNamePattern: this.getFieldValue("fileNamePattern"), headerRow: this.getBooleanValue("hasHeader"), repeat: 1, @@ -455,7 +474,8 @@ class CSVFileDialog extends Dialog { fileKind: "csv", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; } } @@ -485,6 +505,7 @@ class GenericLogDialog extends Dialog { public getFiles(): FileSetDescription { return { schemaFile: null, + schema: null, logFormat: this.getFieldValue("logFormat"), fileNamePattern: this.getFieldValue("fileNamePattern"), headerRow: false, @@ -493,7 +514,8 @@ class GenericLogDialog extends Dialog { cookie: getUUID(), fileKind: "genericlog", startTime: Converters.doubleFromDate(this.getDateTimeValue("startTime")), - endTime: Converters.doubleFromDate(this.getDateTimeValue("endTime")) + endTime: Converters.doubleFromDate(this.getDateTimeValue("endTime")), + deleteAfterLoading: false }; } } @@ -517,6 +539,7 @@ class JsonFileDialog extends Dialog { public getFiles(): FileSetDescription { return { schemaFile: this.getFieldValue("schemaFile"), + schema: null, fileNamePattern: this.getFieldValue("fileNamePattern"), headerRow: false, repeat: 1, @@ -524,7 +547,8 @@ class JsonFileDialog extends Dialog { fileKind: "json", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; } } @@ -545,6 +569,7 @@ class ParquetFileDialog extends Dialog { public getFiles(): FileSetDescription { return { schemaFile: null, // not used + schema: null, fileNamePattern: this.getFieldValue("fileNamePattern"), headerRow: false, // not used repeat: 1, @@ -552,7 +577,8 @@ class ParquetFileDialog extends Dialog { fileKind: "parquet", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; } } @@ -576,6 +602,7 @@ class OrcFileDialog extends Dialog { public getFiles(): FileSetDescription { return { schemaFile: this.getFieldValue("schemaFile"), + schema: null, fileNamePattern: this.getFieldValue("fileNamePattern"), headerRow: false, // not used repeat: 1, @@ -583,7 +610,8 @@ class OrcFileDialog extends Dialog { fileKind: "orc", logFormat: null, startTime: null, - endTime: null + endTime: null, + deleteAfterLoading: false }; } } @@ -593,10 +621,13 @@ class OrcFileDialog extends Dialog { */ class DBDialog extends Dialog { constructor(isFederated: boolean) { - super("Load DB tables", "Loads one table on each machine that is part of the service."); - const arrDB = ["mysql", "impala"]; - if (isFederated) arrDB.push("cassandra"); - const sel = this.addSelectField("databaseKind", "Database kind", arrDB, "mysql", + super("Load DB tables", "Loads data from a parallel or federated database."); + const arrDB: FederatedDatabase[] = ["mysql"]; + if (isFederated) arrDB.push("cassandra", "greenplum"); + else arrDB.push("impala"); + + const sel = this.addSelectFieldAsObject( + "databaseKind", "Database kind", arrDB, (l) => l.toString(), "The kind of database."); sel.onchange = () => this.dbChanged(); const host = this.addTextField("host", "Host", FieldKind.String, "localhost", @@ -630,9 +661,15 @@ class DBDialog extends Dialog { this.setCacheTitle("DBDialog"); } + public getDbKind(): FederatedDatabase | null { + return this.getFieldValueAsObject("databaseKind"); + } + public dbChanged(): void { - const db = this.getFieldValue("databaseKind"); + const db = this.getDbKind(); switch (db) { + case null: + break; case "mysql": this.setFieldValue("port", "3306"); this.hideInputField("jmxPort"); @@ -649,21 +686,29 @@ class DBDialog extends Dialog { this.showInputField("jmxPort"); this.showInputField("dbDir"); break; + case "greenplum": + this.setFieldValue("port", "5432"); + this.hideInputField("jmxPort"); + this.hideInputField("dbDir"); + break; + default: + assertNever(db); } } - public getDatabaseKind(): String { - return this.getFieldValue("databaseKind"); - } - - public getDBConnection(): any { - const db = this.getFieldValue("databaseKind"); + public getDBConnection(): JdbcConnectionInformation | CassandraConnectionInfo | null { + const db = this.getDbKind(); switch (db) { + case null: + return null; case "mysql": case "impala": + case "greenplum": return this.getJdbcConnection(); case "cassandra": return this.getCassandraConnection(); + default: + assertNever(db); } } diff --git a/web/src/main/webapp/test.ts b/web/src/main/webapp/test.ts index f44c41103..229fb5c38 100644 --- a/web/src/main/webapp/test.ts +++ b/web/src/main/webapp/test.ts @@ -137,8 +137,6 @@ export class Test { public createTestProgram(): void { /* This produces the following pages: - First tab: - syslog logs Second tab: ontime small dataset 1: a tabular view 2: schema view @@ -147,8 +145,8 @@ export class Test { 5: Histogram of UniqueCarrier, shown as pie chart 6: 2dHistogram of DepTime, Depdelay 7: Table view, filtered flights - 8: Trellis 2D histograms (DepTime, DepDelay) grouped by ActualElapsedTime - 9: Trellis Histograms of UniqueCarrier grouped by ActualElapsedTime + 8: Trellis 2D histograms (DepTime, DepDelay) grouped by DayOfWeek + 9: Trellis Histograms of UniqueCarrier grouped by DayOfWeek 10: Trellis heatmap plot 11: Quartiles plot 12: Non-stacked bar charts plot @@ -176,9 +174,13 @@ export class Test { confirm.click(); }, }, { - description: "Load all flights", + description: "Close this tab", cond: () => Test.existsElement("#hillviewPage1 .idle"), cont: () => findElement("#hillviewPage0 .topMenu #Flights__15_columns__CSV_").click(), + }, { + description: "Load all flights", + cond: () => Test.existsElement("#hillviewPage1 .idle"), + cont: () => findElement(".tab .close").click(), }, { description: "Show no columns", cond: () => Test.existsElement("#hillviewPage1 .idle"), @@ -267,7 +269,7 @@ export class Test { this.next(); // no rpc } }, { - description: "Display histogram from schema view", + description: "Display histogram", cond: () => true, cont: () => { const col1 = findElement("#hillviewPage1 thead .col1"); @@ -356,27 +358,23 @@ export class Test { }, }, { description: "Quartiles vector", - cond: () => Test.existsElement("#hillviewPage10 .idle"), + cond: () => Test.existsElement("#hillviewPage6 .idle"), cont: () => { - const dest = findElement("#hillviewPage1 thead td[data-colname=Dest] .truncated"); - dest.click(); - const arrTime = findElement("#hillviewPage1 thead td[data-colname=ArrTime] .truncated"); - arrTime.dispatchEvent(controlClickEvent()); - arrTime.dispatchEvent(contextMenuEvent()); - const qv = findElement("#hillviewPage1 .dropdown #Quartiles"); - qv.click(); + findElement("#hillviewPage1 #Chart").click(); + findElement("#Quartiles___").click(); + (findElement(".dialog #columnName0") as HTMLInputElement).value = "Dest"; + (findElement(".dialog #columnName1") as HTMLInputElement).value = "ArrTime"; + findElement(".dialog .confirm").click(); } }, { description: "Stacked bars 2D histogram", cond: () => Test.existsElement("#hillviewPage11 .idle"), cont: () => { - const carrier = findElement("#hillviewPage1 thead td[data-colname=UniqueCarrier] .truncated"); - carrier.click(); - const depDelay = findElement("#hillviewPage1 thead td[data-colname=DepDelay] .truncated"); - depDelay.dispatchEvent(controlClickEvent()); - depDelay.dispatchEvent(contextMenuEvent()); - const qv = findElement("#hillviewPage1 .dropdown #Histogram"); - qv.click(); + findElement("#hillviewPage1 #Chart").click(); + findElement("#I2D_Histogram___").click(); + (findElement(".dialog #columnName0") as HTMLInputElement).value = "UniqueCarrier"; + (findElement(".dialog #columnName1") as HTMLInputElement).value = "DepDelay"; + findElement(".dialog .confirm").click(); } }, { description: "Change buckets for 2D histogram", @@ -414,7 +412,10 @@ export class Test { const cellArr = findElement("#hillviewPage1 thead td[data-colname=ArrDelay] .truncated"); cellArr.dispatchEvent(mouseClickEvent(true, false)); cellArr.dispatchEvent(contextMenuEvent()); + const chart = findElement("#hillviewPage1 .dropdown #Charts"); + chart.click(); const qv = findElement("#hillviewPage1 .dropdown #Correlation"); + console.log(qv + "," + qv.className + "," + qv.parentElement!.className); qv.click(); } }, { diff --git a/web/src/main/webapp/ui/menu.ts b/web/src/main/webapp/ui/menu.ts index 203cef691..b38929738 100644 --- a/web/src/main/webapp/ui/menu.ts +++ b/web/src/main/webapp/ui/menu.ts @@ -199,6 +199,8 @@ abstract class BaseMenu implements IHtmlElement { * A context menu is displayed on right-click on some displayed element. */ export class ContextMenu extends BaseMenu implements IHtmlElement { + protected foldOuts: FoldoutMenu[]; + /** * Create a context menu. * @param parent HTML element where this is inserted. @@ -206,6 +208,7 @@ export class ContextMenu extends BaseMenu implements IHtmlElement { */ constructor(public readonly parent: Element, mis?: MenuItem[]) { super(); + this.foldOuts = []; if (mis != null) this.addItems(mis); this.outer.classList.add("dropdown"); @@ -224,6 +227,13 @@ export class ContextMenu extends BaseMenu implements IHtmlElement { this.outer.focus(); } + public clear(): void { + super.clear(); + for (const f of this.foldOuts) + f.clear(); + this.foldOuts = []; + } + /** * Display the menu. */ @@ -271,15 +281,13 @@ export class ContextMenu extends BaseMenu implements IHtmlElement { } } - public addExpandableItem(mi: MenuItem): FoldoutMenu { - const index = this.cells.length; - const cell = this.addItem(mi, true); - const arrow = document.createElement("span"); - arrow.textContent = "▸"; - arrow.classList.add("menuArrow"); - cell.appendChild(arrow); + public addExpandableItem(text: string, help: string): FoldoutMenu { const fo = new FoldoutMenu(this); - cell.onmouseenter = () => { + this.foldOuts.push(fo); + const show = () => { + // Function executed when displaying the foldout menu. + // It computes the coordinates on the screen where the menu should + // be displayed. const max = browserWindowSize(); let x = this.outer.offsetLeft + this.outer.offsetWidth; let y = arrow.offsetTop + cell.offsetTop + this.outer.offsetTop - ContextMenu.borderSize; @@ -292,7 +300,19 @@ export class ContextMenu extends BaseMenu implements IHtmlElement { y = max.height - fo.outer.offsetHeight; fo.showAt(x, y); this.select(index); + }; + const item: MenuItem = { + text, + help, + action: show } + const index = this.cells.length; + const cell = this.addItem(item, true); + const arrow = document.createElement("span"); + arrow.textContent = "▸"; + arrow.classList.add("menuArrow"); + cell.appendChild(arrow); + cell.onmouseenter = show; cell.onmouseleave = () => { fo.hide(); this.select(-1); @@ -304,18 +324,19 @@ export class ContextMenu extends BaseMenu implements IHtmlElement { export class FoldoutMenu extends ContextMenu { constructor(protected parentMenu: ContextMenu) { super(parentMenu.parent); + this.outer.onmouseenter = () => this.show(); } addItem(mi: MenuItem, enabled: boolean): HTMLTableDataCellElement { - const result = super.addItem({ + return super.addItem({ text: mi.text, help: mi.help, - action: () => { if (mi.action != null) - mi.action(); - this.parentMenu.hide(); } + action: () => { + this.parentMenu.hide(); + if (mi.action != null) + mi.action(); + } }, enabled); - this.outer.onmouseenter = () => this.show(); - return result; } public show(): void {