Skip to content
This repository has been archived by the owner on Aug 30, 2022. It is now read-only.

Commit

Permalink
More operations for private local SQL databases (#558)
Browse files Browse the repository at this point in the history
Support histograms and heatmaps for SQL and private SQL databases.
  • Loading branch information
Mihai Budiu authored Oct 21, 2019
1 parent 33e0451 commit 0316485
Show file tree
Hide file tree
Showing 31 changed files with 1,301 additions and 449 deletions.
4 changes: 2 additions & 2 deletions NOTICE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Hillview Project
Hillview Data Visualization Project

Copyright 2017 VMware, Inc. All Rights Reserved.
Copyright 2017-2019 VMware, Inc. All Rights Reserved.

This product is licensed to you under the Apache 2.0 license (the
"License"). You may not use this product except in compliance with
Expand Down
2 changes: 1 addition & 1 deletion data/ontime_private/gen_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def get_metadata(cn):
elif cn == "Distance":
(g, gMin, gMax) = (10, 0, 5000)
elif cn == "FlightDate":
(g, gMin, gMax) = (86400, 1451635200000, 1456732800000)
(g, gMin, gMax) = (86400000, 1451635200000, 1456732800000)
else:
raise Exception("Unexpected column " + cn)
return {'type': "DoubleColumnQuantization",
Expand Down
27 changes: 15 additions & 12 deletions platform/src/main/java/org/hillview/maps/FilterMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
package org.hillview.maps;

import org.hillview.dataset.api.IMap;
import org.hillview.table.filters.FalseTableFilter;
import org.hillview.table.QuantizationSchema;
import org.hillview.table.QuantizedTable;
import org.hillview.table.api.ITableFilterDescription;
import org.hillview.table.api.ITableFilter;
import org.hillview.table.api.IMembershipSet;
import org.hillview.table.api.ITable;
import org.hillview.utils.Converters;
import org.hillview.utils.HillviewLogger;

import javax.annotation.Nullable;
Expand All @@ -36,31 +38,32 @@ public class FilterMap implements IMap<ITable, ITable> {
* Argument to the rowFilterPredicate.test method is a row index.
* Returns true if a row has to be preserved
*/
@Nullable
private final ITableFilterDescription rowFilterPredicate;
@Nullable
private final QuantizationSchema quantization;

public FilterMap() {
this.rowFilterPredicate = null;
public FilterMap(ITableFilterDescription rowFilterPredicate,
@Nullable QuantizationSchema quantiation) {
this.rowFilterPredicate = rowFilterPredicate;
this.quantization = quantiation;
}

public FilterMap(ITableFilterDescription rowFilterPredicate) {
this.rowFilterPredicate = rowFilterPredicate;
this(rowFilterPredicate, null);
}

@Override
public ITable apply(@Nullable ITable data) {
assert data != null;
ITableFilter filter;
if (this.rowFilterPredicate == null)
filter = new FalseTableFilter();
else
filter = this.rowFilterPredicate.getFilter(data);
Converters.checkNull(data);
if (this.quantization != null)
data = new QuantizedTable(data, this.quantization);
ITableFilter filter = this.rowFilterPredicate.getFilter(data);
HillviewLogger.instance.info("Filtering", "{0}", filter);
IMembershipSet result = data.getMembershipSet().filter(filter::test);
return data.selectRowsFromFullTable(result);
}

public String asString() {
return (this.rowFilterPredicate == null) ? "<null>" : this.rowFilterPredicate.toString();
return this.rowFilterPredicate.toString();
}
}
62 changes: 62 additions & 0 deletions platform/src/main/java/org/hillview/storage/ColumnLimits.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2019 VMware Inc. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.hillview.storage;

import org.hillview.table.filters.RangeFilterDescription;

import javax.annotation.Nullable;
import java.util.Collection;
import java.util.HashMap;

/**
* Stores for each column a pair of bounds represented as a RangeFilter.
* This is used for doing filtering in SQL databases without materializing views.
*/
public class ColumnLimits {
// For each column the range allowed after filtering
private final HashMap<String, RangeFilterDescription> columnLimits;

public ColumnLimits() {
this.columnLimits = new HashMap<String, RangeFilterDescription>();
}

public ColumnLimits(ColumnLimits other) {
this.columnLimits = new HashMap<String, RangeFilterDescription>(other.columnLimits);
}

@Nullable
public RangeFilterDescription get(String column) {
return this.columnLimits.get(column);
}

public void put(RangeFilterDescription filter) {
this.columnLimits.put(filter.cd.name, filter);
}

public Collection<RangeFilterDescription> allFilters() {
return this.columnLimits.values();
}

public void intersect(RangeFilterDescription filter) {
RangeFilterDescription existing = this.get(filter.cd.name);
if (existing == null)
this.put(filter);
else
this.put(existing.intersect(filter));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.hillview.storage;

import org.hillview.utils.Converters;
import org.hillview.utils.Utilities;

public class ImpalaJdbcConnection extends JdbcConnection {
Expand All @@ -25,8 +26,8 @@ public class ImpalaJdbcConnection extends JdbcConnection {
}

@Override
public String getQueryToReadTable(String table, int rowCount) {
String result = "SELECT * FROM " + table;
public String getQueryToReadTable(int rowCount) {
String result = "SELECT * FROM " + Converters.checkNull(this.info.table);
if (rowCount >= 0)
result += " LIMIT " + rowCount;
return result;
Expand Down
99 changes: 36 additions & 63 deletions platform/src/main/java/org/hillview/storage/JdbcConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@

package org.hillview.storage;

import org.hillview.sketches.results.DoubleHistogramBuckets;
import org.hillview.sketches.results.ExplicitDoubleHistogramBuckets;
import org.hillview.sketches.results.StringHistogramBuckets;
import org.hillview.sketches.results.IHistogramBuckets;
import org.hillview.table.ColumnDescription;
import org.hillview.table.Schema;
import org.hillview.table.columns.ColumnQuantization;
import org.hillview.table.columns.DoubleColumnQuantization;
import org.hillview.utils.Converters;
import org.hillview.utils.Utilities;

import javax.annotation.Nullable;
import java.util.HashMap;

/**
Expand Down Expand Up @@ -60,19 +62,18 @@ static JdbcConnection create(JdbcConnectionInformation conn) {
public abstract String getURL();

/**
* Construct the query string to read the specified table.
* @param table Table to read.
* Construct the query string to read the connection table.
* @param rowCount Number of rows to read.
* @return A SQL query string that reads the specified number of rows.
*/
public abstract String getQueryToReadTable(String table, int rowCount);
public abstract String getQueryToReadTable(int rowCount);

String getQueryToReadSize(String table) {
return "SELECT COUNT(*) FROM " + table;
String getQueryToReadSize(@Nullable ColumnLimits columnLimits) {
throw new UnsupportedOperationException();
}

String getQueryForDistinctCount(String table, String column) {
return "SELECT COUNT(DISTINCT " + column + ") FROM " + table;
String getQueryForDistinctCount(String column, @Nullable ColumnLimits columnLimits) {
throw new UnsupportedOperationException();
}

void addBaseUrl(StringBuilder urlBuilder) {
Expand Down Expand Up @@ -117,74 +118,46 @@ void addParameter(String param, String value) {
this.info = info;
}

String getQueryToComputeFreqValues(String table, Schema schema, int minCt) {
StringBuilder builder = new StringBuilder();
String ctcol = schema.newColumnName("countcol");
/*
e.g., select gender, first_name, count(*) as ct
from employees
group by gender, first_name
order by count desc
having ct > minCt
*/
boolean first = true;
StringBuilder cols = new StringBuilder();
for (String col : schema.getColumnNames()) {
if (!first)
cols.append(", ");
first = false;
cols.append(col);
}
builder.append("select ").append(cols.toString()).append(", count(*) AS ").append(ctcol)
.append(" from ").append(table)
.append(" group by ").append(cols.toString())
.append(" having ").append(ctcol).append(" > " ).append(minCt)
.append(" order by ").append(ctcol).append(" desc")
;
return builder.toString();
}

public String getQueryForExplicitNumericHistogram(
String table, ColumnDescription cd, ExplicitDoubleHistogramBuckets buckets) {
throw new UnsupportedOperationException();
}

public String getQueryForExplicitDateHistogram(
String table, ColumnDescription cd, ExplicitDoubleHistogramBuckets buckets) {
throw new UnsupportedOperationException();
}

public String getQueryForNumericHistogram(
String table, ColumnDescription cd, DoubleHistogramBuckets buckets) {
throw new UnsupportedOperationException();
}

public String getQueryForStringHistogram(
String table, ColumnDescription cd, StringHistogramBuckets buckets) {
String getQueryToComputeFreqValues(Schema schema, int minCt, @Nullable ColumnLimits columnLimits) {
throw new UnsupportedOperationException();
}

/**
* Returns a query that computes 4 values for a given numeric column.
* @param table Table used.
* @param column Column name.
* @param cd Column description.
* @param quantization Optional quantization information for this column.
* @param columnLimits Limits for each column.
* @return A query that computes the min, max, total rows, and non-nulls in the specified column.
* These are returned in columns min, max, total and nonnulls respectively.
*/
public String getQueryForNumericRange(String table, String column) {
public String getQueryForNumericRange(ColumnDescription cd,
@Nullable DoubleColumnQuantization quantization,
@Nullable ColumnLimits columnLimits) {
throw new UnsupportedOperationException();
}

@SuppressWarnings("WeakerAccess")
public String getQueryForCounts(String table, String column) {
return "select COUNT(*) as total, COUNT(" + column + ") as nonnulls from " + table;
public String getQueryForCounts(ColumnDescription cd, @Nullable ColumnQuantization quantization,
@Nullable ColumnLimits columnLimits) {
throw new UnsupportedOperationException();
}

public String getQueryForDistinct(String table, String column) {
return "SELECT DISTINCT " + column + " FROM " + table + " ORDER BY " + column;
public String getQueryForDistinct(String column) {
Converters.checkNull(this.info.table);
return "SELECT DISTINCT " + column + " FROM " + this.info.table + " ORDER BY " + column;
}

public String getQueryForHistogram(ColumnDescription cd,
@Nullable ColumnLimits columnLimits,
IHistogramBuckets buckets,
@Nullable ColumnQuantization quantization) {
throw new UnsupportedOperationException();
}

public String getQueryForDateHistogram(String table, ColumnDescription cd, DoubleHistogramBuckets buckets) {
public String getQueryForHeatmap(ColumnDescription cd0, ColumnDescription cd1,
@Nullable ColumnLimits columnLimits,
IHistogramBuckets buckets0, IHistogramBuckets buckets1,
@Nullable ColumnQuantization quantization0,
@Nullable ColumnQuantization quantization1) {
throw new UnsupportedOperationException();
}
}
Loading

0 comments on commit 0316485

Please sign in to comment.