Skip to content
This repository has been archived by the owner on Aug 30, 2022. It is now read-only.

Commit

Permalink
Support for columns of type LocalDate and Time (#664)
Browse files Browse the repository at this point in the history
* support for localdate columns
  • Loading branch information
Mihai Budiu authored Aug 11, 2020
1 parent d286dfa commit 91d1565
Show file tree
Hide file tree
Showing 53 changed files with 758 additions and 355 deletions.
2 changes: 1 addition & 1 deletion data/ontime/On_Time.schema
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"name":"Year","kind":"Integer"},{"name":"Quarter","kind":"Integer"},{"name":"Month","kind":"Integer"},{"name":"DayofMonth","kind":"Integer"},{"name":"DayOfWeek","kind":"Integer"},{"name":"FlightDate","kind":"Date"},{"name":"UniqueCarrier","kind":"String"},{"name":"AirlineID","kind":"Integer"},{"name":"Carrier","kind":"String"},{"name":"TailNum","kind":"String"},{"name":"FlightNum","kind":"Integer"},{"name":"OriginAirportID","kind":"Integer"},{"name":"OriginAirportSeqID","kind":"Integer"},{"name":"OriginCityMarketID","kind":"Integer"},{"name":"Origin","kind":"String"},{"name":"OriginCityName","kind":"String"},{"name":"OriginState","kind":"String"},{"name":"OriginStateFips","kind":"Integer"},{"name":"OriginStateName","kind":"String"},{"name":"OriginWac","kind":"Integer"},{"name":"DestAirportID","kind":"Integer"},{"name":"DestAirportSeqID","kind":"Integer"},{"name":"DestCityMarketID","kind":"Integer"},{"name":"Dest","kind":"String"},{"name":"DestCityName","kind":"String"},{"name":"DestState","kind":"String"},{"name":"DestStateFips","kind":"Integer"},{"name":"DestStateName","kind":"String"},{"name":"DestWac","kind":"Integer"},{"name":"CRSDepTime","kind":"Integer"},{"name":"DepTime","kind":"Integer"},{"name":"DepDelay","kind":"Double"},{"name":"DepDelayMinutes","kind":"Double"},{"name":"DepDel15","kind":"Double"},{"name":"DepartureDelayGroups","kind":"Integer"},{"name":"DepTimeBlk","kind":"String"},{"name":"TaxiOut","kind":"Double"},{"name":"WheelsOff","kind":"String"},{"name":"WheelsOn","kind":"String"},{"name":"TaxiIn","kind":"Double"},{"name":"CRSArrTime","kind":"Integer"},{"name":"ArrTime","kind":"Integer"},{"name":"ArrDelay","kind":"Double"},{"name":"ArrDelayMinutes","kind":"Double"},{"name":"ArrDel15","kind":"Double"},{"name":"ArrivalDelayGroups","kind":"Integer"},{"name":"ArrTimeBlk","kind":"String"},{"name":"Cancelled","kind":"Double"},{"name":"CancellationCode","kind":"String"},{"name":"Diverted","kind":"Double"},{"name":"CRSElapsedTime","kind":"Double"},{"name":"ActualElapsedTime","kind":"Double"},{"name":"AirTime","kind":"Double"},{"name":"Flights","kind":"Double"},{"name":"Distance","kind":"Double"},{"name":"DistanceGroup","kind":"Integer"},{"name":"CarrierDelay","kind":"Double"},{"name":"WeatherDelay","kind":"Double"},{"name":"NASDelay","kind":"Double"},{"name":"SecurityDelay","kind":"Double"},{"name":"LateAircraftDelay","kind":"Double"},{"name":"FirstDepTime","kind":"Integer"},{"name":"TotalAddGTime","kind":"Double"},{"name":"LongestAddGTime","kind":"Double"},{"name":"DivAirportLandings","kind":"Integer"},{"name":"DivReachedDest","kind":"Double"},{"name":"DivActualElapsedTime","kind":"Double"},{"name":"DivArrDelay","kind":"Double"},{"name":"DivDistance","kind":"Double"},{"name":"Div1Airport","kind":"String"},{"name":"Div1AirportID","kind":"Integer"},{"name":"Div1AirportSeqID","kind":"Integer"},{"name":"Div1WheelsOn","kind":"String"},{"name":"Div1TotalGTime","kind":"Double"},{"name":"Div1LongestGTime","kind":"Double"},{"name":"Div1WheelsOff","kind":"String"},{"name":"Div1TailNum","kind":"String"},{"name":"Div2Airport","kind":"String"},{"name":"Div2AirportID","kind":"Integer"},{"name":"Div2AirportSeqID","kind":"Integer"},{"name":"Div2WheelsOn","kind":"String"},{"name":"Div2TotalGTime","kind":"Double"},{"name":"Div2LongestGTime","kind":"Double"},{"name":"Div2WheelsOff","kind":"String"},{"name":"Div2TailNum","kind":"String"},{"name":"Div3Airport","kind":"String"},{"name":"Div3AirportID","kind":"String"},{"name":"Div3AirportSeqID","kind":"String"},{"name":"Div3WheelsOn","kind":"String"},{"name":"Div3TotalGTime","kind":"String"},{"name":"Div3LongestGTime","kind":"String"},{"name":"Div3WheelsOff","kind":"String"},{"name":"Div3TailNum","kind":"String"},{"name":"Div4Airport","kind":"String"},{"name":"Div4AirportID","kind":"String"},{"name":"Div4AirportSeqID","kind":"String"},{"name":"Div4WheelsOn","kind":"String"},{"name":"Div4TotalGTime","kind":"String"},{"name":"Div4LongestGTime","kind":"String"},{"name":"Div4WheelsOff","kind":"String"},{"name":"Div4TailNum","kind":"String"},{"name":"Div5Airport","kind":"String"},{"name":"Div5AirportID","kind":"String"},{"name":"Div5AirportSeqID","kind":"String"},{"name":"Div5WheelsOn","kind":"String"},{"name":"Div5TotalGTime","kind":"String"},{"name":"Div5LongestGTime","kind":"String"},{"name":"Div5WheelsOff","kind":"String"},{"name":"Div5TailNum","kind":"String"},{"name":"Column_109","kind":"String"}]
[{"name":"Year","kind":"Integer"},{"name":"Quarter","kind":"Integer"},{"name":"Month","kind":"Integer"},{"name":"DayofMonth","kind":"Integer"},{"name":"DayOfWeek","kind":"Integer"},{"name":"FlightDate","kind":"LocalDate"},{"name":"UniqueCarrier","kind":"String"},{"name":"AirlineID","kind":"Integer"},{"name":"Carrier","kind":"String"},{"name":"TailNum","kind":"String"},{"name":"FlightNum","kind":"Integer"},{"name":"OriginAirportID","kind":"Integer"},{"name":"OriginAirportSeqID","kind":"Integer"},{"name":"OriginCityMarketID","kind":"Integer"},{"name":"Origin","kind":"String"},{"name":"OriginCityName","kind":"String"},{"name":"OriginState","kind":"String"},{"name":"OriginStateFips","kind":"Integer"},{"name":"OriginStateName","kind":"String"},{"name":"OriginWac","kind":"Integer"},{"name":"DestAirportID","kind":"Integer"},{"name":"DestAirportSeqID","kind":"Integer"},{"name":"DestCityMarketID","kind":"Integer"},{"name":"Dest","kind":"String"},{"name":"DestCityName","kind":"String"},{"name":"DestState","kind":"String"},{"name":"DestStateFips","kind":"Integer"},{"name":"DestStateName","kind":"String"},{"name":"DestWac","kind":"Integer"},{"name":"CRSDepTime","kind":"Integer"},{"name":"DepTime","kind":"Integer"},{"name":"DepDelay","kind":"Double"},{"name":"DepDelayMinutes","kind":"Double"},{"name":"DepDel15","kind":"Double"},{"name":"DepartureDelayGroups","kind":"Integer"},{"name":"DepTimeBlk","kind":"String"},{"name":"TaxiOut","kind":"Double"},{"name":"WheelsOff","kind":"String"},{"name":"WheelsOn","kind":"String"},{"name":"TaxiIn","kind":"Double"},{"name":"CRSArrTime","kind":"Integer"},{"name":"ArrTime","kind":"Integer"},{"name":"ArrDelay","kind":"Double"},{"name":"ArrDelayMinutes","kind":"Double"},{"name":"ArrDel15","kind":"Double"},{"name":"ArrivalDelayGroups","kind":"Integer"},{"name":"ArrTimeBlk","kind":"String"},{"name":"Cancelled","kind":"Double"},{"name":"CancellationCode","kind":"String"},{"name":"Diverted","kind":"Double"},{"name":"CRSElapsedTime","kind":"Double"},{"name":"ActualElapsedTime","kind":"Double"},{"name":"AirTime","kind":"Double"},{"name":"Flights","kind":"Double"},{"name":"Distance","kind":"Double"},{"name":"DistanceGroup","kind":"Integer"},{"name":"CarrierDelay","kind":"Double"},{"name":"WeatherDelay","kind":"Double"},{"name":"NASDelay","kind":"Double"},{"name":"SecurityDelay","kind":"Double"},{"name":"LateAircraftDelay","kind":"Double"},{"name":"FirstDepTime","kind":"Integer"},{"name":"TotalAddGTime","kind":"Double"},{"name":"LongestAddGTime","kind":"Double"},{"name":"DivAirportLandings","kind":"Integer"},{"name":"DivReachedDest","kind":"Double"},{"name":"DivActualElapsedTime","kind":"Double"},{"name":"DivArrDelay","kind":"Double"},{"name":"DivDistance","kind":"Double"},{"name":"Div1Airport","kind":"String"},{"name":"Div1AirportID","kind":"Integer"},{"name":"Div1AirportSeqID","kind":"Integer"},{"name":"Div1WheelsOn","kind":"String"},{"name":"Div1TotalGTime","kind":"Double"},{"name":"Div1LongestGTime","kind":"Double"},{"name":"Div1WheelsOff","kind":"String"},{"name":"Div1TailNum","kind":"String"},{"name":"Div2Airport","kind":"String"},{"name":"Div2AirportID","kind":"Integer"},{"name":"Div2AirportSeqID","kind":"Integer"},{"name":"Div2WheelsOn","kind":"String"},{"name":"Div2TotalGTime","kind":"Double"},{"name":"Div2LongestGTime","kind":"Double"},{"name":"Div2WheelsOff","kind":"String"},{"name":"Div2TailNum","kind":"String"},{"name":"Div3Airport","kind":"String"},{"name":"Div3AirportID","kind":"String"},{"name":"Div3AirportSeqID","kind":"String"},{"name":"Div3WheelsOn","kind":"String"},{"name":"Div3TotalGTime","kind":"String"},{"name":"Div3LongestGTime","kind":"String"},{"name":"Div3WheelsOff","kind":"String"},{"name":"Div3TailNum","kind":"String"},{"name":"Div4Airport","kind":"String"},{"name":"Div4AirportID","kind":"String"},{"name":"Div4AirportSeqID","kind":"String"},{"name":"Div4WheelsOn","kind":"String"},{"name":"Div4TotalGTime","kind":"String"},{"name":"Div4LongestGTime","kind":"String"},{"name":"Div4WheelsOff","kind":"String"},{"name":"Div4TailNum","kind":"String"},{"name":"Div5Airport","kind":"String"},{"name":"Div5AirportID","kind":"String"},{"name":"Div5AirportSeqID","kind":"String"},{"name":"Div5WheelsOn","kind":"String"},{"name":"Div5TotalGTime","kind":"String"},{"name":"Div5LongestGTime","kind":"String"},{"name":"Div5WheelsOff","kind":"String"},{"name":"Div5TailNum","kind":"String"},{"name":"Column_109","kind":"String"}]
2 changes: 1 addition & 1 deletion data/ontime/short.schema
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "DayOfWeek"
},
{
"kind": "Date",
"kind": "LocalDate",
"name": "FlightDate"
},
{
Expand Down
7 changes: 4 additions & 3 deletions docs/userManual.src
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ types:
* JSON (strings that represent JSON values)
* Double (64-bit floating point)
* Integer (32-bit)
* Date+time (the Java Instant class is used to represent such date+time values
on the server side; dates include time zone information)
* Durations (represented using the Java Duration class)
* Date: a date including time and timezone
* LocalDate: a date includ time, but without any timezone information
* Time: a time within a day
* Durations (differences between two times)
* Intervals: an interval contains two double values.
Operations on interval values are described in section (#interval-values).

Expand Down
4 changes: 4 additions & 0 deletions platform/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,10 @@
<groupId>io.netty</groupId>
<artifactId>netty-handler</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
Expand Down
19 changes: 0 additions & 19 deletions platform/src/main/java/org/hillview/dataset/api/IJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,11 @@
import org.hillview.utils.HostList;
import org.hillview.sketches.results.NextKList;
import org.hillview.table.Schema;
import org.hillview.utils.Converters;
import org.hillview.utils.HostAndPort;
import org.hillview.utils.RuntimeTypeAdapterFactory;

import java.io.Serializable;
import java.lang.reflect.Type;
import java.time.Instant;

// Unfortunately this module introduces many circular dependencies, because it has
// to register various type adaptors.
Expand Down Expand Up @@ -65,22 +63,6 @@ public JsonElement serialize(Count count, Type type, JsonSerializationContext js
}
}

class DateSerializer implements JsonSerializer<Instant> {
public JsonElement serialize(Instant data, Type typeOfSchema, JsonSerializationContext
unused) {
double d = Converters.toDouble(data);
return new JsonPrimitive(d);
}
}

class DateDeserializer implements JsonDeserializer<Instant> {
public Instant deserialize(JsonElement data, Type typeOfSchema, JsonDeserializationContext
unused) {
double d = data.getAsDouble();
return Converters.toDate(d);
}
}

class NextKSerializer
implements JsonSerializer<NextKList> {
public JsonElement serialize(NextKList data, Type typeOfSchema, JsonSerializationContext unused) {
Expand All @@ -101,7 +83,6 @@ public JsonElement serialize(RowSnapshot rowSnapshot, Type type, JsonSerializati
.registerTypeAdapter(Schema.class, new Schema.Deserializer())
.registerTypeAdapter(RowSnapshot.class, new RowSnapshotSerializer())
.registerTypeAdapter(NextKList.class, new NextKSerializer())
.registerTypeAdapter(Instant.class, new DateSerializer())
.registerTypeAdapter(Count.class, new CountSerializer())
.registerTypeAdapter(Interval.class, new IntervalSerializer())
.registerTypeAdapter(Interval.class, new IntervalDeserializer())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ IColumn createColumn(ITable table) {
double timestampLocal = value.invokeMember("getTime").asDouble();
col.set(r, timestampLocal);
break;
case LocalDate:
double ts = value.invokeMember("getTime").asDouble();
// ts is the local time; we have to adjust for the timezone
double offset = value.invokeMember("getTimezoneOffset").asDouble();
col.set(r, ts - offset * 60 * 1000);
break;
case Integer:
col.set(r, value.asInt());
break;
Expand All @@ -131,6 +137,7 @@ IColumn createColumn(ITable table) {
Value v0 = value.getArrayElement(0);
Value v1 = value.getArrayElement(1);
col.set(r, v0.asDouble());
assert endCol != null;
endCol.set(r, v1.asDouble());
break;
default:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,10 @@ public void scan(final IColumn column,

boolean extractString = false;
boolean numeric = false;
switch (column.getKind()) {
case String:
case Json:
extractString = this.computeStringMax;
break;
default:
numeric = true;
break;
}
if (column.getKind().isString())
extractString = this.computeStringMax;
else
numeric = true;
while (currRow >= 0) {
if (column.isMissing(currRow)) {
this.missingCount++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ public ITable load() {
String format = FileSetDescription.this.getLogFormat();
assert format != null;
GrokLogs genLog = new GrokLogs(format);
//noinspection ConstantConditions
loader = genLog.getFileLoader(this.pathname,
Converters.toDate(FileSetDescription.this.startTime),
Converters.toDate(FileSetDescription.this.endTime));
Converters.toLocalDate(FileSetDescription.this.startTime),
Converters.toLocalDate(FileSetDescription.this.endTime));
break;
default:
throw new RuntimeException(
Expand Down
24 changes: 16 additions & 8 deletions platform/src/main/java/org/hillview/storage/GrokLogs.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
import org.hillview.utils.GrokExtra;
import org.hillview.utils.HillviewLogger;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.List;
import java.util.Map;
import java.io.BufferedReader;
Expand All @@ -49,9 +50,9 @@ public class LogFileLoader extends BaseLogLoader {
private final Grok grok;

@Nullable
private final Instant start;
private final LocalDateTime start;
@Nullable
private final Instant end;
private final LocalDateTime end;
@Nullable
DateParsing dateTimeParser = null;
/**
Expand All @@ -62,7 +63,7 @@ public class LogFileLoader extends BaseLogLoader {
@Nullable
private List<ColumnDescription> columnDescriptions = null;

LogFileLoader(final String path, @Nullable Instant start, @Nullable Instant end) {
LogFileLoader(final String path, @Nullable LocalDateTime start, @Nullable LocalDateTime end) {
super(path);
GrokCompiler grokCompiler = GrokCompiler.newInstance();
grokCompiler.registerDefaultPatterns();
Expand Down Expand Up @@ -183,10 +184,17 @@ else if (this.first)
(this.start != null || this.end != null)) {
if (this.dateTimeParser == null)
this.dateTimeParser = new DateParsing(currentTimestamp);
Instant parsed = this.dateTimeParser.parse(currentTimestamp);
if (this.start != null && this.start.isAfter(parsed))

LocalDateTime date;
if (this.dateTimeParser.isLocalDate()) {
date = this.dateTimeParser.parseLocalDate(currentTimestamp);
} else {
date = LocalDateTime.ofInstant(
this.dateTimeParser.parseDate(currentTimestamp), ZoneOffset.UTC);
}
if (this.start != null && this.start.isAfter(date))
continue;
if (this.end != null && this.end.isBefore(parsed))
if (this.end != null && this.end.isBefore(date))
// We assume timestamps are monotone, and thus
// we won't see another one smaller. So we end
// parsing here.
Expand Down Expand Up @@ -228,7 +236,7 @@ public void endLoading() {
}
}

public LogFileLoader getFileLoader(String path, @Nullable Instant start, @Nullable Instant end) {
public LogFileLoader getFileLoader(String path, @Nullable LocalDateTime start, @Nullable LocalDateTime end) {
return new LogFileLoader(path, start, end);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class HillviewLogs extends LogFiles {
"([^,]*),([^,]*),([^,]*),?(.*)");

static {
HillviewLogs.schema.append(new ColumnDescription(LogFiles.timestampColumnName, ContentsKind.Date));
HillviewLogs.schema.append(new ColumnDescription(LogFiles.timestampColumnName, ContentsKind.LocalDate));
HillviewLogs.schema.append(new ColumnDescription("Role", ContentsKind.String));
HillviewLogs.schema.append(new ColumnDescription("Level", ContentsKind.String));
HillviewLogs.schema.append(new ColumnDescription("Machine", ContentsKind.String));
Expand Down
14 changes: 13 additions & 1 deletion platform/src/main/java/org/hillview/storage/JdbcDatabase.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import javax.annotation.Nullable;
import java.sql.*;
import java.time.Instant;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -388,6 +389,8 @@ private static ColumnDescription getDescription(ResultSetMetaData meta, int colI
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
kind = ContentsKind.LocalDate;
break;
case Types.TIME_WITH_TIMEZONE:
case Types.TIMESTAMP_WITH_TIMEZONE:
kind = ContentsKind.Date;
Expand Down Expand Up @@ -463,7 +466,16 @@ private static void appendNext(List<IAppendableColumn> cols,
break;
case Types.DATE:
case Types.TIME:
case Types.TIMESTAMP:
case Types.TIMESTAMP: {
Timestamp ts = data.getTimestamp(colIndex);
if (ts == null) {
col.appendMissing();
} else {
LocalDateTime ldt = ts.toLocalDateTime();
col.append(Converters.toDouble(ldt));
}
break;
}
case Types.TIME_WITH_TIMEZONE:
case Types.TIMESTAMP_WITH_TIMEZONE:
Timestamp ts = data.getTimestamp(colIndex);
Expand Down
Loading

0 comments on commit 91d1565

Please sign in to comment.