Skip to content

Commit

Permalink
Merge pull request #53 from xingyutangyuan/master
Browse files Browse the repository at this point in the history
Add mug-bigquery artifact to apply StringFormat templating for BigQuery
  • Loading branch information
fluentfuture authored Dec 3, 2023
2 parents a5008d1 + b5da8c0 commit 02b42b7
Show file tree
Hide file tree
Showing 6 changed files with 633 additions and 0 deletions.
1 change: 1 addition & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ maven_install(
"com.google.protobuf:protobuf-java:[3.0.0,)",
"com.google.protobuf:protobuf-java-util:[3.0.0,)",
"com.google.code.findbugs:jsr305:3.0.2",
"com.google.cloud:google-cloud-bigquery:[2.34.2,)",
],
repositories = [
"https://repo1.maven.org/maven2",
Expand Down
31 changes: 31 additions & 0 deletions mug-bigquery/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
load("@com_googlesource_gerrit_bazlets//tools:junit.bzl", "junit_tests")

java_library(
name = "template",
srcs = glob(["src/main/java/**/*.java"]),
deps = [
"//mug:base",
"//mug:format",
"@maven//:com_google_errorprone_error_prone_annotations",
"@maven//:com_google_cloud_google_cloud_bigquery",
]
)

junit_tests(
name = "AllTests",
srcs = glob(["src/test/java/**/*Test.java"]),
deps = [
":template",
"//mug:base",
"//mug:format",
"//mug-guava",
"@maven//:com_google_guava_guava",
"@maven//:com_google_guava_guava_testlib",
"@maven//:com_google_truth_truth",
"@maven//:com_google_truth_extensions_truth_java8_extension",
"@maven//:com_google_errorprone_error_prone_annotations",
"@maven//:com_google_cloud_google_cloud_bigquery",
"@maven//:junit_junit",
"@maven//:org_junit_jupiter_junit_jupiter_api",
],
)
93 changes: 93 additions & 0 deletions mug-bigquery/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.google.mug</groupId>
<artifactId>mug-root</artifactId>
<version>7.1-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>mug-bigquery</artifactId>
<packaging>jar</packaging>
<name>BigQuery Utils</name>

<build>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>com.google.errorprone</groupId>
<artifactId>error_prone_core</artifactId>
<version>2.23.0</version>
</path>
<path>
<groupId>${project.groupId}</groupId>
<artifactId>mug-errorprone</artifactId>
<version>${project.version}</version>
</path>
<!-- Other annotation processors go here.
If 'annotationProcessorPaths' is set, processors will no longer be
discovered on the regular -classpath; see also 'Using Error Prone
together with other annotation processors' below. -->
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>mug</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>mug-guava</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>mug-errorprone</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.truth</groupId>
<artifactId>truth</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.truth.extensions</groupId>
<artifactId>truth-java8-extension</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-testlib</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquery</artifactId>
<version>2.34.2</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
package com.google.mu.bigquery;

import static java.util.Objects.requireNonNull;

import java.math.BigDecimal;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collector;

import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.JobException;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.QueryParameterValue;
import com.google.cloud.bigquery.TableResult;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import com.google.errorprone.annotations.CompileTimeConstant;
import com.google.errorprone.annotations.Immutable;
import com.google.mu.util.StringFormat;
import com.google.mu.util.stream.BiStream;

/**
* Facade class to create BigQuery parameterized queries using a template string and parameters.
*
* <p>The string template syntax is defined by {@link StringFormat} and protected by the same
* compile-time checks.
*
* @since 7.1
*/
@Immutable
public final class ParameterizedQuery {
private static final DateTimeFormatter TIMESTAMP_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZZ");
private final String query;

@SuppressWarnings("Immutable")
private final Map<String, QueryParameterValue> parameters;

private ParameterizedQuery(String query, Map<String, QueryParameterValue> parameters) {
this.query = requireNonNull(query);
// Defensive copy. Not worth pulling in Guava dependency just for this
this.parameters = Collections.unmodifiableMap(new LinkedHashMap<>(parameters));
}

/**
* Convenience method when you need to create the {@link ParameterizedQuery} inline, with both the
* query template and the arguments.
*
* <p>For example:
*
* <pre>{@code
* TableResult result = ParameterizedQuery.of("select * from JOBS where id = {id}", jobId).run();
* }</pre>
*/
@SuppressWarnings("StringFormatArgsCheck") // Called immediately, runtime error is good enough.
public static ParameterizedQuery of(@CompileTimeConstant String query, Object... args) {
return template(query).with(args);
}

/**
* Returns a template of {@iink QueryJobConfiguration} based on the {@code template} string.
*
* <p>For example:
*
* <pre>{@code
* private static final StringFormat.To<QueryJobConfiguration> GET_JOB_IDS_BY_QUERY =
* ParameterizedQuery.template(
* """
* SELECT job_id from INFORMATION_SCHEMA.JOBS_BY_PROJECT
* WHERE configuration.query LIKE '%{keyword}%'
* """);
*
* TableResult result = GET_JOB_IDS_BY_QUERY.with("sensitive word").run();
* }</pre>
*
* <p>Except {@link ParameterizedQuery} itself, which are directly substituted into the query, all
* other placeholder arguments are passed into the QueryJobConfiguration as query parameters.
*
* <p>Placeholder types supported:
*
* <ul>
* <li>CharSequence
* <li>Enum
* <li>java.time.Instant (translated to TIMESTAMP)
* <li>java.time.LocalDate (translated to DATE)
* <li>Integer
* <li>Long
* <li>BigDecimal
* <li>Double
* <li>Float
* </ul>
*
* If you need to supply other types, consider to wrap them explicitly using one of the static
* factory methods of {@link QueryParameterValue}.
*/
public static StringFormat.To<ParameterizedQuery> template(@CompileTimeConstant String template) {
return StringFormat.template(
template,
(fragments, placeholders) -> {
Iterator<String> it = fragments.iterator();
return placeholders
.collect(
new Builder(),
(builder, placeholder, value) -> {
builder.append(it.next());
if (value == null) {
builder.append("NULL");
} else if (value instanceof ParameterizedQuery) {
builder.addSubQuery((ParameterizedQuery) value);
} else {
String paramName = placeholder.skip(1, 1).toString().trim();
builder.append("@" + paramName);
builder.addParameter(paramName, toQueryParameter(value));
}
})
.append(it.next())
.build();
});
}

/** Returns a joiner that joins ParameterizedQuery elements using {@code delim}. */
public static Collector<ParameterizedQuery, ?, ParameterizedQuery> joining(
@CompileTimeConstant String delim) {
return Collector.of(
Builder::new,
(b, q) -> b.appendDelimiter(delim).addSubQuery(q),
(b1, b2) -> b1.appendDelimiter(delim).addSubQuery(b2.build()),
Builder::build);
}

/**
* Sends this query to BigQuery using the default options.
*
* <p>To use alternative options, pass {@link #jobConfiguration} to the {link BigQueryOptions} of
* your choice.
*/
public TableResult run() throws JobException, InterruptedException {
return BigQueryOptions.getDefaultInstance().getService().query(jobConfiguration());
}

/** Returns the {@link QueryJobConfiguration} that can be sent to BigQuery. */
public QueryJobConfiguration jobConfiguration() {
return BiStream.from(parameters)
.collect(
QueryJobConfiguration.newBuilder(query),
QueryJobConfiguration.Builder::addNamedParameter)
.build();
}

private static final class Builder {
private final StringBuilder queryText = new StringBuilder();
private final LinkedHashMap<String, QueryParameterValue> parameters = new LinkedHashMap<>();

@CanIgnoreReturnValue
Builder append(String snippet) {
queryText.append(snippet);
return this;
}

@CanIgnoreReturnValue
Builder appendDelimiter(String delim) {
if (queryText.length() > 0) {
queryText.append(delim);
}
return this;
}

@CanIgnoreReturnValue
Builder addParameter(String name, QueryParameterValue value) {
if (parameters.put(name, value) != null) {
throw new IllegalArgumentException("Duplicate placeholder name " + name);
}
return this;
}

@CanIgnoreReturnValue
Builder addSubQuery(ParameterizedQuery subQuery) {
queryText.append(subQuery.query);
BiStream.from(subQuery.parameters).forEachOrdered(this::addParameter);
return this;
}

ParameterizedQuery build() {
return new ParameterizedQuery(queryText.toString(), parameters);
}
}

private static QueryParameterValue toQueryParameter(Object value) {
if (value instanceof CharSequence) {
return QueryParameterValue.string(value.toString());
}
if (value instanceof Instant) {
Instant time = (Instant) value;
return QueryParameterValue.timestamp(
time.atZone(ZoneId.of("UTC")).format(TIMESTAMP_FORMATTER));
}
if (value instanceof LocalDate) {
return QueryParameterValue.date(((LocalDate) value).toString());
}
if (value instanceof Boolean) {
return QueryParameterValue.bool((Boolean) value);
}
if (value instanceof Integer) {
return QueryParameterValue.int64((Integer) value);
}
if (value instanceof Long) {
return QueryParameterValue.int64((Long) value);
}
if (value instanceof Double) {
return QueryParameterValue.float64((Double) value);
}
if (value instanceof Float) {
return QueryParameterValue.float64((Float) value);
}
if (value instanceof BigDecimal) {
return QueryParameterValue.bigNumeric((BigDecimal) value);
}
if (value instanceof byte[]) {
return QueryParameterValue.bytes((byte[]) value);
}
if (value instanceof QueryParameterValue) {
return (QueryParameterValue) value;
}
if (value instanceof Enum) {
return QueryParameterValue.string(((Enum<?>) value).name());
}
if (value.getClass().isArray()) {
@SuppressWarnings("rawtypes")
Class componentType = value.getClass().getComponentType();
return QueryParameterValue.array((Object[]) value, componentType);
}
throw new IllegalArgumentException(
"Unsupported parameter type: "
+ value.getClass().getName()
+ ". Consider manually converting it to QueryParameterValue.");
}

@Override
public int hashCode() {
return Objects.hash(query, parameters);
}

@Override
public boolean equals(Object obj) {
if (obj instanceof ParameterizedQuery) {
ParameterizedQuery that = (ParameterizedQuery) obj;
return query.equals(that.query) && parameters.equals(that.parameters);
}
return false;
}

@Override
public String toString() {
return query;
}
}
Loading

0 comments on commit 02b42b7

Please sign in to comment.