Skip to content

Commit

Permalink
[Coral-schema] Generalize operand schema inference on ordinal return …
Browse files Browse the repository at this point in the history
…type UDF calls (#548)

* fix nested nullability inference for nested udf calls

* spotless

* generalize operand schema inference for ordinal return type UDF calls

* add unit test + documentation
  • Loading branch information
KevinGe00 authored Feb 10, 2025
1 parent 15fc504 commit f6ce2be
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
* Copyright 2019-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -433,25 +433,20 @@ public RexNode visitLiteral(RexLiteral rexLiteral) {
@Override
public RexNode visitCall(RexCall rexCall) {
/**
* For SqlUserDefinedFunction and SqlOperator RexCall, no need to handle it recursively
* and only return type of udf or sql operator is relevant
*/

/**
* If the return type of RexCall is based on the ordinal of its input argument
* and the corresponding input argument refers to a field from the input schema,
* use the field's schema as is.
* If the return type of RexCall is based on an ordinal of its input arguments, then leverage SchemaRexShuttle
* to visit the input argument and use the argument's schema as is to infer the return type of the call
*/
if (rexCall.getOperator().getReturnTypeInference() instanceof OrdinalReturnTypeInferenceV2) {
int index = ((OrdinalReturnTypeInferenceV2) rexCall.getOperator().getReturnTypeInference()).getOrdinal();
RexNode operand = rexCall.operands.get(index);

if (operand instanceof RexInputRef) {
appendRexInputRefField((RexInputRef) operand);
return rexCall;
}
operand.accept(this);
return rexCall;
}

/**
* For SqlUserDefinedFunction and SqlOperator RexCall, no need to handle it recursively
* and just directly use the return type of udf or sql operator as the field's schema
*/
RelDataType fieldType = rexCall.getType();
boolean isNullable = SchemaUtilities.isFieldNullable(rexCall, inputSchema);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
* Copyright 2019-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -177,7 +177,7 @@ private static void initializeUdfs() {
executeCreateFunctionQuery("default", Collections.singletonList("foo_udf_return_struct"), "FuncIsEven",
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnStruct");

executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStuct",
executeCreateFunctionQuery("default", Collections.singletonList("innerfield_with_udf"), "ReturnInnerStruct",
"com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg");
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
* Copyright 2019-2025 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -236,9 +236,9 @@ public void testUdfLessThanHundred() {
@Test
public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() {
String viewSql = "CREATE VIEW innerfield_with_udf "
+ "tblproperties('functions' = 'ReturnInnerStuct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
+ "tblproperties('functions' = 'ReturnInnerStruct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
+ " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS "
+ "SELECT default_innerfield_with_udf_ReturnInnerStuct('foo', innerRecord) AS innerRecord "
+ "SELECT default_innerfield_with_udf_ReturnInnerStruct('foo', innerRecord) AS innerRecord "
+ "FROM basecomplexmixednullabilities";

TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql);
Expand All @@ -252,6 +252,28 @@ public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF() {
TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc"));
}

@Test
public void testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDFForNestedCalls() {
String viewSql = "CREATE VIEW innerfield_with_udf "
+ "tblproperties('functions' = 'ReturnInnerStruct:com.linkedin.coral.hive.hive2rel.CoralTestUDFReturnSecondArg', "
+ " 'dependencies' = 'ivy://com.linkedin:udf:1.0') " + "AS "
+ "SELECT default_innerfield_with_udf_ReturnInnerStruct('foo', default_innerfield_with_udf_ReturnInnerStruct('foo', innerRecord)) AS innerRecord "
+ "FROM basecomplexmixednullabilities";

TestUtils.executeCreateViewQuery("default", "innerfield_with_udf", viewSql);

ViewToAvroSchemaConverter viewToAvroSchemaConverter = ViewToAvroSchemaConverter.create(hiveMetastoreClient);
Schema actualSchema = viewToAvroSchemaConverter.toAvroSchema("default", "innerfield_with_udf");

// Inner ReturnInnerStruct call return type == Return type of it's second argument, innerRecord
// Outer ReturnInnerStruct call return type == Return type of it's second argument, Inner ReturnInnerStruct call return type
// Therefore, Outer ReturnInnerStruct call return type == Return type of innerRecord
//
// We also expect all fields to retain their nullability after applying the UDF calls
Assert.assertEquals(actualSchema.toString(true),
TestUtils.loadSchema("testPreserveNullabilitiesAfterApplyingOrdinalReturnTypeUDF-expected.avsc"));
}

@Test
public void testUdfGreaterThanHundred() {
String viewSql = "CREATE VIEW foo_dali_udf2 "
Expand Down

0 comments on commit f6ce2be

Please sign in to comment.