Skip to content

Commit

Permalink
Revert "fix(expr-common): Coerce to Decimal(20, 0) when combining UIn…
Browse files Browse the repository at this point in the history
…t64 with signed integers (apache#14223)"

This reverts commit e0f9f65.
  • Loading branch information
alamb committed Jan 25, 2025
1 parent 144ebf9 commit 658ad43
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 69 deletions.
58 changes: 25 additions & 33 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -777,19 +777,29 @@ pub fn binary_numeric_coercion(
(_, Float32) | (Float32, _) => Some(Float32),
// The following match arms encode the following logic: Given the two
// integral types, we choose the narrowest possible integral type that
// accommodates all values of both types. Note that to avoid information
// loss when combining UInt64 with signed integers we use Decimal128(20, 0).
(UInt64, Int64 | Int32 | Int16 | Int8)
| (Int64 | Int32 | Int16 | Int8, UInt64) => Some(Decimal128(20, 0)),
(UInt64, _) | (_, UInt64) => Some(UInt64),
// accommodates all values of both types. Note that some information
// loss is inevitable when we have a signed type and a `UInt64`, in
// which case we use `Int64`;i.e. the widest signed integral type.

// TODO: For i64 and u64, we can use decimal or float64
// Postgres has no unsigned type :(
// DuckDB v.0.10.0 has double (double precision floating-point number (8 bytes))
// for largest signed (signed sixteen-byte integer) and unsigned integer (unsigned sixteen-byte integer)
(Int64, _)
| (_, Int64)
| (UInt64, Int8)
| (Int8, UInt64)
| (UInt64, Int16)
| (Int16, UInt64)
| (UInt64, Int32)
| (Int32, UInt64)
| (UInt32, Int8)
| (Int8, UInt32)
| (UInt32, Int16)
| (Int16, UInt32)
| (UInt32, Int32)
| (Int32, UInt32) => Some(Int64),
(UInt64, _) | (_, UInt64) => Some(UInt64),
(Int32, _)
| (_, Int32)
| (UInt16, Int16)
Expand Down Expand Up @@ -918,16 +928,16 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result<DataType> {
}

/// Convert the numeric data type to the decimal data type.
/// We support signed and unsigned integer types and floating-point type.
/// Now, we just support the signed integer type and floating-point type.
fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
// This conversion rule is from spark
// https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127
match numeric_type {
Int8 | UInt8 => Some(Decimal128(3, 0)),
Int16 | UInt16 => Some(Decimal128(5, 0)),
Int32 | UInt32 => Some(Decimal128(10, 0)),
Int64 | UInt64 => Some(Decimal128(20, 0)),
Int8 => Some(Decimal128(3, 0)),
Int16 => Some(Decimal128(5, 0)),
Int32 => Some(Decimal128(10, 0)),
Int64 => Some(Decimal128(20, 0)),
// TODO if we convert the floating-point data to the decimal type, it maybe overflow.
Float32 => Some(Decimal128(14, 7)),
Float64 => Some(Decimal128(30, 15)),
Expand All @@ -936,16 +946,16 @@ fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option<DataType> {
}

/// Convert the numeric data type to the decimal data type.
/// We support signed and unsigned integer types and floating-point type.
/// Now, we just support the signed integer type and floating-point type.
fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
// This conversion rule is from spark
// https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127
match numeric_type {
Int8 | UInt8 => Some(Decimal256(3, 0)),
Int16 | UInt16 => Some(Decimal256(5, 0)),
Int32 | UInt32 => Some(Decimal256(10, 0)),
Int64 | UInt64 => Some(Decimal256(20, 0)),
Int8 => Some(Decimal256(3, 0)),
Int16 => Some(Decimal256(5, 0)),
Int32 => Some(Decimal256(10, 0)),
Int64 => Some(Decimal256(20, 0)),
// TODO if we convert the floating-point data to the decimal type, it maybe overflow.
Float32 => Some(Decimal256(14, 7)),
Float64 => Some(Decimal256(30, 15)),
Expand Down Expand Up @@ -1984,18 +1994,6 @@ mod tests {
Operator::Gt,
DataType::UInt32
);
test_coercion_binary_rule!(
DataType::UInt64,
DataType::UInt8,
Operator::Eq,
DataType::UInt64
);
test_coercion_binary_rule!(
DataType::UInt64,
DataType::Int64,
Operator::Eq,
DataType::Decimal128(20, 0)
);
// numeric/decimal
test_coercion_binary_rule!(
DataType::Int64,
Expand Down Expand Up @@ -2027,12 +2025,6 @@ mod tests {
Operator::GtEq,
DataType::Decimal128(15, 3)
);
test_coercion_binary_rule!(
DataType::UInt64,
DataType::Decimal128(20, 0),
Operator::Eq,
DataType::Decimal128(20, 0)
);

// Binary
test_coercion_binary_rule!(
Expand Down
6 changes: 3 additions & 3 deletions datafusion/sqllogictest/test_files/coalesce.slt
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ select
----
2 Int64

# i64 and u64, cast to decimal128(20, 0)
query RT
# TODO: Got types (i64, u64), casting to decimal or double or even i128 if supported
query IT
select
coalesce(2, arrow_cast(3, 'UInt64')),
arrow_typeof(coalesce(2, arrow_cast(3, 'UInt64')));
----
2 Decimal128(20, 0)
2 Int64

statement ok
create table t1 (a bigint, b int, c int) as values (null, null, 1), (null, 2, null);
Expand Down
27 changes: 0 additions & 27 deletions datafusion/sqllogictest/test_files/insert.slt
Original file line number Diff line number Diff line change
Expand Up @@ -433,30 +433,3 @@ drop table test_column_defaults

statement error DataFusion error: Error during planning: Column reference is not allowed in the DEFAULT expression : Schema error: No field named a.
create table test_column_defaults(a int, b int default a+1)


# test inserting UInt64 and signed integers into a bigint unsigned column
statement ok
create table unsigned_bigint_test (v bigint unsigned)

query I
insert into unsigned_bigint_test values (10000000000000000000), (18446744073709551615)
----
2

query I
insert into unsigned_bigint_test values (10000000000000000001), (1), (10000000000000000002)
----
3

query I rowsort
select * from unsigned_bigint_test
----
1
10000000000000000000
10000000000000000001
10000000000000000002
18446744073709551615

statement ok
drop table unsigned_bigint_test
2 changes: 1 addition & 1 deletion datafusion/sqllogictest/test_files/joins.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1828,7 +1828,7 @@ AS VALUES
('BB', 6, 1),
('BB', 6, 1);

query TIR
query TII
select col1, col2, coalesce(sum_col3, 0) as sum_col3
from (select distinct col2 from tbl) AS q1
cross join (select distinct col1 from tbl) AS q2
Expand Down
10 changes: 5 additions & 5 deletions datafusion/sqllogictest/test_files/union.slt
Original file line number Diff line number Diff line change
Expand Up @@ -413,23 +413,23 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre
logical_plan
01)Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5
02)--Union
03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Decimal128(20, 0)) AS c9
03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9
04)------TableScan: aggregate_test_100 projection=[c1, c9]
05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Decimal128(20, 0)) AS c9
05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9
06)------TableScan: aggregate_test_100 projection=[c1, c3]
physical_plan
01)SortPreservingMergeExec: [c9@1 DESC], fetch=5
02)--UnionExec
03)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true]
04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Decimal128(20, 0)) as c9]
04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9]
05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
07)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true]
08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Decimal128(20, 0)) as c9]
08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9]
09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true

query TR
query TI
SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5
----
c 4268716378
Expand Down

0 comments on commit 658ad43

Please sign in to comment.