diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index 83f47883add9..c775d3131692 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -777,19 +777,29 @@ pub fn binary_numeric_coercion( (_, Float32) | (Float32, _) => Some(Float32), // The following match arms encode the following logic: Given the two // integral types, we choose the narrowest possible integral type that - // accommodates all values of both types. Note that to avoid information - // loss when combining UInt64 with signed integers we use Decimal128(20, 0). - (UInt64, Int64 | Int32 | Int16 | Int8) - | (Int64 | Int32 | Int16 | Int8, UInt64) => Some(Decimal128(20, 0)), - (UInt64, _) | (_, UInt64) => Some(UInt64), + // accommodates all values of both types. Note that some information + // loss is inevitable when we have a signed type and a `UInt64`, in + // which case we use `Int64`;i.e. the widest signed integral type. + + // TODO: For i64 and u64, we can use decimal or float64 + // Postgres has no unsigned type :( + // DuckDB v.0.10.0 has double (double precision floating-point number (8 bytes)) + // for largest signed (signed sixteen-byte integer) and unsigned integer (unsigned sixteen-byte integer) (Int64, _) | (_, Int64) + | (UInt64, Int8) + | (Int8, UInt64) + | (UInt64, Int16) + | (Int16, UInt64) + | (UInt64, Int32) + | (Int32, UInt64) | (UInt32, Int8) | (Int8, UInt32) | (UInt32, Int16) | (Int16, UInt32) | (UInt32, Int32) | (Int32, UInt32) => Some(Int64), + (UInt64, _) | (_, UInt64) => Some(UInt64), (Int32, _) | (_, Int32) | (UInt16, Int16) @@ -918,16 +928,16 @@ pub fn get_wider_type(lhs: &DataType, rhs: &DataType) -> Result { } /// Convert the numeric data type to the decimal data type. -/// We support signed and unsigned integer types and floating-point type. +/// Now, we just support the signed integer type and floating-point type. fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option { use arrow::datatypes::DataType::*; // This conversion rule is from spark // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127 match numeric_type { - Int8 | UInt8 => Some(Decimal128(3, 0)), - Int16 | UInt16 => Some(Decimal128(5, 0)), - Int32 | UInt32 => Some(Decimal128(10, 0)), - Int64 | UInt64 => Some(Decimal128(20, 0)), + Int8 => Some(Decimal128(3, 0)), + Int16 => Some(Decimal128(5, 0)), + Int32 => Some(Decimal128(10, 0)), + Int64 => Some(Decimal128(20, 0)), // TODO if we convert the floating-point data to the decimal type, it maybe overflow. Float32 => Some(Decimal128(14, 7)), Float64 => Some(Decimal128(30, 15)), @@ -936,16 +946,16 @@ fn coerce_numeric_type_to_decimal(numeric_type: &DataType) -> Option { } /// Convert the numeric data type to the decimal data type. -/// We support signed and unsigned integer types and floating-point type. +/// Now, we just support the signed integer type and floating-point type. fn coerce_numeric_type_to_decimal256(numeric_type: &DataType) -> Option { use arrow::datatypes::DataType::*; // This conversion rule is from spark // https://github.com/apache/spark/blob/1c81ad20296d34f137238dadd67cc6ae405944eb/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala#L127 match numeric_type { - Int8 | UInt8 => Some(Decimal256(3, 0)), - Int16 | UInt16 => Some(Decimal256(5, 0)), - Int32 | UInt32 => Some(Decimal256(10, 0)), - Int64 | UInt64 => Some(Decimal256(20, 0)), + Int8 => Some(Decimal256(3, 0)), + Int16 => Some(Decimal256(5, 0)), + Int32 => Some(Decimal256(10, 0)), + Int64 => Some(Decimal256(20, 0)), // TODO if we convert the floating-point data to the decimal type, it maybe overflow. Float32 => Some(Decimal256(14, 7)), Float64 => Some(Decimal256(30, 15)), @@ -1984,18 +1994,6 @@ mod tests { Operator::Gt, DataType::UInt32 ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::UInt8, - Operator::Eq, - DataType::UInt64 - ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::Int64, - Operator::Eq, - DataType::Decimal128(20, 0) - ); // numeric/decimal test_coercion_binary_rule!( DataType::Int64, @@ -2027,12 +2025,6 @@ mod tests { Operator::GtEq, DataType::Decimal128(15, 3) ); - test_coercion_binary_rule!( - DataType::UInt64, - DataType::Decimal128(20, 0), - Operator::Eq, - DataType::Decimal128(20, 0) - ); // Binary test_coercion_binary_rule!( diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt index a624d1def980..06460a005c20 100644 --- a/datafusion/sqllogictest/test_files/coalesce.slt +++ b/datafusion/sqllogictest/test_files/coalesce.slt @@ -105,13 +105,13 @@ select ---- 2 Int64 -# i64 and u64, cast to decimal128(20, 0) -query RT +# TODO: Got types (i64, u64), casting to decimal or double or even i128 if supported +query IT select coalesce(2, arrow_cast(3, 'UInt64')), arrow_typeof(coalesce(2, arrow_cast(3, 'UInt64'))); ---- -2 Decimal128(20, 0) +2 Int64 statement ok create table t1 (a bigint, b int, c int) as values (null, null, 1), (null, 2, null); diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index c32b4b9ea397..804612287246 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -433,30 +433,3 @@ drop table test_column_defaults statement error DataFusion error: Error during planning: Column reference is not allowed in the DEFAULT expression : Schema error: No field named a. create table test_column_defaults(a int, b int default a+1) - - -# test inserting UInt64 and signed integers into a bigint unsigned column -statement ok -create table unsigned_bigint_test (v bigint unsigned) - -query I -insert into unsigned_bigint_test values (10000000000000000000), (18446744073709551615) ----- -2 - -query I -insert into unsigned_bigint_test values (10000000000000000001), (1), (10000000000000000002) ----- -3 - -query I rowsort -select * from unsigned_bigint_test ----- -1 -10000000000000000000 -10000000000000000001 -10000000000000000002 -18446744073709551615 - -statement ok -drop table unsigned_bigint_test diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 496c6c609e45..68426f180d99 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -1828,7 +1828,7 @@ AS VALUES ('BB', 6, 1), ('BB', 6, 1); -query TIR +query TII select col1, col2, coalesce(sum_col3, 0) as sum_col3 from (select distinct col2 from tbl) AS q1 cross join (select distinct col1 from tbl) AS q2 diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 352c01ca295c..7b8992b966ad 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -413,23 +413,23 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre logical_plan 01)Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5 02)--Union -03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Decimal128(20, 0)) AS c9 +03)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9 04)------TableScan: aggregate_test_100 projection=[c1, c9] -05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Decimal128(20, 0)) AS c9 +05)----Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9 06)------TableScan: aggregate_test_100 projection=[c1, c3] physical_plan 01)SortPreservingMergeExec: [c9@1 DESC], fetch=5 02)--UnionExec 03)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] -04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Decimal128(20, 0)) as c9] +04)------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true 07)----SortExec: TopK(fetch=5), expr=[c9@1 DESC], preserve_partitioning=[true] -08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Decimal128(20, 0)) as c9] +08)------ProjectionExec: expr=[c1@0 as c1, CAST(c3@1 AS Int64) as c9] 09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3], has_header=true -query TR +query TI SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggregate_test_100 ORDER BY c9 DESC LIMIT 5 ---- c 4268716378