From ec4a6da798f7d02979e80d518ea6ca9546f8f15e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 17 Apr 2024 13:50:07 -0400 Subject: [PATCH] coercion vec[Dictionary, Utf8] to Dictionary for coalesce function (#9958) (#10104) * for debug finish remove print add space * fix clippy * finish * fix clippy Co-authored-by: Lordworms <48054792+Lordworms@users.noreply.github.com> --- .../expr/src/type_coercion/functions.rs | 57 ++++++++++++------- datafusion/sqllogictest/test_files/scalar.slt | 42 +++++++++++++- 2 files changed, 75 insertions(+), 24 deletions(-) diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index d4095a72fe3e..34b607d0884d 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -311,17 +311,25 @@ fn coerced_from<'a>( type_from: &'a DataType, ) -> Option { use self::DataType::*; - - match type_into { + // match Dictionary first + match (type_into, type_from) { + // coerced dictionary first + (cur_type, Dictionary(_, value_type)) | (Dictionary(_, value_type), cur_type) + if coerced_from(cur_type, value_type).is_some() => + { + Some(type_into.clone()) + } // coerced into type_into - Int8 if matches!(type_from, Null | Int8) => Some(type_into.clone()), - Int16 if matches!(type_from, Null | Int8 | Int16 | UInt8) => { + (Int8, _) if matches!(type_from, Null | Int8) => Some(type_into.clone()), + (Int16, _) if matches!(type_from, Null | Int8 | Int16 | UInt8) => { Some(type_into.clone()) } - Int32 if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) => { + (Int32, _) + if matches!(type_from, Null | Int8 | Int16 | Int32 | UInt8 | UInt16) => + { Some(type_into.clone()) } - Int64 + (Int64, _) if matches!( type_from, Null | Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 @@ -329,15 +337,17 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - UInt8 if matches!(type_from, Null | UInt8) => Some(type_into.clone()), - UInt16 if matches!(type_from, Null | UInt8 | UInt16) => Some(type_into.clone()), - UInt32 if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => { + (UInt8, _) if matches!(type_from, Null | UInt8) => Some(type_into.clone()), + (UInt16, _) if matches!(type_from, Null | UInt8 | UInt16) => { + Some(type_into.clone()) + } + (UInt32, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32) => { Some(type_into.clone()) } - UInt64 if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) => { + (UInt64, _) if matches!(type_from, Null | UInt8 | UInt16 | UInt32 | UInt64) => { Some(type_into.clone()) } - Float32 + (Float32, _) if matches!( type_from, Null | Int8 @@ -353,7 +363,7 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - Float64 + (Float64, _) if matches!( type_from, Null | Int8 @@ -371,7 +381,7 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - Timestamp(TimeUnit::Nanosecond, None) + (Timestamp(TimeUnit::Nanosecond, None), _) if matches!( type_from, Null | Timestamp(_, None) | Date32 | Utf8 | LargeUtf8 @@ -379,23 +389,27 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - Interval(_) if matches!(type_from, Utf8 | LargeUtf8) => Some(type_into.clone()), + (Interval(_), _) if matches!(type_from, Utf8 | LargeUtf8) => { + Some(type_into.clone()) + } // Any type can be coerced into strings - Utf8 | LargeUtf8 => Some(type_into.clone()), - Null if can_cast_types(type_from, type_into) => Some(type_into.clone()), + (Utf8 | LargeUtf8, _) => Some(type_into.clone()), + (Null, _) if can_cast_types(type_from, type_into) => Some(type_into.clone()), - List(_) if matches!(type_from, FixedSizeList(_, _)) => Some(type_into.clone()), + (List(_), _) if matches!(type_from, FixedSizeList(_, _)) => { + Some(type_into.clone()) + } // Only accept list and largelist with the same number of dimensions unless the type is Null. // List or LargeList with different dimensions should be handled in TypeSignature or other places before this - List(_) | LargeList(_) + (List(_) | LargeList(_), _) if datafusion_common::utils::base_type(type_from).eq(&Null) || list_ndims(type_from) == list_ndims(type_into) => { Some(type_into.clone()) } // should be able to coerce wildcard fixed size list to non wildcard fixed size list - FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD) => match type_from { + (FixedSizeList(f_into, FIXED_SIZE_LIST_WILDCARD), _) => match type_from { FixedSizeList(f_from, size_from) => { match coerced_from(f_into.data_type(), f_from.data_type()) { Some(data_type) if &data_type != f_into.data_type() => { @@ -410,7 +424,7 @@ fn coerced_from<'a>( _ => None, }, - Timestamp(unit, Some(tz)) if tz.as_ref() == TIMEZONE_WILDCARD => { + (Timestamp(unit, Some(tz)), _) if tz.as_ref() == TIMEZONE_WILDCARD => { match type_from { Timestamp(_, Some(from_tz)) => { Some(Timestamp(unit.clone(), Some(from_tz.clone()))) @@ -422,7 +436,7 @@ fn coerced_from<'a>( _ => None, } } - Timestamp(_, Some(_)) + (Timestamp(_, Some(_)), _) if matches!( type_from, Null | Timestamp(_, _) | Date32 | Utf8 | LargeUtf8 @@ -430,7 +444,6 @@ fn coerced_from<'a>( { Some(type_into.clone()) } - // More coerce rules. // Note that not all rules in `comparison_coercion` can be reused here. // For example, all numeric types can be coerced into Utf8 for comparison, diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index a77a2bf4059c..1911b0cef599 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -1779,6 +1779,46 @@ SELECT COALESCE(NULL, 'test') ---- test + +statement ok +create table test1 as values (arrow_cast('foo', 'Dictionary(Int32, Utf8)')), (null); + +# test coercion string +query ? +select coalesce(column1, 'none_set') from test1; +---- +foo +none_set + +# test coercion Int +query I +select coalesce(34, arrow_cast(123, 'Dictionary(Int32, Int8)')); +---- +34 + +# test with Int +query I +select coalesce(arrow_cast(123, 'Dictionary(Int32, Int8)'),34); +---- +123 + +# test with null +query I +select coalesce(null, 34, arrow_cast(123, 'Dictionary(Int32, Int8)')); +---- +34 + +# test with null +query T +select coalesce(null, column1, 'none_set') from test1; +---- +foo +none_set + +statement ok +drop table test1 + + statement ok CREATE TABLE test( c1 INT, @@ -2162,5 +2202,3 @@ query I select strpos('joséésoj', arrow_cast(null, 'Utf8')); ---- NULL - -