From 9ae676ccdb0866ba302bace4fdb226a5b2ec3b0e Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 18 Jun 2024 00:51:40 +0800 Subject: [PATCH 1/2] support to unparse ScalarValue::IntervalMonthDayNano to String --- datafusion/expr/src/expr_fn.rs | 8 +- datafusion/sql/src/unparser/expr.rs | 125 ++++++++++++++++++++++++++-- 2 files changed, 127 insertions(+), 6 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 099851aece46..a87412ee6356 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -31,8 +31,9 @@ use crate::{ Signature, Volatility, }; use crate::{AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowUDF, WindowUDFImpl}; +use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano; use arrow::datatypes::{DataType, Field}; -use datafusion_common::{Column, Result}; +use datafusion_common::{Column, Result, ScalarValue}; use std::any::Any; use std::fmt::Debug; use std::ops::Not; @@ -670,6 +671,11 @@ impl WindowUDFImpl for SimpleWindowUDF { } } +pub fn interval_month_day_nano_lit(value: &str) -> Expr { + let interval = parse_interval_month_day_nano(value).ok(); + Expr::Literal(ScalarValue::IntervalMonthDayNano(interval)) +} + #[cfg(test)] mod test { use super::*; diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 12c48054f1a7..ed075e5011d1 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -20,8 +20,9 @@ use std::{fmt::Display, vec}; use arrow_array::{Date32Array, Date64Array}; use arrow_schema::DataType; +use sqlparser::ast::Value::SingleQuotedString; use sqlparser::ast::{ - self, Expr as AstExpr, Function, FunctionArg, Ident, UnaryOperator, + self, Expr as AstExpr, Function, FunctionArg, Ident, Interval, UnaryOperator, }; use datafusion_common::{ @@ -825,8 +826,26 @@ impl Unparser<'_> { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::IntervalDayTime(None) => Ok(ast::Expr::Value(ast::Value::Null)), - ScalarValue::IntervalMonthDayNano(Some(_i)) => { - not_impl_err!("Unsupported scalar: {v:?}") + ScalarValue::IntervalMonthDayNano(Some(i)) => { + let mut s = vec![]; + if i.months != 0 { + s.push(format!("{} MONTH", i.months)); + } + if i.days != 0 { + s.push(format!("{} DAY", i.days)); + } + if i.nanoseconds != 0 { + s.push(Self::process_interval_nanosecond(i.nanoseconds)); + } + + let interval = Interval { + value: Box::new(ast::Expr::Value(SingleQuotedString(s.join(" ")))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }; + Ok(ast::Expr::Interval(interval)) } ScalarValue::IntervalMonthDayNano(None) => { Ok(ast::Expr::Value(ast::Value::Null)) @@ -859,6 +878,35 @@ impl Unparser<'_> { } } + fn process_interval_nanosecond(nano: i64) -> String { + let mut s = vec![]; + let hour = nano / 3_600_000_000_000; + let minute = nano / 60_000_000_000 % 60; + let second = nano / 1_000_000_000 % 60; + let millisecond = nano / 1_000_000 % 1_000; + let microsecond = nano / 1_000 % 1_000; + let nanosecond = nano % 1_000; + if hour != 0 { + s.push(format!("{} HOUR", hour)); + } + if minute != 0 { + s.push(format!("{} MINUTE", minute)); + } + if second != 0 { + s.push(format!("{} SECOND", second)); + } + if millisecond != 0 { + s.push(format!("{} MILLISECOND", millisecond)); + } + if microsecond != 0 { + s.push(format!("{} MICROSECOND", microsecond)); + } + if nanosecond != 0 { + s.push(format!("{} NANOSECOND", nanosecond)); + } + s.join(" ") + } + fn arrow_dtype_to_ast_dtype(&self, data_type: &DataType) -> Result { match data_type { DataType::Null => { @@ -954,19 +1002,19 @@ impl Unparser<'_> { #[cfg(test)] mod tests { + use std::ops::{Add, Sub}; use std::{any::Any, sync::Arc, vec}; use arrow::datatypes::{Field, Schema}; use arrow_schema::DataType::Int8; - use datafusion_common::TableReference; - use datafusion_expr::AggregateExt; use datafusion_expr::{ case, col, cube, exists, grouping_set, lit, not, not_exists, out_ref_col, placeholder, rollup, table_scan, try_cast, when, wildcard, ColumnarValue, ScalarUDF, ScalarUDFImpl, Signature, Volatility, WindowFrame, WindowFunctionDefinition, }; + use datafusion_expr::{interval_month_day_nano_lit, AggregateExt}; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::sum; @@ -1256,6 +1304,73 @@ mod tests { ), (col("need-quoted").eq(lit(1)), r#"("need-quoted" = 1)"#), (col("need quoted").eq(lit(1)), r#"("need quoted" = 1)"#), + ( + interval_month_day_nano_lit("3 NANOSECOND"), + r#"INTERVAL '3 NANOSECOND'"#, + ), + ( + interval_month_day_nano_lit("1000 NANOSECOND"), + r#"INTERVAL '1 MICROSECOND'"#, + ), + ( + interval_month_day_nano_lit("1000000 NANOSECOND"), + r#"INTERVAL '1 MILLISECOND'"#, + ), + ( + interval_month_day_nano_lit("1000000000 NANOSECOND"), + r#"INTERVAL '1 SECOND'"#, + ), + ( + interval_month_day_nano_lit("1001001001 NANOSECOND"), + r#"INTERVAL '1 SECOND 1 MILLISECOND 1 MICROSECOND 1 NANOSECOND'"#, + ), + ( + interval_month_day_nano_lit("3 SECOND"), + r#"INTERVAL '3 SECOND'"#, + ), + ( + interval_month_day_nano_lit("3 MINUTE"), + r#"INTERVAL '3 MINUTE'"#, + ), + ( + interval_month_day_nano_lit("3 HOUR"), + r#"INTERVAL '3 HOUR'"#, + ), + ( + interval_month_day_nano_lit("3 HOUR 10 MINUTE 20 SECOND"), + r#"INTERVAL '3 HOUR 10 MINUTE 20 SECOND'"#, + ), + (interval_month_day_nano_lit("3 DAY"), r#"INTERVAL '3 DAY'"#), + ( + interval_month_day_nano_lit("3 MONTH"), + r#"INTERVAL '3 MONTH'"#, + ), + ( + interval_month_day_nano_lit("1 MONTH 1 DAY 10 SECOND"), + r#"INTERVAL '1 MONTH 1 DAY 10 SECOND'"#, + ), + ( + interval_month_day_nano_lit("15 MONTH"), + r#"INTERVAL '15 MONTH'"#, + ), + ( + interval_month_day_nano_lit("1.5 MONTH"), + r#"INTERVAL '1 MONTH 15 DAY'"#, + ), + ( + interval_month_day_nano_lit("-3 MONTH"), + r#"INTERVAL '-3 MONTH'"#, + ), + ( + interval_month_day_nano_lit("1 MONTH") + .add(interval_month_day_nano_lit("1 DAY")), + r#"(INTERVAL '1 MONTH' + INTERVAL '1 DAY')"#, + ), + ( + interval_month_day_nano_lit("1 MONTH") + .sub(interval_month_day_nano_lit("1 DAY")), + r#"(INTERVAL '1 MONTH' - INTERVAL '1 DAY')"#, + ), ]; for (expr, expected) in tests { From e6222b50edcf269ec029710300a4dadcb60de5f7 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Tue, 18 Jun 2024 20:26:03 +0800 Subject: [PATCH 2/2] use array formatter to format the interval string --- datafusion/sql/src/unparser/expr.rs | 112 +++++----------------------- 1 file changed, 19 insertions(+), 93 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index ed075e5011d1..13053b88d722 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use arrow::util::display::array_value_to_string; use core::fmt; use std::{fmt::Display, vec}; @@ -826,20 +827,17 @@ impl Unparser<'_> { not_impl_err!("Unsupported scalar: {v:?}") } ScalarValue::IntervalDayTime(None) => Ok(ast::Expr::Value(ast::Value::Null)), - ScalarValue::IntervalMonthDayNano(Some(i)) => { - let mut s = vec![]; - if i.months != 0 { - s.push(format!("{} MONTH", i.months)); - } - if i.days != 0 { - s.push(format!("{} DAY", i.days)); - } - if i.nanoseconds != 0 { - s.push(Self::process_interval_nanosecond(i.nanoseconds)); - } - + ScalarValue::IntervalMonthDayNano(Some(_i)) => { + let wrap_array = v.to_array()?; + let Some(result) = array_value_to_string(&wrap_array, 0).ok() else { + return internal_err!( + "Unable to convert IntervalMonthDayNano to string" + ); + }; let interval = Interval { - value: Box::new(ast::Expr::Value(SingleQuotedString(s.join(" ")))), + value: Box::new(ast::Expr::Value(SingleQuotedString( + result.to_uppercase(), + ))), leading_field: None, leading_precision: None, last_field: None, @@ -878,35 +876,6 @@ impl Unparser<'_> { } } - fn process_interval_nanosecond(nano: i64) -> String { - let mut s = vec![]; - let hour = nano / 3_600_000_000_000; - let minute = nano / 60_000_000_000 % 60; - let second = nano / 1_000_000_000 % 60; - let millisecond = nano / 1_000_000 % 1_000; - let microsecond = nano / 1_000 % 1_000; - let nanosecond = nano % 1_000; - if hour != 0 { - s.push(format!("{} HOUR", hour)); - } - if minute != 0 { - s.push(format!("{} MINUTE", minute)); - } - if second != 0 { - s.push(format!("{} SECOND", second)); - } - if millisecond != 0 { - s.push(format!("{} MILLISECOND", millisecond)); - } - if microsecond != 0 { - s.push(format!("{} MICROSECOND", microsecond)); - } - if nanosecond != 0 { - s.push(format!("{} NANOSECOND", nanosecond)); - } - s.join(" ") - } - fn arrow_dtype_to_ast_dtype(&self, data_type: &DataType) -> Result { match data_type { DataType::Null => { @@ -1305,71 +1274,28 @@ mod tests { (col("need-quoted").eq(lit(1)), r#"("need-quoted" = 1)"#), (col("need quoted").eq(lit(1)), r#"("need quoted" = 1)"#), ( - interval_month_day_nano_lit("3 NANOSECOND"), - r#"INTERVAL '3 NANOSECOND'"#, - ), - ( - interval_month_day_nano_lit("1000 NANOSECOND"), - r#"INTERVAL '1 MICROSECOND'"#, - ), - ( - interval_month_day_nano_lit("1000000 NANOSECOND"), - r#"INTERVAL '1 MILLISECOND'"#, - ), - ( - interval_month_day_nano_lit("1000000000 NANOSECOND"), - r#"INTERVAL '1 SECOND'"#, - ), - ( - interval_month_day_nano_lit("1001001001 NANOSECOND"), - r#"INTERVAL '1 SECOND 1 MILLISECOND 1 MICROSECOND 1 NANOSECOND'"#, - ), - ( - interval_month_day_nano_lit("3 SECOND"), - r#"INTERVAL '3 SECOND'"#, - ), - ( - interval_month_day_nano_lit("3 MINUTE"), - r#"INTERVAL '3 MINUTE'"#, - ), - ( - interval_month_day_nano_lit("3 HOUR"), - r#"INTERVAL '3 HOUR'"#, - ), - ( - interval_month_day_nano_lit("3 HOUR 10 MINUTE 20 SECOND"), - r#"INTERVAL '3 HOUR 10 MINUTE 20 SECOND'"#, - ), - (interval_month_day_nano_lit("3 DAY"), r#"INTERVAL '3 DAY'"#), - ( - interval_month_day_nano_lit("3 MONTH"), - r#"INTERVAL '3 MONTH'"#, - ), - ( - interval_month_day_nano_lit("1 MONTH 1 DAY 10 SECOND"), - r#"INTERVAL '1 MONTH 1 DAY 10 SECOND'"#, - ), - ( - interval_month_day_nano_lit("15 MONTH"), - r#"INTERVAL '15 MONTH'"#, + interval_month_day_nano_lit( + "1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND", + ), + r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#, ), ( interval_month_day_nano_lit("1.5 MONTH"), - r#"INTERVAL '1 MONTH 15 DAY'"#, + r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, ), ( interval_month_day_nano_lit("-3 MONTH"), - r#"INTERVAL '-3 MONTH'"#, + r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, ), ( interval_month_day_nano_lit("1 MONTH") .add(interval_month_day_nano_lit("1 DAY")), - r#"(INTERVAL '1 MONTH' + INTERVAL '1 DAY')"#, + r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, ), ( interval_month_day_nano_lit("1 MONTH") .sub(interval_month_day_nano_lit("1 DAY")), - r#"(INTERVAL '1 MONTH' - INTERVAL '1 DAY')"#, + r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, ), ];