From a46fa72581380d39c92a6a1b55a597ead1505276 Mon Sep 17 00:00:00 2001 From: jonahgao Date: Thu, 29 Feb 2024 17:06:01 +0800 Subject: [PATCH 1/5] feat: support `unnest` with additional columns --- datafusion/sql/src/select.rs | 39 ++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index 59ef16d758ad..db3e5da9ce9d 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -24,8 +24,8 @@ use crate::utils::{ resolve_columns, resolve_positions_to_exprs, }; -use datafusion_common::Column; use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result}; +use datafusion_common::{Column, UnnestOptions}; use datafusion_expr::expr::{Alias, Unnest}; use datafusion_expr::expr_rewriter::{ normalize_col, normalize_col_with_schemas_and_ambiguity_check, @@ -282,30 +282,35 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { input: LogicalPlan, select_exprs: Vec, ) -> Result { - let mut exprs_to_unnest = vec![]; - - for expr in select_exprs.iter() { - if let Expr::Unnest(Unnest { exprs }) = expr { - exprs_to_unnest.push(exprs[0].clone()); - } - } + let mut unnest_columns = vec![]; + // Map unnest expressions to their argument + let projection_exprs = select_exprs + .into_iter() + .map(|expr| { + if let Expr::Unnest(Unnest { exprs }) = expr { + unnest_columns.push(exprs[0].display_name()?); + Ok(exprs[0].clone()) + } else { + Ok(expr) + } + }) + .collect::>>()?; // Do the final projection - if exprs_to_unnest.is_empty() { + if unnest_columns.is_empty() { LogicalPlanBuilder::from(input) - .project(select_exprs)? + .project(projection_exprs)? .build() } else { - if exprs_to_unnest.len() > 1 { + if unnest_columns.len() > 1 { return not_impl_err!("Only support single unnest expression for now"); } - - let expr = exprs_to_unnest[0].clone(); - let column = expr.display_name()?; - + let unnest_column = unnest_columns.pop().unwrap(); + // Set preserve_nulls to false to compatible with DuckDB and PostgreSQL + let unnest_options = UnnestOptions::new().with_preserve_nulls(false); LogicalPlanBuilder::from(input) - .project(vec![expr])? - .unnest_column(column)? + .project(projection_exprs)? + .unnest_column_with_options(unnest_column, unnest_options)? .build() } } From 7b48a73492be882e8e8de47efd939a4a967f818f Mon Sep 17 00:00:00 2001 From: jonahgao Date: Thu, 29 Feb 2024 17:08:46 +0800 Subject: [PATCH 2/5] add test from issue --- datafusion/sqllogictest/test_files/unnest.slt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index ccdd07875927..c1780829e4db 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -91,6 +91,20 @@ NULL 42 NULL +## Unnest with additional column +## Issue: https://github.com/apache/arrow-datafusion/issues/9349 +query II +select unnest(column1), column3 from unnest_table; +---- +1 1 +2 1 +3 1 +4 2 +5 2 +6 3 +12 NULL + + ## Unnest column with scalars query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null select unnest(column3) from unnest_table; From 26226d234f65e098df7f8b92aef74d9e575d9d4d Mon Sep 17 00:00:00 2001 From: jonahgao Date: Thu, 29 Feb 2024 17:45:49 +0800 Subject: [PATCH 3/5] add test to verify preserve_nulls --- datafusion/sql/src/select.rs | 2 +- datafusion/sqllogictest/test_files/unnest.slt | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index db3e5da9ce9d..f361bcf7cad7 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -306,7 +306,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { return not_impl_err!("Only support single unnest expression for now"); } let unnest_column = unnest_columns.pop().unwrap(); - // Set preserve_nulls to false to compatible with DuckDB and PostgreSQL + // Set preserve_nulls to false to ensure compatibility with DuckDB and PostgreSQL let unnest_options = UnnestOptions::new().with_preserve_nulls(false); LogicalPlanBuilder::from(input) .project(projection_exprs)? diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index c1780829e4db..3aafbbb96c3f 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -25,7 +25,9 @@ AS VALUES ([1,2,3], [7], 1), ([4,5], [8,9,10], 2), ([6], [11,12], 3), - ([12], [null, 42, null], null) + ([12], [null, 42, null], null), + -- null array to verify the `preserve_nulls` option + (null, null, null) ; ## Basic unnest expression in select list @@ -104,6 +106,19 @@ select unnest(column1), column3 from unnest_table; 6 3 12 NULL +query ?II +select column1, unnest(column2), column3 from unnest_table; +---- +[1, 2, 3] 7 1 +[4, 5] 8 2 +[4, 5] 9 2 +[4, 5] 10 2 +[6] 11 3 +[6] 12 3 +[12] NULL NULL +[12] 42 NULL +[12] NULL NULL + ## Unnest column with scalars query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null From 6d57dbbb5ce16717fd5e4f73183a683e91a9ded0 Mon Sep 17 00:00:00 2001 From: jonahgao Date: Thu, 29 Feb 2024 17:50:54 +0800 Subject: [PATCH 4/5] update test --- datafusion/sqllogictest/test_files/unnest.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 3aafbbb96c3f..03bc993ea12f 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -27,7 +27,7 @@ AS VALUES ([6], [11,12], 3), ([12], [null, 42, null], null), -- null array to verify the `preserve_nulls` option - (null, null, null) + (null, null, 4) ; ## Basic unnest expression in select list @@ -241,7 +241,7 @@ select * from unnest([1,2,(select sum(column3) from unnest_table)]); ---- 1 2 -6 +10 statement ok drop table unnest_table; From 60772c6737818c1ebed2c35e03e1a1a41a6ed08b Mon Sep 17 00:00:00 2001 From: jonahgao Date: Thu, 29 Feb 2024 21:08:13 +0800 Subject: [PATCH 5/5] fix name conflicts --- datafusion/sql/src/select.rs | 9 ++++--- datafusion/sqllogictest/test_files/unnest.slt | 27 +++++++++++++------ 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index f361bcf7cad7..c5c80bd6acf0 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -287,9 +287,12 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let projection_exprs = select_exprs .into_iter() .map(|expr| { - if let Expr::Unnest(Unnest { exprs }) = expr { - unnest_columns.push(exprs[0].display_name()?); - Ok(exprs[0].clone()) + if let Expr::Unnest(Unnest { ref exprs }) = expr { + let column_name = expr.display_name()?; + unnest_columns.push(column_name.clone()); + // Add alias for the argument expression, to avoid naming conflicts with other expressions + // in the select list. For example: `select unnest(col1), col1 from t`. + Ok(exprs[0].clone().alias(column_name)) } else { Ok(expr) } diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 03bc993ea12f..f60f715242cf 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -106,15 +106,26 @@ select unnest(column1), column3 from unnest_table; 6 3 12 NULL +query I? +select unnest(column1), column1 from unnest_table; +---- +1 [1, 2, 3] +2 [1, 2, 3] +3 [1, 2, 3] +4 [4, 5] +5 [4, 5] +6 [6] +12 [12] + query ?II -select column1, unnest(column2), column3 from unnest_table; ----- -[1, 2, 3] 7 1 -[4, 5] 8 2 -[4, 5] 9 2 -[4, 5] 10 2 -[6] 11 3 -[6] 12 3 +select array_remove(column1, 4), unnest(column2), column3 * 10 from unnest_table; +---- +[1, 2, 3] 7 10 +[5] 8 20 +[5] 9 20 +[5] 10 20 +[6] 11 30 +[6] 12 30 [12] NULL NULL [12] 42 NULL [12] NULL NULL