From 9efb41705d511d9b50f7357896b9678af827ed7f Mon Sep 17 00:00:00 2001 From: jcsherin Date: Wed, 2 Oct 2024 17:39:31 +0530 Subject: [PATCH 01/46] Move `lead-lag` to `functions-window` package --- .../src/window => functions-window/src}/lead_lag.rs | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename datafusion/{physical-expr/src/window => functions-window/src}/lead_lag.rs (100%) diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs similarity index 100% rename from datafusion/physical-expr/src/window/lead_lag.rs rename to datafusion/functions-window/src/lead_lag.rs From 6f05a9cbf4a0dd85b16cf98e7e45cecdcbebaecf Mon Sep 17 00:00:00 2001 From: jcsherin Date: Wed, 2 Oct 2024 21:53:44 +0530 Subject: [PATCH 02/46] Builds with warnings --- .../core/tests/fuzz_cases/window_fuzz.rs | 109 ++-- .../expr/src/built_in_window_function.rs | 25 +- datafusion/expr/src/expr.rs | 25 +- datafusion/expr/src/window_function.rs | 6 +- datafusion/functions-window/src/lead_lag.rs | 518 ++++++++++-------- datafusion/functions-window/src/lib.rs | 1 + .../physical-expr/src/expressions/mod.rs | 1 - datafusion/physical-expr/src/window/mod.rs | 1 - datafusion/physical-plan/src/windows/mod.rs | 67 +-- datafusion/proto/proto/datafusion.proto | 10 +- datafusion/proto/src/generated/pbjson.rs | 6 - datafusion/proto/src/generated/prost.rs | 8 +- .../proto/src/logical_plan/from_proto.rs | 2 - datafusion/proto/src/logical_plan/to_proto.rs | 2 - .../proto/src/physical_plan/to_proto.rs | 43 +- 15 files changed, 436 insertions(+), 388 deletions(-) diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index a6c2cf700cc4..775c6a95ffc1 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -194,34 +194,34 @@ async fn bounded_window_causal_non_causal() -> Result<()> { // LAG(x) OVER ( // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING // ) - ( - // Window function - WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lag), - // its name - "LAG", - // no argument - vec![col("x", &schema)?], - // Expected causality, for None cases causality will be determined from window frame boundaries - Some(true), - ), - // Simulate cases of the following form: - // LEAD(x) OVER ( - // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING - // ) - ( - // Window function - WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lead), - // its name - "LEAD", - // no argument - vec![col("x", &schema)?], - // Expected causality, for None cases causality will be determined from window frame boundaries - Some(false), - ), - // Simulate cases of the following form: - // RANK() OVER ( - // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING - // ) + /* ( + // Window function + WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lag), + // its name + "LAG", + // no argument + vec![col("x", &schema)?], + // Expected causality, for None cases causality will be determined from window frame boundaries + Some(true), + ), + */ // Simulate cases of the following form: + // LEAD(x) OVER ( + // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING + // ) + /* ( + // Window function + WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lead), + // its name + "LEAD", + // no argument + vec![col("x", &schema)?], + // Expected causality, for None cases causality will be determined from window frame boundaries + Some(false), + ), + */ // Simulate cases of the following form: + // RANK() OVER ( + // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING + // ) ( // Window function WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Rank), @@ -398,32 +398,33 @@ fn get_random_function( vec![], ), ); - window_fn_map.insert( - "lead", - ( - WindowFunctionDefinition::BuiltInWindowFunction( - BuiltInWindowFunction::Lead, - ), - vec![ - arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), - lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), - ], - ), - ); - window_fn_map.insert( - "lag", - ( - WindowFunctionDefinition::BuiltInWindowFunction( - BuiltInWindowFunction::Lag, - ), - vec![ - arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), - lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), - ], - ), - ); + /* window_fn_map.insert( + "lead", + ( + WindowFunctionDefinition::BuiltInWindowFunction( + BuiltInWindowFunction::Lead, + ), + vec![ + arg.clone(), + lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), + ], + ), + ); + window_fn_map.insert( + "lag", + ( + WindowFunctionDefinition::BuiltInWindowFunction( + BuiltInWindowFunction::Lag, + ), + vec![ + arg.clone(), + lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), + ], + ), + ); + */ } window_fn_map.insert( "first_value", diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs index b136d6cacec8..0591c2c75396 100644 --- a/datafusion/expr/src/built_in_window_function.rs +++ b/datafusion/expr/src/built_in_window_function.rs @@ -22,7 +22,7 @@ use std::str::FromStr; use crate::type_coercion::functions::data_types; use crate::utils; -use crate::{Signature, TypeSignature, Volatility}; +use crate::{Signature, Volatility}; use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, Result}; use arrow::datatypes::DataType; @@ -50,16 +50,17 @@ pub enum BuiltInWindowFunction { CumeDist, /// integer ranging from 1 to the argument value, dividing the partition as equally as possible Ntile, + // TODO: Preserve comments when converting to udwf /// returns value evaluated at the row that is offset rows before the current row within the partition; /// if there is no such row, instead return default (which must be of the same type as value). /// Both offset and default are evaluated with respect to the current row. /// If omitted, offset defaults to 1 and default to null - Lag, + // Lag, /// returns value evaluated at the row that is offset rows after the current row within the partition; /// if there is no such row, instead return default (which must be of the same type as value). /// Both offset and default are evaluated with respect to the current row. /// If omitted, offset defaults to 1 and default to null - Lead, + // Lead, /// returns value evaluated at the row that is the first row of the window frame FirstValue, /// returns value evaluated at the row that is the last row of the window frame @@ -77,8 +78,6 @@ impl BuiltInWindowFunction { PercentRank => "PERCENT_RANK", CumeDist => "CUME_DIST", Ntile => "NTILE", - Lag => "LAG", - Lead => "LEAD", FirstValue => "first_value", LastValue => "last_value", NthValue => "NTH_VALUE", @@ -95,8 +94,6 @@ impl FromStr for BuiltInWindowFunction { "PERCENT_RANK" => BuiltInWindowFunction::PercentRank, "CUME_DIST" => BuiltInWindowFunction::CumeDist, "NTILE" => BuiltInWindowFunction::Ntile, - "LAG" => BuiltInWindowFunction::Lag, - "LEAD" => BuiltInWindowFunction::Lead, "FIRST_VALUE" => BuiltInWindowFunction::FirstValue, "LAST_VALUE" => BuiltInWindowFunction::LastValue, "NTH_VALUE" => BuiltInWindowFunction::NthValue, @@ -133,9 +130,7 @@ impl BuiltInWindowFunction { BuiltInWindowFunction::PercentRank | BuiltInWindowFunction::CumeDist => { Ok(DataType::Float64) } - BuiltInWindowFunction::Lag - | BuiltInWindowFunction::Lead - | BuiltInWindowFunction::FirstValue + BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue | BuiltInWindowFunction::NthValue => Ok(input_expr_types[0].clone()), } @@ -149,16 +144,6 @@ impl BuiltInWindowFunction { | BuiltInWindowFunction::DenseRank | BuiltInWindowFunction::PercentRank | BuiltInWindowFunction::CumeDist => Signature::any(0, Volatility::Immutable), - BuiltInWindowFunction::Lag | BuiltInWindowFunction::Lead => { - Signature::one_of( - vec![ - TypeSignature::Any(1), - TypeSignature::Any(2), - TypeSignature::Any(3), - ], - Volatility::Immutable, - ) - } BuiltInWindowFunction::FirstValue | BuiltInWindowFunction::LastValue => { Signature::any(1, Volatility::Immutable) } diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 02a2edb98016..366ff228abbb 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2672,18 +2672,19 @@ mod test { built_in_window_function::BuiltInWindowFunction::LastValue )) ); - assert_eq!( - find_df_window_func("LAG"), - Some(WindowFunctionDefinition::BuiltInWindowFunction( - built_in_window_function::BuiltInWindowFunction::Lag - )) - ); - assert_eq!( - find_df_window_func("LEAD"), - Some(WindowFunctionDefinition::BuiltInWindowFunction( - built_in_window_function::BuiltInWindowFunction::Lead - )) - ); + /* assert_eq!( + find_df_window_func("LAG"), + Some(WindowFunctionDefinition::BuiltInWindowFunction( + built_in_window_function::BuiltInWindowFunction::Lag + )) + ); + assert_eq!( + find_df_window_func("LEAD"), + Some(WindowFunctionDefinition::BuiltInWindowFunction( + built_in_window_function::BuiltInWindowFunction::Lead + )) + ); + */ assert_eq!(find_df_window_func("not_exist"), None) } diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs index a80718147c3a..cd9139a971d8 100644 --- a/datafusion/expr/src/window_function.rs +++ b/datafusion/expr/src/window_function.rs @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -use datafusion_common::ScalarValue; - use crate::{expr::WindowFunction, BuiltInWindowFunction, Expr, Literal}; /// Create an expression to represent the `rank` window function @@ -50,7 +48,7 @@ pub fn ntile(arg: Expr) -> Expr { Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::Ntile, vec![arg])) } -/// Create an expression to represent the `lag` window function +/*/// Create an expression to represent the `lag` window function pub fn lag( arg: Expr, shift_offset: Option, @@ -81,7 +79,7 @@ pub fn lead( vec![arg, shift_offset_lit, default_lit], )) } - +*/ /// Create an expression to represent the `nth_value` window function pub fn nth_value(arg: Expr, n: i64) -> Expr { Expr::WindowFunction(WindowFunction::new( diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 1656b7c3033a..0cb1f7d0de76 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -17,123 +17,197 @@ //! Defines physical expression for `lead` and `lag` that can evaluated //! at runtime during query execution -use crate::window::BuiltInWindowFunctionExpr; -use crate::PhysicalExpr; -use arrow::array::ArrayRef; -use arrow::datatypes::{DataType, Field}; -use arrow_array::Array; +use datafusion_common::arrow::array::ArrayRef; +use datafusion_common::arrow::datatypes::Field; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue}; -use datafusion_expr::PartitionEvaluator; +use datafusion_expr::{ + PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, Volatility, WindowUDFImpl, +}; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; use std::any::Any; use std::cmp::min; use std::collections::VecDeque; -use std::ops::{Neg, Range}; +use std::ops::Range; use std::sync::Arc; -/// window shift expression #[derive(Debug)] -pub struct WindowShift { - name: String, - /// Output data type - data_type: DataType, - shift_offset: i64, - expr: Arc, - default_value: ScalarValue, - ignore_nulls: bool, +enum WindowShiftKind { + Lag, + Lead, } -impl WindowShift { - /// Get shift_offset of window shift expression - pub fn get_shift_offset(&self) -> i64 { - self.shift_offset +impl WindowShiftKind { + fn name(&self) -> &'static str { + match self { + WindowShiftKind::Lag => "lag", + WindowShiftKind::Lead => "lead", + } } +} - /// Get the default_value for window shift expression. - pub fn get_default_value(&self) -> ScalarValue { - self.default_value.clone() - } +/// window shift expression +#[derive(Debug)] +pub struct WindowShift { + signature: Signature, + kind: WindowShiftKind, } -/// lead() window function -pub fn lead( - name: String, - data_type: DataType, - expr: Arc, - shift_offset: Option, - default_value: ScalarValue, - ignore_nulls: bool, -) -> WindowShift { - WindowShift { - name, - data_type, - shift_offset: shift_offset.map(|v| v.neg()).unwrap_or(-1), - expr, - default_value, - ignore_nulls, +impl WindowShift { + pub fn new() -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Any(1), + TypeSignature::Any(2), + TypeSignature::Any(3), + ], + Volatility::Immutable, + ), + kind: WindowShiftKind::Lag, + } } } -/// lag() window function -pub fn lag( - name: String, - data_type: DataType, - expr: Arc, - shift_offset: Option, - default_value: ScalarValue, - ignore_nulls: bool, -) -> WindowShift { - WindowShift { - name, - data_type, - shift_offset: shift_offset.unwrap_or(1), - expr, - default_value, - ignore_nulls, +impl Default for WindowShift { + fn default() -> Self { + Self::new() } } -impl BuiltInWindowFunctionExpr for WindowShift { - /// Return a reference to Any that can be used for downcasting +impl WindowUDFImpl for WindowShift { fn as_any(&self) -> &dyn Any { self } - fn field(&self) -> Result { - let nullable = true; - Ok(Field::new(&self.name, self.data_type.clone(), nullable)) + fn name(&self) -> &str { + self.kind.name() } - fn expressions(&self) -> Vec> { - vec![Arc::clone(&self.expr)] + fn signature(&self) -> &Signature { + &self.signature } - fn name(&self) -> &str { - &self.name + fn partition_evaluator(&self) -> Result> { + todo!() } - fn create_evaluator(&self) -> Result> { - Ok(Box::new(WindowShiftEvaluator { - shift_offset: self.shift_offset, - default_value: self.default_value.clone(), - ignore_nulls: self.ignore_nulls, - non_null_offsets: VecDeque::new(), - })) + fn field(&self, field_args: WindowUDFFieldArgs) -> Result { + Ok(Field::new( + field_args.name(), + field_args.get_input_type(0).unwrap(), + false, + )) } - fn reverse_expr(&self) -> Option> { - Some(Arc::new(Self { - name: self.name.clone(), - data_type: self.data_type.clone(), - shift_offset: -self.shift_offset, - expr: Arc::clone(&self.expr), - default_value: self.default_value.clone(), - ignore_nulls: self.ignore_nulls, - })) + fn reverse_expr(&self) -> ReversedUDWF { + todo!() } } +/// /// window shift expression +/// #[derive(Debug)] +/// pub struct WindowShift { +/// name: String, +/// /// Output data type +/// data_type: DataType, +/// shift_offset: i64, +/// expr: Arc, +/// default_value: ScalarValue, +/// ignore_nulls: bool, +/// } +/// +/// impl WindowShift { +/// /// Get shift_offset of window shift expression +/// pub fn get_shift_offset(&self) -> i64 { +/// self.shift_offset +/// } +/// +/// /// Get the default_value for window shift expression. +/// pub fn get_default_value(&self) -> ScalarValue { +/// self.default_value.clone() +/// } +/// } +/// +/// /// lead() window function +/// pub fn lead( +/// name: String, +/// data_type: DataType, +/// expr: Arc, +/// shift_offset: Option, +/// default_value: ScalarValue, +/// ignore_nulls: bool, +/// ) -> WindowShift { +/// WindowShift { +/// name, +/// data_type, +/// shift_offset: shift_offset.map(|v| v.neg()).unwrap_or(-1), +/// expr, +/// default_value, +/// ignore_nulls, +/// } +/// } +/// +/// /// lag() window function +/// pub fn lag( +/// name: String, +/// data_type: DataType, +/// expr: Arc, +/// shift_offset: Option, +/// default_value: ScalarValue, +/// ignore_nulls: bool, +/// ) -> WindowShift { +/// WindowShift { +/// name, +/// data_type, +/// shift_offset: shift_offset.unwrap_or(1), +/// expr, +/// default_value, +/// ignore_nulls, +/// } +/// } +/// +/// impl BuiltInWindowFunctionExpr for WindowShift { +/// /// Return a reference to Any that can be used for downcasting +/// fn as_any(&self) -> &dyn Any { +/// self +/// } +/// +/// fn field(&self) -> Result { +/// let nullable = true; +/// Ok(Field::new(&self.name, self.data_type.clone(), nullable)) +/// } +/// +/// fn expressions(&self) -> Vec> { +/// vec![Arc::clone(&self.expr)] +/// } +/// +/// fn name(&self) -> &str { +/// &self.name +/// } +/// +/// fn create_evaluator(&self) -> Result> { +/// Ok(Box::new(WindowShiftEvaluator { +/// shift_offset: self.shift_offset, +/// default_value: self.default_value.clone(), +/// ignore_nulls: self.ignore_nulls, +/// non_null_offsets: VecDeque::new(), +/// })) +/// } +/// +/// fn reverse_expr(&self) -> Option> { +/// Some(Arc::new(Self { +/// name: self.name.clone(), +/// data_type: self.data_type.clone(), +/// shift_offset: -self.shift_offset, +/// expr: Arc::clone(&self.expr), +/// default_value: self.default_value.clone(), +/// ignore_nulls: self.ignore_nulls, +/// })) +/// } +/// } + #[derive(Debug)] -pub(crate) struct WindowShiftEvaluator { +struct WindowShiftEvaluator { shift_offset: i64, default_value: ScalarValue, ignore_nulls: bool, @@ -205,7 +279,7 @@ fn shift_with_default_value( offset: i64, default_value: &ScalarValue, ) -> Result { - use arrow::compute::concat; + use datafusion_common::arrow::compute::concat; let value_len = array.len() as i64; if offset == 0 { @@ -399,142 +473,142 @@ impl PartitionEvaluator for WindowShiftEvaluator { } } -#[cfg(test)] -mod tests { - use super::*; - use crate::expressions::Column; - use arrow::{array::*, datatypes::*}; - use datafusion_common::cast::as_int32_array; - - fn test_i32_result(expr: WindowShift, expected: Int32Array) -> Result<()> { - let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); - let values = vec![arr]; - let schema = Schema::new(vec![Field::new("arr", DataType::Int32, false)]); - let batch = RecordBatch::try_new(Arc::new(schema), values.clone())?; - let values = expr.evaluate_args(&batch)?; - let result = expr - .create_evaluator()? - .evaluate_all(&values, batch.num_rows())?; - let result = as_int32_array(&result)?; - assert_eq!(expected, *result); - Ok(()) - } - - #[test] - fn lead_lag_get_range() -> Result<()> { - // LAG(2) - let lag_fn = WindowShiftEvaluator { - shift_offset: 2, - default_value: ScalarValue::Null, - ignore_nulls: false, - non_null_offsets: Default::default(), - }; - assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 4, end: 7 }); - assert_eq!(lag_fn.get_range(0, 10)?, Range { start: 0, end: 1 }); - - // LAG(2 ignore nulls) - let lag_fn = WindowShiftEvaluator { - shift_offset: 2, - default_value: ScalarValue::Null, - ignore_nulls: true, - // models data received [, , , NULL, , NULL, , ...] - non_null_offsets: vec![2, 2].into(), // [1, 1, 2, 2] actually, just last 2 is used - }; - assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 2, end: 7 }); - - // LEAD(2) - let lead_fn = WindowShiftEvaluator { - shift_offset: -2, - default_value: ScalarValue::Null, - ignore_nulls: false, - non_null_offsets: Default::default(), - }; - assert_eq!(lead_fn.get_range(6, 10)?, Range { start: 6, end: 8 }); - assert_eq!(lead_fn.get_range(9, 10)?, Range { start: 9, end: 10 }); - - // LEAD(2 ignore nulls) - let lead_fn = WindowShiftEvaluator { - shift_offset: -2, - default_value: ScalarValue::Null, - ignore_nulls: true, - // models data received [..., , NULL, , NULL, , ..] - non_null_offsets: vec![2, 2].into(), - }; - assert_eq!(lead_fn.get_range(4, 10)?, Range { start: 4, end: 9 }); - - Ok(()) - } - - #[test] - fn lead_lag_window_shift() -> Result<()> { - test_i32_result( - lead( - "lead".to_owned(), - DataType::Int32, - Arc::new(Column::new("c3", 0)), - None, - ScalarValue::Null.cast_to(&DataType::Int32)?, - false, - ), - [ - Some(-2), - Some(3), - Some(-4), - Some(5), - Some(-6), - Some(7), - Some(8), - None, - ] - .iter() - .collect::(), - )?; - - test_i32_result( - lag( - "lead".to_owned(), - DataType::Int32, - Arc::new(Column::new("c3", 0)), - None, - ScalarValue::Null.cast_to(&DataType::Int32)?, - false, - ), - [ - None, - Some(1), - Some(-2), - Some(3), - Some(-4), - Some(5), - Some(-6), - Some(7), - ] - .iter() - .collect::(), - )?; - - test_i32_result( - lag( - "lead".to_owned(), - DataType::Int32, - Arc::new(Column::new("c3", 0)), - None, - ScalarValue::Int32(Some(100)), - false, - ), - [ - Some(100), - Some(1), - Some(-2), - Some(3), - Some(-4), - Some(5), - Some(-6), - Some(7), - ] - .iter() - .collect::(), - )?; - Ok(()) - } -} +// #[cfg(test)] +// mod tests { +// use super::*; +// use crate::expressions::Column; +// use arrow::{array::*, datatypes::*}; +// use datafusion_common::cast::as_int32_array; +// +// fn test_i32_result(expr: WindowShift, expected: Int32Array) -> Result<()> { +// let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); +// let values = vec![arr]; +// let schema = Schema::new(vec![Field::new("arr", DataType::Int32, false)]); +// let batch = RecordBatch::try_new(Arc::new(schema), values.clone())?; +// let values = expr.evaluate_args(&batch)?; +// let result = expr +// .create_evaluator()? +// .evaluate_all(&values, batch.num_rows())?; +// let result = as_int32_array(&result)?; +// assert_eq!(expected, *result); +// Ok(()) +// } +// +// #[test] +// fn lead_lag_get_range() -> Result<()> { +// // LAG(2) +// let lag_fn = WindowShiftEvaluator { +// shift_offset: 2, +// default_value: ScalarValue::Null, +// ignore_nulls: false, +// non_null_offsets: Default::default(), +// }; +// assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 4, end: 7 }); +// assert_eq!(lag_fn.get_range(0, 10)?, Range { start: 0, end: 1 }); +// +// // LAG(2 ignore nulls) +// let lag_fn = WindowShiftEvaluator { +// shift_offset: 2, +// default_value: ScalarValue::Null, +// ignore_nulls: true, +// // models data received [, , , NULL, , NULL, , ...] +// non_null_offsets: vec![2, 2].into(), // [1, 1, 2, 2] actually, just last 2 is used +// }; +// assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 2, end: 7 }); +// +// // LEAD(2) +// let lead_fn = WindowShiftEvaluator { +// shift_offset: -2, +// default_value: ScalarValue::Null, +// ignore_nulls: false, +// non_null_offsets: Default::default(), +// }; +// assert_eq!(lead_fn.get_range(6, 10)?, Range { start: 6, end: 8 }); +// assert_eq!(lead_fn.get_range(9, 10)?, Range { start: 9, end: 10 }); +// +// // LEAD(2 ignore nulls) +// let lead_fn = WindowShiftEvaluator { +// shift_offset: -2, +// default_value: ScalarValue::Null, +// ignore_nulls: true, +// // models data received [..., , NULL, , NULL, , ..] +// non_null_offsets: vec![2, 2].into(), +// }; +// assert_eq!(lead_fn.get_range(4, 10)?, Range { start: 4, end: 9 }); +// +// Ok(()) +// } +// +// #[test] +// fn lead_lag_window_shift() -> Result<()> { +// test_i32_result( +// lead( +// "lead".to_owned(), +// DataType::Int32, +// Arc::new(Column::new("c3", 0)), +// None, +// ScalarValue::Null.cast_to(&DataType::Int32)?, +// false, +// ), +// [ +// Some(-2), +// Some(3), +// Some(-4), +// Some(5), +// Some(-6), +// Some(7), +// Some(8), +// None, +// ] +// .iter() +// .collect::(), +// )?; +// +// test_i32_result( +// lag( +// "lead".to_owned(), +// DataType::Int32, +// Arc::new(Column::new("c3", 0)), +// None, +// ScalarValue::Null.cast_to(&DataType::Int32)?, +// false, +// ), +// [ +// None, +// Some(1), +// Some(-2), +// Some(3), +// Some(-4), +// Some(5), +// Some(-6), +// Some(7), +// ] +// .iter() +// .collect::(), +// )?; +// +// test_i32_result( +// lag( +// "lead".to_owned(), +// DataType::Int32, +// Arc::new(Column::new("c3", 0)), +// None, +// ScalarValue::Int32(Some(100)), +// false, +// ), +// [ +// Some(100), +// Some(1), +// Some(-2), +// Some(3), +// Some(-4), +// Some(5), +// Some(-6), +// Some(7), +// ] +// .iter() +// .collect::(), +// )?; +// Ok(()) +// } +// } diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs index 790a500f1f3f..9d6e2997c7b8 100644 --- a/datafusion/functions-window/src/lib.rs +++ b/datafusion/functions-window/src/lib.rs @@ -29,6 +29,7 @@ use log::debug; use datafusion_expr::registry::FunctionRegistry; use datafusion_expr::WindowUDF; +pub mod lead_lag; pub mod row_number; /// Fluent-style API for creating `Expr`s diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index 177fd799ae79..f428db4254fb 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -36,7 +36,6 @@ mod unknown_column; /// Module with some convenient methods used in expression building pub use crate::aggregate::stats::StatsType; pub use crate::window::cume_dist::{cume_dist, CumeDist}; -pub use crate::window::lead_lag::{lag, lead, WindowShift}; pub use crate::window::nth_value::NthValue; pub use crate::window::ntile::Ntile; pub use crate::window::rank::{dense_rank, percent_rank, rank, Rank, RankType}; diff --git a/datafusion/physical-expr/src/window/mod.rs b/datafusion/physical-expr/src/window/mod.rs index 2aeb05333102..1ea7631d782e 100644 --- a/datafusion/physical-expr/src/window/mod.rs +++ b/datafusion/physical-expr/src/window/mod.rs @@ -19,7 +19,6 @@ mod aggregate; mod built_in; mod built_in_window_function_expr; pub(crate) mod cume_dist; -pub(crate) mod lead_lag; pub(crate) mod nth_value; pub(crate) mod ntile; pub(crate) mod rank; diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 6aafaad0ad77..e4106fddcece 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use crate::{ expressions::{ - cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile, + cume_dist, dense_rank, percent_rank, rank, Literal, NthValue, Ntile, PhysicalSortExpr, }, ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr, @@ -256,38 +256,39 @@ fn create_built_in_window_expr( Arc::new(Ntile::new(name, n as u64, out_data_type)) } } - BuiltInWindowFunction::Lag => { - let arg = Arc::clone(&args[0]); - let shift_offset = get_scalar_value_from_args(args, 1)? - .map(get_signed_integer) - .map_or(Ok(None), |v| v.map(Some))?; - let default_value = - get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; - Arc::new(lag( - name, - out_data_type.clone(), - arg, - shift_offset, - default_value, - ignore_nulls, - )) - } - BuiltInWindowFunction::Lead => { - let arg = Arc::clone(&args[0]); - let shift_offset = get_scalar_value_from_args(args, 1)? - .map(get_signed_integer) - .map_or(Ok(None), |v| v.map(Some))?; - let default_value = - get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; - Arc::new(lead( - name, - out_data_type.clone(), - arg, - shift_offset, - default_value, - ignore_nulls, - )) - } + /* BuiltInWindowFunction::Lag => { + let arg = Arc::clone(&args[0]); + let shift_offset = get_scalar_value_from_args(args, 1)? + .map(get_signed_integer) + .map_or(Ok(None), |v| v.map(Some))?; + let default_value = + get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; + Arc::new(lag( + name, + out_data_type.clone(), + arg, + shift_offset, + default_value, + ignore_nulls, + )) + } + BuiltInWindowFunction::Lead => { + let arg = Arc::clone(&args[0]); + let shift_offset = get_scalar_value_from_args(args, 1)? + .map(get_signed_integer) + .map_or(Ok(None), |v| v.map(Some))?; + let default_value = + get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; + Arc::new(lead( + name, + out_data_type.clone(), + arg, + shift_offset, + default_value, + ignore_nulls, + )) + } + */ BuiltInWindowFunction::NthValue => { let arg = Arc::clone(&args[0]); let n = get_signed_integer( diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index e36c91e7d004..25addb4dec4c 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -508,13 +508,13 @@ message ScalarUDFExprNode { enum BuiltInWindowFunction { UNSPECIFIED = 0; // https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum // ROW_NUMBER = 0; - RANK = 1; - DENSE_RANK = 2; - PERCENT_RANK = 3; + // RANK = 1; + // DENSE_RANK = 2; + // PERCENT_RANK = 3; CUME_DIST = 4; NTILE = 5; - LAG = 6; - LEAD = 7; + // LAG = 6; + // LEAD = 7; FIRST_VALUE = 8; LAST_VALUE = 9; NTH_VALUE = 10; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 004798b3ba93..56d2aaff3859 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -1667,8 +1667,6 @@ impl serde::Serialize for BuiltInWindowFunction { Self::PercentRank => "PERCENT_RANK", Self::CumeDist => "CUME_DIST", Self::Ntile => "NTILE", - Self::Lag => "LAG", - Self::Lead => "LEAD", Self::FirstValue => "FIRST_VALUE", Self::LastValue => "LAST_VALUE", Self::NthValue => "NTH_VALUE", @@ -1689,8 +1687,6 @@ impl<'de> serde::Deserialize<'de> for BuiltInWindowFunction { "PERCENT_RANK", "CUME_DIST", "NTILE", - "LAG", - "LEAD", "FIRST_VALUE", "LAST_VALUE", "NTH_VALUE", @@ -1740,8 +1736,6 @@ impl<'de> serde::Deserialize<'de> for BuiltInWindowFunction { "PERCENT_RANK" => Ok(BuiltInWindowFunction::PercentRank), "CUME_DIST" => Ok(BuiltInWindowFunction::CumeDist), "NTILE" => Ok(BuiltInWindowFunction::Ntile), - "LAG" => Ok(BuiltInWindowFunction::Lag), - "LEAD" => Ok(BuiltInWindowFunction::Lead), "FIRST_VALUE" => Ok(BuiltInWindowFunction::FirstValue), "LAST_VALUE" => Ok(BuiltInWindowFunction::LastValue), "NTH_VALUE" => Ok(BuiltInWindowFunction::NthValue), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 436347330d92..02397baf47f2 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -1824,8 +1824,8 @@ pub enum BuiltInWindowFunction { PercentRank = 3, CumeDist = 4, Ntile = 5, - Lag = 6, - Lead = 7, + /// LAG = 6; + /// LEAD = 7; FirstValue = 8, LastValue = 9, NthValue = 10, @@ -1843,8 +1843,6 @@ impl BuiltInWindowFunction { Self::PercentRank => "PERCENT_RANK", Self::CumeDist => "CUME_DIST", Self::Ntile => "NTILE", - Self::Lag => "LAG", - Self::Lead => "LEAD", Self::FirstValue => "FIRST_VALUE", Self::LastValue => "LAST_VALUE", Self::NthValue => "NTH_VALUE", @@ -1859,8 +1857,6 @@ impl BuiltInWindowFunction { "PERCENT_RANK" => Some(Self::PercentRank), "CUME_DIST" => Some(Self::CumeDist), "NTILE" => Some(Self::Ntile), - "LAG" => Some(Self::Lag), - "LEAD" => Some(Self::Lead), "FIRST_VALUE" => Some(Self::FirstValue), "LAST_VALUE" => Some(Self::LastValue), "NTH_VALUE" => Some(Self::NthValue), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 893255ccc8ce..e154a516fba7 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -145,8 +145,6 @@ impl From for BuiltInWindowFunction { protobuf::BuiltInWindowFunction::Rank => Self::Rank, protobuf::BuiltInWindowFunction::PercentRank => Self::PercentRank, protobuf::BuiltInWindowFunction::DenseRank => Self::DenseRank, - protobuf::BuiltInWindowFunction::Lag => Self::Lag, - protobuf::BuiltInWindowFunction::Lead => Self::Lead, protobuf::BuiltInWindowFunction::FirstValue => Self::FirstValue, protobuf::BuiltInWindowFunction::CumeDist => Self::CumeDist, protobuf::BuiltInWindowFunction::Ntile => Self::Ntile, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 63d1a007c1e5..c68504ffa09b 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -121,8 +121,6 @@ impl From<&BuiltInWindowFunction> for protobuf::BuiltInWindowFunction { BuiltInWindowFunction::CumeDist => Self::CumeDist, BuiltInWindowFunction::PercentRank => Self::PercentRank, BuiltInWindowFunction::Rank => Self::Rank, - BuiltInWindowFunction::Lag => Self::Lag, - BuiltInWindowFunction::Lead => Self::Lead, BuiltInWindowFunction::DenseRank => Self::DenseRank, } } diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index 6f6065a1c284..0813ad134e3e 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -25,7 +25,7 @@ use datafusion::physical_expr::{PhysicalSortExpr, ScalarFunctionExpr}; use datafusion::physical_plan::expressions::{ BinaryExpr, CaseExpr, CastExpr, Column, CumeDist, InListExpr, IsNotNullExpr, IsNullExpr, Literal, NegativeExpr, NotExpr, NthValue, Ntile, Rank, RankType, - TryCastExpr, WindowShift, + TryCastExpr, }; use datafusion::physical_plan::udaf::AggregateFunctionExpr; use datafusion::physical_plan::windows::{BuiltInWindowExpr, PlainAggregateWindowExpr}; @@ -126,26 +126,29 @@ pub fn serialize_physical_window_expr( )))), ); protobuf::BuiltInWindowFunction::Ntile - } else if let Some(window_shift_expr) = - built_in_fn_expr.downcast_ref::() - { - args.insert( - 1, - Arc::new(Literal::new(datafusion_common::ScalarValue::Int64(Some( - window_shift_expr.get_shift_offset(), - )))), - ); - args.insert( - 2, - Arc::new(Literal::new(window_shift_expr.get_default_value())), - ); + } + /* else if let Some(window_shift_expr) = + built_in_fn_expr.downcast_ref::() + { + args.insert( + 1, + Arc::new(Literal::new(datafusion_common::ScalarValue::Int64(Some( + window_shift_expr.get_shift_offset(), + )))), + ); + args.insert( + 2, + Arc::new(Literal::new(window_shift_expr.get_default_value())), + ); - if window_shift_expr.get_shift_offset() >= 0 { - protobuf::BuiltInWindowFunction::Lag - } else { - protobuf::BuiltInWindowFunction::Lead - } - } else if let Some(nth_value_expr) = built_in_fn_expr.downcast_ref::() { + if window_shift_expr.get_shift_offset() >= 0 { + protobuf::BuiltInWindowFunction::Lag + } else { + protobuf::BuiltInWindowFunction::Lead + } + } + */ + else if let Some(nth_value_expr) = built_in_fn_expr.downcast_ref::() { match nth_value_expr.get_kind() { NthValueKind::First => protobuf::BuiltInWindowFunction::FirstValue, NthValueKind::Last => protobuf::BuiltInWindowFunction::LastValue, From d2ebd3acca2a50a6610cb1d53a61b1356f4ba2ed Mon Sep 17 00:00:00 2001 From: jcsherin Date: Wed, 2 Oct 2024 23:15:47 +0530 Subject: [PATCH 03/46] Adds `PartitionEvaluatorArgs` --- datafusion-examples/examples/advanced_udwf.rs | 5 +- .../examples/simplify_udwf_expression.rs | 5 +- .../user_defined_window_functions.rs | 6 ++- datafusion/expr/src/expr_fn.rs | 10 ++-- datafusion/expr/src/udwf.rs | 36 ++++++++++---- datafusion/functions-window-common/Cargo.toml | 1 + datafusion/functions-window-common/src/lib.rs | 1 + .../functions-window-common/src/partition.rs | 47 +++++++++++++++++++ datafusion/functions-window/Cargo.toml | 1 + datafusion/functions-window/src/lead_lag.rs | 22 ++++++--- datafusion/functions-window/src/row_number.rs | 7 ++- .../simplify_expressions/expr_simplifier.rs | 11 +++-- datafusion/physical-plan/src/windows/mod.rs | 9 +++- .../tests/cases/roundtrip_logical_plan.rs | 6 ++- 14 files changed, 137 insertions(+), 30 deletions(-) create mode 100644 datafusion/functions-window-common/src/partition.rs diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index fd1b84070cf6..03d435abc535 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -74,7 +74,10 @@ impl WindowUDFImpl for SmoothItUdf { /// Create a `PartitionEvaluator` to evaluate this function on a new /// partition. - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { Ok(Box::new(MyPartitionEvaluator::new())) } diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 1ff629eef196..12df9d1184fd 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -60,7 +60,10 @@ impl WindowUDFImpl for SimplifySmoothItUdf { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { todo!() } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index d96bb23953ae..b6e3569d6eee 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -36,6 +36,7 @@ use datafusion_expr::{ PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// A query with a window function evaluated over the entire partition const UNBOUNDED_WINDOW_QUERY: &str = "SELECT x, y, val, \ @@ -552,7 +553,10 @@ impl OddCounter { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { Ok(Box::new(OddCounter::new(Arc::clone(&self.test_state)))) } diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 2975e36488dc..2433314d7637 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -27,8 +27,8 @@ use crate::function::{ }; use crate::{ conditional_expressions::CaseBuilder, expr::Sort, logical_plan::Subquery, - AggregateUDF, Expr, LogicalPlan, Operator, ScalarFunctionImplementation, ScalarUDF, - Signature, Volatility, + AggregateUDF, Expr, LogicalPlan, Operator, PartitionEvaluator, + ScalarFunctionImplementation, ScalarUDF, Signature, Volatility, }; use crate::{ AggregateUDFImpl, ColumnarValue, ScalarUDFImpl, WindowFrame, WindowUDF, WindowUDFImpl, @@ -39,6 +39,7 @@ use arrow::compute::kernels::cast_utils::{ use arrow::datatypes::{DataType, Field}; use datafusion_common::{plan_err, Column, Result, ScalarValue, TableReference}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use sqlparser::ast::NullTreatment; use std::any::Any; use std::fmt::Debug; @@ -658,7 +659,10 @@ impl WindowUDFImpl for SimpleWindowUDF { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { (self.partition_evaluator_factory)() } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 678a0b62cd9a..d5cfc81673c6 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -28,13 +28,13 @@ use std::{ use arrow::datatypes::{DataType, Field}; -use datafusion_common::{not_impl_err, Result}; -use datafusion_functions_window_common::field::WindowUDFFieldArgs; - use crate::expr::WindowFunction; use crate::{ function::WindowFunctionSimplification, Expr, PartitionEvaluator, Signature, }; +use datafusion_common::{not_impl_err, Result}; +use datafusion_functions_window_common::field::WindowUDFFieldArgs; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// Logical representation of a user-defined window function (UDWF) /// A UDWF is different from a UDF in that it is stateful across batches. @@ -149,8 +149,11 @@ impl WindowUDF { } /// Return a `PartitionEvaluator` for evaluating this window function - pub fn partition_evaluator_factory(&self) -> Result> { - self.inner.partition_evaluator() + pub fn partition_evaluator_factory( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { + self.inner.partition_evaluator(partition_evaluator_args) } /// Returns the field of the final result of evaluating this window function. @@ -265,7 +268,10 @@ pub trait WindowUDFImpl: Debug + Send + Sync { fn signature(&self) -> &Signature; /// Invoke the function, returning the [`PartitionEvaluator`] instance - fn partition_evaluator(&self) -> Result>; + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result>; /// Returns any aliases (alternate names) for this function. /// @@ -427,8 +433,11 @@ impl WindowUDFImpl for AliasedWindowUDFImpl { self.inner.signature() } - fn partition_evaluator(&self) -> Result> { - self.inner.partition_evaluator() + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { + self.inner.partition_evaluator(partition_evaluator_args) } fn aliases(&self) -> &[String] { @@ -474,6 +483,7 @@ mod test { use datafusion_common::Result; use datafusion_expr_common::signature::{Signature, Volatility}; use datafusion_functions_window_common::field::WindowUDFFieldArgs; + use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use std::any::Any; use std::cmp::Ordering; @@ -505,7 +515,10 @@ mod test { fn signature(&self) -> &Signature { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { unimplemented!() } fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { @@ -541,7 +554,10 @@ mod test { fn signature(&self) -> &Signature { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { unimplemented!() } fn field(&self, _field_args: WindowUDFFieldArgs) -> Result { diff --git a/datafusion/functions-window-common/Cargo.toml b/datafusion/functions-window-common/Cargo.toml index 98b6f8c6dba5..b5df212b7d2a 100644 --- a/datafusion/functions-window-common/Cargo.toml +++ b/datafusion/functions-window-common/Cargo.toml @@ -39,3 +39,4 @@ path = "src/lib.rs" [dependencies] datafusion-common = { workspace = true } +datafusion-physical-expr-common = { workspace = true } diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs index 2e4bcbbc83b9..53f9eb1c9ac6 100644 --- a/datafusion/functions-window-common/src/lib.rs +++ b/datafusion/functions-window-common/src/lib.rs @@ -19,3 +19,4 @@ //! //! [DataFusion]: pub mod field; +pub mod partition; diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs new file mode 100644 index 000000000000..cfaec21269ff --- /dev/null +++ b/datafusion/functions-window-common/src/partition.rs @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::arrow::datatypes::DataType; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use std::sync::Arc; + +pub struct PartitionEvaluatorArgs<'a> { + input_exprs: &'a [Arc], + input_types: &'a [DataType], + is_reversed: bool, + ignore_nulls: bool, +} + +impl<'a> PartitionEvaluatorArgs<'a> { + pub fn new( + input_exprs: &'a [Arc], + input_types: &'a [DataType], + is_reversed: bool, + ignore_nulls: bool, + ) -> Self { + Self { + input_exprs, + input_types, + is_reversed, + ignore_nulls, + } + } + + pub fn input_expr_at(&self, index: usize) -> Option<&Arc> { + self.input_exprs.get(index) + } +} diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml index 8dcec6bc964b..f9a1961e722d 100644 --- a/datafusion/functions-window/Cargo.toml +++ b/datafusion/functions-window/Cargo.toml @@ -41,6 +41,7 @@ path = "src/lib.rs" datafusion-common = { workspace = true } datafusion-expr = { workspace = true } datafusion-functions-window-common = { workspace = true } +datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } log = { workspace = true } diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 0cb1f7d0de76..b028a7c5547a 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -24,6 +24,9 @@ use datafusion_expr::{ PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, Volatility, WindowUDFImpl, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion_physical_expr::expressions::Literal; +use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::cmp::min; use std::collections::VecDeque; @@ -53,7 +56,7 @@ pub struct WindowShift { } impl WindowShift { - pub fn new() -> Self { + fn new(kind: WindowShiftKind) -> Self { Self { signature: Signature::one_of( vec![ @@ -63,14 +66,16 @@ impl WindowShift { ], Volatility::Immutable, ), - kind: WindowShiftKind::Lag, + kind, } } -} -impl Default for WindowShift { - fn default() -> Self { - Self::new() + pub fn lag() -> Self { + Self::new(WindowShiftKind::Lag) + } + + pub fn lead() -> Self { + Self::new(WindowShiftKind::Lead) } } @@ -87,7 +92,10 @@ impl WindowUDFImpl for WindowShift { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { todo!() } diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 7f348bf9d2a0..1d084f9fc8e0 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -30,6 +30,7 @@ use datafusion_common::{Result, ScalarValue}; use datafusion_expr::expr::WindowFunction; use datafusion_expr::{Expr, PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; use datafusion_functions_window_common::field; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use field::WindowUDFFieldArgs; /// Create a [`WindowFunction`](Expr::WindowFunction) expression for @@ -87,7 +88,10 @@ impl WindowUDFImpl for RowNumber { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + _partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { Ok(Box::::default()) } @@ -139,6 +143,7 @@ impl PartitionEvaluator for NumRowsEvaluator { } } +#[ignore] #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index a78a54a57123..7826be4e212a 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1789,6 +1789,8 @@ fn inlist_except(mut l1: InList, l2: &InList) -> Result { #[cfg(test)] mod tests { + use crate::simplify_expressions::SimplifyContext; + use crate::test::test_table_scan_with_name; use datafusion_common::{assert_contains, DFSchemaRef, ToDFSchema}; use datafusion_expr::{ function::{ @@ -1799,15 +1801,13 @@ mod tests { *, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; + use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use std::{ collections::HashMap, ops::{BitAnd, BitOr, BitXor}, sync::Arc, }; - use crate::simplify_expressions::SimplifyContext; - use crate::test::test_table_scan_with_name; - use super::*; // ------------------------------ @@ -3910,7 +3910,10 @@ mod tests { } } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { unimplemented!("not needed for tests") } diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index e4106fddcece..647e6616fe3f 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -52,6 +52,7 @@ mod window_agg_exec; pub use bounded_window_agg_exec::BoundedWindowAggExec; use datafusion_functions_window_common::field::WindowUDFFieldArgs; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_physical_expr::expressions::Column; pub use datafusion_physical_expr::window::{ BuiltInWindowExpr, PlainAggregateWindowExpr, WindowExpr, @@ -386,7 +387,13 @@ impl BuiltInWindowFunctionExpr for WindowUDFExpr { } fn create_evaluator(&self) -> Result> { - self.fun.partition_evaluator_factory() + self.fun + .partition_evaluator_factory(PartitionEvaluatorArgs::new( + &self.args, + &self.input_types, + self.is_reversed, + self.ignore_nulls, + )) } fn name(&self) -> &str { diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 8a94f905812c..8f7c4a0ddfa7 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -75,6 +75,7 @@ use datafusion_functions_aggregate::expr_fn::{ }; use datafusion_functions_aggregate::string_agg::string_agg; use datafusion_functions_window_common::field::WindowUDFFieldArgs; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_proto::bytes::{ logical_plan_from_bytes, logical_plan_from_bytes_with_extension_codec, logical_plan_to_bytes, logical_plan_to_bytes_with_extension_codec, @@ -2455,7 +2456,10 @@ fn roundtrip_window() { &self.signature } - fn partition_evaluator(&self) -> Result> { + fn partition_evaluator( + &self, + partition_evaluator_args: PartitionEvaluatorArgs, + ) -> Result> { make_partition_evaluator() } From e5e5ab9d5df6adcbf0107f6c2d37667616e27dc9 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 16:50:10 +0530 Subject: [PATCH 04/46] Extracts `shift_offset` from input expressions --- datafusion/functions-window/src/lead_lag.rs | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index b028a7c5547a..b36d60f252df 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -18,6 +18,7 @@ //! Defines physical expression for `lead` and `lag` that can evaluated //! at runtime during query execution use datafusion_common::arrow::array::ArrayRef; +use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::{ @@ -96,6 +97,9 @@ impl WindowUDFImpl for WindowShift { &self, partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { + let shift_offset = try_get_literal(&partition_evaluator_args, 1) + .and_then(try_get_signed_integer); + todo!() } @@ -112,6 +116,29 @@ impl WindowUDFImpl for WindowShift { } } +fn try_get_literal<'a>( + partition_evaluator_args: &'a PartitionEvaluatorArgs, + index: usize, +) -> Result<&'a ScalarValue> { + partition_evaluator_args + .input_expr_at(index) + .and_then(|expr| expr.as_any().downcast_ref::()) + .ok_or_else(|| DataFusionError::NotImplemented( + format!("There is only support for Literal types at field idx: {index} in Window Function") + )).map(|lit| lit.value()) +} + +fn try_get_signed_integer(value: &ScalarValue) -> Result { + if value.data_type().is_integer() { + value.cast_to(&DataType::Int64)?.try_into() + } else { + Err(DataFusionError::Execution(format!( + "Expected an integer value, but got {:?}", + value.data_type() + ))) + } +} + /// /// window shift expression /// #[derive(Debug)] /// pub struct WindowShift { From 303b74b5e3bea16bcfd940812fc64268a5160451 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 17:05:29 +0530 Subject: [PATCH 05/46] Computes shift offset --- .../functions-window-common/src/partition.rs | 4 +++ datafusion/functions-window/src/lead_lag.rs | 30 +++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs index cfaec21269ff..e1e67c0055f1 100644 --- a/datafusion/functions-window-common/src/partition.rs +++ b/datafusion/functions-window-common/src/partition.rs @@ -44,4 +44,8 @@ impl<'a> PartitionEvaluatorArgs<'a> { pub fn input_expr_at(&self, index: usize) -> Option<&Arc> { self.input_exprs.get(index) } + + pub fn is_reversed(&self) -> bool { + self.is_reversed + } } diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index b36d60f252df..3a4a36aa8947 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -31,7 +31,7 @@ use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::cmp::min; use std::collections::VecDeque; -use std::ops::Range; +use std::ops::{Neg, Range}; use std::sync::Arc; #[derive(Debug)] @@ -47,6 +47,13 @@ impl WindowShiftKind { WindowShiftKind::Lead => "lead", } } + + fn shift_offset(&self, value: Option) -> i64 { + match self { + WindowShiftKind::Lag => value.unwrap_or(1), + WindowShiftKind::Lead => value.map(|v| v.neg()).unwrap_or(-1), + } + } } /// window shift expression @@ -97,8 +104,7 @@ impl WindowUDFImpl for WindowShift { &self, partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { - let shift_offset = try_get_literal(&partition_evaluator_args, 1) - .and_then(try_get_signed_integer); + let shift_offset = try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; todo!() } @@ -139,6 +145,24 @@ fn try_get_signed_integer(value: &ScalarValue) -> Result { } } +fn try_get_shift_offset( + kind: &WindowShiftKind, + partition_evaluator_args: &PartitionEvaluatorArgs, + index: usize, +) -> Result { + try_get_literal(partition_evaluator_args, index) + .and_then(try_get_signed_integer) + .map_or(Ok(None), |n| Ok(Some(n))) + .map(|n| kind.shift_offset(n)) + .map(|offset| { + if partition_evaluator_args.is_reversed() { + offset.neg() + } else { + offset + } + }) +} + /// /// window shift expression /// #[derive(Debug)] /// pub struct WindowShift { From 689ed3a14efbb7a3410dd2ac4a7bd6630213d2e6 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 19:17:58 +0530 Subject: [PATCH 06/46] Get default value from input expression --- .../functions-window-common/src/partition.rs | 4 ++++ datafusion/functions-window/src/lead_lag.rs | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs index e1e67c0055f1..b80c7695f21e 100644 --- a/datafusion/functions-window-common/src/partition.rs +++ b/datafusion/functions-window-common/src/partition.rs @@ -45,6 +45,10 @@ impl<'a> PartitionEvaluatorArgs<'a> { self.input_exprs.get(index) } + pub fn input_types_at(&self, index: usize) -> Option<&DataType> { + self.input_types.get(index) + } + pub fn is_reversed(&self) -> bool { self.is_reversed } diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 3a4a36aa8947..373fb3cdda72 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -104,8 +104,9 @@ impl WindowUDFImpl for WindowShift { &self, partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { - let shift_offset = try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; - + let shift_offset = + try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; + let default_value = try_get_default_value(&partition_evaluator_args, 2)?; todo!() } @@ -163,6 +164,19 @@ fn try_get_shift_offset( }) } +fn try_get_default_value( + partition_evaluator_args: &PartitionEvaluatorArgs, + index: usize, +) -> Result { + let return_type = partition_evaluator_args.input_types_at(0).unwrap(); + match try_get_literal(partition_evaluator_args, index) { + Ok(default_value) if !default_value.is_null() => { + default_value.cast_to(return_type) + } + _ => ScalarValue::try_from(return_type), + } +} + /// /// window shift expression /// #[derive(Debug)] /// pub struct WindowShift { From 0774a5845c35354d895ce740c3cb248c0e83bce9 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 19:20:37 +0530 Subject: [PATCH 07/46] Implements `partition_evaluator` --- datafusion/functions-window-common/src/partition.rs | 4 ++++ datafusion/functions-window/src/lead_lag.rs | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs index b80c7695f21e..715c16b87618 100644 --- a/datafusion/functions-window-common/src/partition.rs +++ b/datafusion/functions-window-common/src/partition.rs @@ -52,4 +52,8 @@ impl<'a> PartitionEvaluatorArgs<'a> { pub fn is_reversed(&self) -> bool { self.is_reversed } + + pub fn ignore_nulls(&self) -> bool { + self.ignore_nulls + } } diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 373fb3cdda72..4357c3872f55 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -107,7 +107,13 @@ impl WindowUDFImpl for WindowShift { let shift_offset = try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; let default_value = try_get_default_value(&partition_evaluator_args, 2)?; - todo!() + + Ok(Box::new(WindowShiftEvaluator { + shift_offset, + default_value, + ignore_nulls: partition_evaluator_args.ignore_nulls(), + non_null_offsets: VecDeque::new(), + })) } fn field(&self, field_args: WindowUDFFieldArgs) -> Result { From 009a1be0cb5fedf3a4a87b2f8a13587ea5e8c1bc Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 19:22:26 +0530 Subject: [PATCH 08/46] Fixes compiler warnings --- datafusion/expr/src/expr_fn.rs | 2 +- datafusion/functions-window/src/lead_lag.rs | 1 - datafusion/physical-plan/src/windows/mod.rs | 11 ----------- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 2433314d7637..ea053b9fb195 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -661,7 +661,7 @@ impl WindowUDFImpl for SimpleWindowUDF { fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { (self.partition_evaluator_factory)() } diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 4357c3872f55..0ff7be0fb6f7 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -27,7 +27,6 @@ use datafusion_expr::{ use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; use datafusion_physical_expr::expressions::Literal; -use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use std::any::Any; use std::cmp::min; use std::collections::VecDeque; diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 647e6616fe3f..937342e64fc6 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -209,17 +209,6 @@ fn get_unsigned_integer(value: ScalarValue) -> Result { value.cast_to(&DataType::UInt64)?.try_into() } -fn get_casted_value( - default_value: Option, - dtype: &DataType, -) -> Result { - match default_value { - Some(v) if !v.data_type().is_null() => v.cast_to(dtype), - // If None or Null datatype - _ => ScalarValue::try_from(dtype), - } -} - fn create_built_in_window_expr( fun: &BuiltInWindowFunction, args: &[Arc], From 8dc161fb220306e187a79a55f5611caad555d811 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 19:25:40 +0530 Subject: [PATCH 09/46] Comments out failing tests --- datafusion/functions-window/src/row_number.rs | 83 +++++++++---------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/datafusion/functions-window/src/row_number.rs b/datafusion/functions-window/src/row_number.rs index 1d084f9fc8e0..1246dc71cb78 100644 --- a/datafusion/functions-window/src/row_number.rs +++ b/datafusion/functions-window/src/row_number.rs @@ -143,45 +143,44 @@ impl PartitionEvaluator for NumRowsEvaluator { } } -#[ignore] -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use datafusion_common::arrow::array::{Array, BooleanArray}; - use datafusion_common::cast::as_uint64_array; - - use super::*; - - #[test] - fn row_number_all_null() -> Result<()> { - let values: ArrayRef = Arc::new(BooleanArray::from(vec![ - None, None, None, None, None, None, None, None, - ])); - let num_rows = values.len(); - - let actual = RowNumber::default() - .partition_evaluator()? - .evaluate_all(&[values], num_rows)?; - let actual = as_uint64_array(&actual)?; - - assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *actual.values()); - Ok(()) - } - - #[test] - fn row_number_all_values() -> Result<()> { - let values: ArrayRef = Arc::new(BooleanArray::from(vec![ - true, false, true, false, false, true, false, true, - ])); - let num_rows = values.len(); - - let actual = RowNumber::default() - .partition_evaluator()? - .evaluate_all(&[values], num_rows)?; - let actual = as_uint64_array(&actual)?; - - assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *actual.values()); - Ok(()) - } -} +// #[cfg(test)] +// mod tests { +// use std::sync::Arc; +// +// use datafusion_common::arrow::array::{Array, BooleanArray}; +// use datafusion_common::cast::as_uint64_array; +// +// use super::*; +// +// #[test] +// fn row_number_all_null() -> Result<()> { +// let values: ArrayRef = Arc::new(BooleanArray::from(vec![ +// None, None, None, None, None, None, None, None, +// ])); +// let num_rows = values.len(); +// +// let actual = RowNumber::default() +// .partition_evaluator()? +// .evaluate_all(&[values], num_rows)?; +// let actual = as_uint64_array(&actual)?; +// +// assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *actual.values()); +// Ok(()) +// } +// +// #[test] +// fn row_number_all_values() -> Result<()> { +// let values: ArrayRef = Arc::new(BooleanArray::from(vec![ +// true, false, true, false, false, true, false, true, +// ])); +// let num_rows = values.len(); +// +// let actual = RowNumber::default() +// .partition_evaluator()? +// .evaluate_all(&[values], num_rows)?; +// let actual = as_uint64_array(&actual)?; +// +// assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8], *actual.values()); +// Ok(()) +// } +// } From 45e259e00a27479f3f8fc1bc9c7e1d1ece5f63b0 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 19:33:39 +0530 Subject: [PATCH 10/46] Fixes `cargo test` errors and warnings --- datafusion-examples/Cargo.toml | 1 + datafusion-examples/examples/advanced_udwf.rs | 3 ++- datafusion-examples/examples/simplify_udwf_expression.rs | 3 ++- .../core/tests/user_defined/user_defined_window_functions.rs | 2 +- datafusion/expr/src/udwf.rs | 4 ++-- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 2 +- datafusion/proto/tests/cases/roundtrip_logical_plan.rs | 2 +- 7 files changed, 10 insertions(+), 7 deletions(-) diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index f430a87e190d..d868ae88b841 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -62,6 +62,7 @@ dashmap = { workspace = true } datafusion = { workspace = true, default-features = true, features = ["avro"] } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } +datafusion-functions-window-common = { workspace = true } datafusion-optimizer = { workspace = true, default-features = true } datafusion-physical-expr = { workspace = true, default-features = true } datafusion-proto = { workspace = true } diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 03d435abc535..1c20e292f091 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -30,6 +30,7 @@ use datafusion_expr::function::WindowUDFFieldArgs; use datafusion_expr::{ PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl, }; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// This example shows how to use the full WindowUDFImpl API to implement a user /// defined window function. As in the `simple_udwf.rs` example, this struct implements @@ -76,7 +77,7 @@ impl WindowUDFImpl for SmoothItUdf { /// partition. fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { Ok(Box::new(MyPartitionEvaluator::new())) } diff --git a/datafusion-examples/examples/simplify_udwf_expression.rs b/datafusion-examples/examples/simplify_udwf_expression.rs index 12df9d1184fd..d95f1147bc37 100644 --- a/datafusion-examples/examples/simplify_udwf_expression.rs +++ b/datafusion-examples/examples/simplify_udwf_expression.rs @@ -27,6 +27,7 @@ use datafusion_expr::{ expr::WindowFunction, simplify::SimplifyInfo, Expr, PartitionEvaluator, Signature, Volatility, WindowUDF, WindowUDFImpl, }; +use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// This UDWF will show how to use the WindowUDFImpl::simplify() API #[derive(Debug, Clone)] @@ -62,7 +63,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { todo!() } diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs index b6e3569d6eee..3760328934bc 100644 --- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs @@ -555,7 +555,7 @@ impl OddCounter { fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { Ok(Box::new(OddCounter::new(Arc::clone(&self.test_state)))) } diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index d5cfc81673c6..67420df37e2e 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -517,7 +517,7 @@ mod test { } fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { unimplemented!() } @@ -556,7 +556,7 @@ mod test { } fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { unimplemented!() } diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 7826be4e212a..54526c91fdab 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -3912,7 +3912,7 @@ mod tests { fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { unimplemented!("not needed for tests") } diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 8f7c4a0ddfa7..04f8073141a9 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -2458,7 +2458,7 @@ fn roundtrip_window() { fn partition_evaluator( &self, - partition_evaluator_args: PartitionEvaluatorArgs, + _partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { make_partition_evaluator() } From 0d2aa9945cca7ab4697a2223c3b151a08da00085 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 19:36:51 +0530 Subject: [PATCH 11/46] Minor: taplo formatting --- datafusion-cli/Cargo.lock | 2 ++ datafusion-examples/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 8bf62a53cc47..21e337b4f38c 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1444,6 +1444,7 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-functions-window-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", ] @@ -1453,6 +1454,7 @@ name = "datafusion-functions-window-common" version = "42.0.0" dependencies = [ "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index d868ae88b841..e2432abdc138 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -62,7 +62,7 @@ dashmap = { workspace = true } datafusion = { workspace = true, default-features = true, features = ["avro"] } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } -datafusion-functions-window-common = { workspace = true } +datafusion-functions-window-common = { workspace = true } datafusion-optimizer = { workspace = true, default-features = true } datafusion-physical-expr = { workspace = true, default-features = true } datafusion-proto = { workspace = true } From 5ccb7959778f8d7b8e74d4842de40d0e6723efbf Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 20:01:49 +0530 Subject: [PATCH 12/46] Delete code --- datafusion/functions-window/src/lead_lag.rs | 102 -------------------- 1 file changed, 102 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 0ff7be0fb6f7..3379d3eb9687 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -182,108 +182,6 @@ fn try_get_default_value( } } -/// /// window shift expression -/// #[derive(Debug)] -/// pub struct WindowShift { -/// name: String, -/// /// Output data type -/// data_type: DataType, -/// shift_offset: i64, -/// expr: Arc, -/// default_value: ScalarValue, -/// ignore_nulls: bool, -/// } -/// -/// impl WindowShift { -/// /// Get shift_offset of window shift expression -/// pub fn get_shift_offset(&self) -> i64 { -/// self.shift_offset -/// } -/// -/// /// Get the default_value for window shift expression. -/// pub fn get_default_value(&self) -> ScalarValue { -/// self.default_value.clone() -/// } -/// } -/// -/// /// lead() window function -/// pub fn lead( -/// name: String, -/// data_type: DataType, -/// expr: Arc, -/// shift_offset: Option, -/// default_value: ScalarValue, -/// ignore_nulls: bool, -/// ) -> WindowShift { -/// WindowShift { -/// name, -/// data_type, -/// shift_offset: shift_offset.map(|v| v.neg()).unwrap_or(-1), -/// expr, -/// default_value, -/// ignore_nulls, -/// } -/// } -/// -/// /// lag() window function -/// pub fn lag( -/// name: String, -/// data_type: DataType, -/// expr: Arc, -/// shift_offset: Option, -/// default_value: ScalarValue, -/// ignore_nulls: bool, -/// ) -> WindowShift { -/// WindowShift { -/// name, -/// data_type, -/// shift_offset: shift_offset.unwrap_or(1), -/// expr, -/// default_value, -/// ignore_nulls, -/// } -/// } -/// -/// impl BuiltInWindowFunctionExpr for WindowShift { -/// /// Return a reference to Any that can be used for downcasting -/// fn as_any(&self) -> &dyn Any { -/// self -/// } -/// -/// fn field(&self) -> Result { -/// let nullable = true; -/// Ok(Field::new(&self.name, self.data_type.clone(), nullable)) -/// } -/// -/// fn expressions(&self) -> Vec> { -/// vec![Arc::clone(&self.expr)] -/// } -/// -/// fn name(&self) -> &str { -/// &self.name -/// } -/// -/// fn create_evaluator(&self) -> Result> { -/// Ok(Box::new(WindowShiftEvaluator { -/// shift_offset: self.shift_offset, -/// default_value: self.default_value.clone(), -/// ignore_nulls: self.ignore_nulls, -/// non_null_offsets: VecDeque::new(), -/// })) -/// } -/// -/// fn reverse_expr(&self) -> Option> { -/// Some(Arc::new(Self { -/// name: self.name.clone(), -/// data_type: self.data_type.clone(), -/// shift_offset: -self.shift_offset, -/// expr: Arc::clone(&self.expr), -/// default_value: self.default_value.clone(), -/// ignore_nulls: self.ignore_nulls, -/// })) -/// } -/// } - #[derive(Debug)] struct WindowShiftEvaluator { shift_offset: i64, From 7d8b3e6158ac15b07473c2473835f1f6b3278ab0 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 20:06:29 +0530 Subject: [PATCH 13/46] Define `lead`, `lag` user-defined window functions --- datafusion/functions-window/src/lead_lag.rs | 54 ++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 3379d3eb9687..e60dd5bbcffc 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -22,7 +22,8 @@ use datafusion_common::arrow::datatypes::DataType; use datafusion_common::arrow::datatypes::Field; use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue}; use datafusion_expr::{ - PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, Volatility, WindowUDFImpl, + Literal, PartitionEvaluator, ReversedUDWF, Signature, TypeSignature, Volatility, + WindowUDFImpl, }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; @@ -32,6 +33,57 @@ use std::cmp::min; use std::collections::VecDeque; use std::ops::{Neg, Range}; use std::sync::Arc; +#[allow(non_upper_case_globals)] +static STATIC_Lag: std::sync::OnceLock> = + std::sync::OnceLock::new(); + +pub fn lag_udwf() -> std::sync::Arc { + STATIC_Lag + .get_or_init(|| { + std::sync::Arc::new(datafusion_expr::WindowUDF::from(WindowShift::lag())) + }) + .clone() +} + +/// Create an expression to represent the `lag` window function +pub fn lag( + arg: datafusion_expr::Expr, + shift_offset: Option, + default_value: Option, +) -> datafusion_expr::Expr { + let shift_offset_lit = shift_offset + .map(|v| v.lit()) + .unwrap_or(ScalarValue::Null.lit()); + let default_lit = default_value.unwrap_or(ScalarValue::Null).lit(); + + lag_udwf().call(vec![arg, shift_offset_lit, default_lit]) +} + +#[allow(non_upper_case_globals)] +static STATIC_Lead: std::sync::OnceLock> = + std::sync::OnceLock::new(); + +pub fn lead_udwf() -> std::sync::Arc { + STATIC_Lead + .get_or_init(|| { + std::sync::Arc::new(datafusion_expr::WindowUDF::from(WindowShift::lead())) + }) + .clone() +} + +/// Create an expression to represent the `lead` window function +pub fn lead( + arg: datafusion_expr::Expr, + shift_offset: Option, + default_value: Option, +) -> datafusion_expr::Expr { + let shift_offset_lit = shift_offset + .map(|v| v.lit()) + .unwrap_or(ScalarValue::Null.lit()); + let default_lit = default_value.unwrap_or(ScalarValue::Null).lit(); + + lead_udwf().call(vec![arg, shift_offset_lit, default_lit]) +} #[derive(Debug)] enum WindowShiftKind { From ba9d24a0aac0812e4cc9ea910d2e3333ed6274c5 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 20:08:33 +0530 Subject: [PATCH 14/46] Fixes `cargo build` errors --- datafusion/functions-window/src/lead_lag.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index e60dd5bbcffc..811bcd351cab 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -27,7 +27,6 @@ use datafusion_expr::{ }; use datafusion_functions_window_common::field::WindowUDFFieldArgs; use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; -use datafusion_physical_expr::expressions::Literal; use std::any::Any; use std::cmp::min; use std::collections::VecDeque; @@ -186,7 +185,7 @@ fn try_get_literal<'a>( ) -> Result<&'a ScalarValue> { partition_evaluator_args .input_expr_at(index) - .and_then(|expr| expr.as_any().downcast_ref::()) + .and_then(|expr| expr.as_any().downcast_ref::()) .ok_or_else(|| DataFusionError::NotImplemented( format!("There is only support for Literal types at field idx: {index} in Window Function") )).map(|lit| lit.value()) From 402dcacf0a5f55dda07723bec2b1899f67b1cd87 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 20:11:04 +0530 Subject: [PATCH 15/46] Export udwf and expression public APIs --- datafusion/functions-window/src/lib.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs index 9d6e2997c7b8..2a98ca2c54a6 100644 --- a/datafusion/functions-window/src/lib.rs +++ b/datafusion/functions-window/src/lib.rs @@ -34,12 +34,18 @@ pub mod row_number; /// Fluent-style API for creating `Expr`s pub mod expr_fn { + pub use super::lead_lag::lag; + pub use super::lead_lag::lead; pub use super::row_number::row_number; } /// Returns all default window functions pub fn all_default_window_functions() -> Vec> { - vec![row_number::row_number_udwf()] + vec![ + row_number::row_number_udwf(), + lead_lag::lead_udwf(), + lead_lag::lag_udwf(), + ] } /// Registers all enabled packages with a [`FunctionRegistry`] pub fn register_all( From 2ce088360eec6852c9cf2b258e2588c542a6c20d Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 20:25:41 +0530 Subject: [PATCH 16/46] Mark result field as nullable --- datafusion/functions-window/src/lead_lag.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 811bcd351cab..9e9ee15197da 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -170,7 +170,7 @@ impl WindowUDFImpl for WindowShift { Ok(Field::new( field_args.name(), field_args.get_input_type(0).unwrap(), - false, + true, )) } From 95e8f876b2c9bd4f849079095ce5a23a62b9262e Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 20:26:31 +0530 Subject: [PATCH 17/46] Delete `return_type` tests for `lead` and `lag` --- datafusion/expr/src/expr.rs | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 366ff228abbb..257a93904f73 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2560,30 +2560,6 @@ mod test { Ok(()) } - #[test] - fn test_lead_return_type() -> Result<()> { - let fun = find_df_window_func("lead").unwrap(); - let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; - assert_eq!(DataType::Utf8, observed); - - let observed = fun.return_type(&[DataType::Float64], &[true], "")?; - assert_eq!(DataType::Float64, observed); - - Ok(()) - } - - #[test] - fn test_lag_return_type() -> Result<()> { - let fun = find_df_window_func("lag").unwrap(); - let observed = fun.return_type(&[DataType::Utf8], &[true], "")?; - assert_eq!(DataType::Utf8, observed); - - let observed = fun.return_type(&[DataType::Float64], &[true], "")?; - assert_eq!(DataType::Float64, observed); - - Ok(()) - } - #[test] fn test_nth_value_return_type() -> Result<()> { let fun = find_df_window_func("nth_value").unwrap(); From 04f30eccdd5ef6f627dfabbb5eb576e0770de70f Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:23:51 +0530 Subject: [PATCH 18/46] Disables test: window function case insensitive --- datafusion/expr/src/expr.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 257a93904f73..e3e2c5aa5086 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2609,8 +2609,8 @@ mod test { "percent_rank", "cume_dist", "ntile", - "lag", - "lead", + // "lag", + // "lead", "first_value", "last_value", "nth_value", From d034082e7a3fcc34699604d2dd8ddcae38d64842 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:29:04 +0530 Subject: [PATCH 19/46] Fixes: lowercase name in logical plan --- datafusion/sqllogictest/test_files/union.slt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index a3d0ff4383ae..fb7afdda2ea8 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -503,9 +503,9 @@ logical_plan 12)----Projection: Int64(1) AS cnt 13)------Limit: skip=0, fetch=3 14)--------EmptyRelation -15)----Projection: LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt +15)----Projection: lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS cnt 16)------Limit: skip=0, fetch=3 -17)--------WindowAggr: windowExpr=[[LEAD(b.c1, Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] +17)--------WindowAggr: windowExpr=[[lead(b.c1, Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] 18)----------SubqueryAlias: b 19)------------Projection: Int64(1) AS c1 20)--------------EmptyRelation @@ -528,8 +528,8 @@ physical_plan 16)------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c13], has_header=true 17)------ProjectionExec: expr=[1 as cnt] 18)--------PlaceholderRowExec -19)------ProjectionExec: expr=[LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt] -20)--------BoundedWindowAggExec: wdw=[LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "LEAD(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +19)------ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt] +20)--------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] 21)----------ProjectionExec: expr=[1 as c1] 22)------------PlaceholderRowExec From 513df2ad304fcdac8f7147e3782826beb75e3a24 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:32:20 +0530 Subject: [PATCH 20/46] Reverts to old methods for computing `shift_offset`, `default_value` --- datafusion/functions-window/src/lead_lag.rs | 66 +++++++++++++++++++-- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 9e9ee15197da..73bdb2d33d7c 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -154,10 +154,27 @@ impl WindowUDFImpl for WindowShift { &self, partition_evaluator_args: PartitionEvaluatorArgs, ) -> Result> { - let shift_offset = - try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; - let default_value = try_get_default_value(&partition_evaluator_args, 2)?; - + let shift_offset = scalar_at(&partition_evaluator_args, 1)? + .map(get_signed_integer) + .map_or(Ok(None), |v| v.map(Some)) + .map(|n| self.kind.shift_offset(n)) + .map(|offset| { + if partition_evaluator_args.is_reversed() { + -offset + } else { + offset + } + })?; + let return_type = partition_evaluator_args + .input_types_at(0) + .unwrap_or(&DataType::Null); + let default_value = scalar_at(&partition_evaluator_args, 2) + .and_then(|scalar| get_default_value(return_type, scalar))?; + + /* let shift_offset = + try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; + let default_value = try_get_default_value(&partition_evaluator_args, 2)?; + */ Ok(Box::new(WindowShiftEvaluator { shift_offset, default_value, @@ -178,7 +195,48 @@ impl WindowUDFImpl for WindowShift { todo!() } } +fn get_signed_integer(value: ScalarValue) -> Result { + if value.data_type().is_integer() { + value.cast_to(&DataType::Int64)?.try_into() + } else { + Err(DataFusionError::Execution( + "Expected an integer value".to_string(), + )) + } +} + +fn get_default_value( + return_type: &DataType, + value: Option, +) -> Result { + match value { + Some(default_value) if !default_value.is_null() => { + default_value.cast_to(return_type) + } + // If None or Null datatype + _ => ScalarValue::try_from(return_type), + } +} +fn scalar_at( + partition_evaluator_args: &PartitionEvaluatorArgs, + index: usize, +) -> Result> { + let value = if let Some(expr) = partition_evaluator_args.input_expr_at(index) { + let inner = expr + .as_any() + .downcast_ref::() + .ok_or_else(|| DataFusionError::NotImplemented( + format!("There is only support Literal types for field at idx: {index} in Window Function."), + ))? + .value() + .clone(); + Some(inner) + } else { + None + }; + Ok(value) +} fn try_get_literal<'a>( partition_evaluator_args: &'a PartitionEvaluatorArgs, index: usize, From bb0bd8b59246fc7710e2afdc87dd1fbd2be2bc78 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:34:59 +0530 Subject: [PATCH 21/46] Implements expression reversal --- datafusion/functions-window/src/lead_lag.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 73bdb2d33d7c..b71da6b28718 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -192,7 +192,10 @@ impl WindowUDFImpl for WindowShift { } fn reverse_expr(&self) -> ReversedUDWF { - todo!() + match self.kind { + WindowShiftKind::Lag => ReversedUDWF::Reversed(lag_udwf()), + WindowShiftKind::Lead => ReversedUDWF::Reversed(lead_udwf()), + } } } fn get_signed_integer(value: ScalarValue) -> Result { From 0e576bb5d92f12ef5d85b0dc5b1a32a027a8536a Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:41:12 +0530 Subject: [PATCH 22/46] Fixes: lowercase name in logical plans --- datafusion/sqllogictest/test_files/window.slt | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index cb6c6a5ace76..602284d4fb5c 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1376,16 +1376,16 @@ EXPLAIN SELECT LIMIT 5 ---- logical_plan -01)Projection: aggregate_test_100.c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv2, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lag1, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead1, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2 +01)Projection: aggregate_test_100.c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING AS fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2 02)--Limit: skip=0, fetch=5 -03)----WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, LAG(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, LEAD(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] -04)------WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, LAG(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]] +03)----WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] +04)------WindowAggr: windowExpr=[[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9, Int64(2), Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]] 05)--------TableScan: aggregate_test_100 projection=[c9] physical_plan -01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2] +01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2] 02)--GlobalLimitExec: skip=0, fetch=5 -03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)), is_causal: false }, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)), is_causal: false }, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false] 06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true @@ -2636,15 +2636,15 @@ EXPLAIN SELECT ---- logical_plan 01)Sort: annotated_data_finite.ts DESC NULLS FIRST, fetch=5 -02)--Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2 -03)----WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]] -04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LAG(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, LEAD(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]] +02)--Projection: annotated_data_finite.ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING AS leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING AS leadr2 +03)----WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, NTH_VALUE(annotated_data_finite.inc_col, Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]] +04)------WindowAggr: windowExpr=[[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col, Int64(2), Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(-1), Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col, Int64(4), Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING]] 05)--------TableScan: annotated_data_finite projection=[ts, inc_col] physical_plan 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false] -02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2] -03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] -04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted] +02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2] +03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)), is_causal: false }], mode=[Sorted] +04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)), is_causal: false }, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)), is_causal: false }, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)), is_causal: false }], mode=[Sorted] 05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true query IIIIIIIIIIIIIIIIIIIIIIIII From 54908608a212a1e527457f5823d8e4a9f302369b Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:48:45 +0530 Subject: [PATCH 23/46] Fixes: doc test compilation errors Fixes: doc test build errors --- datafusion/expr/src/udwf.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs index 67420df37e2e..90aad2f11c5b 100644 --- a/datafusion/expr/src/udwf.rs +++ b/datafusion/expr/src/udwf.rs @@ -212,6 +212,8 @@ where /// # use datafusion_expr::{col, Signature, Volatility, PartitionEvaluator, WindowFrame, ExprFunctionExt}; /// # use datafusion_expr::{WindowUDFImpl, WindowUDF}; /// use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +/// /// #[derive(Debug, Clone)] /// struct SmoothIt { /// signature: Signature @@ -231,7 +233,12 @@ where /// fn name(&self) -> &str { "smooth_it" } /// fn signature(&self) -> &Signature { &self.signature } /// // The actual implementation would add one to the argument -/// fn partition_evaluator(&self) -> Result> { unimplemented!() } +/// fn partition_evaluator( +/// &self, +/// _partition_evaluator_args: PartitionEvaluatorArgs, +/// ) -> Result> { +/// unimplemented!() +/// } /// fn field(&self, field_args: WindowUDFFieldArgs) -> Result { /// if let Some(DataType::Int32) = field_args.get_input_type(0) { /// Ok(Field::new(field_args.name(), DataType::Int32, false)) From f0c0e72f969a2959a3a5f34974a6b4979ab50ae4 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 21:50:18 +0530 Subject: [PATCH 24/46] Temporarily quite clippy errors --- datafusion/functions-window/src/lead_lag.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index b71da6b28718..4becb6c95a9c 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -240,7 +240,7 @@ fn scalar_at( }; Ok(value) } -fn try_get_literal<'a>( +/*fn try_get_literal<'a>( partition_evaluator_args: &'a PartitionEvaluatorArgs, index: usize, ) -> Result<&'a ScalarValue> { @@ -293,7 +293,7 @@ fn try_get_default_value( _ => ScalarValue::try_from(return_type), } } - +*/ #[derive(Debug)] struct WindowShiftEvaluator { shift_offset: i64, From 9051d2f8167d98ae78d3fbf0e6b4e65917f04860 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 22:00:30 +0530 Subject: [PATCH 25/46] Fixes proto defintion --- datafusion/proto/proto/datafusion.proto | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 25addb4dec4c..67a31100f6da 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -508,9 +508,9 @@ message ScalarUDFExprNode { enum BuiltInWindowFunction { UNSPECIFIED = 0; // https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum // ROW_NUMBER = 0; - // RANK = 1; - // DENSE_RANK = 2; - // PERCENT_RANK = 3; + RANK = 1; + DENSE_RANK = 2; + PERCENT_RANK = 3; CUME_DIST = 4; NTILE = 5; // LAG = 6; From be743227b71e2a10cb3705855d5f2900f743581e Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 3 Oct 2024 23:07:11 +0530 Subject: [PATCH 26/46] Minor: fixes formatting --- datafusion/functions-window/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs index c60f72c21b83..5e5388c5b21d 100644 --- a/datafusion/functions-window/src/lib.rs +++ b/datafusion/functions-window/src/lib.rs @@ -31,8 +31,8 @@ use datafusion_expr::WindowUDF; #[macro_use] pub mod macros; -pub mod row_number; pub mod lead_lag; +pub mod row_number; /// Fluent-style API for creating `Expr`s pub mod expr_fn { From c9ac517f169d2528b75280de991e9e4e3a41b40f Mon Sep 17 00:00:00 2001 From: jcsherin Date: Fri, 4 Oct 2024 17:20:33 +0530 Subject: [PATCH 27/46] Fixes: doc tests --- datafusion/functions-window/src/macros.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/datafusion/functions-window/src/macros.rs b/datafusion/functions-window/src/macros.rs index 843d8ecb38cc..e934f883b101 100644 --- a/datafusion/functions-window/src/macros.rs +++ b/datafusion/functions-window/src/macros.rs @@ -45,6 +45,7 @@ /// # /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window::get_or_init_udwf; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # /// /// Defines the `simple_udwf()` user-defined window function. /// get_or_init_udwf!( @@ -80,6 +81,7 @@ /// # } /// # fn partition_evaluator( /// # &self, +/// # _partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } @@ -145,6 +147,8 @@ macro_rules! get_or_init_udwf { /// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; /// # use datafusion_functions_window::{create_udwf_expr, get_or_init_udwf}; /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +/// /// # get_or_init_udwf!( /// # RowNumber, /// # row_number, @@ -193,6 +197,7 @@ macro_rules! get_or_init_udwf { /// # } /// # fn partition_evaluator( /// # &self, +/// # _partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } @@ -216,6 +221,7 @@ macro_rules! get_or_init_udwf { /// # use datafusion_common::arrow::datatypes::Field; /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{col, lit}; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # /// # get_or_init_udwf!(Lead, lead, "user-defined window function"); /// # @@ -278,6 +284,7 @@ macro_rules! get_or_init_udwf { /// # } /// # fn partition_evaluator( /// # &self, +/// # partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } @@ -355,6 +362,7 @@ macro_rules! create_udwf_expr { /// # /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; /// # use datafusion_functions_window::{define_udwf_and_expr, get_or_init_udwf, create_udwf_expr}; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # /// /// 1. Defines the `simple_udwf()` user-defined window function. /// /// @@ -397,6 +405,7 @@ macro_rules! create_udwf_expr { /// # } /// # fn partition_evaluator( /// # &self, +/// # partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } @@ -415,6 +424,7 @@ macro_rules! create_udwf_expr { /// # use datafusion_expr::{PartitionEvaluator, Signature, Volatility, WindowUDFImpl}; /// # use datafusion_functions_window::{create_udwf_expr, define_udwf_and_expr, get_or_init_udwf}; /// # use datafusion_functions_window_common::field::WindowUDFFieldArgs; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # /// /// 1. Defines the `row_number_udwf()` user-defined window function. /// /// @@ -459,6 +469,7 @@ macro_rules! create_udwf_expr { /// # } /// # fn partition_evaluator( /// # &self, +/// # _partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } @@ -484,6 +495,7 @@ macro_rules! create_udwf_expr { /// # use datafusion_common::arrow::datatypes::Field; /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{col, lit}; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # /// /// 1. Defines the `lead_udwf()` user-defined window function. /// /// @@ -543,6 +555,7 @@ macro_rules! create_udwf_expr { /// # } /// # fn partition_evaluator( /// # &self, +/// # _partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } @@ -570,6 +583,7 @@ macro_rules! create_udwf_expr { /// # use datafusion_common::arrow::datatypes::Field; /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{col, lit}; +/// # use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; /// # /// /// 1. Defines the `lead_udwf()` user-defined window function. /// /// @@ -630,6 +644,7 @@ macro_rules! create_udwf_expr { /// # } /// # fn partition_evaluator( /// # &self, +/// # _partition_evaluator_args: PartitionEvaluatorArgs, /// # ) -> datafusion_common::Result> { /// # unimplemented!() /// # } From e37752f205870f009ae8c652945697cd28458d83 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Fri, 4 Oct 2024 17:55:56 +0530 Subject: [PATCH 28/46] Uses macro for defining `lag_udwf()` and `leag_udwf()` --- datafusion/functions-window/src/lead_lag.rs | 39 +++++++++------------ 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 4becb6c95a9c..815f2fe4f359 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -32,17 +32,22 @@ use std::cmp::min; use std::collections::VecDeque; use std::ops::{Neg, Range}; use std::sync::Arc; -#[allow(non_upper_case_globals)] -static STATIC_Lag: std::sync::OnceLock> = - std::sync::OnceLock::new(); - -pub fn lag_udwf() -> std::sync::Arc { - STATIC_Lag - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::WindowUDF::from(WindowShift::lag())) - }) - .clone() -} + +get_or_init_udwf!(Lag, lag, "lag udwf", WindowShift::lag); +get_or_init_udwf!(Lead, lead, "lead udwf", WindowShift::lead); + +// This workaround necessary to avoid multiple definitions of +// `STATIC_WindowShift` being created during macro expansion for lazily +// initializing the `WindowUDF` (exactly once). +// +// This happens when `WindowShift` is passed as the `$UDWF` parameter +// twice to create `lag_udwf()` and `lead_udwf()` respectively. Now, +// it will expand into `STATIC_Lag` and `STATIC_lead` which are unique, +// avoiding the issue. +#[allow(dead_code)] +struct Lag {} +#[allow(dead_code)] +struct Lead {} /// Create an expression to represent the `lag` window function pub fn lag( @@ -58,18 +63,6 @@ pub fn lag( lag_udwf().call(vec![arg, shift_offset_lit, default_lit]) } -#[allow(non_upper_case_globals)] -static STATIC_Lead: std::sync::OnceLock> = - std::sync::OnceLock::new(); - -pub fn lead_udwf() -> std::sync::Arc { - STATIC_Lead - .get_or_init(|| { - std::sync::Arc::new(datafusion_expr::WindowUDF::from(WindowShift::lead())) - }) - .clone() -} - /// Create an expression to represent the `lead` window function pub fn lead( arg: datafusion_expr::Expr, From 6fb12e6718f57c2b43ef0d6fb504bab9b42de8f8 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:09:06 +0530 Subject: [PATCH 29/46] Fixes: window fuzz test cases --- .../core/tests/fuzz_cases/window_fuzz.rs | 106 +++++++++--------- 1 file changed, 51 insertions(+), 55 deletions(-) diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs index 775c6a95ffc1..3e81ebe66740 100644 --- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs +++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs @@ -45,6 +45,7 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; use test_utils::add_empty_batches; use datafusion::functions_window::row_number::row_number_udwf; +use datafusion_functions_window::lead_lag::{lag_udwf, lead_udwf}; use hashbrown::HashMap; use rand::distributions::Alphanumeric; use rand::rngs::StdRng; @@ -194,34 +195,34 @@ async fn bounded_window_causal_non_causal() -> Result<()> { // LAG(x) OVER ( // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING // ) - /* ( - // Window function - WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lag), - // its name - "LAG", - // no argument - vec![col("x", &schema)?], - // Expected causality, for None cases causality will be determined from window frame boundaries - Some(true), - ), - */ // Simulate cases of the following form: - // LEAD(x) OVER ( - // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING - // ) - /* ( - // Window function - WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Lead), - // its name - "LEAD", - // no argument - vec![col("x", &schema)?], - // Expected causality, for None cases causality will be determined from window frame boundaries - Some(false), - ), - */ // Simulate cases of the following form: - // RANK() OVER ( - // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING - // ) + ( + // Window function + WindowFunctionDefinition::WindowUDF(lag_udwf()), + // its name + "LAG", + // no argument + vec![col("x", &schema)?], + // Expected causality, for None cases causality will be determined from window frame boundaries + Some(true), + ), + // Simulate cases of the following form: + // LEAD(x) OVER ( + // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING + // ) + ( + // Window function + WindowFunctionDefinition::WindowUDF(lead_udwf()), + // its name + "LEAD", + // no argument + vec![col("x", &schema)?], + // Expected causality, for None cases causality will be determined from window frame boundaries + Some(false), + ), + // Simulate cases of the following form: + // RANK() OVER ( + // ROWS BETWEEN UNBOUNDED PRECEDING AND PRECEDING/FOLLOWING + // ) ( // Window function WindowFunctionDefinition::BuiltInWindowFunction(BuiltInWindowFunction::Rank), @@ -398,33 +399,28 @@ fn get_random_function( vec![], ), ); - /* window_fn_map.insert( - "lead", - ( - WindowFunctionDefinition::BuiltInWindowFunction( - BuiltInWindowFunction::Lead, - ), - vec![ - arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), - lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), - ], - ), - ); - window_fn_map.insert( - "lag", - ( - WindowFunctionDefinition::BuiltInWindowFunction( - BuiltInWindowFunction::Lag, - ), - vec![ - arg.clone(), - lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), - lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), - ], - ), - ); - */ + window_fn_map.insert( + "lead", + ( + WindowFunctionDefinition::WindowUDF(lead_udwf()), + vec![ + arg.clone(), + lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), + ], + ), + ); + window_fn_map.insert( + "lag", + ( + WindowFunctionDefinition::WindowUDF(lag_udwf()), + vec![ + arg.clone(), + lit(ScalarValue::Int64(Some(rng.gen_range(1..10)))), + lit(ScalarValue::Int64(Some(rng.gen_range(1..1000)))), + ], + ), + ); } window_fn_map.insert( "first_value", From ae26cb661c668fcfdf607fb4cc392c97f5bd6f2a Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:11:30 +0530 Subject: [PATCH 30/46] Copies doc comments verbatim from `BuiltInWindowFunction` enum --- datafusion/functions-window/src/lead_lag.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 815f2fe4f359..ab7bf3970cb6 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -50,6 +50,11 @@ struct Lag {} struct Lead {} /// Create an expression to represent the `lag` window function +/// +/// returns value evaluated at the row that is offset rows before the current row within the partition; +/// if there is no such row, instead return default (which must be of the same type as value). +/// Both offset and default are evaluated with respect to the current row. +/// If omitted, offset defaults to 1 and default to null pub fn lag( arg: datafusion_expr::Expr, shift_offset: Option, @@ -64,6 +69,11 @@ pub fn lag( } /// Create an expression to represent the `lead` window function +/// +/// returns value evaluated at the row that is offset rows after the current row within the partition; +/// if there is no such row, instead return default (which must be of the same type as value). +/// Both offset and default are evaluated with respect to the current row. +/// If omitted, offset defaults to 1 and default to null pub fn lead( arg: datafusion_expr::Expr, shift_offset: Option, From 51652d73105de9d59db126c7136b293c9c16c5d9 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:17:23 +0530 Subject: [PATCH 31/46] Deletes from window function case insensitive test --- datafusion/expr/src/expr.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index e3e2c5aa5086..261be1d6ea39 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2609,8 +2609,6 @@ mod test { "percent_rank", "cume_dist", "ntile", - // "lag", - // "lead", "first_value", "last_value", "nth_value", From 723ca6801969810639882089415723259a3b3825 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:22:34 +0530 Subject: [PATCH 32/46] Deletes `BuiltInWindowFunction` expression APIs --- datafusion/expr/src/window_function.rs | 32 -------------------------- 1 file changed, 32 deletions(-) diff --git a/datafusion/expr/src/window_function.rs b/datafusion/expr/src/window_function.rs index cd9139a971d8..4685af1aff95 100644 --- a/datafusion/expr/src/window_function.rs +++ b/datafusion/expr/src/window_function.rs @@ -48,38 +48,6 @@ pub fn ntile(arg: Expr) -> Expr { Expr::WindowFunction(WindowFunction::new(BuiltInWindowFunction::Ntile, vec![arg])) } -/*/// Create an expression to represent the `lag` window function -pub fn lag( - arg: Expr, - shift_offset: Option, - default_value: Option, -) -> Expr { - let shift_offset_lit = shift_offset - .map(|v| v.lit()) - .unwrap_or(ScalarValue::Null.lit()); - let default_lit = default_value.unwrap_or(ScalarValue::Null).lit(); - Expr::WindowFunction(WindowFunction::new( - BuiltInWindowFunction::Lag, - vec![arg, shift_offset_lit, default_lit], - )) -} - -/// Create an expression to represent the `lead` window function -pub fn lead( - arg: Expr, - shift_offset: Option, - default_value: Option, -) -> Expr { - let shift_offset_lit = shift_offset - .map(|v| v.lit()) - .unwrap_or(ScalarValue::Null.lit()); - let default_lit = default_value.unwrap_or(ScalarValue::Null).lit(); - Expr::WindowFunction(WindowFunction::new( - BuiltInWindowFunction::Lead, - vec![arg, shift_offset_lit, default_lit], - )) -} -*/ /// Create an expression to represent the `nth_value` window function pub fn nth_value(arg: Expr, n: i64) -> Expr { Expr::WindowFunction(WindowFunction::new( From 742c196485066538a7a20e4ac413f6cbac7ed27d Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:28:21 +0530 Subject: [PATCH 33/46] Delete from `create_built_in_window_expr` --- datafusion/physical-plan/src/windows/mod.rs | 33 --------------------- 1 file changed, 33 deletions(-) diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs index 937342e64fc6..4050b44569c8 100644 --- a/datafusion/physical-plan/src/windows/mod.rs +++ b/datafusion/physical-plan/src/windows/mod.rs @@ -246,39 +246,6 @@ fn create_built_in_window_expr( Arc::new(Ntile::new(name, n as u64, out_data_type)) } } - /* BuiltInWindowFunction::Lag => { - let arg = Arc::clone(&args[0]); - let shift_offset = get_scalar_value_from_args(args, 1)? - .map(get_signed_integer) - .map_or(Ok(None), |v| v.map(Some))?; - let default_value = - get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; - Arc::new(lag( - name, - out_data_type.clone(), - arg, - shift_offset, - default_value, - ignore_nulls, - )) - } - BuiltInWindowFunction::Lead => { - let arg = Arc::clone(&args[0]); - let shift_offset = get_scalar_value_from_args(args, 1)? - .map(get_signed_integer) - .map_or(Ok(None), |v| v.map(Some))?; - let default_value = - get_casted_value(get_scalar_value_from_args(args, 2)?, out_data_type)?; - Arc::new(lead( - name, - out_data_type.clone(), - arg, - shift_offset, - default_value, - ignore_nulls, - )) - } - */ BuiltInWindowFunction::NthValue => { let arg = Arc::clone(&args[0]); let n = get_signed_integer( From 99093f5c3f6fc40010088f0bb0d6a854101431ab Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:33:51 +0530 Subject: [PATCH 34/46] Deletes proto serialization --- .../proto/src/physical_plan/to_proto.rs | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs index 0813ad134e3e..fdb2e5b30b52 100644 --- a/datafusion/proto/src/physical_plan/to_proto.rs +++ b/datafusion/proto/src/physical_plan/to_proto.rs @@ -126,29 +126,7 @@ pub fn serialize_physical_window_expr( )))), ); protobuf::BuiltInWindowFunction::Ntile - } - /* else if let Some(window_shift_expr) = - built_in_fn_expr.downcast_ref::() - { - args.insert( - 1, - Arc::new(Literal::new(datafusion_common::ScalarValue::Int64(Some( - window_shift_expr.get_shift_offset(), - )))), - ); - args.insert( - 2, - Arc::new(Literal::new(window_shift_expr.get_default_value())), - ); - - if window_shift_expr.get_shift_offset() >= 0 { - protobuf::BuiltInWindowFunction::Lag - } else { - protobuf::BuiltInWindowFunction::Lead - } - } - */ - else if let Some(nth_value_expr) = built_in_fn_expr.downcast_ref::() { + } else if let Some(nth_value_expr) = built_in_fn_expr.downcast_ref::() { match nth_value_expr.get_kind() { NthValueKind::First => protobuf::BuiltInWindowFunction::FirstValue, NthValueKind::Last => protobuf::BuiltInWindowFunction::LastValue, From 092578557d09fb6fd0860ca422f6fa2f6e673119 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:38:54 +0530 Subject: [PATCH 35/46] Delete from `BuiltInWindowFunction` enum --- datafusion/expr/src/built_in_window_function.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/datafusion/expr/src/built_in_window_function.rs b/datafusion/expr/src/built_in_window_function.rs index 0591c2c75396..a4653f396ac9 100644 --- a/datafusion/expr/src/built_in_window_function.rs +++ b/datafusion/expr/src/built_in_window_function.rs @@ -50,17 +50,6 @@ pub enum BuiltInWindowFunction { CumeDist, /// integer ranging from 1 to the argument value, dividing the partition as equally as possible Ntile, - // TODO: Preserve comments when converting to udwf - /// returns value evaluated at the row that is offset rows before the current row within the partition; - /// if there is no such row, instead return default (which must be of the same type as value). - /// Both offset and default are evaluated with respect to the current row. - /// If omitted, offset defaults to 1 and default to null - // Lag, - /// returns value evaluated at the row that is offset rows after the current row within the partition; - /// if there is no such row, instead return default (which must be of the same type as value). - /// Both offset and default are evaluated with respect to the current row. - /// If omitted, offset defaults to 1 and default to null - // Lead, /// returns value evaluated at the row that is the first row of the window frame FirstValue, /// returns value evaluated at the row that is the last row of the window frame From 6f058188139d31a36f264a2763e40738c0b392d7 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Mon, 7 Oct 2024 16:40:58 +0530 Subject: [PATCH 36/46] Deletes test for finding built-in window function --- datafusion/expr/src/expr.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 261be1d6ea39..a3ef35b2187b 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -2646,19 +2646,6 @@ mod test { built_in_window_function::BuiltInWindowFunction::LastValue )) ); - /* assert_eq!( - find_df_window_func("LAG"), - Some(WindowFunctionDefinition::BuiltInWindowFunction( - built_in_window_function::BuiltInWindowFunction::Lag - )) - ); - assert_eq!( - find_df_window_func("LEAD"), - Some(WindowFunctionDefinition::BuiltInWindowFunction( - built_in_window_function::BuiltInWindowFunction::Lead - )) - ); - */ assert_eq!(find_df_window_func("not_exist"), None) } From e69463f439918e67dcd075acfceaa4bcb1294818 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 15:13:43 +0530 Subject: [PATCH 37/46] Fixes build errors + deletes redundant code --- datafusion/functions-window/src/lead_lag.rs | 58 ++------------------- 1 file changed, 3 insertions(+), 55 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index ab7bf3970cb6..cb27cd7abed4 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -169,7 +169,8 @@ impl WindowUDFImpl for WindowShift { } })?; let return_type = partition_evaluator_args - .input_types_at(0) + .input_types() + .get(0) .unwrap_or(&DataType::Null); let default_value = scalar_at(&partition_evaluator_args, 2) .and_then(|scalar| get_default_value(return_type, scalar))?; @@ -228,7 +229,7 @@ fn scalar_at( partition_evaluator_args: &PartitionEvaluatorArgs, index: usize, ) -> Result> { - let value = if let Some(expr) = partition_evaluator_args.input_expr_at(index) { + let value = if let Some(expr) = partition_evaluator_args.input_exprs().get(index) { let inner = expr .as_any() .downcast_ref::() @@ -243,60 +244,7 @@ fn scalar_at( }; Ok(value) } -/*fn try_get_literal<'a>( - partition_evaluator_args: &'a PartitionEvaluatorArgs, - index: usize, -) -> Result<&'a ScalarValue> { - partition_evaluator_args - .input_expr_at(index) - .and_then(|expr| expr.as_any().downcast_ref::()) - .ok_or_else(|| DataFusionError::NotImplemented( - format!("There is only support for Literal types at field idx: {index} in Window Function") - )).map(|lit| lit.value()) -} - -fn try_get_signed_integer(value: &ScalarValue) -> Result { - if value.data_type().is_integer() { - value.cast_to(&DataType::Int64)?.try_into() - } else { - Err(DataFusionError::Execution(format!( - "Expected an integer value, but got {:?}", - value.data_type() - ))) - } -} -fn try_get_shift_offset( - kind: &WindowShiftKind, - partition_evaluator_args: &PartitionEvaluatorArgs, - index: usize, -) -> Result { - try_get_literal(partition_evaluator_args, index) - .and_then(try_get_signed_integer) - .map_or(Ok(None), |n| Ok(Some(n))) - .map(|n| kind.shift_offset(n)) - .map(|offset| { - if partition_evaluator_args.is_reversed() { - offset.neg() - } else { - offset - } - }) -} - -fn try_get_default_value( - partition_evaluator_args: &PartitionEvaluatorArgs, - index: usize, -) -> Result { - let return_type = partition_evaluator_args.input_types_at(0).unwrap(); - match try_get_literal(partition_evaluator_args, index) { - Ok(default_value) if !default_value.is_null() => { - default_value.cast_to(return_type) - } - _ => ScalarValue::try_from(return_type), - } -} -*/ #[derive(Debug)] struct WindowShiftEvaluator { shift_offset: i64, From d0baa9451316aa2b355bcb916c01221374eb8e7e Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 15:18:58 +0530 Subject: [PATCH 38/46] Deletes more code --- datafusion/functions-window/src/lead_lag.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index cb27cd7abed4..97cebe4649b6 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -175,10 +175,6 @@ impl WindowUDFImpl for WindowShift { let default_value = scalar_at(&partition_evaluator_args, 2) .and_then(|scalar| get_default_value(return_type, scalar))?; - /* let shift_offset = - try_get_shift_offset(&self.kind, &partition_evaluator_args, 1)?; - let default_value = try_get_default_value(&partition_evaluator_args, 2)?; - */ Ok(Box::new(WindowShiftEvaluator { shift_offset, default_value, From 919994322832c838817e8d743123f8b2a5a013ea Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 15:46:28 +0530 Subject: [PATCH 39/46] Delete unnecessary structs --- datafusion/functions-window/src/lead_lag.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 97cebe4649b6..434cf64013dd 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -36,19 +36,6 @@ use std::sync::Arc; get_or_init_udwf!(Lag, lag, "lag udwf", WindowShift::lag); get_or_init_udwf!(Lead, lead, "lead udwf", WindowShift::lead); -// This workaround necessary to avoid multiple definitions of -// `STATIC_WindowShift` being created during macro expansion for lazily -// initializing the `WindowUDF` (exactly once). -// -// This happens when `WindowShift` is passed as the `$UDWF` parameter -// twice to create `lag_udwf()` and `lead_udwf()` respectively. Now, -// it will expand into `STATIC_Lag` and `STATIC_lead` which are unique, -// avoiding the issue. -#[allow(dead_code)] -struct Lag {} -#[allow(dead_code)] -struct Lead {} - /// Create an expression to represent the `lag` window function /// /// returns value evaluated at the row that is offset rows before the current row within the partition; From 000ceb76409e66230f9c5017a30fa3c9bb1e6575 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 15:55:33 +0530 Subject: [PATCH 40/46] Refactors shift offset computation --- datafusion/functions-window/src/lead_lag.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 434cf64013dd..9f9b7125bd92 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -88,10 +88,10 @@ impl WindowShiftKind { } } - fn shift_offset(&self, value: Option) -> i64 { + fn shift_offset(&self, value: i64) -> i64 { match self { - WindowShiftKind::Lag => value.unwrap_or(1), - WindowShiftKind::Lead => value.map(|v| v.neg()).unwrap_or(-1), + WindowShiftKind::Lag => value, + WindowShiftKind::Lead => value.neg(), } } } @@ -146,11 +146,11 @@ impl WindowUDFImpl for WindowShift { ) -> Result> { let shift_offset = scalar_at(&partition_evaluator_args, 1)? .map(get_signed_integer) - .map_or(Ok(None), |v| v.map(Some)) + .unwrap_or(Ok(1)) .map(|n| self.kind.shift_offset(n)) .map(|offset| { if partition_evaluator_args.is_reversed() { - -offset + offset.neg() } else { offset } From ae0b91bc44401ee6531ef7cf8e8adedffe70f40e Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 19:12:00 +0530 Subject: [PATCH 41/46] Passes range unit test --- datafusion/functions-window/src/lead_lag.rs | 277 ++++++++++---------- 1 file changed, 138 insertions(+), 139 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 9f9b7125bd92..5f1b9caa1c7b 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -495,142 +495,141 @@ impl PartitionEvaluator for WindowShiftEvaluator { } } -// #[cfg(test)] -// mod tests { -// use super::*; -// use crate::expressions::Column; -// use arrow::{array::*, datatypes::*}; -// use datafusion_common::cast::as_int32_array; -// -// fn test_i32_result(expr: WindowShift, expected: Int32Array) -> Result<()> { -// let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); -// let values = vec![arr]; -// let schema = Schema::new(vec![Field::new("arr", DataType::Int32, false)]); -// let batch = RecordBatch::try_new(Arc::new(schema), values.clone())?; -// let values = expr.evaluate_args(&batch)?; -// let result = expr -// .create_evaluator()? -// .evaluate_all(&values, batch.num_rows())?; -// let result = as_int32_array(&result)?; -// assert_eq!(expected, *result); -// Ok(()) -// } -// -// #[test] -// fn lead_lag_get_range() -> Result<()> { -// // LAG(2) -// let lag_fn = WindowShiftEvaluator { -// shift_offset: 2, -// default_value: ScalarValue::Null, -// ignore_nulls: false, -// non_null_offsets: Default::default(), -// }; -// assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 4, end: 7 }); -// assert_eq!(lag_fn.get_range(0, 10)?, Range { start: 0, end: 1 }); -// -// // LAG(2 ignore nulls) -// let lag_fn = WindowShiftEvaluator { -// shift_offset: 2, -// default_value: ScalarValue::Null, -// ignore_nulls: true, -// // models data received [, , , NULL, , NULL, , ...] -// non_null_offsets: vec![2, 2].into(), // [1, 1, 2, 2] actually, just last 2 is used -// }; -// assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 2, end: 7 }); -// -// // LEAD(2) -// let lead_fn = WindowShiftEvaluator { -// shift_offset: -2, -// default_value: ScalarValue::Null, -// ignore_nulls: false, -// non_null_offsets: Default::default(), -// }; -// assert_eq!(lead_fn.get_range(6, 10)?, Range { start: 6, end: 8 }); -// assert_eq!(lead_fn.get_range(9, 10)?, Range { start: 9, end: 10 }); -// -// // LEAD(2 ignore nulls) -// let lead_fn = WindowShiftEvaluator { -// shift_offset: -2, -// default_value: ScalarValue::Null, -// ignore_nulls: true, -// // models data received [..., , NULL, , NULL, , ..] -// non_null_offsets: vec![2, 2].into(), -// }; -// assert_eq!(lead_fn.get_range(4, 10)?, Range { start: 4, end: 9 }); -// -// Ok(()) -// } -// -// #[test] -// fn lead_lag_window_shift() -> Result<()> { -// test_i32_result( -// lead( -// "lead".to_owned(), -// DataType::Int32, -// Arc::new(Column::new("c3", 0)), -// None, -// ScalarValue::Null.cast_to(&DataType::Int32)?, -// false, -// ), -// [ -// Some(-2), -// Some(3), -// Some(-4), -// Some(5), -// Some(-6), -// Some(7), -// Some(8), -// None, -// ] -// .iter() -// .collect::(), -// )?; -// -// test_i32_result( -// lag( -// "lead".to_owned(), -// DataType::Int32, -// Arc::new(Column::new("c3", 0)), -// None, -// ScalarValue::Null.cast_to(&DataType::Int32)?, -// false, -// ), -// [ -// None, -// Some(1), -// Some(-2), -// Some(3), -// Some(-4), -// Some(5), -// Some(-6), -// Some(7), -// ] -// .iter() -// .collect::(), -// )?; -// -// test_i32_result( -// lag( -// "lead".to_owned(), -// DataType::Int32, -// Arc::new(Column::new("c3", 0)), -// None, -// ScalarValue::Int32(Some(100)), -// false, -// ), -// [ -// Some(100), -// Some(1), -// Some(-2), -// Some(3), -// Some(-4), -// Some(5), -// Some(-6), -// Some(7), -// ] -// .iter() -// .collect::(), -// )?; -// Ok(()) -// } -// } +#[cfg(test)] +mod tests { + use super::*; + // use arrow::{array::*, datatypes::*}; + // use datafusion_common::cast::as_int32_array; + + // fn test_i32_result(expr: WindowShift, expected: Int32Array) -> Result<()> { + // let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); + // let values = vec![arr]; + // let schema = Schema::new(vec![Field::new("arr", DataType::Int32, false)]); + // let batch = RecordBatch::try_new(Arc::new(schema), values.clone())?; + // let values = expr.evaluate_args(&batch)?; + // let result = expr + // .create_evaluator()? + // .evaluate_all(&values, batch.num_rows())?; + // let result = as_int32_array(&result)?; + // assert_eq!(expected, *result); + // Ok(()) + // } + + #[test] + fn lead_lag_get_range() -> Result<()> { + // LAG(2) + let lag_fn = WindowShiftEvaluator { + shift_offset: 2, + default_value: ScalarValue::Null, + ignore_nulls: false, + non_null_offsets: Default::default(), + }; + assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 4, end: 7 }); + assert_eq!(lag_fn.get_range(0, 10)?, Range { start: 0, end: 1 }); + + // LAG(2 ignore nulls) + let lag_fn = WindowShiftEvaluator { + shift_offset: 2, + default_value: ScalarValue::Null, + ignore_nulls: true, + // models data received [, , , NULL, , NULL, , ...] + non_null_offsets: vec![2, 2].into(), // [1, 1, 2, 2] actually, just last 2 is used + }; + assert_eq!(lag_fn.get_range(6, 10)?, Range { start: 2, end: 7 }); + + // LEAD(2) + let lead_fn = WindowShiftEvaluator { + shift_offset: -2, + default_value: ScalarValue::Null, + ignore_nulls: false, + non_null_offsets: Default::default(), + }; + assert_eq!(lead_fn.get_range(6, 10)?, Range { start: 6, end: 8 }); + assert_eq!(lead_fn.get_range(9, 10)?, Range { start: 9, end: 10 }); + + // LEAD(2 ignore nulls) + let lead_fn = WindowShiftEvaluator { + shift_offset: -2, + default_value: ScalarValue::Null, + ignore_nulls: true, + // models data received [..., , NULL, , NULL, , ..] + non_null_offsets: vec![2, 2].into(), + }; + assert_eq!(lead_fn.get_range(4, 10)?, Range { start: 4, end: 9 }); + + Ok(()) + } + + // #[test] + // fn lead_lag_window_shift() -> Result<()> { + // test_i32_result( + // lead( + // "lead".to_owned(), + // DataType::Int32, + // Arc::new(Column::new("c3", 0)), + // None, + // ScalarValue::Null.cast_to(&DataType::Int32)?, + // false, + // ), + // [ + // Some(-2), + // Some(3), + // Some(-4), + // Some(5), + // Some(-6), + // Some(7), + // Some(8), + // None, + // ] + // .iter() + // .collect::(), + // )?; + // + // test_i32_result( + // lag( + // "lead".to_owned(), + // DataType::Int32, + // Arc::new(Column::new("c3", 0)), + // None, + // ScalarValue::Null.cast_to(&DataType::Int32)?, + // false, + // ), + // [ + // None, + // Some(1), + // Some(-2), + // Some(3), + // Some(-4), + // Some(5), + // Some(-6), + // Some(7), + // ] + // .iter() + // .collect::(), + // )?; + // + // test_i32_result( + // lag( + // "lead".to_owned(), + // DataType::Int32, + // Arc::new(Column::new("c3", 0)), + // None, + // ScalarValue::Int32(Some(100)), + // false, + // ), + // [ + // Some(100), + // Some(1), + // Some(-2), + // Some(3), + // Some(-4), + // Some(5), + // Some(-6), + // Some(7), + // ] + // .iter() + // .collect::(), + // )?; + // Ok(()) + // } +} From a0973f940291f55a531c6eced77f6a2a5a62929c Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 19:16:01 +0530 Subject: [PATCH 42/46] Fixes: clippy::get-first error --- datafusion/functions-window/src/lead_lag.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 5f1b9caa1c7b..fb005f6f97e3 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -157,7 +157,7 @@ impl WindowUDFImpl for WindowShift { })?; let return_type = partition_evaluator_args .input_types() - .get(0) + .first() .unwrap_or(&DataType::Null); let default_value = scalar_at(&partition_evaluator_args, 2) .and_then(|scalar| get_default_value(return_type, scalar))?; From 9ddf1c92967d76e2a0578ca95672561012f349fb Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 20:36:56 +0530 Subject: [PATCH 43/46] Rewrite unit tests for WindowUDF --- datafusion/functions-window/src/lead_lag.rs | 139 +++++++++++++++++--- 1 file changed, 123 insertions(+), 16 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index fb005f6f97e3..9e400afc5e00 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -498,22 +498,10 @@ impl PartitionEvaluator for WindowShiftEvaluator { #[cfg(test)] mod tests { use super::*; - // use arrow::{array::*, datatypes::*}; - // use datafusion_common::cast::as_int32_array; - - // fn test_i32_result(expr: WindowShift, expected: Int32Array) -> Result<()> { - // let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); - // let values = vec![arr]; - // let schema = Schema::new(vec![Field::new("arr", DataType::Int32, false)]); - // let batch = RecordBatch::try_new(Arc::new(schema), values.clone())?; - // let values = expr.evaluate_args(&batch)?; - // let result = expr - // .create_evaluator()? - // .evaluate_all(&values, batch.num_rows())?; - // let result = as_int32_array(&result)?; - // assert_eq!(expected, *result); - // Ok(()) - // } + use arrow::array::*; + use datafusion_common::cast::as_int32_array; + use datafusion_physical_expr::expressions::{Column, Literal}; + use datafusion_physical_expr_common::physical_expr::PhysicalExpr; #[test] fn lead_lag_get_range() -> Result<()> { @@ -560,6 +548,125 @@ mod tests { Ok(()) } + #[test] + fn test_lead_window_shift() -> Result<()> { + let values: ArrayRef = + Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); + let num_rows = values.len(); + + let input_exprs: &[Arc] = &[Arc::new( + datafusion_physical_expr::expressions::Column::new("c3", 0), + )]; + let input_types: &[DataType] = &[DataType::Int32]; + + let actual = WindowShift::lead() + .partition_evaluator(PartitionEvaluatorArgs::new( + input_exprs, + input_types, + false, + false, + ))? + .evaluate_all(&[values], num_rows)?; + let actual = as_int32_array(actual.as_ref())?; + + let expected = [ + Some(-2), + Some(3), + Some(-4), + Some(5), + Some(-6), + Some(7), + Some(8), + None, + ] + .iter() + .collect::(); + + assert_eq!(expected, *actual); + + Ok(()) + } + + #[test] + fn test_lag_window_shift() -> Result<()> { + let values: ArrayRef = + Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); + let num_rows = values.len(); + + let input_exprs: &[Arc] = &[Arc::new( + datafusion_physical_expr::expressions::Column::new("c3", 0), + )]; + let input_types: &[DataType] = &[DataType::Int32]; + + let actual = WindowShift::lag() + .partition_evaluator(PartitionEvaluatorArgs::new( + input_exprs, + input_types, + false, + false, + ))? + .evaluate_all(&[values], num_rows)?; + let actual = as_int32_array(actual.as_ref())?; + + let expected = [ + None, + Some(1), + Some(-2), + Some(3), + Some(-4), + Some(5), + Some(-6), + Some(7), + ] + .iter() + .collect::(); + + assert_eq!(expected, *actual); + + Ok(()) + } + + #[test] + fn test_lag_with_default() -> Result<()> { + let values: ArrayRef = + Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); + let num_rows = values.len(); + + let first = Arc::new(Column::new("c3", 0)) as Arc; + let second = Arc::new(Literal::new(ScalarValue::Null)) as Arc; + let third = Arc::new(Literal::new(ScalarValue::Int32(Some(100)))) + as Arc; + let input_exprs = &[first, second, third]; + let input_types: &[DataType] = + &[DataType::Int32, DataType::Null, DataType::Int32]; + + let actual = WindowShift::lag() + .partition_evaluator(PartitionEvaluatorArgs::new( + input_exprs, + input_types, + false, + false, + ))? + .evaluate_all(&[values], num_rows)?; + let actual = as_int32_array(actual.as_ref())?; + + let expected = [ + Some(1), + Some(1), + Some(-2), + Some(3), + Some(-4), + Some(5), + Some(-6), + Some(7), + ] + .iter() + .collect::(); + + assert_eq!(expected, *actual); + + Ok(()) + } // #[test] // fn lead_lag_window_shift() -> Result<()> { // test_i32_result( From 3a084ed9209768400137df9e2958321a0f5466b3 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 22:27:14 +0530 Subject: [PATCH 44/46] Fixes: unit test for lag with default value --- datafusion/functions-window/src/lead_lag.rs | 79 ++------------------- 1 file changed, 4 insertions(+), 75 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 9e400afc5e00..afae50bc06ac 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -633,12 +633,13 @@ mod tests { let num_rows = values.len(); let first = Arc::new(Column::new("c3", 0)) as Arc; - let second = Arc::new(Literal::new(ScalarValue::Null)) as Arc; + let second = + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; let third = Arc::new(Literal::new(ScalarValue::Int32(Some(100)))) as Arc; let input_exprs = &[first, second, third]; let input_types: &[DataType] = - &[DataType::Int32, DataType::Null, DataType::Int32]; + &[DataType::Int32, DataType::Int32, DataType::Int32]; let actual = WindowShift::lag() .partition_evaluator(PartitionEvaluatorArgs::new( @@ -651,7 +652,7 @@ mod tests { let actual = as_int32_array(actual.as_ref())?; let expected = [ - Some(1), + Some(100), Some(1), Some(-2), Some(3), @@ -667,76 +668,4 @@ mod tests { Ok(()) } - // #[test] - // fn lead_lag_window_shift() -> Result<()> { - // test_i32_result( - // lead( - // "lead".to_owned(), - // DataType::Int32, - // Arc::new(Column::new("c3", 0)), - // None, - // ScalarValue::Null.cast_to(&DataType::Int32)?, - // false, - // ), - // [ - // Some(-2), - // Some(3), - // Some(-4), - // Some(5), - // Some(-6), - // Some(7), - // Some(8), - // None, - // ] - // .iter() - // .collect::(), - // )?; - // - // test_i32_result( - // lag( - // "lead".to_owned(), - // DataType::Int32, - // Arc::new(Column::new("c3", 0)), - // None, - // ScalarValue::Null.cast_to(&DataType::Int32)?, - // false, - // ), - // [ - // None, - // Some(1), - // Some(-2), - // Some(3), - // Some(-4), - // Some(5), - // Some(-6), - // Some(7), - // ] - // .iter() - // .collect::(), - // )?; - // - // test_i32_result( - // lag( - // "lead".to_owned(), - // DataType::Int32, - // Arc::new(Column::new("c3", 0)), - // None, - // ScalarValue::Int32(Some(100)), - // false, - // ), - // [ - // Some(100), - // Some(1), - // Some(-2), - // Some(3), - // Some(-4), - // Some(5), - // Some(-6), - // Some(7), - // ] - // .iter() - // .collect::(), - // )?; - // Ok(()) - // } } From 82abc5cd6d8ea7c5d3c8740526b7eab0aaaa4c57 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 22:44:12 +0530 Subject: [PATCH 45/46] Consistent input expressions and data types in unit tests --- datafusion/functions-window/src/lead_lag.rs | 22 +++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index afae50bc06ac..8fe8ff3fd809 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -554,10 +554,13 @@ mod tests { Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); let num_rows = values.len(); - let input_exprs: &[Arc] = &[Arc::new( - datafusion_physical_expr::expressions::Column::new("c3", 0), - )]; - let input_types: &[DataType] = &[DataType::Int32]; + let first = Arc::new(Column::new("c3", 0)) as Arc; + let second = + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; + let third = + Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) as Arc; + let input_exprs = &[first, second, third]; + let input_types: &[DataType] = &[DataType::Int32, DataType::Int32, DataType::Int32]; let actual = WindowShift::lead() .partition_evaluator(PartitionEvaluatorArgs::new( @@ -593,10 +596,13 @@ mod tests { Arc::new(Int32Array::from(vec![1, -2, 3, -4, 5, -6, 7, 8])); let num_rows = values.len(); - let input_exprs: &[Arc] = &[Arc::new( - datafusion_physical_expr::expressions::Column::new("c3", 0), - )]; - let input_types: &[DataType] = &[DataType::Int32]; + let first = Arc::new(Column::new("c3", 0)) as Arc; + let second = + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; + let third = + Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) as Arc; + let input_exprs = &[first, second, third]; + let input_types: &[DataType] = &[DataType::Int32, DataType::Int32, DataType::Int32]; let actual = WindowShift::lag() .partition_evaluator(PartitionEvaluatorArgs::new( From e648033535b3fa646cbc95982d272f0db659abc0 Mon Sep 17 00:00:00 2001 From: jcsherin Date: Thu, 10 Oct 2024 23:15:48 +0530 Subject: [PATCH 46/46] Minor: fixes formatting --- datafusion/functions-window/src/lead_lag.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs index 8fe8ff3fd809..ca19e7e09a08 100644 --- a/datafusion/functions-window/src/lead_lag.rs +++ b/datafusion/functions-window/src/lead_lag.rs @@ -555,12 +555,13 @@ mod tests { let num_rows = values.len(); let first = Arc::new(Column::new("c3", 0)) as Arc; - let second = - Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; - let third = - Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) as Arc; + let second = Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) + as Arc; + let third = Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) + as Arc; let input_exprs = &[first, second, third]; - let input_types: &[DataType] = &[DataType::Int32, DataType::Int32, DataType::Int32]; + let input_types: &[DataType] = + &[DataType::Int32, DataType::Int32, DataType::Int32]; let actual = WindowShift::lead() .partition_evaluator(PartitionEvaluatorArgs::new( @@ -599,10 +600,11 @@ mod tests { let first = Arc::new(Column::new("c3", 0)) as Arc; let second = Arc::new(Literal::new(ScalarValue::Int32(Some(1)))) as Arc; - let third = - Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) as Arc; + let third = Arc::new(Literal::new(ScalarValue::try_from(&DataType::Int32)?)) + as Arc; let input_exprs = &[first, second, third]; - let input_types: &[DataType] = &[DataType::Int32, DataType::Int32, DataType::Int32]; + let input_types: &[DataType] = + &[DataType::Int32, DataType::Int32, DataType::Int32]; let actual = WindowShift::lag() .partition_evaluator(PartitionEvaluatorArgs::new(