pola-rs · ritchie46 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023 · Jan 25, 2023
diff --git a/polars/polars-lazy/src/dsl/eval.rs b/polars/polars-lazy/src/dsl/eval.rs
@@ -6,6 +6,37 @@ use crate::physical_plan::planner::create_physical_expr;
 use crate::physical_plan::state::ExecutionState;
 use crate::prelude::*;
 
+pub(crate) fn eval_field_to_dtype(f: &Field, expr: &Expr, list: bool) -> Field {
+    // dummy df to determine output dtype
+    let dtype = f
+        .data_type()
+        .inner_dtype()
+        .cloned()
+        .unwrap_or_else(|| f.data_type().clone());
+
+    let df = Series::new_empty("", &dtype).into_frame();
+
+    #[cfg(feature = "python")]
+    let out = {
+        use pyo3::Python;
+        Python::with_gil(|py| py.allow_threads(|| df.lazy().select([expr.clone()]).collect()))
+    };
+    #[cfg(not(feature = "python"))]
+    let out = { df.lazy().select([expr.clone()]).collect() };
+
+    match out {
+        Ok(out) => {
+            let dtype = out.get_columns()[0].dtype();
+            if list {
+                Field::new(f.name(), DataType::List(Box::new(dtype.clone())))
+            } else {
+                Field::new(f.name(), dtype.clone())
+            }
+        }
+        Err(_) => Field::new(f.name(), DataType::Null),
+    }
+}
+
 pub trait ExprEvalExtension: IntoExpr + Sized {
     /// Run an expression over a sliding window that increases `1` slot every iteration.
     ///
@@ -18,6 +49,10 @@ pub trait ExprEvalExtension: IntoExpr + Sized {
         let func = move |mut s: Series| {
             let name = s.name().to_string();
             s.rename("");
+
+            // ensure we get the new schema
+            let output_field = eval_field_to_dtype(s.field().as_ref(), &expr, false);
+
             let expr = expr.clone();
             let mut arena = Arena::with_capacity(10);
             let aexpr = to_aexpr(expr, &mut arena);
@@ -70,39 +105,18 @@ pub trait ExprEvalExtension: IntoExpr + Sized {
                     })
                     .collect::<PolarsResult<Vec<_>>>()?
             };
-            Ok(Series::new(&name, avs))
+            let s = Series::new(&name, avs);
+
+            if s.dtype() != output_field.data_type() {
+                s.cast(output_field.data_type())
+            } else {
+                Ok(s)
+            }
         };
 
         this.apply(
             func,
-            GetOutput::map_field(move |f| {
-                // dummy df to determine output dtype
-                let dtype = f
-                    .data_type()
-                    .inner_dtype()
-                    .cloned()
-                    .unwrap_or_else(|| f.data_type().clone());
-
-                let df = Series::new_empty("", &dtype).into_frame();
-
-                #[cfg(feature = "python")]
-                let out = {
-                    use pyo3::Python;
-                    Python::with_gil(|py| {
-                        py.allow_threads(|| df.lazy().select([expr2.clone()]).collect())
-                    })
-                };
-                #[cfg(not(feature = "python"))]
-                let out = { df.lazy().select([expr2.clone()]).collect() };
-
-                match out {
-                    Ok(out) => {
-                        let dtype = out.get_columns()[0].dtype();
-                        Field::new(f.name(), dtype.clone())
-                    }
-                    Err(_) => Field::new(f.name(), DataType::Null),
-                }
-            }),
+            GetOutput::map_field(move |f| eval_field_to_dtype(f, &expr2, false)),
         )
         .with_fmt("expanding_eval")
     }

diff --git a/polars/polars-lazy/src/dsl/list.rs b/polars/polars-lazy/src/dsl/list.rs
@@ -54,10 +54,10 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized {
             }
 
             let lst = s.list()?;
+            // ensure we get the new schema
+            let output_field = eval_field_to_dtype(lst.ref_field(), &expr, true);
             if lst.is_empty() {
-                // ensure we get the new schema
-                let fld = field_to_dtype(lst.ref_field(), &expr);
-                return Ok(Series::new_empty(s.name(), fld.data_type()));
+                return Ok(Series::new_empty(s.name(), output_field.data_type()));
             }
 
             let phys_expr =
@@ -109,47 +109,23 @@ pub trait ListNameSpaceExtension: IntoListNameSpace + Sized {
 
             ca.rename(s.name());
 
-            match err {
-                None => Ok(ca.into_series()),
-                Some(e) => Err(e),
+            if ca.dtype() != output_field.data_type() {
+                ca.cast(output_field.data_type())
+            } else {
+                match err {
+                    None => Ok(ca.into_series()),
+                    Some(e) => Err(e),
+                }
             }
         };
 
         this.0
             .map(
                 func,
-                GetOutput::map_field(move |f| field_to_dtype(f, &expr2)),
+                GetOutput::map_field(move |f| eval_field_to_dtype(f, &expr2, true)),
             )
             .with_fmt("eval")
     }
 }
 
-#[cfg(feature = "list_eval")]
-fn field_to_dtype(f: &Field, expr: &Expr) -> Field {
-    // dummy df to determine output dtype
-    let dtype = f
-        .data_type()
-        .inner_dtype()
-        .cloned()
-        .unwrap_or_else(|| f.data_type().clone());
-
-    let df = Series::new_empty("", &dtype).into_frame();
-
-    #[cfg(feature = "python")]
-    let out = {
-        use pyo3::Python;
-        Python::with_gil(|py| py.allow_threads(|| df.lazy().select([expr.clone()]).collect()))
-    };
-    #[cfg(not(feature = "python"))]
-    let out = { df.lazy().select([expr.clone()]).collect() };
-
-    match out {
-        Ok(out) => {
-            let dtype = out.get_columns()[0].dtype();
-            Field::new(f.name(), DataType::List(Box::new(dtype.clone())))
-        }
-        Err(_) => Field::new(f.name(), DataType::Null),
-    }
-}
-
 impl ListNameSpaceExtension for ListNameSpace {}
diff --git a/polars/polars-lazy/src/dsl/mod.rs b/polars/polars-lazy/src/dsl/mod.rs
@@ -1,14 +1,14 @@
 //! Domain specific language for the Lazy api.
-#[cfg(feature = "cumulative_eval")]
+#[cfg(any(feature = "cumulative_eval", feature = "list_eval"))]
 mod eval;
 pub mod functions;
 mod into;
 mod list;
 
-#[cfg(feature = "cumulative_eval")]
+#[cfg(any(feature = "cumulative_eval", feature = "list_eval"))]
 pub use eval::*;
 pub use functions::*;
-#[cfg(feature = "cumulative_eval")]
+#[cfg(any(feature = "cumulative_eval", feature = "list_eval"))]
 use into::IntoExpr;
 pub use list::*;
 pub use polars_plan::dsl::*;

diff --git a/py-polars/tests/unit/test_window.py b/py-polars/tests/unit/test_window.py
@@ -158,6 +158,12 @@ def test_cumulative_eval_window_functions() -> None:
         "cumulative_eval_max": [20, 40, 40, 2, 4, 4],
     }
 
+    # 6394
+    df = pl.DataFrame({"group": [1, 1, 2, 3], "value": [1, None, 3, None]})
+    assert df.select(
+        pl.col("value").cumulative_eval(pl.element().mean()).over("group")
+    ).to_dict(False) == {"value": [1.0, 1.0, 3.0, None]}
+
 
 def test_count_window() -> None:
     assert (