diff --git a/crates/polars-lazy/src/physical_plan/expressions/aggregation.rs b/crates/polars-lazy/src/physical_plan/expressions/aggregation.rs index 5393d2a6a4a03..03894f1c43c22 100644 --- a/crates/polars-lazy/src/physical_plan/expressions/aggregation.rs +++ b/crates/polars-lazy/src/physical_plan/expressions/aggregation.rs @@ -174,35 +174,46 @@ impl PhysicalExpr for AggregationExpr { GroupsProxy::Idx(idx) => { let s = s.rechunk(); let array = &s.chunks()[0]; - let validity = array.validity().unwrap(); - - let out: IdxCa = idx - .iter() - .map(|(_, g)| { - let mut count = 0 as IdxSize; - // Count valid values - g.iter().for_each(|i| { - count += validity.get_bit_unchecked(*i as usize) - as IdxSize; - }); - count - }) - .collect_ca_trusted_with_dtype(&keep_name, IDX_DTYPE); + let out: IdxCa = if matches!(s.dtype(), &DataType::Null) { + IdxCa::full(s.name(), 0, idx.first().len()) + } else { + let validity = array.validity().unwrap(); + idx.iter() + .map(|(_, g)| { + let mut count = 0 as IdxSize; + // Count valid values + g.iter().for_each(|i| { + count += validity + .get_bit_unchecked(*i as usize) + as IdxSize; + }); + count + }) + .collect_ca_trusted_with_dtype( + &keep_name, IDX_DTYPE, + ) + }; AggregatedScalar(out.into_series()) }, GroupsProxy::Slice { groups, .. } => { - // Slice and use computed null count - let out: IdxCa = groups - .iter() - .map(|g| { - let start = g[0]; - let len = g[1]; - len - s - .slice(start as i64, len as usize) - .null_count() - as IdxSize - }) - .collect_ca_trusted_with_dtype(&keep_name, IDX_DTYPE); + let out: IdxCa = if matches!(s.dtype(), &DataType::Null) { + IdxCa::full(s.name(), 0, groups.len()) + } else { + // Slice and use computed null count + groups + .iter() + .map(|g| { + let start = g[0]; + let len = g[1]; + len - s + .slice(start as i64, len as usize) + .null_count() + as IdxSize + }) + .collect_ca_trusted_with_dtype( + &keep_name, IDX_DTYPE, + ) + }; AggregatedScalar(out.into_series()) }, } diff --git a/py-polars/tests/unit/operations/test_aggregations.py b/py-polars/tests/unit/operations/test_aggregations.py index 64ea9551dffe4..ca3153cdfc1b6 100644 --- a/py-polars/tests/unit/operations/test_aggregations.py +++ b/py-polars/tests/unit/operations/test_aggregations.py @@ -530,3 +530,11 @@ def test_horizontal_mean_in_groupby_15115() -> None: } ), ) + + +def test_group_count_over_null_column_15705() -> None: + df = pl.DataFrame( + {"a": [1, 1, 2, 2, 3, 3], "c": [None, None, None, None, None, None]} + ) + out = df.group_by("a", maintain_order=True).agg(pl.col("c").count()) + assert out["c"].to_list() == [0, 0, 0]