diff --git a/crates/polars-io/src/csv/read/options.rs b/crates/polars-io/src/csv/read/options.rs index 3741bd6d9e47..2764d085d093 100644 --- a/crates/polars-io/src/csv/read/options.rs +++ b/crates/polars-io/src/csv/read/options.rs @@ -133,17 +133,6 @@ pub(super) enum NullValuesCompiled { } impl NullValuesCompiled { - pub(super) fn apply_projection(&mut self, projections: &[usize]) { - if let Self::Columns(nv) = self { - let nv = projections - .iter() - .map(|i| std::mem::take(&mut nv[*i])) - .collect::>(); - - *self = NullValuesCompiled::Columns(nv); - } - } - /// # Safety /// /// The caller must ensure that `index` is in bounds diff --git a/crates/polars-io/src/csv/read/parser.rs b/crates/polars-io/src/csv/read/parser.rs index 29845c990621..eff02dd4f19e 100644 --- a/crates/polars-io/src/csv/read/parser.rs +++ b/crates/polars-io/src/csv/read/parser.rs @@ -511,7 +511,7 @@ pub(super) fn parse_lines( // SAFETY: // process fields is in bounds - add_null = unsafe { null_values.is_null(field, processed_fields) } + add_null = unsafe { null_values.is_null(field, idx as usize) } } if add_null { buf.add_null(!missing_is_null && field.is_empty()) diff --git a/crates/polars-io/src/csv/read/read_impl.rs b/crates/polars-io/src/csv/read/read_impl.rs index a44c2ba8c90e..5805d1898fdc 100644 --- a/crates/polars-io/src/csv/read/read_impl.rs +++ b/crates/polars-io/src/csv/read/read_impl.rs @@ -224,8 +224,8 @@ impl<'a> CoreReader<'a> { } } - // create a null value for every column - let mut null_values = null_values.map(|nv| nv.compile(&schema)).transpose()?; + // Create a null value for every column + let null_values = null_values.map(|nv| nv.compile(&schema)).transpose()?; if let Some(cols) = columns { let mut prj = Vec::with_capacity(cols.len()); @@ -233,12 +233,6 @@ impl<'a> CoreReader<'a> { let i = schema.try_index_of(&col)?; prj.push(i); } - - // update null values with projection - if let Some(nv) = null_values.as_mut() { - nv.apply_projection(&prj); - } - projection = Some(prj); } diff --git a/py-polars/tests/unit/io/test_lazy_csv.py b/py-polars/tests/unit/io/test_lazy_csv.py index 59bb84d72658..59e7291ea522 100644 --- a/py-polars/tests/unit/io/test_lazy_csv.py +++ b/py-polars/tests/unit/io/test_lazy_csv.py @@ -1,5 +1,6 @@ from __future__ import annotations +import tempfile from collections import OrderedDict from typing import TYPE_CHECKING @@ -285,3 +286,25 @@ def test_scan_empty_csv_with_row_index(tmp_path: Path) -> None: read = pl.scan_csv(file_path).with_row_index("idx") assert read.collect().schema == OrderedDict([("idx", pl.UInt32), ("a", pl.String)]) + + +@pytest.mark.write_disk() +def test_csv_null_values_with_projection_15515() -> None: + data = """IndCode,SireCode,BirthDate,Flag +ID00316,.,19940315, +""" + + with tempfile.NamedTemporaryFile() as f: + f.write(data.encode()) + f.seek(0) + + q = ( + pl.scan_csv(f.name, null_values={"SireCode": "."}) + .with_columns(pl.col("SireCode").alias("SireKey")) + .select("SireKey", "BirthDate") + ) + + assert q.collect().to_dict(as_series=False) == { + "SireKey": [None], + "BirthDate": [19940315], + }