From 8cdbdb69ed8f8f657d07c7f41965677c150f7b84 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Tue, 31 Jan 2023 08:43:09 +0000 Subject: [PATCH] fix(python): don't convert "ns"-precision temporal types via pyarrow --- py-polars/polars/internals/dataframe/frame.py | 14 +++++++------- py-polars/tests/unit/test_datelike.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py index be93bf614f55f..4496b0e554a09 100644 --- a/py-polars/polars/internals/dataframe/frame.py +++ b/py-polars/polars/internals/dataframe/frame.py @@ -6771,18 +6771,13 @@ def iter_rows( Returns ------- - An iterator of tuples (default) or dictionaries of row values. + An iterator of tuples (default) or dictionaries of python row values. Warnings -------- Row iteration is not optimal as the underlying data is stored in columnar form; where possible, prefer export via one of the dedicated export/output methods. - Notes - ----- - If you are planning to materialise all frame data at once you should prefer - calling ``rows()``, which will be faster. - Examples -------- >>> df = pl.DataFrame( @@ -6809,7 +6804,12 @@ def iter_rows( if buffer_size: for offset in range(0, self.height, buffer_size): zerocopy_slice = self.slice(offset, buffer_size) - if named and _PYARROW_AVAILABLE: + if ( + named + and _PYARROW_AVAILABLE + # note: 'ns' precision instantiates values as pandas types - avoid + and not any((getattr(tp, "tu", None) == "ns") for tp in self.dtypes) + ): yield from zerocopy_slice.to_arrow().to_batches()[0].to_pylist() else: rows_chunk = zerocopy_slice.rows(named=False) diff --git a/py-polars/tests/unit/test_datelike.py b/py-polars/tests/unit/test_datelike.py index a92a31f7a9b7e..cdc2bd7e362e4 100644 --- a/py-polars/tests/unit/test_datelike.py +++ b/py-polars/tests/unit/test_datelike.py @@ -356,6 +356,25 @@ def test_timezone() -> None: assert s.cast(int).series_equal(tz_s.cast(int)) +def test_to_dicts() -> None: + now = datetime.now() + data = { + "a": now, + "b": now.date(), + "c": now.time(), + "d": timedelta(days=1, seconds=43200), + } + df = pl.DataFrame( + data, schema_overrides={"a": pl.Datetime("ns"), "d": pl.Duration("ns")} + ) + assert len(df) == 1 + + d = df.to_dicts()[0] + for col in data: + assert d[col] == data[col] + assert isinstance(d[col], type(data[col])) + + def test_to_list() -> None: s = pl.Series("date", [123543, 283478, 1243]).cast(pl.Date)