From 63a66bd017c87f6bcec129b2b306f7c2682865d9 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 26 Jan 2023 19:26:54 +0900 Subject: [PATCH] refactor(python): deprecate "iterrows" in favour of "iter_rows" --- .../reference/dataframe/modify_select.rst | 2 +- py-polars/polars/internals/dataframe/frame.py | 22 +++++++++------- .../polars/internals/dataframe/groupby.py | 4 +-- py-polars/polars/utils.py | 26 +++++++++++++++++++ py-polars/tests/unit/test_fmt.py | 14 +++++----- py-polars/tests/unit/test_rows.py | 18 +++++++------ 6 files changed, 59 insertions(+), 27 deletions(-) diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst index 93d1501174e3b..591cbf7a45c88 100644 --- a/py-polars/docs/source/reference/dataframe/modify_select.rst +++ b/py-polars/docs/source/reference/dataframe/modify_select.rst @@ -27,8 +27,8 @@ Manipulation/selection DataFrame.insert_at_idx DataFrame.interpolate DataFrame.item + DataFrame.iter_rows DataFrame.iter_slices - DataFrame.iterrows DataFrame.join DataFrame.join_asof DataFrame.limit diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py index e1301471c8deb..b8f450a2a5185 100644 --- a/py-polars/polars/internals/dataframe/frame.py +++ b/py-polars/polars/internals/dataframe/frame.py @@ -84,6 +84,7 @@ is_str_sequence, normalise_filepath, range_to_slice, + renamed_methods, scale_bytes, ) @@ -145,6 +146,7 @@ def wrap_df(df: PyDataFrame) -> DataFrame: return DataFrame._from_pydf(df) +@renamed_methods({"iterrows": "iter_rows"}) class DataFrame: """ Two-dimensional data structure representing data as a table with rows and columns. @@ -1840,7 +1842,7 @@ def to_dicts(self) -> list[dict[str, Any]]: """ dict_, zip_, columns = dict, zip, self.columns - return [dict_(zip_(columns, row)) for row in self.iterrows()] + return [dict_(zip_(columns, row)) for row in self.iter_rows()] def to_numpy(self) -> np.ndarray[Any, Any]: """ @@ -6604,7 +6606,7 @@ def row( Warning ------- You should NEVER use this method to iterate over a DataFrame; if you absolutely - require row-iteration you should strongly prefer ``iterrows()`` instead. + require row-iteration you should strongly prefer ``iter_rows()`` instead. Examples -------- @@ -6632,7 +6634,7 @@ def row( See Also -------- - iterrows : Row iterator over frame data (does not materialise all rows). + iter_rows : Row iterator over frame data (does not materialise all rows). rows : Materialises all frame data as a list of rows. """ @@ -6720,7 +6722,7 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]: See Also -------- - iterrows : Row iterator over frame data (does not materialise all rows). + iter_rows : Row iterator over frame data (does not materialise all rows). """ if named: @@ -6736,18 +6738,18 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]: return self._df.row_tuples() @overload - def iterrows( + def iter_rows( self, named: Literal[False] = ..., buffer_size: int = ... ) -> Iterator[tuple[Any, ...]]: ... @overload - def iterrows( + def iter_rows( self, named: Literal[True] = ..., buffer_size: int = ... ) -> Iterator[Any]: ... - def iterrows( + def iter_rows( self, named: bool = False, buffer_size: int = 500 ) -> Iterator[tuple[Any, ...]] | Iterator[Any]: """ @@ -6784,9 +6786,9 @@ def iterrows( ... "b": [2, 4, 6], ... } ... ) - >>> [row[0] for row in df.iterrows()] + >>> [row[0] for row in df.iter_rows()] [1, 3, 5] - >>> [row.b for row in df.iterrows(named=True)] + >>> [row.b for row in df.iter_rows(named=True)] [2, 4, 6] See Also @@ -6866,7 +6868,7 @@ def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]: See Also -------- - iterrows : Row iterator over frame data (does not materialise all rows). + iter_rows : Row iterator over frame data (does not materialise all rows). partition_by : Split into multiple DataFrames, partitioned by groups. """ diff --git a/py-polars/polars/internals/dataframe/groupby.py b/py-polars/polars/internals/dataframe/groupby.py index 4501b4ab0a54f..35287d4f2e2e9 100644 --- a/py-polars/polars/internals/dataframe/groupby.py +++ b/py-polars/polars/internals/dataframe/groupby.py @@ -795,7 +795,7 @@ def __iter__(self) -> RollingGroupBy[DF]: if self.by is None: self._group_names = iter(group_names.to_series()) else: - self._group_names = group_names.iterrows() + self._group_names = group_names.iter_rows() self._group_indices = groups_df.select(temp_col).to_series() self._current_index = 0 @@ -891,7 +891,7 @@ def __iter__(self) -> DynamicGroupBy[DF]: if self.by is None: self._group_names = iter(group_names.to_series()) else: - self._group_names = group_names.iterrows() + self._group_names = group_names.iter_rows() self._group_indices = groups_df.select(temp_col).to_series() self._current_index = 0 diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py index 805f644b8ee81..1baea512892e3 100644 --- a/py-polars/polars/utils.py +++ b/py-polars/polars/utils.py @@ -397,6 +397,32 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T: return deco +def renamed_methods(old_new: dict[str, str | None]) -> Callable[[type[T]], type[T]]: + """ + Class decorator allowing deprecation/transition from one method name to another. + + The parameters must be the same (unless they are being renamed, in + which case you can use this in conjunction with @deprecated_alias). + """ + + def _redirecting_getattr_(obj: T, item: Any) -> Any: + if isinstance(item, str) and item in old_new: + redirect = old_new[item] + warnings.warn( + f"`{type(obj).__name__}.{item}` has been renamed and this" + f" redirect is temporary; please use `.{redirect}` instead", + category=DeprecationWarning, + ) + item = redirect + return getattr(obj, item) + + def _cls_(cls: type[T]) -> type[T]: + cls.__getattr__ = _redirecting_getattr_ # type: ignore[attr-defined] + return cls + + return _cls_ + + def _rename_kwargs( func_name: str, kwargs: dict[str, object], aliases: dict[str, str] ) -> None: diff --git a/py-polars/tests/unit/test_fmt.py b/py-polars/tests/unit/test_fmt.py index 527f937400964..9b7941983db20 100644 --- a/py-polars/tests/unit/test_fmt.py +++ b/py-polars/tests/unit/test_fmt.py @@ -119,9 +119,11 @@ def test_duration_smallest_units() -> None: def test_fmt_float_full() -> None: - pl.Config.set_fmt_float("full") - assert ( - str(pl.Series([1.2304980958725870923])) - == "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]" - ) - pl.Config.restore_defaults() + fmt_float_full = "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]" + s = pl.Series([1.2304980958725870923]) + + with pl.Config() as cfg: + cfg.set_fmt_float("full") + assert str(s) == fmt_float_full + + assert str(s) != fmt_float_full diff --git a/py-polars/tests/unit/test_rows.py b/py-polars/tests/unit/test_rows.py index fffb1098bc405..95e17371aad8f 100644 --- a/py-polars/tests/unit/test_rows.py +++ b/py-polars/tests/unit/test_rows.py @@ -69,16 +69,18 @@ def test_iterrows() -> None: df = pl.DataFrame({"a": [1, 2, 3], "b": [None, False, None]}) # Default iterrows behaviour - it = df.iterrows() - assert next(it) == (1, None) - assert next(it) == (2, False) - assert next(it) == (3, None) - with pytest.raises(StopIteration): - next(it) + # TODO: remove reference to deprecated "iterrows" once it is retired + for iter_method in ("iter_rows", "iterrows"): + it = getattr(df, iter_method)() + assert next(it) == (1, None) + assert next(it) == (2, False) + assert next(it) == (3, None) + with pytest.raises(StopIteration): + next(it) # Apply explicit row-buffer size for sz in (0, 1, 2, 3, 4): - it = df.iterrows(buffer_size=sz) + it = df.iter_rows(buffer_size=sz) assert next(it) == (1, None) assert next(it) == (2, False) assert next(it) == (3, None) @@ -86,7 +88,7 @@ def test_iterrows() -> None: next(it) # Return rows as namedtuples - it_named = df.iterrows(named=True, buffer_size=sz) + it_named = df.iter_rows(named=True, buffer_size=sz) row = next(it_named) assert row.a == 1