refactor(python): deprecate "iterrows" in favour of "iter_rows"

pola-rs · Jan 26, 2023 · 7b245dc · 7b245dc
1 parent f2502de
commit 7b245dc
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 27 deletions.
diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst
@@ -27,8 +27,8 @@ Manipulation/selection
     DataFrame.insert_at_idx
     DataFrame.interpolate
     DataFrame.item
+    DataFrame.iter_rows
     DataFrame.iter_slices
-    DataFrame.iterrows
     DataFrame.join
     DataFrame.join_asof
     DataFrame.limit

diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py
@@ -84,6 +84,7 @@
     is_str_sequence,
     normalise_filepath,
     range_to_slice,
+    redirect,
     scale_bytes,
 )
 
@@ -145,6 +146,7 @@ def wrap_df(df: PyDataFrame) -> DataFrame:
     return DataFrame._from_pydf(df)
 
 
+@redirect({"iterrows": "iter_rows"})
 class DataFrame:
     """
     Two-dimensional data structure representing data as a table with rows and columns.
@@ -1840,7 +1842,7 @@ def to_dicts(self) -> list[dict[str, Any]]:
 
         """
         dict_, zip_, columns = dict, zip, self.columns
-        return [dict_(zip_(columns, row)) for row in self.iterrows()]
+        return [dict_(zip_(columns, row)) for row in self.iter_rows()]
 
     def to_numpy(self) -> np.ndarray[Any, Any]:
         """
@@ -6604,7 +6606,7 @@ def row(
         Warning
         -------
         You should NEVER use this method to iterate over a DataFrame; if you absolutely
-        require row-iteration you should strongly prefer ``iterrows()`` instead.
+        require row-iteration you should strongly prefer ``iter_rows()`` instead.
 
         Examples
         --------
@@ -6632,7 +6634,7 @@ def row(
 
         See Also
         --------
-        iterrows : Row iterator over frame data (does not materialise all rows).
+        iter_rows : Row iterator over frame data (does not materialise all rows).
         rows : Materialises all frame data as a list of rows.
 
         """
@@ -6720,7 +6722,7 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
 
         See Also
         --------
-        iterrows : Row iterator over frame data (does not materialise all rows).
+        iter_rows : Row iterator over frame data (does not materialise all rows).
 
         """
         if named:
@@ -6736,18 +6738,18 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
             return self._df.row_tuples()
 
     @overload
-    def iterrows(
+    def iter_rows(
         self, named: Literal[False] = ..., buffer_size: int = ...
     ) -> Iterator[tuple[Any, ...]]:
         ...
 
     @overload
-    def iterrows(
+    def iter_rows(
         self, named: Literal[True] = ..., buffer_size: int = ...
     ) -> Iterator[Any]:
         ...
 
-    def iterrows(
+    def iter_rows(
         self, named: bool = False, buffer_size: int = 500
     ) -> Iterator[tuple[Any, ...]] | Iterator[Any]:
         """
@@ -6784,9 +6786,9 @@ def iterrows(
         ...         "b": [2, 4, 6],
         ...     }
         ... )
-        >>> [row[0] for row in df.iterrows()]
+        >>> [row[0] for row in df.iter_rows()]
         [1, 3, 5]
-        >>> [row.b for row in df.iterrows(named=True)]
+        >>> [row.b for row in df.iter_rows(named=True)]
         [2, 4, 6]
 
         See Also
@@ -6866,7 +6868,7 @@ def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]:
 
         See Also
         --------
-        iterrows : Row iterator over frame data (does not materialise all rows).
+        iter_rows : Row iterator over frame data (does not materialise all rows).
         partition_by : Split into multiple DataFrames, partitioned by groups.
 
         """

diff --git a/py-polars/polars/internals/dataframe/groupby.py b/py-polars/polars/internals/dataframe/groupby.py
@@ -795,7 +795,7 @@ def __iter__(self) -> RollingGroupBy[DF]:
         if self.by is None:
             self._group_names = iter(group_names.to_series())
         else:
-            self._group_names = group_names.iterrows()
+            self._group_names = group_names.iter_rows()
 
         self._group_indices = groups_df.select(temp_col).to_series()
         self._current_index = 0
@@ -891,7 +891,7 @@ def __iter__(self) -> DynamicGroupBy[DF]:
         if self.by is None:
             self._group_names = iter(group_names.to_series())
         else:
-            self._group_names = group_names.iterrows()
+            self._group_names = group_names.iter_rows()
 
         self._group_indices = groups_df.select(temp_col).to_series()
         self._current_index = 0

diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py
@@ -397,6 +397,33 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
     return deco
 
 
+def redirect(from_to: dict[str, str]) -> Callable[[type[T]], type[T]]:
+    """
+    Class decorator allowing deprecation/transition from one method name to another.
+
+    The parameters must be the same (unless they are being renamed, in
+    which case you can use this in conjunction with @deprecated_alias).
+    """
+
+    def _redirecting_getattr_(obj: T, item: Any) -> Any:
+        if isinstance(item, str) and item in from_to:
+            new_item = from_to[item]
+            warnings.warn(
+                f"`{type(obj).__name__}.{item}` has been renamed and this"
+                f" redirect is temporary; please use `.{new_item}` instead",
+                category=DeprecationWarning,
+            )
+            item = new_item
+        return obj.__getattribute__(item)
+
+    def _cls_(cls: type[T]) -> type[T]:
+        # override __getattr__ as it will only be called if the item doesn't exist
+        cls.__getattr__ = _redirecting_getattr_  # type: ignore[attr-defined]
+        return cls
+
+    return _cls_
+
+
 def _rename_kwargs(
     func_name: str, kwargs: dict[str, object], aliases: dict[str, str]
 ) -> None:

diff --git a/py-polars/tests/unit/test_fmt.py b/py-polars/tests/unit/test_fmt.py
@@ -119,9 +119,11 @@ def test_duration_smallest_units() -> None:
 
 
 def test_fmt_float_full() -> None:
-    pl.Config.set_fmt_float("full")
-    assert (
-        str(pl.Series([1.2304980958725870923]))
-        == "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
-    )
-    pl.Config.restore_defaults()
+    fmt_float_full = "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
+    s = pl.Series([1.2304980958725870923])
+
+    with pl.Config() as cfg:
+        cfg.set_fmt_float("full")
+        assert str(s) == fmt_float_full
+
+    assert str(s) != fmt_float_full
diff --git a/py-polars/tests/unit/test_rows.py b/py-polars/tests/unit/test_rows.py
@@ -69,24 +69,26 @@ def test_iterrows() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": [None, False, None]})
 
     # Default iterrows behaviour
-    it = df.iterrows()
-    assert next(it) == (1, None)
-    assert next(it) == (2, False)
-    assert next(it) == (3, None)
-    with pytest.raises(StopIteration):
-        next(it)
+    # TODO: remove reference to deprecated "iterrows" once it is retired
+    for iter_method in ("iter_rows", "iterrows"):
+        it = getattr(df, iter_method)()
+        assert next(it) == (1, None)
+        assert next(it) == (2, False)
+        assert next(it) == (3, None)
+        with pytest.raises(StopIteration):
+            next(it)
 
     # Apply explicit row-buffer size
     for sz in (0, 1, 2, 3, 4):
-        it = df.iterrows(buffer_size=sz)
+        it = df.iter_rows(buffer_size=sz)
         assert next(it) == (1, None)
         assert next(it) == (2, False)
         assert next(it) == (3, None)
         with pytest.raises(StopIteration):
             next(it)
 
         # Return rows as namedtuples
-        it_named = df.iterrows(named=True, buffer_size=sz)
+        it_named = df.iter_rows(named=True, buffer_size=sz)
 
         row = next(it_named)
         assert row.a == 1