Skip to content

Commit

Permalink
refactor(python): deprecate "iterrows" in favour of "iter_rows"
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Jan 26, 2023
1 parent f2502de commit 7b245dc
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ Manipulation/selection
DataFrame.insert_at_idx
DataFrame.interpolate
DataFrame.item
DataFrame.iter_rows
DataFrame.iter_slices
DataFrame.iterrows
DataFrame.join
DataFrame.join_asof
DataFrame.limit
Expand Down
22 changes: 12 additions & 10 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
is_str_sequence,
normalise_filepath,
range_to_slice,
redirect,
scale_bytes,
)

Expand Down Expand Up @@ -145,6 +146,7 @@ def wrap_df(df: PyDataFrame) -> DataFrame:
return DataFrame._from_pydf(df)


@redirect({"iterrows": "iter_rows"})
class DataFrame:
"""
Two-dimensional data structure representing data as a table with rows and columns.
Expand Down Expand Up @@ -1840,7 +1842,7 @@ def to_dicts(self) -> list[dict[str, Any]]:
"""
dict_, zip_, columns = dict, zip, self.columns
return [dict_(zip_(columns, row)) for row in self.iterrows()]
return [dict_(zip_(columns, row)) for row in self.iter_rows()]

def to_numpy(self) -> np.ndarray[Any, Any]:
"""
Expand Down Expand Up @@ -6604,7 +6606,7 @@ def row(
Warning
-------
You should NEVER use this method to iterate over a DataFrame; if you absolutely
require row-iteration you should strongly prefer ``iterrows()`` instead.
require row-iteration you should strongly prefer ``iter_rows()`` instead.
Examples
--------
Expand Down Expand Up @@ -6632,7 +6634,7 @@ def row(
See Also
--------
iterrows : Row iterator over frame data (does not materialise all rows).
iter_rows : Row iterator over frame data (does not materialise all rows).
rows : Materialises all frame data as a list of rows.
"""
Expand Down Expand Up @@ -6720,7 +6722,7 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
See Also
--------
iterrows : Row iterator over frame data (does not materialise all rows).
iter_rows : Row iterator over frame data (does not materialise all rows).
"""
if named:
Expand All @@ -6736,18 +6738,18 @@ def rows(self, named: bool = False) -> list[tuple[Any, ...]] | list[Any]:
return self._df.row_tuples()

@overload
def iterrows(
def iter_rows(
self, named: Literal[False] = ..., buffer_size: int = ...
) -> Iterator[tuple[Any, ...]]:
...

@overload
def iterrows(
def iter_rows(
self, named: Literal[True] = ..., buffer_size: int = ...
) -> Iterator[Any]:
...

def iterrows(
def iter_rows(
self, named: bool = False, buffer_size: int = 500
) -> Iterator[tuple[Any, ...]] | Iterator[Any]:
"""
Expand Down Expand Up @@ -6784,9 +6786,9 @@ def iterrows(
... "b": [2, 4, 6],
... }
... )
>>> [row[0] for row in df.iterrows()]
>>> [row[0] for row in df.iter_rows()]
[1, 3, 5]
>>> [row.b for row in df.iterrows(named=True)]
>>> [row.b for row in df.iter_rows(named=True)]
[2, 4, 6]
See Also
Expand Down Expand Up @@ -6866,7 +6868,7 @@ def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]:
See Also
--------
iterrows : Row iterator over frame data (does not materialise all rows).
iter_rows : Row iterator over frame data (does not materialise all rows).
partition_by : Split into multiple DataFrames, partitioned by groups.
"""
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/internals/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ def __iter__(self) -> RollingGroupBy[DF]:
if self.by is None:
self._group_names = iter(group_names.to_series())
else:
self._group_names = group_names.iterrows()
self._group_names = group_names.iter_rows()

self._group_indices = groups_df.select(temp_col).to_series()
self._current_index = 0
Expand Down Expand Up @@ -891,7 +891,7 @@ def __iter__(self) -> DynamicGroupBy[DF]:
if self.by is None:
self._group_names = iter(group_names.to_series())
else:
self._group_names = group_names.iterrows()
self._group_names = group_names.iter_rows()

self._group_indices = groups_df.select(temp_col).to_series()
self._current_index = 0
Expand Down
27 changes: 27 additions & 0 deletions py-polars/polars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,33 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
return deco


def redirect(from_to: dict[str, str]) -> Callable[[type[T]], type[T]]:
"""
Class decorator allowing deprecation/transition from one method name to another.
The parameters must be the same (unless they are being renamed, in
which case you can use this in conjunction with @deprecated_alias).
"""

def _redirecting_getattr_(obj: T, item: Any) -> Any:
if isinstance(item, str) and item in from_to:
new_item = from_to[item]
warnings.warn(
f"`{type(obj).__name__}.{item}` has been renamed and this"
f" redirect is temporary; please use `.{new_item}` instead",
category=DeprecationWarning,
)
item = new_item
return obj.__getattribute__(item)

def _cls_(cls: type[T]) -> type[T]:
# override __getattr__ as it will only be called if the item doesn't exist
cls.__getattr__ = _redirecting_getattr_ # type: ignore[attr-defined]
return cls

return _cls_


def _rename_kwargs(
func_name: str, kwargs: dict[str, object], aliases: dict[str, str]
) -> None:
Expand Down
14 changes: 8 additions & 6 deletions py-polars/tests/unit/test_fmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,11 @@ def test_duration_smallest_units() -> None:


def test_fmt_float_full() -> None:
pl.Config.set_fmt_float("full")
assert (
str(pl.Series([1.2304980958725870923]))
== "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
)
pl.Config.restore_defaults()
fmt_float_full = "shape: (1,)\nSeries: '' [f64]\n[\n\t1.230498095872587\n]"
s = pl.Series([1.2304980958725870923])

with pl.Config() as cfg:
cfg.set_fmt_float("full")
assert str(s) == fmt_float_full

assert str(s) != fmt_float_full
18 changes: 10 additions & 8 deletions py-polars/tests/unit/test_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,26 @@ def test_iterrows() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [None, False, None]})

# Default iterrows behaviour
it = df.iterrows()
assert next(it) == (1, None)
assert next(it) == (2, False)
assert next(it) == (3, None)
with pytest.raises(StopIteration):
next(it)
# TODO: remove reference to deprecated "iterrows" once it is retired
for iter_method in ("iter_rows", "iterrows"):
it = getattr(df, iter_method)()
assert next(it) == (1, None)
assert next(it) == (2, False)
assert next(it) == (3, None)
with pytest.raises(StopIteration):
next(it)

# Apply explicit row-buffer size
for sz in (0, 1, 2, 3, 4):
it = df.iterrows(buffer_size=sz)
it = df.iter_rows(buffer_size=sz)
assert next(it) == (1, None)
assert next(it) == (2, False)
assert next(it) == (3, None)
with pytest.raises(StopIteration):
next(it)

# Return rows as namedtuples
it_named = df.iterrows(named=True, buffer_size=sz)
it_named = df.iter_rows(named=True, buffer_size=sz)

row = next(it_named)
assert row.a == 1
Expand Down

0 comments on commit 7b245dc

Please sign in to comment.