From ab4fe922ae756e0679f86b4831b809719cfa3d2e Mon Sep 17 00:00:00 2001 From: Priyansh Agrawal Date: Sat, 29 Jun 2024 10:11:09 +0100 Subject: [PATCH] feat: add DataFrame.rows --- docs/api-reference/dataframe.md | 1 + narwhals/_pandas_like/dataframe.py | 8 ++++ narwhals/dataframe.py | 61 ++++++++++++++++++++++++++++++ tests/frame/rows_test.py | 4 +- 4 files changed, 73 insertions(+), 1 deletion(-) diff --git a/docs/api-reference/dataframe.md b/docs/api-reference/dataframe.md index 6165c7061..67520527a 100644 --- a/docs/api-reference/dataframe.md +++ b/docs/api-reference/dataframe.md @@ -20,6 +20,7 @@ - null_count - pipe - rename + - rows - schema - select - shape diff --git a/narwhals/_pandas_like/dataframe.py b/narwhals/_pandas_like/dataframe.py index a06e9c850..f86ef2e2a 100644 --- a/narwhals/_pandas_like/dataframe.py +++ b/narwhals/_pandas_like/dataframe.py @@ -116,6 +116,14 @@ def __getitem__(self, item: str | range | slice) -> PandasSeries | PandasDataFra def columns(self) -> list[str]: return self._dataframe.columns.tolist() # type: ignore[no-any-return] + def rows( + self, *, named: bool = False + ) -> list[tuple[Any, ...]] | list[dict[str, Any]]: + if not named: + return list(self._dataframe.itertuples(index=False, name=None)) + + return self._dataframe.to_dict("records") # type: ignore[no-any-return] + def iter_rows( self, *, diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index d0056dbfd..8b3722b64 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -629,6 +629,67 @@ def columns(self) -> list[str]: """ return super().columns + @overload + def rows( + self, + *, + named: Literal[False], + ) -> tuple[Any, ...]: ... + + @overload + def rows( + self, + *, + named: Literal[True], + ) -> dict[str, Any]: ... + + @overload + def rows( + self, + *, + named: bool, + ) -> tuple[Any, ...] | dict[str, Any]: ... + + def rows( + self, + *, + named: bool = False, + ) -> tuple[Any, ...] | dict[str, Any]: + """ + Returns all data in the DataFrame as a list of rows of python-native values. + + Arguments: + named: By default, each row is returned as a tuple of values given + in the same order as the frame columns. Setting named=True will + return rows of dictionaries instead. + + Examples: + >>> import pandas as pd + >>> import polars as pl + >>> import narwhals as nw + >>> df = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0], "ham": ["a", "b", "c"]} + >>> df_pd = pd.DataFrame(df) + >>> df_pl = pl.DataFrame(df) + + We define a library agnostic function: + + >>> def func(df_any, *, named): + ... df = nw.from_native(df_any) + ... return df.rows(named=named) + + We can then pass either pandas or Polars to `func`: + + >>> func(df_pd, named=False) + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> func(df_pd, named=True) + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + >>> func(df_pl, named=False) + [(1, 6.0, 'a'), (2, 7.0, 'b'), (3, 8.0, 'c')] + >>> func(df_pl, named=True) + [{'foo': 1, 'bar': 6.0, 'ham': 'a'}, {'foo': 2, 'bar': 7.0, 'ham': 'b'}, {'foo': 3, 'bar': 8.0, 'ham': 'c'}] + """ + return self._dataframe.rows(named=named) # type: ignore[no-any-return] + @overload def iter_rows( self, *, named: Literal[False], buffer_size: int = ... diff --git a/tests/frame/rows_test.py b/tests/frame/rows_test.py index 089f70dcd..0a0089fa8 100644 --- a/tests/frame/rows_test.py +++ b/tests/frame/rows_test.py @@ -40,6 +40,7 @@ df_polars_na = pl.DataFrame({"a": [None, 3, 2], "b": [4, 4, 6], "z": [7.0, None, 9]}) +@pytest.mark.parametrize("method_name", ["iter_rows", "rows"]) @pytest.mark.parametrize( "df_raw", [df_pandas, df_pandas_nullable, df_pandas_pyarrow, df_mpd, df_polars] ) @@ -59,6 +60,7 @@ ) @pytest.mark.filterwarnings("ignore::FutureWarning") def test_rows( + method_name: str, df_raw: Any, named: bool, # noqa: FBT001 expected: list[tuple[Any, ...]] | list[dict[str, Any]], @@ -67,7 +69,7 @@ def test_rows( df = nw.DataFrame(df_raw) # WHEN - result = list(df.iter_rows(named=named)) + result = list(getattr(df, method_name)(named=named)) # THEN assert result == expected