Skip to content

Commit

Permalink
Add support for get_group in GroupBy (#9070)
Browse files Browse the repository at this point in the history
This PR adds `get_group` functionality to `GroupBy`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: #9070
  • Loading branch information
galipremsagar authored Aug 31, 2021
1 parent 549bcb7 commit 1935a8a
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 0 deletions.
37 changes: 37 additions & 0 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,43 @@ def groups(self):
zip(group_names.to_pandas(), grouped_index._split(offsets[1:-1]))
)

def get_group(self, name, obj=None):
"""
Construct DataFrame from group with provided name.
Parameters
----------
name : object
The name of the group to get as a DataFrame.
obj : DataFrame, default None
The DataFrame to take the DataFrame out of. If
it is None, the object groupby was called on will
be used.
Returns
-------
group : same type as obj
Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]})
>>> df
X Y
0 A 1
1 B 4
2 A 3
3 B 2
>>> df.groupby("X").get_group("A")
X Y
0 A 1
2 A 3
"""
if obj is None:
obj = self.obj

return obj.loc[self.groups[name]]

def size(self):
"""
Return the size of each group.
Expand Down
49 changes: 49 additions & 0 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2114,3 +2114,52 @@ def foo(x):
expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(foo)

assert_groupby_results_equal(expect, got)


@pytest.mark.parametrize(
"pdf, group, name, obj",
[
(
pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
"X",
"A",
None,
),
(
pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
"X",
"B",
None,
),
(
pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
"X",
"A",
pd.DataFrame({"a": [1, 2, 4, 5, 10, 11]}),
),
(
pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
"Y",
1,
pd.DataFrame({"a": [1, 2, 4, 5, 10, 11]}),
),
(
pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}),
"Y",
3,
pd.DataFrame({"a": [1, 2, 0, 11]}),
),
],
)
def test_groupby_get_group(pdf, group, name, obj):
gdf = cudf.from_pandas(pdf)

if isinstance(obj, pd.DataFrame):
gobj = cudf.from_pandas(obj)
else:
gobj = obj

expected = pdf.groupby(group).get_group(name=name, obj=obj)
actual = gdf.groupby(group).get_group(name=name, obj=gobj)

assert_groupby_results_equal(expected, actual)

0 comments on commit 1935a8a

Please sign in to comment.