diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index b5538a0f0a8..8347c2bd94e 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -201,11 +201,11 @@ def cumcount(self): cudf.Series( cudf.core.column.column_empty( len(self.obj), "int8", masked=False - ) + ), + index=self.obj.index, ) .groupby(self.grouping, sort=self._sort) .agg("cumcount") - .reset_index(drop=True) ) def rank( @@ -343,7 +343,6 @@ def agg(self, func): if not self._as_index: result = result.reset_index() - if libgroupby._is_all_scan_aggregate(normalized_aggs): # Scan aggregations return rows in original index order return self._mimic_pandas_order(result) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index 99ee95a8f18..0750a36461b 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -1116,13 +1116,15 @@ def test_groupby_size(): ) -def test_groupby_cumcount(): +@pytest.mark.parametrize("index", [None, [1, 2, 3, 4]]) +def test_groupby_cumcount(index): pdf = pd.DataFrame( { "a": [1, 1, 3, 4], "b": ["bob", "bob", "alice", "cooper"], "c": [1, 2, 3, 4], - } + }, + index=index, ) gdf = cudf.from_pandas(pdf) @@ -1138,7 +1140,7 @@ def test_groupby_cumcount(): check_dtype=False, ) - sr = pd.Series(range(len(pdf))) + sr = pd.Series(range(len(pdf)), index=index) assert_groupby_results_equal( pdf.groupby(sr).cumcount(), gdf.groupby(sr).cumcount(),