rapidsai · rapids-bot · Jul 27, 2022 · May 18, 2022 · Jun 8, 2022 · Jun 8, 2022
@@ -21,7 +21,12 @@
 import cudf
 from cudf.utils.utils import _dask_cudf_nvtx_annotate
 
-SUPPORTED_AGGS = (
+CUMULATIVE_AGGS = (
+    'cumsum',
+    'cumcount',
+)
+
+AGGS = (
     "count",
     "mean",
     "std",
@@ -34,6 +39,8 @@
     "last",
 )
 
+SUPPORTED_AGGS = (*AGGS, *CUMULATIVE_AGGS)
+
 
 def _check_groupby_supported(func):
     """

@@ -11,12 +11,10 @@
 from cudf.core._compat import PANDAS_GE_120
 
 import dask_cudf
-from dask_cudf.groupby import SUPPORTED_AGGS, _aggs_supported
+from dask_cudf.groupby import AGGS, _aggs_supported, CUMULATIVE_AGGS
 
-
-@pytest.mark.parametrize("aggregation", SUPPORTED_AGGS)
-@pytest.mark.parametrize("series", [False, True])
-def test_groupby_basic(series, aggregation):
+@pytest.fixture
+def pdf():
     np.random.seed(0)
 
     # note that column name "x" is a substring of the groupby key;
@@ -28,6 +26,12 @@ def test_groupby_basic(series, aggregation):
             "y": np.random.normal(size=10000),
         }
     )
+    return pdf
+
+
+@pytest.mark.parametrize("aggregation", AGGS)
+@pytest.mark.parametrize("series", [False, True])
+def test_groupby_basic(series, aggregation, pdf):
 
     gdf = cudf.DataFrame.from_pandas(pdf)
     gdf_grouped = gdf.groupby("xx")
@@ -53,6 +57,18 @@ def test_groupby_basic(series, aggregation):
     else:
         dd.assert_eq(a, b)
 
+@pytest.mark.parametrize("aggregation", CUMULATIVE_AGGS)
+def test_groupby_cumulative(aggregation, pdf):
         pytest.param( 
             False, 
             ["a", "b"], 
             marks=pytest.mark.xfail( 
                 reason="https://github.com/dask/dask/issues/8817" 
             ), 
         ), 
         pytest.param( 
             False, 
             ["a", "b"], 
             marks=pytest.mark.xfail( 
                 reason="https://github.com/dask/dask/issues/8817" 
             ), 
         ), 
+    gdf = cudf.DataFrame.from_pandas(pdf)
+    gdf_grouped = gdf.groupby("xx")
+    ddf_grouped = dask_cudf.from_cudf(gdf, npartitions=5).groupby("xx")
+
+    a = getattr(gdf_grouped, aggregation)()
+    b = getattr(ddf_grouped, aggregation)().compute()
+
+    dd.assert_eq(a, b)
+
+
 
 @pytest.mark.parametrize(
     "func",
@@ -679,7 +695,7 @@ def test_groupby_agg_redirect(aggregations):
     ],
 )
 def test_is_supported(arg, supported):
-    assert _aggs_supported(arg, SUPPORTED_AGGS) is supported
+    assert _aggs_supported(arg, AGGS) is supported
-    assert _aggs_supported(arg, AGGS) is supported
+    assert _aggs_supported(arg, SUPPORTED_AGGS) is supported
-    assert _aggs_supported(arg, AGGS) is supported
+    assert _aggs_supported(arg, SUPPORTED_AGGS) is supported
 
 
 def test_groupby_unique_lists():