Remove deprecated Series.applymap. (#11031)

This PR removes the deprecated `Series.applymap` function. This function does not exist in pandas. Users should switch to using `Series.apply`. (Note that `DataFrame.applymap` does exist in both pandas and cudf.) Deprecated in #10497. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/brandon-b-miller - Michael Wang (https://github.com/isVoid) URL: #11031
rapidsai · Aug 26, 2022 · 48dc168 · 48dc168
1 parent 05a553b
commit 48dc168
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 194 deletions.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -6,7 +6,6 @@
 import inspect
 import pickle
 import textwrap
-import warnings
 from collections import abc
 from shutil import get_terminal_size
 from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union
@@ -1157,10 +1156,7 @@ def map(self, arg, na_action=None) -> "Series":
             result.name = self.name
             result.index = self.index
         else:
-            # TODO: switch to `apply`
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore", category=FutureWarning)
-                result = self.applymap(arg)
+            result = self.apply(arg)
         return result
 
     @_cudf_nvtx_annotate
@@ -2246,7 +2242,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         ``apply`` relies on Numba to JIT compile ``func``.
         Thus the allowed operations within ``func`` are limited to `those
         supported by the CUDA Python Numba target
-        <https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html>`__.
+        <https://numba.readthedocs.io/en/stable/cuda/cudapysupported.html>`__.
         For more information, see the `cuDF guide to user defined functions
         <https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>`__.
 
@@ -2265,6 +2261,11 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         **kwargs
             Not supported
 
+        Returns
+        -------
+        result : Series
+            The mask and index are preserved.
+
         Notes
         -----
         UDFs are cached in memory to avoid recompilation. The first
@@ -2275,7 +2276,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
 
         Examples
         --------
-        Apply a basic function to a series
+        Apply a basic function to a series:
+
         >>> sr = cudf.Series([1,2,3])
         >>> def f(x):
         ...     return x + 1
@@ -2333,124 +2335,6 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         result.name = self.name
         return result
 
-    @_cudf_nvtx_annotate
-    def applymap(self, udf, out_dtype=None):
-        """Apply an elementwise function to transform the values in the Column.
-
-        The user function is expected to take one argument and return the
-        result, which will be stored to the output Series.  The function
-        cannot reference globals except for other simple scalar objects.
-
-        Parameters
-        ----------
-        udf : function
-            Either a callable python function or a python function already
-            decorated by ``numba.cuda.jit`` for call on the GPU as a device
-
-        out_dtype : :class:`numpy.dtype`; optional
-            The dtype for use in the output.
-            Only used for ``numba.cuda.jit`` decorated udf.
-            By default, the result will have the same dtype as the source.
-
-        Returns
-        -------
-        result : Series
-            The mask and index are preserved.
-
-        Notes
-        -----
-        The supported Python features are listed in
-
-          https://numba.pydata.org/numba-doc/dev/cuda/cudapysupported.html
-
-        with these exceptions:
-
-        * Math functions in `cmath` are not supported since `libcudf` does not
-          have complex number support and output of `cmath` functions are most
-          likely complex numbers.
-
-        * These five functions in `math` are not supported since numba
-          generates multiple PTX functions from them
-
-          * math.sin()
-          * math.cos()
-          * math.tan()
-          * math.gamma()
-          * math.lgamma()
-
-        * Series with string dtypes are not supported in `applymap` method.
-
-        * Global variables need to be re-defined explicitly inside
-          the udf, as numba considers them to be compile-time constants
-          and there is no known way to obtain value of the global variable.
-
-        Examples
-        --------
-        Returning a Series of booleans using only a literal pattern.
-
-        >>> import cudf
-        >>> s = cudf.Series([1, 10, -10, 200, 100])
-        >>> s.applymap(lambda x: x)
-        0      1
-        1     10
-        2    -10
-        3    200
-        4    100
-        dtype: int64
-        >>> s.applymap(lambda x: x in [1, 100, 59])
-        0     True
-        1    False
-        2    False
-        3    False
-        4     True
-        dtype: bool
-        >>> s.applymap(lambda x: x ** 2)
-        0        1
-        1      100
-        2      100
-        3    40000
-        4    10000
-        dtype: int64
-        >>> s.applymap(lambda x: (x ** 2) + (x / 2))
-        0        1.5
-        1      105.0
-        2       95.0
-        3    40100.0
-        4    10050.0
-        dtype: float64
-        >>> def cube_function(a):
-        ...     return a ** 3
-        ...
-        >>> s.applymap(cube_function)
-        0          1
-        1       1000
-        2      -1000
-        3    8000000
-        4    1000000
-        dtype: int64
-        >>> def custom_udf(x):
-        ...     if x > 0:
-        ...         return x + 5
-        ...     else:
-        ...         return x - 5
-        ...
-        >>> s.applymap(custom_udf)
-        0      6
-        1     15
-        2    -15
-        3    205
-        4    105
-        dtype: int64
-        """
-        warnings.warn(
-            "Series.applymap is deprecated and will be removed "
-            "in a future cuDF release. Use Series.apply instead.",
-            FutureWarning,
-        )
-        if not callable(udf):
-            raise ValueError("Input UDF must be a callable object.")
-        return self._from_data({self.name: self._unaryop(udf)}, self._index)
-
     #
     # Stats
     #

diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
@@ -335,8 +335,7 @@ def count(self):
 
     def apply(self, func, *args, **kwargs):
         """
-        Counterpart of `pandas.core.window.Rolling.apply
-        <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.window.rolling.Rolling.apply.html>`_.
+        Calculate the rolling custom aggregation function.
 
         Parameters
         ----------
@@ -349,12 +348,35 @@ def apply(self, func, *args, **kwargs):
 
         See Also
         --------
-        cudf.Series.applymap : Apply an elementwise function to
+        cudf.Series.apply: Apply an elementwise function to
             transform the values in the Column.
 
         Notes
         -----
-        See notes of the :meth:`cudf.Series.applymap`
+        The supported Python features are listed in
+
+        https://numba.readthedocs.io/en/stable/cuda/cudapysupported.html
+
+        with these exceptions:
+
+        * Math functions in `cmath` are not supported since `libcudf` does not
+          have complex number support and output of `cmath` functions are most
+          likely complex numbers.
+
+        * These five functions in `math` are not supported since numba
+          generates multiple PTX functions from them:
+
+          * math.sin()
+          * math.cos()
+          * math.tan()
+          * math.gamma()
+          * math.lgamma()
+
+        * Series with string dtypes are not supported.
+
+        * Global variables need to be re-defined explicitly inside
+          the udf, as numba considers them to be compile-time constants
+          and there is no known way to obtain value of the global variable.
 
         Examples
         --------

diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py
@@ -1,65 +1,11 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
-from itertools import product
-from math import floor
-
-import numpy as np
 import pytest
 
-from cudf import NA, DataFrame, Series
+from cudf import NA, DataFrame
 from cudf.testing import _utils as utils
 
 
-@pytest.mark.parametrize(
-    "nelem,masked", list(product([2, 10, 100, 1000], [True, False]))
-)
-def test_applymap_round(nelem, masked):
-    # Generate data
-    np.random.seed(0)
-    data = np.random.random(nelem) * 100
-
-    if masked:
-        # Make mask
-        bitmask = utils.random_bitmask(nelem)
-        boolmask = np.asarray(
-            utils.expand_bits_to_bytes(bitmask), dtype=np.bool_
-        )[:nelem]
-        data[~boolmask] = None
-
-    sr = Series(data)
-
-    # Call applymap
-    with pytest.warns(FutureWarning):
-        out = sr.applymap(
-            lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x))
-        )
-
-    if masked:
-        # Fill masked values
-        out = out.fillna(np.nan)
-
-    # Check
-    expect = np.round(data)
-    got = out.to_numpy()
-    np.testing.assert_array_almost_equal(expect, got)
-
-
-def test_applymap_change_out_dtype():
-    # Test for changing the out_dtype using applymap
-
-    data = list(range(10))
-
-    sr = Series(data)
-
-    with pytest.warns(FutureWarning):
-        out = sr.applymap(lambda x: float(x), out_dtype=float)
-
-    # Check
-    expect = np.array(data, dtype=float)
-    got = out.to_numpy()
-    np.testing.assert_array_equal(expect, got)
-
-
 @pytest.mark.parametrize(
     "data",
     [

diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py
@@ -52,7 +52,7 @@ def test_series_map_callable_numeric_random(nelem):
     sr = Series(data)
     pdsr = pd.Series(data)
 
-    # Call applymap
+    # Call map
     got = sr.map(lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x)))
     expect = pdsr.map(
         lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x))
@@ -63,7 +63,7 @@ def test_series_map_callable_numeric_random(nelem):
 
 
 def test_series_map_callable_numeric_random_dtype_change():
-    # Test for changing the out_dtype using applymap
+    # Test for changing the out_dtype using map
 
     data = list(range(10))
 

diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py
@@ -22,14 +22,13 @@ def _generic_function(a):
         (lambda x: x in [1, 2, 3, 4], lambda ser: np.isin(ser, [1, 2, 3, 4])),
     ],
 )
-def test_applymap_python_lambda(dtype, udf, testfunc):
+def test_apply_python_lambda(dtype, udf, testfunc):
 
     size = 500
 
     lhs_arr = np.random.random(size).astype(dtype)
     lhs_ser = Series(lhs_arr)
 
-    with pytest.warns(FutureWarning):
-        out_ser = lhs_ser.applymap(udf)
+    out_ser = lhs_ser.apply(udf)
     result = testfunc(lhs_arr)
     np.testing.assert_almost_equal(result, out_ser.to_numpy())
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -243,6 +243,15 @@ def func(row):
     run_masked_udf_test(func, gdf, check_dtype=False)
 
 
+def test_apply_contains():
+    def func(row):
+        x = row["a"]
+        return x in [1, 2]
+
+    gdf = cudf.DataFrame({"a": [1, 3]})
+    run_masked_udf_test(func, gdf, check_dtype=False)
+
+
 @parametrize_numeric_dtypes_pairwise
 @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
 def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
@@ -647,18 +656,16 @@ def test_masked_udf_caching():
     # recompile
 
     data = cudf.Series([1, 2, 3])
-    expect = data**2
-    with pytest.warns(FutureWarning):
-        got = data.applymap(lambda x: x**2)
 
+    expect = data**2
+    got = data.apply(lambda x: x**2)
     assert_eq(expect, got, check_dtype=False)
 
     # update the constant value being used and make sure
     # it does not result in a cache hit
 
     expect = data**3
-    with pytest.warns(FutureWarning):
-        got = data.applymap(lambda x: x**3)
+    got = data.apply(lambda x: x**3)
     assert_eq(expect, got, check_dtype=False)
 
     # make sure we get a hit when reapplying