Skip to content

Commit

Permalink
Remove deprecated Series.applymap. (#11031)
Browse files Browse the repository at this point in the history
This PR removes the deprecated `Series.applymap` function. This function does not exist in pandas. Users should switch to using `Series.apply`. (Note that `DataFrame.applymap` does exist in both pandas and cudf.) Deprecated in #10497.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - https://github.com/brandon-b-miller
  - Michael Wang (https://github.com/isVoid)

URL: #11031
  • Loading branch information
bdice authored Aug 26, 2022
1 parent 05a553b commit 48dc168
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 194 deletions.
134 changes: 9 additions & 125 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import inspect
import pickle
import textwrap
import warnings
from collections import abc
from shutil import get_terminal_size
from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union
Expand Down Expand Up @@ -1157,10 +1156,7 @@ def map(self, arg, na_action=None) -> "Series":
result.name = self.name
result.index = self.index
else:
# TODO: switch to `apply`
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=FutureWarning)
result = self.applymap(arg)
result = self.apply(arg)
return result

@_cudf_nvtx_annotate
Expand Down Expand Up @@ -2246,7 +2242,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
``apply`` relies on Numba to JIT compile ``func``.
Thus the allowed operations within ``func`` are limited to `those
supported by the CUDA Python Numba target
<https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html>`__.
<https://numba.readthedocs.io/en/stable/cuda/cudapysupported.html>`__.
For more information, see the `cuDF guide to user defined functions
<https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>`__.
Expand All @@ -2265,6 +2261,11 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
**kwargs
Not supported
Returns
-------
result : Series
The mask and index are preserved.
Notes
-----
UDFs are cached in memory to avoid recompilation. The first
Expand All @@ -2275,7 +2276,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
Examples
--------
Apply a basic function to a series
Apply a basic function to a series:
>>> sr = cudf.Series([1,2,3])
>>> def f(x):
... return x + 1
Expand Down Expand Up @@ -2333,124 +2335,6 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
result.name = self.name
return result

@_cudf_nvtx_annotate
def applymap(self, udf, out_dtype=None):
"""Apply an elementwise function to transform the values in the Column.
The user function is expected to take one argument and return the
result, which will be stored to the output Series. The function
cannot reference globals except for other simple scalar objects.
Parameters
----------
udf : function
Either a callable python function or a python function already
decorated by ``numba.cuda.jit`` for call on the GPU as a device
out_dtype : :class:`numpy.dtype`; optional
The dtype for use in the output.
Only used for ``numba.cuda.jit`` decorated udf.
By default, the result will have the same dtype as the source.
Returns
-------
result : Series
The mask and index are preserved.
Notes
-----
The supported Python features are listed in
https://numba.pydata.org/numba-doc/dev/cuda/cudapysupported.html
with these exceptions:
* Math functions in `cmath` are not supported since `libcudf` does not
have complex number support and output of `cmath` functions are most
likely complex numbers.
* These five functions in `math` are not supported since numba
generates multiple PTX functions from them
* math.sin()
* math.cos()
* math.tan()
* math.gamma()
* math.lgamma()
* Series with string dtypes are not supported in `applymap` method.
* Global variables need to be re-defined explicitly inside
the udf, as numba considers them to be compile-time constants
and there is no known way to obtain value of the global variable.
Examples
--------
Returning a Series of booleans using only a literal pattern.
>>> import cudf
>>> s = cudf.Series([1, 10, -10, 200, 100])
>>> s.applymap(lambda x: x)
0 1
1 10
2 -10
3 200
4 100
dtype: int64
>>> s.applymap(lambda x: x in [1, 100, 59])
0 True
1 False
2 False
3 False
4 True
dtype: bool
>>> s.applymap(lambda x: x ** 2)
0 1
1 100
2 100
3 40000
4 10000
dtype: int64
>>> s.applymap(lambda x: (x ** 2) + (x / 2))
0 1.5
1 105.0
2 95.0
3 40100.0
4 10050.0
dtype: float64
>>> def cube_function(a):
... return a ** 3
...
>>> s.applymap(cube_function)
0 1
1 1000
2 -1000
3 8000000
4 1000000
dtype: int64
>>> def custom_udf(x):
... if x > 0:
... return x + 5
... else:
... return x - 5
...
>>> s.applymap(custom_udf)
0 6
1 15
2 -15
3 205
4 105
dtype: int64
"""
warnings.warn(
"Series.applymap is deprecated and will be removed "
"in a future cuDF release. Use Series.apply instead.",
FutureWarning,
)
if not callable(udf):
raise ValueError("Input UDF must be a callable object.")
return self._from_data({self.name: self._unaryop(udf)}, self._index)

#
# Stats
#
Expand Down
30 changes: 26 additions & 4 deletions python/cudf/cudf/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,7 @@ def count(self):

def apply(self, func, *args, **kwargs):
"""
Counterpart of `pandas.core.window.Rolling.apply
<https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.window.rolling.Rolling.apply.html>`_.
Calculate the rolling custom aggregation function.
Parameters
----------
Expand All @@ -349,12 +348,35 @@ def apply(self, func, *args, **kwargs):
See Also
--------
cudf.Series.applymap : Apply an elementwise function to
cudf.Series.apply: Apply an elementwise function to
transform the values in the Column.
Notes
-----
See notes of the :meth:`cudf.Series.applymap`
The supported Python features are listed in
https://numba.readthedocs.io/en/stable/cuda/cudapysupported.html
with these exceptions:
* Math functions in `cmath` are not supported since `libcudf` does not
have complex number support and output of `cmath` functions are most
likely complex numbers.
* These five functions in `math` are not supported since numba
generates multiple PTX functions from them:
* math.sin()
* math.cos()
* math.tan()
* math.gamma()
* math.lgamma()
* Series with string dtypes are not supported.
* Global variables need to be re-defined explicitly inside
the udf, as numba considers them to be compile-time constants
and there is no known way to obtain value of the global variable.
Examples
--------
Expand Down
56 changes: 1 addition & 55 deletions python/cudf/cudf/tests/test_applymap.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,11 @@
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

from itertools import product
from math import floor

import numpy as np
import pytest

from cudf import NA, DataFrame, Series
from cudf import NA, DataFrame
from cudf.testing import _utils as utils


@pytest.mark.parametrize(
"nelem,masked", list(product([2, 10, 100, 1000], [True, False]))
)
def test_applymap_round(nelem, masked):
# Generate data
np.random.seed(0)
data = np.random.random(nelem) * 100

if masked:
# Make mask
bitmask = utils.random_bitmask(nelem)
boolmask = np.asarray(
utils.expand_bits_to_bytes(bitmask), dtype=np.bool_
)[:nelem]
data[~boolmask] = None

sr = Series(data)

# Call applymap
with pytest.warns(FutureWarning):
out = sr.applymap(
lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x))
)

if masked:
# Fill masked values
out = out.fillna(np.nan)

# Check
expect = np.round(data)
got = out.to_numpy()
np.testing.assert_array_almost_equal(expect, got)


def test_applymap_change_out_dtype():
# Test for changing the out_dtype using applymap

data = list(range(10))

sr = Series(data)

with pytest.warns(FutureWarning):
out = sr.applymap(lambda x: float(x), out_dtype=float)

# Check
expect = np.array(data, dtype=float)
got = out.to_numpy()
np.testing.assert_array_equal(expect, got)


@pytest.mark.parametrize(
"data",
[
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_seriesmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_series_map_callable_numeric_random(nelem):
sr = Series(data)
pdsr = pd.Series(data)

# Call applymap
# Call map
got = sr.map(lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x)))
expect = pdsr.map(
lambda x: (floor(x) + 1 if x - floor(x) >= 0.5 else floor(x))
Expand All @@ -63,7 +63,7 @@ def test_series_map_callable_numeric_random(nelem):


def test_series_map_callable_numeric_random_dtype_change():
# Test for changing the out_dtype using applymap
# Test for changing the out_dtype using map

data = list(range(10))

Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,13 @@ def _generic_function(a):
(lambda x: x in [1, 2, 3, 4], lambda ser: np.isin(ser, [1, 2, 3, 4])),
],
)
def test_applymap_python_lambda(dtype, udf, testfunc):
def test_apply_python_lambda(dtype, udf, testfunc):

size = 500

lhs_arr = np.random.random(size).astype(dtype)
lhs_ser = Series(lhs_arr)

with pytest.warns(FutureWarning):
out_ser = lhs_ser.applymap(udf)
out_ser = lhs_ser.apply(udf)
result = testfunc(lhs_arr)
np.testing.assert_almost_equal(result, out_ser.to_numpy())
17 changes: 12 additions & 5 deletions python/cudf/cudf/tests/test_udf_masked_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,15 @@ def func(row):
run_masked_udf_test(func, gdf, check_dtype=False)


def test_apply_contains():
def func(row):
x = row["a"]
return x in [1, 2]

gdf = cudf.DataFrame({"a": [1, 3]})
run_masked_udf_test(func, gdf, check_dtype=False)


@parametrize_numeric_dtypes_pairwise
@pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
Expand Down Expand Up @@ -647,18 +656,16 @@ def test_masked_udf_caching():
# recompile

data = cudf.Series([1, 2, 3])
expect = data**2
with pytest.warns(FutureWarning):
got = data.applymap(lambda x: x**2)

expect = data**2
got = data.apply(lambda x: x**2)
assert_eq(expect, got, check_dtype=False)

# update the constant value being used and make sure
# it does not result in a cache hit

expect = data**3
with pytest.warns(FutureWarning):
got = data.applymap(lambda x: x**3)
got = data.apply(lambda x: x**3)
assert_eq(expect, got, check_dtype=False)

# make sure we get a hit when reapplying
Expand Down

0 comments on commit 48dc168

Please sign in to comment.