Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix warnings in test_categorical.py. #10354

Merged
merged 8 commits into from
Feb 25, 2022
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import pandas as pd
from packaging import version
Expand All @@ -9,4 +9,5 @@
PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
PANDAS_GE_130 = PANDAS_VERSION >= version.parse("1.3.0")
PANDAS_GE_134 = PANDAS_VERSION >= version.parse("1.3.4")
PANDAS_LT_140 = PANDAS_VERSION < version.parse("1.4.0")
199 changes: 117 additions & 82 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,48 @@
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.

import operator
import string
import warnings
from contextlib import contextmanager
from textwrap import dedent

import numpy as np
import pandas as pd
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_110
from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_134
from cudf.testing._utils import (
NUMERIC_TYPES,
assert_eq,
assert_exceptions_equal,
)


@contextmanager
def _hide_deprecated_pandas_categorical_inplace_warnings(function_name):
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
(
"The `inplace` parameter in "
f"pandas.Categorical.{function_name} is deprecated and will "
"be removed in a future version."
),
category=FutureWarning,
)
yield


@contextmanager
def _hide_cudf_safe_casting_warning():
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", "Can't safely cast column", category=UserWarning,
)
yield


@pytest.fixture
def pd_str_cat():
categories = list("abc")
Expand Down Expand Up @@ -51,9 +78,8 @@ def test_categorical_basic():
assert_eq(cat.codes, cudf_cat.codes.to_numpy())


@pytest.mark.skipif(not PANDAS_GE_110, reason="requires pandas>=1.1.0")
def test_categorical_integer():
if not PANDAS_GE_110:
pytest.xfail(reason="pandas >=1.1 required")
cat = pd.Categorical(["a", "_", "_", "c", "a"], categories=["a", "b", "c"])
pdsr = pd.Series(cat)
sr = cudf.Series(cat)
Expand All @@ -67,17 +93,17 @@ def test_categorical_integer():
sr.cat.codes.astype(pdsr.cat.codes.dtype).fillna(-1).to_numpy(),
)

string = str(sr)
expect_str = """
0 a
1 <NA>
2 <NA>
3 c
4 a
dtype: category
Categories (3, object): ['a', 'b', 'c']
"""
assert string.split() == expect_str.split()
expect_str = dedent(
"""\
0 a
1 <NA>
2 <NA>
3 c
4 a
dtype: category
Categories (3, object): ['a', 'b', 'c']"""
)
assert str(sr) == expect_str


def test_categorical_compare_unordered():
Expand Down Expand Up @@ -152,23 +178,9 @@ def test_categorical_binary_add():
rfunc=operator.add,
lfunc_args_and_kwargs=([pdsr, pdsr],),
rfunc_args_and_kwargs=([sr, sr],),
expected_error_message="Series of dtype `category` cannot perform "
"the operation: add",
)


def test_categorical_unary_ceil():
cat = pd.Categorical(["a", "a", "b", "c", "a"], categories=["a", "b", "c"])
pdsr = pd.Series(cat)
sr = cudf.Series(cat)

assert_exceptions_equal(
lfunc=getattr,
rfunc=sr.ceil,
lfunc_args_and_kwargs=([pdsr, "ceil"],),
check_exception_type=False,
expected_error_message="Series of dtype `category` cannot "
"perform the operation: ceil",
expected_error_message=(
"Series of dtype `category` cannot perform the operation: add"
),
)


Expand Down Expand Up @@ -238,26 +250,25 @@ def test_cat_series_binop_error():
df["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc"))
df["b"] = np.arange(len(df))

dfa = df["a"]
dfb = df["b"]
pdf = df.to_pandas()

# lhs is a categorical
# lhs is categorical
assert_exceptions_equal(
lfunc=operator.add,
rfunc=operator.add,
lfunc_args_and_kwargs=([dfa, dfb],),
rfunc_args_and_kwargs=([dfa, dfb],),
check_exception_type=False,
expected_error_message="Series of dtype `category` cannot "
"perform the operation: add",
lfunc_args_and_kwargs=([pdf["a"], pdf["b"]],),
rfunc_args_and_kwargs=([df["a"], df["b"]],),
expected_error_message=(
"Series of dtype `category` cannot perform the operation: add"
),
)
# if lhs is a numerical

# lhs is numerical
assert_exceptions_equal(
lfunc=operator.add,
rfunc=operator.add,
lfunc_args_and_kwargs=([dfb, dfa],),
rfunc_args_and_kwargs=([dfb, dfa],),
check_exception_type=False,
lfunc_args_and_kwargs=([pdf["b"], pdf["a"]],),
rfunc_args_and_kwargs=([df["b"], df["a"]],),
expected_error_message="'add' operator not supported",
)

Expand Down Expand Up @@ -367,8 +378,9 @@ def test_categorical_as_ordered(pd_str_cat, inplace):

pd_sr_1 = pd_sr.cat.as_ordered(inplace=inplace)
cd_sr_1 = cd_sr.cat.as_ordered(inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert cd_sr_1.cat.ordered is True
assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
Expand All @@ -386,8 +398,9 @@ def test_categorical_as_unordered(pd_str_cat, inplace):

pd_sr_1 = pd_sr.cat.as_unordered(inplace=inplace)
cd_sr_1 = cd_sr.cat.as_unordered(inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert cd_sr_1.cat.ordered is False
assert cd_sr_1.cat.ordered == pd_sr_1.cat.ordered
Expand All @@ -401,8 +414,9 @@ def test_categorical_as_unordered(pd_str_cat, inplace):
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/pandas-dev/pandas/issues/43232"
marks=pytest.mark.skipif(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This xfail can be replaced by a skipif because the underlying bug pandas-dev/pandas#43232 was resolved in pandas 1.3.4.

not PANDAS_GE_134,
reason="https://github.com/pandas-dev/pandas/issues/43232",
),
),
False,
Expand All @@ -421,10 +435,14 @@ def test_categorical_reorder_categories(

kwargs = dict(ordered=to_ordered, inplace=inplace)

pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"reorder_categories"
):
pd_sr_1 = pd_sr.cat.reorder_categories(list("cba"), **kwargs)
cd_sr_1 = cd_sr.cat.reorder_categories(list("cba"), **kwargs)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert_eq(pd_sr_1, cd_sr_1)

Expand All @@ -436,8 +454,9 @@ def test_categorical_reorder_categories(
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/pandas-dev/pandas/issues/43232"
marks=pytest.mark.skipif(
not PANDAS_GE_134,
reason="https://github.com/pandas-dev/pandas/issues/43232",
),
),
False,
Expand All @@ -452,10 +471,14 @@ def test_categorical_add_categories(pd_str_cat, inplace):

assert str(pd_sr) == str(cd_sr)

pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"add_categories"
):
pd_sr_1 = pd_sr.cat.add_categories(["d"], inplace=inplace)
cd_sr_1 = cd_sr.cat.add_categories(["d"], inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert "d" in pd_sr_1.cat.categories.to_list()
assert "d" in cd_sr_1.cat.categories.to_pandas().to_list()
Expand All @@ -468,8 +491,9 @@ def test_categorical_add_categories(pd_str_cat, inplace):
[
pytest.param(
True,
marks=pytest.mark.xfail(
reason="https://github.com/pandas-dev/pandas/issues/43232"
marks=pytest.mark.skipif(
not PANDAS_GE_134,
reason="https://github.com/pandas-dev/pandas/issues/43232",
),
),
False,
Expand All @@ -484,24 +508,31 @@ def test_categorical_remove_categories(pd_str_cat, inplace):

assert str(pd_sr) == str(cd_sr)

pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"remove_categories"
):
pd_sr_1 = pd_sr.cat.remove_categories(["a"], inplace=inplace)
cd_sr_1 = cd_sr.cat.remove_categories(["a"], inplace=inplace)
pd_sr_1 = pd_sr if pd_sr_1 is None else pd_sr_1
cd_sr_1 = cd_sr if cd_sr_1 is None else cd_sr_1
if inplace:
pd_sr_1 = pd_sr
cd_sr_1 = cd_sr

assert "a" not in pd_sr_1.cat.categories.to_list()
assert "a" not in cd_sr_1.cat.categories.to_pandas().to_list()

assert_eq(pd_sr_1, cd_sr_1)

# test using ordered operators
assert_exceptions_equal(
lfunc=cd_sr.to_pandas().cat.remove_categories,
rfunc=cd_sr.cat.remove_categories,
lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
expected_error_message="removals must all be in old categories",
)
with _hide_deprecated_pandas_categorical_inplace_warnings(
"remove_categories"
):
assert_exceptions_equal(
lfunc=cd_sr.to_pandas().cat.remove_categories,
rfunc=cd_sr.cat.remove_categories,
lfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
rfunc_args_and_kwargs=([["a", "d"]], {"inplace": inplace}),
expected_error_message="removals must all be in old categories",
)


def test_categorical_dataframe_slice_copy():
Expand Down Expand Up @@ -583,19 +614,21 @@ def test_categorical_set_categories_categoricals(data, new_categories):
pd_data = data.copy().astype("category")
gd_data = cudf.from_pandas(pd_data)

assert_eq(
pd_data.cat.set_categories(new_categories=new_categories),
gd_data.cat.set_categories(new_categories=new_categories),
)
expected = pd_data.cat.set_categories(new_categories=new_categories)
with _hide_cudf_safe_casting_warning():
actual = gd_data.cat.set_categories(new_categories=new_categories)

assert_eq(
pd_data.cat.set_categories(
new_categories=pd.Series(new_categories, dtype="category")
),
gd_data.cat.set_categories(
new_categories=cudf.Series(new_categories, dtype="category")
),
assert_eq(expected, actual)

expected = pd_data.cat.set_categories(
new_categories=pd.Series(new_categories, dtype="category")
)
with _hide_cudf_safe_casting_warning():
actual = gd_data.cat.set_categories(
new_categories=cudf.Series(new_categories, dtype="category")
)

assert_eq(expected, actual)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -703,7 +736,9 @@ def test_add_categories(data, add):
gds = cudf.Series(data, dtype="category")

expected = pds.cat.add_categories(add)
actual = gds.cat.add_categories(add)
with _hide_cudf_safe_casting_warning():
actual = gds.cat.add_categories(add)

assert_eq(
expected.cat.codes, actual.cat.codes.astype(expected.cat.codes.dtype)
)
Expand Down