From 45b3dff3b8b7b9022a6a633fbc094dfc7286eac4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 13:59:45 -0800 Subject: [PATCH 01/12] Move array ufunc index definitions to proper class. --- python/cudf/cudf/core/_base_index.py | 8 -------- python/cudf/cudf/core/frame.py | 4 +++- python/cudf/cudf/core/index.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 7a9a17631a9..387cd54d7d3 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -41,14 +41,6 @@ class BaseIndex(Serializable): _accessors: Set[Any] = set() _data: ColumnAccessor - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - - if method == "__call__" and hasattr(cudf, ufunc.__name__): - func = getattr(cudf, ufunc.__name__) - return func(*inputs) - else: - return NotImplemented - @cached_property def _values(self) -> ColumnBase: raise NotImplementedError diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index d478baf2d69..c8714388d28 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3496,7 +3496,9 @@ def _binaryop( Frame A new instance containing the result of the operation. """ - raise NotImplementedError + raise NotImplementedError( + f"Binary operations are not supported for {self.__class__}" + ) @classmethod @annotate("FRAME_COLWISE_BINOP", color="green", domain="cudf_python") diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 5b60e8dbd1c..202b9baa80c 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -520,6 +520,11 @@ def _as_int64(self): # that are not defined directly on RangeIndex. return Int64Index._from_data(self._data) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + return self._as_int64().__array_ufunc__( + ufunc, method, *inputs, **kwargs + ) + def __getattr__(self, key): # For methods that are not defined for RangeIndex we attempt to operate # on the corresponding integer index if possible. @@ -773,6 +778,14 @@ def __init__(self, data, **kwargs): name = kwargs.get("name") super().__init__({name: data}) + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + + if method == "__call__" and hasattr(cudf, ufunc.__name__): + func = getattr(cudf, ufunc.__name__) + return func(*inputs) + else: + return NotImplemented + def _binaryop( self, other: T, From 13a83d973fd6dc7380f158929fe1d67b690d4dd2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 15:27:11 -0800 Subject: [PATCH 02/12] Unify binops further between Series and DataFrame. --- python/cudf/cudf/core/dataframe.py | 24 +--------------- python/cudf/cudf/core/indexed_frame.py | 37 +++++++++++++++++++++++- python/cudf/cudf/core/series.py | 40 +++++--------------------- 3 files changed, 44 insertions(+), 57 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1c672aacd86..486690fa91b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1898,7 +1898,7 @@ def _get_columns_by_label(self, labels, downcast=False): ) return out - def _prep_for_binop( + def _make_operands_and_index_for_binop( self, other: Any, fn: str, @@ -1986,28 +1986,6 @@ def _prep_for_binop( return operands, lhs._index - @annotate("DATAFRAME_BINARYOP", color="blue", domain="cudf_python") - def _binaryop( - self, - other: Any, - fn: str, - fill_value: Any = None, - reflect: bool = False, - can_reindex: bool = False, - *args, - **kwargs, - ): - operands, out_index = self._prep_for_binop( - other, fn, fill_value, reflect, can_reindex - ) - if operands is NotImplemented: - return NotImplemented - - return self._from_data( - ColumnAccessor(type(self)._colwise_binop(operands, fn)), - index=out_index, - ) - @annotate("DATAFRAME_UPDATE", color="blue", domain="cudf_python") def update( self, diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index bc7337d0a42..51cbf4fba30 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -7,7 +7,7 @@ import warnings from collections import Counter, abc from functools import cached_property -from typing import Callable, Type, TypeVar +from typing import Any, Callable, Type, TypeVar from uuid import uuid4 import cupy as cp @@ -1695,6 +1695,41 @@ def last(self, offset): slice_func=lambda i: self.iloc[i:], ) + def _binaryop( + self, + other: Any, + fn: str, + fill_value: Any = None, + reflect: bool = False, + can_reindex: bool = False, + *args, + **kwargs, + ): + operands, out_index = self._make_operands_and_index_for_binop( + other, fn, fill_value, reflect, can_reindex + ) + if operands is NotImplemented: + return NotImplemented + + return self._from_data( + ColumnAccessor(type(self)._colwise_binop(operands, fn)), + index=out_index, + ) + + def _make_operands_and_index_for_binop( + self, + other: Any, + fn: str, + fill_value: Any = None, + reflect: bool = False, + can_reindex: bool = False, + *args, + **kwargs, + ): + raise NotImplementedError( + "Binary operations are not supported for {self.__class__}" + ) + # For more detail on this function and how it should work, see # https://numpy.org/doc/stable/reference/ufuncs.html def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 8574a152c44..b286f716e7f 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1206,7 +1206,7 @@ def __repr__(self): lines.append(category_memory) return "\n".join(lines) - def _prep_for_binop( + def _make_operands_and_index_for_binop( self, other: Any, fn: str, @@ -1217,20 +1217,15 @@ def _prep_for_binop( **kwargs, ): # Specialize binops to align indices. - if isinstance(other, SingleColumnFrame): + if isinstance(other, Series): if ( - # TODO: The can_reindex logic also needs to be applied for - # DataFrame (the methods that need it just don't exist yet). not can_reindex and fn in cudf.utils.utils._EQUALITY_OPS - and ( - isinstance(other, Series) - # TODO: mypy doesn't like this line because the index - # property is not defined on SingleColumnFrame (or Index, - # for that matter). Ignoring is the easy solution for now, - # a cleaner fix requires reworking the type hierarchy. - and not self.index.equals(other.index) # type: ignore - ) + # TODO: mypy doesn't like this line because the index property + # is not defined on SingleColumnFrame (or Index, for that + # matter). Ignoring is the easy solution for now, a cleaner fix + # requires reworking the type hierarchy. + and not self.index.equals(other.index) # type: ignore ): raise ValueError( "Can only compare identically-labeled Series objects" @@ -1242,27 +1237,6 @@ def _prep_for_binop( operands = lhs._make_operands_for_binop(other, fill_value, reflect) return operands, lhs._index - def _binaryop( - self, - other: Frame, - fn: str, - fill_value: Any = None, - reflect: bool = False, - can_reindex: bool = False, - *args, - **kwargs, - ): - operands, out_index = self._prep_for_binop( - other, fn, fill_value, reflect, can_reindex - ) - return ( - self._from_data( - data=self._colwise_binop(operands, fn), index=out_index, - ) - if operands is not NotImplemented - else NotImplemented - ) - def logical_and(self, other): warnings.warn( "Series.logical_and is deprecated and will be removed.", From 531bc934e585d5eee93e805a38804f1439c0af4d Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 16:57:00 -0800 Subject: [PATCH 03/12] Move most array_ufunc logic up to Frame, separating out just enough to make it work for Index as well as IndexedFrame. --- python/cudf/cudf/core/frame.py | 129 ++++++++++++++++++++++ python/cudf/cudf/core/index.py | 38 +++++-- python/cudf/cudf/core/indexed_frame.py | 143 ++++--------------------- 3 files changed, 175 insertions(+), 135 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index c8714388d28..1a112b95e05 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3660,6 +3660,135 @@ def _colwise_binop( return output + # For more detail on this function and how it should work, see + # https://numpy.org/doc/stable/reference/ufuncs.html + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # We don't currently support reduction, accumulation, etc. We also + # don't support any special kwargs or higher arity ufuncs than binary. + if method != "__call__" or kwargs or ufunc.nin > 2: + return NotImplemented + + # Binary operations + binary_operations = { + # Arithmetic binary operations. + "add": "add", + "subtract": "sub", + "multiply": "mul", + "matmul": "matmul", + "divide": "truediv", + "true_divide": "truediv", + "floor_divide": "floordiv", + "power": "pow", + "float_power": "pow", + "remainder": "mod", + "mod": "mod", + "fmod": "mod", + # Bitwise binary operations. + "bitwise_and": "and", + "bitwise_or": "or", + "bitwise_xor": "xor", + # Comparison binary operators + "greater": "gt", + "greater_equal": "ge", + "less": "lt", + "less_equal": "le", + "not_equal": "ne", + "equal": "eq", + } + + # First look for methods of the class. + fname = ufunc.__name__ + if fname in binary_operations: + reflect = self is not inputs[0] + other = inputs[0] if reflect else inputs[1] + + # These operators need to be mapped to their inverses when + # performing a reflected operation because no reflected version of + # the operators themselves exist. + ops_without_reflection = { + "gt": "lt", + "ge": "le", + "lt": "gt", + "le": "ge", + # ne and eq are symmetric, so they are their own inverse op + "ne": "ne", + "eq": "eq", + } + + op = binary_operations[fname] + if reflect and op in ops_without_reflection: + op = ops_without_reflection[op] + reflect = False + op = f"__{'r' if reflect else ''}{op}__" + + # pandas bitwise operations return bools if indexes are misaligned. + # TODO: This needs special handling for IndexedFrames. + # if ( + # "bitwise" in fname + # and isinstance(other, IndexedFrame) + # and not self.index.equals(other.index) + # ): + # return getattr(self, op)(other).astype(bool) + # Float_power returns float irrespective of the input type. + if fname == "float_power": + return getattr(self, op)(other).astype(float) + return getattr(self, op)(other) + + # Special handling for unary operations. + if fname == "negative": + return self * -1 + if fname == "positive": + return self.copy(deep=True) + if fname == "invert": + return ~self + if fname == "absolute": + return self.abs() + if fname == "fabs": + return self.abs().astype(np.float64) + + return None + + def _apply_cupy_ufunc_to_operands( + self, ufunc, cupy_func, operands, **kwargs + ): + # Note: There are some operations that may be supported by libcudf but + # are not supported by pandas APIs. In particular, libcudf binary + # operations support logical and/or operations, but those operations + # are not defined on pd.Series/DataFrame. For now those operations will + # dispatch to cupy, but if ufuncs are ever a bottleneck we could add + # special handling to dispatch those (or any other) functions that we + # could implement without cupy. + + mask = None + data = [{} for _ in range(ufunc.nout)] + for name, (left, right, _, _) in operands.items(): + cupy_inputs = [] + for inp in (left, right) if ufunc.nin == 2 else (left,): + if ( + isinstance(inp, cudf.core.column.ColumnBase) + and inp.has_nulls() + ): + new_mask = cudf.core.column.as_column(inp.nullmask) + + # TODO: This is a hackish way to perform a bitwise and + # of bitmasks. Once we expose + # cudf::detail::bitwise_and, then we can use that + # instead. + mask = new_mask if mask is None else (mask & new_mask) + + # Arbitrarily fill with zeros. For ufuncs, we assume + # that the end result propagates nulls via a bitwise + # and, so these elements are irrelevant. + inp = inp.fillna(0) + cupy_inputs.append(cupy.asarray(inp)) + + cp_output = cupy_func(*cupy_inputs, **kwargs) + if ufunc.nout == 1: + cp_output = (cp_output,) + for i, out in enumerate(cp_output): + data[i][name] = cudf.core.column.as_column(out).set_mask(mask) + return data + @annotate("FRAME_DOT", color="green", domain="cudf_python") def dot(self, other, reflect=False): """ diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 202b9baa80c..242b0adc70f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -779,12 +779,32 @@ def __init__(self, data, **kwargs): super().__init__({name: data}) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) + fname = ufunc.__name__ + + if ret is not None: + return ret + + # Attempt to dispatch all other functions to cupy. + cupy_func = getattr(cupy, fname) + if cupy_func: + if ufunc.nin == 2: + other = inputs[self is inputs[0]] + inputs = self._make_operands_for_binop(other) + else: + inputs = { + name: (col, None, False, None) + for name, col in self._data.items() + } - if method == "__call__" and hasattr(cudf, ufunc.__name__): - func = getattr(cudf, ufunc.__name__) - return func(*inputs) - else: - return NotImplemented + data = self._apply_cupy_ufunc_to_operands( + ufunc, cupy_func, inputs, **kwargs + ) + + out = tuple(_index_from_data(out) for out in data) + return out[0] if ufunc.nout == 1 else out + + return NotImplemented def _binaryop( self, @@ -797,11 +817,9 @@ def _binaryop( ) -> SingleColumnFrame: # Specialize binops to generate the appropriate output index type. operands = self._make_operands_for_binop(other, fill_value, reflect) - return ( - _index_from_data(data=self._colwise_binop(operands, fn),) - if operands is not NotImplemented - else NotImplemented - ) + if operands is NotImplemented: + return NotImplemented + return _index_from_data(self._colwise_binop(operands, fn)) def _copy_type_metadata( self, other: Frame, include_index: bool = True diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 51cbf4fba30..7c8c8460303 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1730,146 +1730,39 @@ def _make_operands_and_index_for_binop( "Binary operations are not supported for {self.__class__}" ) - # For more detail on this function and how it should work, see - # https://numpy.org/doc/stable/reference/ufuncs.html def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - # We don't currently support reduction, accumulation, etc. We also - # don't support any special kwargs or higher arity ufuncs than binary. - if method != "__call__" or kwargs or ufunc.nin > 2: - return NotImplemented - - # Binary operations - binary_operations = { - # Arithmetic binary operations. - "add": "add", - "subtract": "sub", - "multiply": "mul", - "matmul": "matmul", - "divide": "truediv", - "true_divide": "truediv", - "floor_divide": "floordiv", - "power": "pow", - "float_power": "pow", - "remainder": "mod", - "mod": "mod", - "fmod": "mod", - # Bitwise binary operations. - "bitwise_and": "and", - "bitwise_or": "or", - "bitwise_xor": "xor", - # Comparison binary operators - "greater": "gt", - "greater_equal": "ge", - "less": "lt", - "less_equal": "le", - "not_equal": "ne", - "equal": "eq", - } - - # First look for methods of the class. + ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) fname = ufunc.__name__ - if fname in binary_operations: - reflect = self is not inputs[0] - other = inputs[0] if reflect else inputs[1] - - # These operators need to be mapped to their inverses when - # performing a reflected operation because no reflected version of - # the operators themselves exist. - ops_without_reflection = { - "gt": "lt", - "ge": "le", - "lt": "gt", - "le": "ge", - # ne and eq are symmetric, so they are their own inverse op - "ne": "ne", - "eq": "eq", - } - - op = binary_operations[fname] - if reflect and op in ops_without_reflection: - op = ops_without_reflection[op] - reflect = False - op = f"__{'r' if reflect else ''}{op}__" - - # pandas bitwise operations return bools if indexes are misaligned. - if ( - "bitwise" in fname - and isinstance(other, IndexedFrame) - and not self.index.equals(other.index) - ): - return getattr(self, op)(other).astype(bool) - # Float_power returns float irrespective of the input type. - if fname == "float_power": - return getattr(self, op)(other).astype(float) - return getattr(self, op)(other) - - # Special handling for unary operations. - if fname == "negative": - return self * -1 - if fname == "positive": - return self.copy(deep=True) - if fname == "invert": - return ~self - if fname == "absolute": - return self.abs() - if fname == "fabs": - return self.abs().astype(np.float64) - - # Note: There are some operations that may be supported by libcudf but - # are not supported by pandas APIs. In particular, libcudf binary - # operations support logical and/or operations, but those operations - # are not defined on pd.Series/DataFrame. For now those operations will - # dispatch to cupy, but if ufuncs are ever a bottleneck we could add - # special handling to dispatch those (or any other) functions that we - # could implement without cupy. + + if ret is not None: + if "bitwise" in fname: + reflect = self is not inputs[0] + other = inputs[0] if reflect else inputs[1] + if isinstance(other, IndexedFrame) and not self.index.equals( + other.index + ): + ret = ret.astype(bool) + return ret # Attempt to dispatch all other functions to cupy. cupy_func = getattr(cp, fname) if cupy_func: - # Indices must be aligned before converting to arrays. if ufunc.nin == 2: other = inputs[self is inputs[0]] - inputs, index = self._prep_for_binop(other, fname) + inputs, index = self._make_operands_and_index_for_binop( + other, fname + ) else: + # This works for Index too inputs = { name: (col, None, False, None) for name, col in self._data.items() } index = self._index - mask = None - data = [{} for _ in range(ufunc.nout)] - for name, (left, right, _, _) in inputs.items(): - cupy_inputs = [] - # TODO: I'm jumping through multiple hoops to get the unary - # behavior to match up with the binary. I should see if there - # are better patterns to employ here. - for inp in (left, right) if ufunc.nin == 2 else (left,): - if ( - isinstance(inp, cudf.core.column.ColumnBase) - and inp.has_nulls() - ): - new_mask = cudf.core.column.as_column(inp.nullmask) - - # TODO: This is a hackish way to perform a bitwise and - # of bitmasks. Once we expose - # cudf::detail::bitwise_and, then we can use that - # instead. - mask = new_mask if mask is None else (mask & new_mask) - - # Arbitrarily fill with zeros. For ufuncs, we assume - # that the end result propagates nulls via a bitwise - # and, so these elements are irrelevant. - inp = inp.fillna(0) - cupy_inputs.append(cp.asarray(inp)) - - cp_output = cupy_func(*cupy_inputs, **kwargs) - if ufunc.nout == 1: - cp_output = (cp_output,) - for i, out in enumerate(cp_output): - data[i][name] = cudf.core.column.as_column(out).set_mask( - mask - ) + data = self._apply_cupy_ufunc_to_operands( + ufunc, cupy_func, inputs, **kwargs + ) out = tuple( self.__class__._from_data(out, index=index) for out in data From 97b12b02cda611ff8650ac145eac5cb16fd0e796 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 16:58:29 -0800 Subject: [PATCH 04/12] Add tests for Index ufuncs, which pass except for a previously undiscovered bug in binops. --- python/cudf/cudf/tests/test_array_ufunc.py | 56 +++++++++++++++++++--- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index e4b4d5020ea..9ab0280377e 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -50,6 +50,50 @@ def _hide_ufunc_warnings(ufunc): yield +@pytest.mark.parametrize("ufunc", _UFUNCS) +@pytest.mark.parametrize("has_nulls", [True, False]) +def test_ufunc_index(ufunc, has_nulls): + # Note: This test assumes that all ufuncs are unary or binary. + fname = ufunc.__name__ + if has_nulls and fname == "matmul": + pytest.xfail("Frame.dot currently does not support indexes or nulls") + + N = 100 + # Avoid zeros in either array to skip division by 0 errors. Also limit the + # scale to avoid issues with overflow, etc. We use ints because some + # operations (like bitwise ops) are not defined for floats. + pandas_args = args = [ + cudf.Index(cp.random.randint(low=1, high=10, size=N),) + for _ in range(ufunc.nin) + ] + + try: + got = ufunc(*args) + except AttributeError as e: + # We xfail if we don't have an explicit dispatch and cupy doesn't have + # the method so that we can easily identify these methods. As of this + # writing, the only missing methods are isnat and heaviside. + if "module 'cupy' has no attribute" in str(e): + pytest.xfail(reason="Operation not supported by cupy") + raise + + expect = ufunc(*(arg.to_pandas() for arg in pandas_args)) + + try: + if ufunc.nout > 1: + for g, e in zip(got, expect): + assert_eq(g, e, check_exact=False) + else: + assert_eq(got, expect, check_exact=False) + except AssertionError: + # TODO: This branch can be removed when + # https://github.com/rapidsai/cudf/issues/10178 is resolved + if fname in ("power", "float_power"): + if (got - expect).abs().max() == 1: + pytest.xfail("https://github.com/rapidsai/cudf/issues/10178") + raise + + @pytest.mark.parametrize("ufunc", _UFUNCS) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) @@ -117,11 +161,11 @@ def test_ufunc_series(ufunc, has_nulls, indexed): for g, e in zip(got, expect): if has_nulls: e[mask] = np.nan - assert_eq(g, e) + assert_eq(g, e, check_exact=False) else: if has_nulls: expect[mask] = np.nan - assert_eq(got, expect) + assert_eq(got, expect, check_exact=False) except AssertionError: # TODO: This branch can be removed when # https://github.com/rapidsai/cudf/issues/10178 is resolved @@ -195,14 +239,14 @@ def test_binary_ufunc_series_array(ufunc, has_nulls, indexed, type_, reflect): if type_ == "cupy" and reflect: assert (cp.asnumpy(g) == e).all() else: - assert_eq(g, e) + assert_eq(g, e, check_exact=False) else: if has_nulls: expect[mask] = np.nan if type_ == "cupy" and reflect: assert (cp.asnumpy(got) == expect).all() else: - assert_eq(got, expect) + assert_eq(got, expect, check_exact=False) @pytest.mark.parametrize( @@ -298,11 +342,11 @@ def test_ufunc_dataframe(ufunc, has_nulls, indexed): for g, e in zip(got, expect): if has_nulls: e[mask] = np.nan - assert_eq(g, e) + assert_eq(g, e, check_exact=False) else: if has_nulls: expect[mask] = np.nan - assert_eq(got, expect) + assert_eq(got, expect, check_exact=False) except AssertionError: # TODO: This branch can be removed when # https://github.com/rapidsai/cudf/issues/10178 is resolved From a563a3d978cc952c8d33cf368ef6f5fcd6a7b67e Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 17:13:03 -0800 Subject: [PATCH 05/12] Fix handling of binops and binary ufuncs that return booleans. --- python/cudf/cudf/core/index.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 242b0adc70f..ba6b1d25c2d 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -801,8 +801,16 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ufunc, cupy_func, inputs, **kwargs ) - out = tuple(_index_from_data(out) for out in data) - return out[0] if ufunc.nout == 1 else out + out = [_index_from_data(out) for out in data] + + # pandas returns numpy arrays when the outputs are boolean. + for i, o in enumerate(out): + # We explicitly _do not_ use isinstance here: we want only + # boolean GenericIndexes, not dtype-specific subclasses. + if type(o) is GenericIndex and o.dtype.kind == "b": + out[i] = o.values + + return out[0] if ufunc.nout == 1 else tuple(out) return NotImplemented @@ -819,7 +827,14 @@ def _binaryop( operands = self._make_operands_for_binop(other, fill_value, reflect) if operands is NotImplemented: return NotImplemented - return _index_from_data(self._colwise_binop(operands, fn)) + ret = _index_from_data(self._colwise_binop(operands, fn)) + + # pandas returns numpy arrays when the outputs are boolean. We + # explicitly _do not_ use isinstance here: we want only boolean + # GenericIndexes, not dtype-specific subclasses. + if type(ret) is GenericIndex and ret.dtype.kind == "b": + return ret.values + return ret def _copy_type_metadata( self, other: Frame, include_index: bool = True From 99871b20e805f73a929caf429d4f3953a2f3b420 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 17:19:44 -0800 Subject: [PATCH 06/12] Deprecate now unnecessary APIs. --- python/cudf/cudf/core/frame.py | 45 +++++++++++++++++++ python/cudf/cudf/core/ops.py | 82 +++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 1a112b95e05..b8fef0e863a 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -2752,6 +2752,11 @@ def sin(self): 0.8011526357338306, 0.8939966636005579], dtype='float64') """ + warnings.warn( + "sin is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("sin") @annotate("FRAME_COS", color="green", domain="cudf_python") @@ -2814,6 +2819,11 @@ def cos(self): -0.5984600690578581, -0.4480736161291701], dtype='float64') """ + warnings.warn( + "cos is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("cos") @annotate("FRAME_TAN", color="green", domain="cudf_python") @@ -2876,6 +2886,11 @@ def tan(self): -1.3386902103511544, -1.995200412208242], dtype='float64') """ + warnings.warn( + "tan is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("tan") @annotate("FRAME_ASIN", color="green", domain="cudf_python") @@ -2927,6 +2942,11 @@ def asin(self): 1.5707963267948966, 0.3046926540153975], dtype='float64') """ + warnings.warn( + "asin is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("asin") @annotate("FRAME_ACOS", color="green", domain="cudf_python") @@ -2978,6 +2998,11 @@ def acos(self): 1.5707963267948966, 1.266103672779499], dtype='float64') """ + warnings.warn( + "acos is deprecated and will be removed in the future", + FutureWarning, + ) + result = self.copy(deep=False) for col in result._data: min_float_dtype = cudf.utils.dtypes.get_min_float_dtype( @@ -3047,6 +3072,11 @@ def atan(self): 0.2914567944778671], dtype='float64') """ + warnings.warn( + "atan is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("atan") @annotate("FRAME_EXP", color="green", domain="cudf_python") @@ -3110,6 +3140,11 @@ def exp(self): 2.718281828459045, 1.0, 1.3498588075760032], dtype='float64') """ + warnings.warn( + "exp is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("exp") @annotate("FRAME_LOG", color="green", domain="cudf_python") @@ -3172,6 +3207,11 @@ def log(self): Float64Index([2.302585092994046, 2.3978952727983707, 6.214608098422191], dtype='float64') """ + warnings.warn( + "log is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("log") @annotate("FRAME_SQRT", color="green", domain="cudf_python") @@ -3228,6 +3268,11 @@ def sqrt(self): >>> index.sqrt() Float64Index([nan, 10.0, 25.0], dtype='float64') """ + warnings.warn( + "sqrt is deprecated and will be removed in the future", + FutureWarning, + ) + return self._unaryop("sqrt") @annotate("FRAME_ABS", color="green", domain="cudf_python") diff --git a/python/cudf/cudf/core/ops.py b/python/cudf/cudf/core/ops.py index fe9e012f406..c2a8c0e72fb 100644 --- a/python/cudf/cudf/core/ops.py +++ b/python/cudf/cudf/core/ops.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. +import warnings from numbers import Number import numpy as np @@ -10,6 +11,10 @@ def sin(arbitrary): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.sin(arbitrary) else: @@ -17,6 +22,10 @@ def sin(arbitrary): def cos(arbitrary): + warnings.warn( + "cos is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.cos(arbitrary) else: @@ -24,6 +33,10 @@ def cos(arbitrary): def tan(arbitrary): + warnings.warn( + "tan is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.tan(arbitrary) else: @@ -31,6 +44,11 @@ def tan(arbitrary): def arcsin(arbitrary): + warnings.warn( + "arcsin is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.arcsin(arbitrary) else: @@ -38,6 +56,11 @@ def arcsin(arbitrary): def arccos(arbitrary): + warnings.warn( + "arcsin is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.arccos(arbitrary) else: @@ -45,6 +68,11 @@ def arccos(arbitrary): def arctan(arbitrary): + warnings.warn( + "arctan is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.arctan(arbitrary) else: @@ -52,6 +80,10 @@ def arctan(arbitrary): def exp(arbitrary): + warnings.warn( + "exp is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.exp(arbitrary) else: @@ -59,6 +91,10 @@ def exp(arbitrary): def log(arbitrary): + warnings.warn( + "log is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.log(arbitrary) else: @@ -66,6 +102,10 @@ def log(arbitrary): def sqrt(arbitrary): + warnings.warn( + "sqrt is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(arbitrary, Number): return np.sqrt(arbitrary) else: @@ -73,6 +113,11 @@ def sqrt(arbitrary): def logical_not(arbitrary): + warnings.warn( + "logical_not is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(arbitrary, Number): return np.logical_not(arbitrary) else: @@ -80,6 +125,11 @@ def logical_not(arbitrary): def logical_and(lhs, rhs): + warnings.warn( + "logical_and is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.logical_and(lhs, rhs) else: @@ -87,6 +137,11 @@ def logical_and(lhs, rhs): def logical_or(lhs, rhs): + warnings.warn( + "logical_or is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.logical_or(lhs, rhs) else: @@ -94,6 +149,11 @@ def logical_or(lhs, rhs): def remainder(lhs, rhs): + warnings.warn( + "remainder is deprecated and will be removed in the future", + FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.mod(lhs, rhs) elif isinstance(lhs, Frame): @@ -103,6 +163,10 @@ def remainder(lhs, rhs): def floor_divide(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.floor_divide(lhs, rhs) elif isinstance(lhs, Frame): @@ -112,6 +176,10 @@ def floor_divide(lhs, rhs): def subtract(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.subtract(lhs, rhs) elif isinstance(lhs, Frame): @@ -121,6 +189,10 @@ def subtract(lhs, rhs): def add(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.add(lhs, rhs) elif isinstance(rhs, Frame): @@ -130,6 +202,10 @@ def add(lhs, rhs): def true_divide(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.true_divide(lhs, rhs) elif isinstance(rhs, Frame): @@ -139,6 +215,10 @@ def true_divide(lhs, rhs): def multiply(lhs, rhs): + warnings.warn( + "sin is deprecated and will be removed in the future", FutureWarning, + ) + if isinstance(lhs, Number) and isinstance(rhs, Number): return np.multiply(lhs, rhs) elif isinstance(rhs, Frame): From 392376253b06c5c5f2860ef4f015b3b2706628c8 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 17:22:19 -0800 Subject: [PATCH 07/12] Add tests of Index ufuncs against other types of objects. --- python/cudf/cudf/tests/test_array_ufunc.py | 41 +++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index 9ab0280377e..9d762f26ebd 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -51,12 +51,9 @@ def _hide_ufunc_warnings(ufunc): @pytest.mark.parametrize("ufunc", _UFUNCS) -@pytest.mark.parametrize("has_nulls", [True, False]) -def test_ufunc_index(ufunc, has_nulls): +def test_ufunc_index(ufunc): # Note: This test assumes that all ufuncs are unary or binary. fname = ufunc.__name__ - if has_nulls and fname == "matmul": - pytest.xfail("Frame.dot currently does not support indexes or nulls") N = 100 # Avoid zeros in either array to skip division by 0 errors. Also limit the @@ -94,6 +91,42 @@ def test_ufunc_index(ufunc, has_nulls): raise +@pytest.mark.parametrize( + "ufunc", [np.add, np.greater, np.greater_equal, np.logical_and] +) +@pytest.mark.parametrize("type_", ["cupy", "numpy", "list"]) +@pytest.mark.parametrize("reflect", [True, False]) +def test_binary_ufunc_index_array(ufunc, type_, reflect): + N = 100 + # Avoid zeros in either array to skip division by 0 errors. Also limit the + # scale to avoid issues with overflow, etc. We use ints because some + # operations (like bitwise ops) are not defined for floats. + args = [cudf.Index(cp.random.rand(N)) for _ in range(ufunc.nin)] + + arg1 = args[1].to_cupy() if type_ == "cupy" else args[1].to_numpy() + if type_ == "list": + arg1 = arg1.tolist() + + if reflect: + got = ufunc(arg1, args[0]) + expect = ufunc(args[1].to_numpy(), args[0].to_pandas()) + else: + got = ufunc(args[0], arg1) + expect = ufunc(args[0].to_pandas(), args[1].to_numpy()) + + if ufunc.nout > 1: + for g, e in zip(got, expect): + if type_ == "cupy" and reflect: + assert (cp.asnumpy(g) == e).all() + else: + assert_eq(g, e, check_exact=False) + else: + if type_ == "cupy" and reflect: + assert (cp.asnumpy(got) == expect).all() + else: + assert_eq(got, expect, check_exact=False) + + @pytest.mark.parametrize("ufunc", _UFUNCS) @pytest.mark.parametrize("has_nulls", [True, False]) @pytest.mark.parametrize("indexed", [True, False]) From d40bef88a93580d680aa19da7f5cf92965927ad0 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 17:44:22 -0800 Subject: [PATCH 08/12] Some cleanup and add type annotations. --- python/cudf/cudf/core/dataframe.py | 31 +++++++++++++------ python/cudf/cudf/core/frame.py | 32 +++++++------------- python/cudf/cudf/core/index.py | 3 +- python/cudf/cudf/core/indexed_frame.py | 20 ++++++++---- python/cudf/cudf/core/series.py | 15 ++++++--- python/cudf/cudf/core/single_column_frame.py | 16 ++++++++-- 6 files changed, 73 insertions(+), 44 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 486690fa91b..45bf1c27bab 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -11,7 +11,17 @@ import warnings from collections import defaultdict from collections.abc import Iterable, Sequence -from typing import Any, MutableMapping, Optional, Set, TypeVar +from typing import ( + Any, + Dict, + MutableMapping, + Optional, + Set, + Tuple, + Type, + TypeVar, + Union, +) import cupy import numpy as np @@ -44,6 +54,7 @@ from cudf.core.abc import Serializable from cudf.core.column import ( CategoricalColumn, + ColumnBase, as_column, build_categorical_column, build_column, @@ -1907,7 +1918,13 @@ def _make_operands_and_index_for_binop( can_reindex: bool = False, *args, **kwargs, - ): + ) -> Tuple[ + Union[ + Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + Type[NotImplemented], + ], + Optional[BaseIndex], + ]: lhs, rhs = self, other if _is_scalar_or_zero_d_array(rhs): @@ -2142,9 +2159,7 @@ def columns(self, columns): columns = pd.Index(range(len(self._data.columns))) is_multiindex = isinstance(columns, pd.MultiIndex) - if isinstance( - columns, (Series, cudf.Index, cudf.core.column.ColumnBase) - ): + if isinstance(columns, (Series, cudf.Index, ColumnBase)): columns = pd.Index(columns.to_numpy(), tupleize_cols=is_multiindex) elif not isinstance(columns, pd.Index): columns = pd.Index(columns, tupleize_cols=is_multiindex) @@ -6585,7 +6600,7 @@ def _setitem_with_dataframe( input_df: DataFrame, replace_df: DataFrame, input_cols: Any = None, - mask: Optional[cudf.core.column.ColumnBase] = None, + mask: Optional[ColumnBase] = None, ignore_index: bool = False, ): """ @@ -6676,9 +6691,7 @@ def _get_union_of_series_names(series_list): def _get_host_unique(array): - if isinstance( - array, (cudf.Series, cudf.Index, cudf.core.column.ColumnBase) - ): + if isinstance(array, (cudf.Series, cudf.Index, ColumnBase)): return array.unique.to_pandas() elif isinstance(array, (str, numbers.Number)): return [array] diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index b8fef0e863a..8264d046745 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3741,7 +3741,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): "equal": "eq", } - # First look for methods of the class. fname = ufunc.__name__ if fname in binary_operations: reflect = self is not inputs[0] @@ -3766,20 +3765,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): reflect = False op = f"__{'r' if reflect else ''}{op}__" - # pandas bitwise operations return bools if indexes are misaligned. - # TODO: This needs special handling for IndexedFrames. - # if ( - # "bitwise" in fname - # and isinstance(other, IndexedFrame) - # and not self.index.equals(other.index) - # ): - # return getattr(self, op)(other).astype(bool) # Float_power returns float irrespective of the input type. if fname == "float_power": return getattr(self, op)(other).astype(float) return getattr(self, op)(other) - # Special handling for unary operations. + # Special handling for various unary operations. if fname == "negative": return self * -1 if fname == "positive": @@ -3791,6 +3782,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if fname == "fabs": return self.abs().astype(np.float64) + # None is a sentinel used by subclasses to trigger cupy dispatch. return None def _apply_cupy_ufunc_to_operands( @@ -3798,22 +3790,20 @@ def _apply_cupy_ufunc_to_operands( ): # Note: There are some operations that may be supported by libcudf but # are not supported by pandas APIs. In particular, libcudf binary - # operations support logical and/or operations, but those operations - # are not defined on pd.Series/DataFrame. For now those operations will - # dispatch to cupy, but if ufuncs are ever a bottleneck we could add - # special handling to dispatch those (or any other) functions that we - # could implement without cupy. + # operations support logical and/or operations as well as + # trigonometric, but those operations are not defined on + # pd.Series/DataFrame. For now those operations will dispatch to cupy, + # but if ufuncs are ever a bottleneck we could add special handling to + # dispatch those (or any other) functions that we could implement + # without cupy. mask = None data = [{} for _ in range(ufunc.nout)] for name, (left, right, _, _) in operands.items(): cupy_inputs = [] for inp in (left, right) if ufunc.nin == 2 else (left,): - if ( - isinstance(inp, cudf.core.column.ColumnBase) - and inp.has_nulls() - ): - new_mask = cudf.core.column.as_column(inp.nullmask) + if isinstance(inp, ColumnBase) and inp.has_nulls(): + new_mask = as_column(inp.nullmask) # TODO: This is a hackish way to perform a bitwise and # of bitmasks. Once we expose @@ -3831,7 +3821,7 @@ def _apply_cupy_ufunc_to_operands( if ufunc.nout == 1: cp_output = (cp_output,) for i, out in enumerate(cp_output): - data[i][name] = cudf.core.column.as_column(out).set_mask(mask) + data[i][name] = as_column(out).set_mask(mask) return data @annotate("FRAME_DOT", color="green", domain="cudf_python") diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index ba6b1d25c2d..5aab834d452 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -780,13 +780,12 @@ def __init__(self, data, **kwargs): def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs) - fname = ufunc.__name__ if ret is not None: return ret # Attempt to dispatch all other functions to cupy. - cupy_func = getattr(cupy, fname) + cupy_func = getattr(cupy, ufunc.__name__) if cupy_func: if ufunc.nin == 2: other = inputs[self is inputs[0]] diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 7c8c8460303..0226ee4888f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -7,7 +7,7 @@ import warnings from collections import Counter, abc from functools import cached_property -from typing import Any, Callable, Type, TypeVar +from typing import Any, Callable, Dict, Optional, Tuple, Type, TypeVar, Union from uuid import uuid4 import cupy as cp @@ -1725,7 +1725,16 @@ def _make_operands_and_index_for_binop( can_reindex: bool = False, *args, **kwargs, - ): + ) -> Tuple[ + Union[ + Dict[ + Optional[str], + Tuple[cudf.core.column.ColumnBase, Any, bool, Any], + ], + Type[NotImplemented], + ], + Optional[cudf.BaseIndex], + ]: raise NotImplementedError( "Binary operations are not supported for {self.__class__}" ) @@ -1735,10 +1744,11 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): fname = ufunc.__name__ if ret is not None: + # pandas bitwise operations return bools if indexes are misaligned. if "bitwise" in fname: reflect = self is not inputs[0] other = inputs[0] if reflect else inputs[1] - if isinstance(other, IndexedFrame) and not self.index.equals( + if isinstance(other, self.__class__) and not self.index.equals( other.index ): ret = ret.astype(bool) @@ -1764,9 +1774,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ufunc, cupy_func, inputs, **kwargs ) - out = tuple( - self.__class__._from_data(out, index=index) for out in data - ) + out = tuple(self._from_data(out, index=index) for out in data) return out[0] if ufunc.nout == 1 else out return NotImplemented diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index b286f716e7f..0568601b9a5 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -9,7 +9,7 @@ from collections import abc as abc from numbers import Number from shutil import get_terminal_size -from typing import Any, MutableMapping, Optional, Set, Union +from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union import cupy import numpy as np @@ -39,6 +39,7 @@ ) from cudf.core.abc import Serializable from cudf.core.column import ( + ColumnBase, DatetimeColumn, TimeDeltaColumn, arange, @@ -435,7 +436,7 @@ def __init__( else: data = {} - if not isinstance(data, column.ColumnBase): + if not isinstance(data, ColumnBase): data = column.as_column(data, nan_as_null=nan_as_null, dtype=dtype) else: if dtype is not None: @@ -444,7 +445,7 @@ def __init__( if index is not None and not isinstance(index, BaseIndex): index = as_index(index) - assert isinstance(data, column.ColumnBase) + assert isinstance(data, ColumnBase) super().__init__({name: data}) self._index = RangeIndex(len(data)) if index is None else index @@ -1215,7 +1216,13 @@ def _make_operands_and_index_for_binop( can_reindex: bool = False, *args, **kwargs, - ): + ) -> Tuple[ + Union[ + Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + Type[NotImplemented], + ], + Optional[BaseIndex], + ]: # Specialize binops to align indices. if isinstance(other, Series): if ( diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 50b206d3388..f02e3b9f959 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -4,7 +4,16 @@ from __future__ import annotations import builtins -from typing import Any, Dict, MutableMapping, Optional, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + MutableMapping, + Optional, + Tuple, + Type, + TypeVar, + Union, +) import cupy import numpy as np @@ -279,7 +288,10 @@ def _make_operands_for_binop( reflect: bool = False, *args, **kwargs, - ) -> Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]]: + ) -> Union[ + Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]], + Type[NotImplemented], + ]: """Generate the dictionary of operands used for a binary operation. Parameters From d24088c5d89a1ee0910ea4e6c22de2e2fc07c48b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 17:45:49 -0800 Subject: [PATCH 09/12] Remove redundant test. --- python/cudf/cudf/tests/test_dataframe.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index acd9e28c661..cab3ed8a30f 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3342,14 +3342,6 @@ def test_select_dtype_datetime_with_frequency(): ) -def test_array_ufunc(): - gdf = cudf.DataFrame({"x": [2, 3, 4.0], "y": [9.0, 2.5, 1.1]}) - pdf = gdf.to_pandas() - - assert_eq(np.sqrt(gdf), np.sqrt(pdf)) - assert_eq(np.sqrt(gdf.x), np.sqrt(pdf.x)) - - def test_dataframe_describe_exclude(): np.random.seed(12) data_length = 10000 From 710e2c7d4cd1249cbe4afedd9b3b5f2a6fd6dfdc Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 22 Feb 2022 17:59:15 -0800 Subject: [PATCH 10/12] Include reference to numpy functions in deprecation warning. --- python/cudf/cudf/core/frame.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 8264d046745..df9a3c4932b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -2753,7 +2753,7 @@ def sin(self): dtype='float64') """ warnings.warn( - "sin is deprecated and will be removed in the future", + "sin is deprecated and will be removed. Use numpy.sin instead", FutureWarning, ) @@ -2820,7 +2820,7 @@ def cos(self): dtype='float64') """ warnings.warn( - "cos is deprecated and will be removed in the future", + "cos is deprecated and will be removed. Use numpy.cos instead", FutureWarning, ) @@ -2887,7 +2887,7 @@ def tan(self): dtype='float64') """ warnings.warn( - "tan is deprecated and will be removed in the future", + "tan is deprecated and will be removed. Use numpy.tan instead", FutureWarning, ) @@ -2999,7 +2999,7 @@ def acos(self): dtype='float64') """ warnings.warn( - "acos is deprecated and will be removed in the future", + "acos is deprecated and will be removed. Use numpy.acos instead", FutureWarning, ) @@ -3073,7 +3073,7 @@ def atan(self): dtype='float64') """ warnings.warn( - "atan is deprecated and will be removed in the future", + "atan is deprecated and will be removed. Use numpy.atan instead", FutureWarning, ) @@ -3141,7 +3141,7 @@ def exp(self): dtype='float64') """ warnings.warn( - "exp is deprecated and will be removed in the future", + "exp is deprecated and will be removed. Use numpy.exp instead", FutureWarning, ) @@ -3208,7 +3208,7 @@ def log(self): 6.214608098422191], dtype='float64') """ warnings.warn( - "log is deprecated and will be removed in the future", + "log is deprecated and will be removed. Use numpy.log instead", FutureWarning, ) @@ -3269,7 +3269,7 @@ def sqrt(self): Float64Index([nan, 10.0, 25.0], dtype='float64') """ warnings.warn( - "sqrt is deprecated and will be removed in the future", + "sqrt is deprecated and will be removed. Use numpy.sqrt instead", FutureWarning, ) From 0f65f1448ad22bbecde3810beeb02fc61d920c46 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 23 Feb 2022 14:38:06 -0800 Subject: [PATCH 11/12] Update tests that assumed the wrong output type. --- python/cudf/cudf/tests/test_binops.py | 6 +----- python/cudf/cudf/tests/test_pickling.py | 6 ++---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 02ca7a0cd58..c90eb75b761 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1744,10 +1744,6 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype): expected = val == data.to_pandas() got = val == data - # In case of index, expected would be a numpy array - if isinstance(data, cudf.BaseIndex): - expected = pd.Index(expected) - utils.assert_eq(expected, got) @@ -2945,7 +2941,7 @@ def test_binops_non_cudf_types(obj_class, binop, other_type): data = range(1, 100) lhs = obj_class(data) rhs = other_type(data) - assert cp.all((binop(lhs, rhs) == binop(lhs, lhs)).values) + assert (binop(lhs, rhs) == binop(lhs, lhs)).all() @pytest.mark.parametrize("binop", _binops + _binops_compare) diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py index 8d504edd669..57b297004bf 100644 --- a/python/cudf/cudf/tests/test_pickling.py +++ b/python/cudf/cudf/tests/test_pickling.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import sys @@ -90,9 +90,7 @@ def test_pickle_index(): idx = GenericIndex(np.arange(nelem), name="a") pickled = pickle.dumps(idx) out = pickle.loads(pickled) - # TODO: Once operations like `all` are supported on Index objects, we can - # just use that without calling values first. - assert (idx == out).values.all() + assert (idx == out).all() def test_pickle_buffer(): From 9cca3b50c2955cb298fb860ab3ee10682b5fef96 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Thu, 24 Feb 2022 17:25:51 -0800 Subject: [PATCH 12/12] Address PR comments. --- python/cudf/cudf/core/frame.py | 92 +++++++++++++++++---------------- python/cudf/cudf/core/series.py | 6 +-- 2 files changed, 48 insertions(+), 50 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index df9a3c4932b..4a5a222e81e 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -54,6 +54,49 @@ T = TypeVar("T", bound="Frame") +# Mapping from ufuncs to the corresponding binary operators. +_ufunc_binary_operations = { + # Arithmetic binary operations. + "add": "add", + "subtract": "sub", + "multiply": "mul", + "matmul": "matmul", + "divide": "truediv", + "true_divide": "truediv", + "floor_divide": "floordiv", + "power": "pow", + "float_power": "pow", + "remainder": "mod", + "mod": "mod", + "fmod": "mod", + # Bitwise binary operations. + "bitwise_and": "and", + "bitwise_or": "or", + "bitwise_xor": "xor", + # Comparison binary operators + "greater": "gt", + "greater_equal": "ge", + "less": "lt", + "less_equal": "le", + "not_equal": "ne", + "equal": "eq", +} + +# These operators need to be mapped to their inverses when performing a +# reflected ufunc operation because no reflected version of the operators +# themselves exist. When these operators are invoked directly (not via +# __array_ufunc__) Python takes care of calling the inverse operation. +_ops_without_reflection = { + "gt": "lt", + "ge": "le", + "lt": "gt", + "le": "ge", + # ne and eq are symmetric, so they are their own inverse op + "ne": "ne", + "eq": "eq", +} + + class Frame: """A collection of Column objects with an optional index. @@ -3713,55 +3756,14 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if method != "__call__" or kwargs or ufunc.nin > 2: return NotImplemented - # Binary operations - binary_operations = { - # Arithmetic binary operations. - "add": "add", - "subtract": "sub", - "multiply": "mul", - "matmul": "matmul", - "divide": "truediv", - "true_divide": "truediv", - "floor_divide": "floordiv", - "power": "pow", - "float_power": "pow", - "remainder": "mod", - "mod": "mod", - "fmod": "mod", - # Bitwise binary operations. - "bitwise_and": "and", - "bitwise_or": "or", - "bitwise_xor": "xor", - # Comparison binary operators - "greater": "gt", - "greater_equal": "ge", - "less": "lt", - "less_equal": "le", - "not_equal": "ne", - "equal": "eq", - } - fname = ufunc.__name__ - if fname in binary_operations: + if fname in _ufunc_binary_operations: reflect = self is not inputs[0] other = inputs[0] if reflect else inputs[1] - # These operators need to be mapped to their inverses when - # performing a reflected operation because no reflected version of - # the operators themselves exist. - ops_without_reflection = { - "gt": "lt", - "ge": "le", - "lt": "gt", - "le": "ge", - # ne and eq are symmetric, so they are their own inverse op - "ne": "ne", - "eq": "eq", - } - - op = binary_operations[fname] - if reflect and op in ops_without_reflection: - op = ops_without_reflection[op] + op = _ufunc_binary_operations[fname] + if reflect and op in _ops_without_reflection: + op = _ops_without_reflection[op] reflect = False op = f"__{'r' if reflect else ''}{op}__" diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 0568601b9a5..45a44016449 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1228,11 +1228,7 @@ def _make_operands_and_index_for_binop( if ( not can_reindex and fn in cudf.utils.utils._EQUALITY_OPS - # TODO: mypy doesn't like this line because the index property - # is not defined on SingleColumnFrame (or Index, for that - # matter). Ignoring is the easy solution for now, a cleaner fix - # requires reworking the type hierarchy. - and not self.index.equals(other.index) # type: ignore + and not self.index.equals(other.index) ): raise ValueError( "Can only compare identically-labeled Series objects"