rapidsai · rapids-bot · Feb 25, 2022 · Feb 22, 2022 · Feb 22, 2022 · Feb 23, 2022
@@ -41,14 +41,6 @@ class BaseIndex(Serializable):
     _accessors: Set[Any] = set()
     _data: ColumnAccessor
 
-    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-
-        if method == "__call__" and hasattr(cudf, ufunc.__name__):
-            func = getattr(cudf, ufunc.__name__)
-            return func(*inputs)
-        else:
-            return NotImplemented
-
     @cached_property
     def _values(self) -> ColumnBase:
         raise NotImplementedError

@@ -11,7 +11,17 @@
 import warnings
 from collections import defaultdict
 from collections.abc import Iterable, Sequence
-from typing import Any, MutableMapping, Optional, Set, TypeVar
+from typing import (
+    Any,
+    Dict,
+    MutableMapping,
+    Optional,
+    Set,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 
 import cupy
 import numpy as np
@@ -44,6 +54,7 @@
 from cudf.core.abc import Serializable
 from cudf.core.column import (
     CategoricalColumn,
+    ColumnBase,
     as_column,
     build_categorical_column,
     build_column,
@@ -1898,7 +1909,7 @@ def _get_columns_by_label(self, labels, downcast=False):
         )
         return out
 
-    def _prep_for_binop(
+    def _make_operands_and_index_for_binop(
         self,
         other: Any,
         fn: str,
@@ -1907,7 +1918,13 @@ def _prep_for_binop(
         can_reindex: bool = False,
         *args,
         **kwargs,
-    ):
+    ) -> Tuple[
+        Union[
+            Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
+            Type[NotImplemented],
+        ],
+        Optional[BaseIndex],
+    ]:
         lhs, rhs = self, other
 
         if _is_scalar_or_zero_d_array(rhs):
@@ -1986,28 +2003,6 @@ def _prep_for_binop(
 
         return operands, lhs._index
 
-    @annotate("DATAFRAME_BINARYOP", color="blue", domain="cudf_python")
-    def _binaryop(
-        self,
-        other: Any,
-        fn: str,
-        fill_value: Any = None,
-        reflect: bool = False,
-        can_reindex: bool = False,
-        *args,
-        **kwargs,
-    ):
-        operands, out_index = self._prep_for_binop(
-            other, fn, fill_value, reflect, can_reindex
-        )
-        if operands is NotImplemented:
-            return NotImplemented
-
-        return self._from_data(
-            ColumnAccessor(type(self)._colwise_binop(operands, fn)),
-            index=out_index,
-        )
-
     @annotate("DATAFRAME_UPDATE", color="blue", domain="cudf_python")
     def update(
         self,
@@ -2164,9 +2159,7 @@ def columns(self, columns):
             columns = pd.Index(range(len(self._data.columns)))
         is_multiindex = isinstance(columns, pd.MultiIndex)
 
-        if isinstance(
-            columns, (Series, cudf.Index, cudf.core.column.ColumnBase)
-        ):
+        if isinstance(columns, (Series, cudf.Index, ColumnBase)):
             columns = pd.Index(columns.to_numpy(), tupleize_cols=is_multiindex)
         elif not isinstance(columns, pd.Index):
             columns = pd.Index(columns, tupleize_cols=is_multiindex)
@@ -6607,7 +6600,7 @@ def _setitem_with_dataframe(
     input_df: DataFrame,
     replace_df: DataFrame,
     input_cols: Any = None,
-    mask: Optional[cudf.core.column.ColumnBase] = None,
+    mask: Optional[ColumnBase] = None,
     ignore_index: bool = False,
 ):
     """
@@ -6698,9 +6691,7 @@ def _get_union_of_series_names(series_list):
 
 
 def _get_host_unique(array):
-    if isinstance(
-        array, (cudf.Series, cudf.Index, cudf.core.column.ColumnBase)
-    ):
+    if isinstance(array, (cudf.Series, cudf.Index, ColumnBase)):
         return array.unique.to_pandas()
     elif isinstance(array, (str, numbers.Number)):
         return [array]

@@ -2752,6 +2752,11 @@ def sin(self):
                     0.8011526357338306, 0.8939966636005579],
                     dtype='float64')
         """
+        warnings.warn(
+            "sin is deprecated and will be removed. Use numpy.sin instead",
+            FutureWarning,
+        )
+
         return self._unaryop("sin")
 
     @annotate("FRAME_COS", color="green", domain="cudf_python")
@@ -2814,6 +2819,11 @@ def cos(self):
                     -0.5984600690578581, -0.4480736161291701],
                     dtype='float64')
         """
+        warnings.warn(
+            "cos is deprecated and will be removed. Use numpy.cos instead",
+            FutureWarning,
+        )
+
         return self._unaryop("cos")
 
     @annotate("FRAME_TAN", color="green", domain="cudf_python")
@@ -2876,6 +2886,11 @@ def tan(self):
                     -1.3386902103511544, -1.995200412208242],
                     dtype='float64')
         """
+        warnings.warn(
+            "tan is deprecated and will be removed. Use numpy.tan instead",
+            FutureWarning,
+        )
+
         return self._unaryop("tan")
 
     @annotate("FRAME_ASIN", color="green", domain="cudf_python")
@@ -2927,6 +2942,11 @@ def asin(self):
                     1.5707963267948966, 0.3046926540153975],
                     dtype='float64')
         """
+        warnings.warn(
+            "asin is deprecated and will be removed in the future",
+            FutureWarning,
+        )
+
         return self._unaryop("asin")
 
     @annotate("FRAME_ACOS", color="green", domain="cudf_python")
@@ -2978,6 +2998,11 @@ def acos(self):
                     1.5707963267948966,  1.266103672779499],
                     dtype='float64')
         """
+        warnings.warn(
+            "acos is deprecated and will be removed. Use numpy.acos instead",
+            FutureWarning,
+        )
+
         result = self.copy(deep=False)
         for col in result._data:
             min_float_dtype = cudf.utils.dtypes.get_min_float_dtype(
@@ -3047,6 +3072,11 @@ def atan(self):
                                     0.2914567944778671],
                     dtype='float64')
         """
+        warnings.warn(
+            "atan is deprecated and will be removed. Use numpy.atan instead",
+            FutureWarning,
+        )
+
         return self._unaryop("atan")
 
     @annotate("FRAME_EXP", color="green", domain="cudf_python")
@@ -3110,6 +3140,11 @@ def exp(self):
                       2.718281828459045, 1.0,  1.3498588075760032],
                     dtype='float64')
         """
+        warnings.warn(
+            "exp is deprecated and will be removed. Use numpy.exp instead",
+            FutureWarning,
+        )
+
         return self._unaryop("exp")
 
     @annotate("FRAME_LOG", color="green", domain="cudf_python")
@@ -3172,6 +3207,11 @@ def log(self):
         Float64Index([2.302585092994046, 2.3978952727983707,
                     6.214608098422191], dtype='float64')
         """
+        warnings.warn(
+            "log is deprecated and will be removed. Use numpy.log instead",
+            FutureWarning,
+        )
+
         return self._unaryop("log")
 
     @annotate("FRAME_SQRT", color="green", domain="cudf_python")
@@ -3228,6 +3268,11 @@ def sqrt(self):
         >>> index.sqrt()
         Float64Index([nan, 10.0, 25.0], dtype='float64')
         """
+        warnings.warn(
+            "sqrt is deprecated and will be removed. Use numpy.sqrt instead",
+            FutureWarning,
+        )
+
         return self._unaryop("sqrt")
 
     @annotate("FRAME_ABS", color="green", domain="cudf_python")
@@ -3496,7 +3541,9 @@ def _binaryop(
         Frame
             A new instance containing the result of the operation.
         """
-        raise NotImplementedError
+        raise NotImplementedError(
+            f"Binary operations are not supported for {self.__class__}"
+        )
 
     @classmethod
     @annotate("FRAME_COLWISE_BINOP", color="green", domain="cudf_python")
@@ -3658,6 +3705,125 @@ def _colwise_binop(
 
         return output
 
+    # For more detail on this function and how it should work, see
+    # https://numpy.org/doc/stable/reference/ufuncs.html
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        # We don't currently support reduction, accumulation, etc. We also
+        # don't support any special kwargs or higher arity ufuncs than binary.
+        if method != "__call__" or kwargs or ufunc.nin > 2:
+            return NotImplemented
+
+        # Binary operations
+        binary_operations = {
+            # Arithmetic binary operations.
+            "add": "add",
+            "subtract": "sub",
+            "multiply": "mul",
+            "matmul": "matmul",
+            "divide": "truediv",
+            "true_divide": "truediv",
+            "floor_divide": "floordiv",
+            "power": "pow",
+            "float_power": "pow",
+            "remainder": "mod",
+            "mod": "mod",
+            "fmod": "mod",
+            # Bitwise binary operations.
+            "bitwise_and": "and",
+            "bitwise_or": "or",
+            "bitwise_xor": "xor",
+            # Comparison binary operators
+            "greater": "gt",
+            "greater_equal": "ge",
+            "less": "lt",
+            "less_equal": "le",
+            "not_equal": "ne",
+            "equal": "eq",
+        }
+
+        fname = ufunc.__name__
+        if fname in binary_operations:
+            reflect = self is not inputs[0]
+            other = inputs[0] if reflect else inputs[1]
+
+            # These operators need to be mapped to their inverses when
+            # performing a reflected operation because no reflected version of
+            # the operators themselves exist.
+            ops_without_reflection = {
+                "gt": "lt",
+                "ge": "le",
+                "lt": "gt",
+                "le": "ge",
+                # ne and eq are symmetric, so they are their own inverse op
+                "ne": "ne",
+                "eq": "eq",
+            }
+
+            op = binary_operations[fname]
+            if reflect and op in ops_without_reflection:
+                op = ops_without_reflection[op]
+                reflect = False
+            op = f"__{'r' if reflect else ''}{op}__"
+
+            # Float_power returns float irrespective of the input type.
+            if fname == "float_power":
+                return getattr(self, op)(other).astype(float)
+            return getattr(self, op)(other)
+
+        # Special handling for various unary operations.
+        if fname == "negative":
+            return self * -1
+        if fname == "positive":
+            return self.copy(deep=True)
+        if fname == "invert":
+            return ~self
+        if fname == "absolute":
+            return self.abs()
+        if fname == "fabs":
+            return self.abs().astype(np.float64)
+
+        # None is a sentinel used by subclasses to trigger cupy dispatch.
+        return None
+
+    def _apply_cupy_ufunc_to_operands(
+        self, ufunc, cupy_func, operands, **kwargs
+    ):
+        # Note: There are some operations that may be supported by libcudf but
+        # are not supported by pandas APIs. In particular, libcudf binary
+        # operations support logical and/or operations as well as
+        # trigonometric, but those operations are not defined on
+        # pd.Series/DataFrame. For now those operations will dispatch to cupy,
+        # but if ufuncs are ever a bottleneck we could add special handling to
+        # dispatch those (or any other) functions that we could implement
+        # without cupy.
+
+        mask = None
+        data = [{} for _ in range(ufunc.nout)]
+        for name, (left, right, _, _) in operands.items():
+            cupy_inputs = []
+            for inp in (left, right) if ufunc.nin == 2 else (left,):
+                if isinstance(inp, ColumnBase) and inp.has_nulls():
+                    new_mask = as_column(inp.nullmask)
+
+                    # TODO: This is a hackish way to perform a bitwise and
+                    # of bitmasks. Once we expose
+                    # cudf::detail::bitwise_and, then we can use that
+                    # instead.
+                    mask = new_mask if mask is None else (mask & new_mask)
+
+                    # Arbitrarily fill with zeros. For ufuncs, we assume
+                    # that the end result propagates nulls via a bitwise
+                    # and, so these elements are irrelevant.
+                    inp = inp.fillna(0)
+                cupy_inputs.append(cupy.asarray(inp))
+
+            cp_output = cupy_func(*cupy_inputs, **kwargs)
+            if ufunc.nout == 1:
+                cp_output = (cp_output,)
+            for i, out in enumerate(cp_output):
+                data[i][name] = as_column(out).set_mask(mask)
+        return data
+
     @annotate("FRAME_DOT", color="green", domain="cudf_python")
     def dot(self, other, reflect=False):
         """