Support Unary Operations in Masked UDF (#9409)

This PR adds support for several unary operations in masked udfs. Including trigonometry functions `sin`, `cos`, `tan`; rounding functions `ceil` and `floor`, sign functions `neg` and logic functions `not`. closes #9405 Authors: - Michael Wang (https://github.com/isVoid) Approvers: - https://github.com/brandon-b-miller - Graham Markall (https://github.com/gmarkall) URL: #9409
rapidsai · Oct 19, 2021 · 5e2aaf9 · 5e2aaf9
1 parent a19bd23
commit 5e2aaf9
Show file tree

Hide file tree

Showing 4 changed files with 166 additions and 34 deletions.
diff --git a/python/cudf/cudf/core/udf/_ops.py b/python/cudf/cudf/core/udf/_ops.py
@@ -1,3 +1,4 @@
+import math
 import operator
 
 arith_ops = [
@@ -10,6 +11,40 @@
     operator.pow,
 ]
 
+unary_ops = [
+    math.acos,
+    math.acosh,
+    math.asin,
+    math.asinh,
+    math.atan,
+    math.atanh,
+    math.ceil,
+    math.cos,
+    math.degrees,
+    math.erf,
+    math.erfc,
+    math.exp,
+    math.expm1,
+    math.fabs,
+    math.floor,
+    math.gamma,
+    math.lgamma,
+    math.log,
+    math.log10,
+    math.log1p,
+    math.log2,
+    math.radians,
+    math.sin,
+    math.sinh,
+    math.sqrt,
+    math.tan,
+    math.tanh,
+    operator.pos,
+    operator.neg,
+    operator.not_,
+    operator.invert,
+]
+
 comparison_ops = [
     operator.eq,
     operator.ne,

diff --git a/python/cudf/cudf/core/udf/lowering.py b/python/cudf/cudf/core/udf/lowering.py
@@ -10,7 +10,7 @@
 from numba.extending import lower_builtin, types
 
 from cudf.core.udf import api
-from cudf.core.udf._ops import arith_ops, comparison_ops
+from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
 from cudf.core.udf.typing import MaskedType, NAType
 
 
@@ -78,6 +78,49 @@ def masked_scalar_op_impl(context, builder, sig, args):
     return masked_scalar_op_impl
 
 
+def make_unary_op(op):
+    """
+    Make closures that implement unary operations. See register_unary_op for
+    details.
+    """
+
+    def masked_scalar_unary_op_impl(context, builder, sig, args):
+        """
+        Implement <op> `MaskedType`
+        """
+        # MaskedType(...)
+        masked_type_1 = sig.args[0]
+        # MaskedType(...)
+        masked_return_type = sig.return_type
+
+        m1 = cgutils.create_struct_proxy(masked_type_1)(
+            context, builder, value=args[0]
+        )
+
+        # we will return an output struct
+        result = cgutils.create_struct_proxy(masked_return_type)(
+            context, builder
+        )
+
+        # compute output validity
+        result.valid = m1.valid
+        with builder.if_then(m1.valid):
+            # Let numba handle generating the extra IR needed to perform
+            # operations on mixed types, by compiling the final core op between
+            # the two primitive values as a separate function and calling it
+            result.value = context.compile_internal(
+                builder,
+                lambda x: op(x),
+                nb_signature(
+                    masked_return_type.value_type, masked_type_1.value_type,
+                ),
+                (m1.value,),
+            )
+        return result._getvalue()
+
+    return masked_scalar_unary_op_impl
+
+
 def register_arithmetic_op(op):
     """
     Register a lowering implementation for the
@@ -95,6 +138,23 @@ def register_arithmetic_op(op):
     cuda_lower(op, MaskedType, MaskedType)(to_lower_op)
 
 
+def register_unary_op(op):
+    """
+    Register a lowering implementation for the
+    unary op `op`.
+
+    Because the lowering implementations compile the final
+    op separately using a lambda and compile_internal, `op`
+    needs to be tied to each lowering implementation using
+    a closure.
+
+    This function makes and lowers a closure for one op.
+
+    """
+    to_lower_op = make_unary_op(op)
+    cuda_lower(op, MaskedType)(to_lower_op)
+
+
 def masked_scalar_null_op_impl(context, builder, sig, args):
     """
     Implement `MaskedType` <op> `NAType`
@@ -158,12 +218,16 @@ def register_const_op(op):
 
 
 # register all lowering at init
-for op in arith_ops + comparison_ops:
-    register_arithmetic_op(op)
-    register_const_op(op)
+for binary_op in arith_ops + comparison_ops:
+    register_arithmetic_op(binary_op)
+    register_const_op(binary_op)
     # null op impl can be shared between all ops
-    cuda_lower(op, MaskedType, NAType)(masked_scalar_null_op_impl)
-    cuda_lower(op, NAType, MaskedType)(masked_scalar_null_op_impl)
+    cuda_lower(binary_op, MaskedType, NAType)(masked_scalar_null_op_impl)
+    cuda_lower(binary_op, NAType, MaskedType)(masked_scalar_null_op_impl)
+
+# register all lowering at init
+for unary_op in unary_ops:
+    register_unary_op(unary_op)
 
 
 @cuda_lower(operator.is_, MaskedType, NAType)

diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py
@@ -18,7 +18,7 @@
 from pandas._libs.missing import NAType as _NAType
 
 from cudf.core.udf import api
-from cudf.core.udf._ops import arith_ops, comparison_ops
+from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
 
 
 class MaskedType(types.Type):
@@ -223,6 +223,15 @@ def generic(self, args, kws):
             return nb_signature(MaskedType(return_type), args[0], args[1])
 
 
+class MaskedScalarUnaryOp(AbstractTemplate):
+    def generic(self, args, kws):
+        if len(args) == 1 and isinstance(args[0], MaskedType):
+            return_type = self.context.resolve_function_type(
+                self.key, (args[0].value_type,), kws
+            ).return_type
+            return nb_signature(MaskedType(return_type), args[0])
+
+
 class MaskedScalarNullOp(AbstractTemplate):
     def generic(self, args, kws):
         """
@@ -303,8 +312,11 @@ def generic(self, args, kws):
             return nb_signature(return_type, args[0])
 
 
-for op in arith_ops + comparison_ops:
+for binary_op in arith_ops + comparison_ops:
     # Every op shares the same typing class
-    cuda_decl_registry.register_global(op)(MaskedScalarArithOp)
-    cuda_decl_registry.register_global(op)(MaskedScalarNullOp)
-    cuda_decl_registry.register_global(op)(MaskedScalarScalarOp)
+    cuda_decl_registry.register_global(binary_op)(MaskedScalarArithOp)
+    cuda_decl_registry.register_global(binary_op)(MaskedScalarNullOp)
+    cuda_decl_registry.register_global(binary_op)(MaskedScalarScalarOp)
+
+for unary_op in unary_ops:
+    cuda_decl_registry.register_global(unary_op)(MaskedScalarUnaryOp)
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -1,31 +1,14 @@
+import math
 import operator
 
 import pandas as pd
 import pytest
 from numba import cuda
 
 import cudf
+from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
 from cudf.testing._utils import NUMERIC_TYPES, assert_eq
 
-arith_ops = [
-    operator.add,
-    operator.sub,
-    operator.mul,
-    operator.truediv,
-    operator.floordiv,
-    operator.mod,
-    operator.pow,
-]
-
-comparison_ops = [
-    operator.eq,
-    operator.ne,
-    operator.lt,
-    operator.le,
-    operator.gt,
-    operator.ge,
-]
-
 
 def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs):
     gdf = data
@@ -175,6 +158,35 @@ def func_gdf(row):
     run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False)
 
 
+@pytest.mark.parametrize("op", unary_ops)
+def test_unary_masked(op):
+    # This test should test all the typing
+    # and lowering for unary ops
+    def func_pdf(row):
+        x = row["a"]
+        return op(x) if x is not pd.NA else pd.NA
+
+    def func_gdf(row):
+        x = row["a"]
+        return op(x) if x is not cudf.NA else cudf.NA
+
+    if "log" in op.__name__:
+        gdf = cudf.DataFrame({"a": [0.1, 1.0, None, 3.5, 1e8]})
+    elif op.__name__ in {"asin", "acos"}:
+        gdf = cudf.DataFrame({"a": [0.0, 0.5, None, 1.0]})
+    elif op.__name__ in {"atanh"}:
+        gdf = cudf.DataFrame({"a": [0.0, -0.5, None, 0.8]})
+    elif op.__name__ in {"acosh", "sqrt", "lgamma"}:
+        gdf = cudf.DataFrame({"a": [1.0, 2.0, None, 11.0]})
+    elif op.__name__ in {"gamma"}:
+        gdf = cudf.DataFrame({"a": [0.1, 2, None, 4]})
+    elif op.__name__ in {"invert"}:
+        gdf = cudf.DataFrame({"a": [-100, 128, None, 0]}, dtype="int64")
+    else:
+        gdf = cudf.DataFrame({"a": [-125.60, 395.2, 0.0, None]})
+    run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False)
+
+
 def test_masked_is_null_conditional():
     def func_pdf(row):
         x = row["a"]
@@ -318,6 +330,13 @@ def func_pdf(row):
             return z / x
         elif x + y is pd.NA:
             return 2.5
+        elif w > 100:
+            return (
+                math.sin(x)
+                + math.sqrt(y)
+                - (-z)
+                + math.lgamma(x) * math.fabs(-0.8) / math.radians(3.14)
+            )
         else:
             return y > 2
 
@@ -334,15 +353,17 @@ def func_gdf(row):
             return z / x
         elif x + y is cudf.NA:
             return 2.5
+        elif w > 100:
+            return math.sin(x) + math.sqrt(y) - operator.neg(z)
         else:
             return y > 2
 
     gdf = cudf.DataFrame(
         {
-            "a": [1, 3, 6, 0, None, 5, None],
-            "b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0],
-            "c": [2, 3, 6, 0, None, 5, None],
-            "d": [4, None, 6, 0, None, 5, None],
+            "a": [1, 3, 6, 0, None, 5, None, 101],
+            "b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0, 1.0],
+            "c": [2, 3, 6, 0, None, 5, None, 6],
+            "d": [4, None, 6, 0, None, 5, None, 7.5],
         }
     )
     run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False)