Skip to content

Commit

Permalink
Support Unary Operations in Masked UDF (#9409)
Browse files Browse the repository at this point in the history
This PR adds support for several unary operations in masked udfs. Including trigonometry functions `sin`, `cos`, `tan`; rounding functions `ceil` and `floor`, sign functions `neg` and logic functions `not`.

closes #9405

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - https://github.com/brandon-b-miller
  - Graham Markall (https://github.com/gmarkall)

URL: #9409
  • Loading branch information
isVoid authored Oct 19, 2021
1 parent a19bd23 commit 5e2aaf9
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 34 deletions.
35 changes: 35 additions & 0 deletions python/cudf/cudf/core/udf/_ops.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import operator

arith_ops = [
Expand All @@ -10,6 +11,40 @@
operator.pow,
]

unary_ops = [
math.acos,
math.acosh,
math.asin,
math.asinh,
math.atan,
math.atanh,
math.ceil,
math.cos,
math.degrees,
math.erf,
math.erfc,
math.exp,
math.expm1,
math.fabs,
math.floor,
math.gamma,
math.lgamma,
math.log,
math.log10,
math.log1p,
math.log2,
math.radians,
math.sin,
math.sinh,
math.sqrt,
math.tan,
math.tanh,
operator.pos,
operator.neg,
operator.not_,
operator.invert,
]

comparison_ops = [
operator.eq,
operator.ne,
Expand Down
76 changes: 70 additions & 6 deletions python/cudf/cudf/core/udf/lowering.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from numba.extending import lower_builtin, types

from cudf.core.udf import api
from cudf.core.udf._ops import arith_ops, comparison_ops
from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
from cudf.core.udf.typing import MaskedType, NAType


Expand Down Expand Up @@ -78,6 +78,49 @@ def masked_scalar_op_impl(context, builder, sig, args):
return masked_scalar_op_impl


def make_unary_op(op):
"""
Make closures that implement unary operations. See register_unary_op for
details.
"""

def masked_scalar_unary_op_impl(context, builder, sig, args):
"""
Implement <op> `MaskedType`
"""
# MaskedType(...)
masked_type_1 = sig.args[0]
# MaskedType(...)
masked_return_type = sig.return_type

m1 = cgutils.create_struct_proxy(masked_type_1)(
context, builder, value=args[0]
)

# we will return an output struct
result = cgutils.create_struct_proxy(masked_return_type)(
context, builder
)

# compute output validity
result.valid = m1.valid
with builder.if_then(m1.valid):
# Let numba handle generating the extra IR needed to perform
# operations on mixed types, by compiling the final core op between
# the two primitive values as a separate function and calling it
result.value = context.compile_internal(
builder,
lambda x: op(x),
nb_signature(
masked_return_type.value_type, masked_type_1.value_type,
),
(m1.value,),
)
return result._getvalue()

return masked_scalar_unary_op_impl


def register_arithmetic_op(op):
"""
Register a lowering implementation for the
Expand All @@ -95,6 +138,23 @@ def register_arithmetic_op(op):
cuda_lower(op, MaskedType, MaskedType)(to_lower_op)


def register_unary_op(op):
"""
Register a lowering implementation for the
unary op `op`.
Because the lowering implementations compile the final
op separately using a lambda and compile_internal, `op`
needs to be tied to each lowering implementation using
a closure.
This function makes and lowers a closure for one op.
"""
to_lower_op = make_unary_op(op)
cuda_lower(op, MaskedType)(to_lower_op)


def masked_scalar_null_op_impl(context, builder, sig, args):
"""
Implement `MaskedType` <op> `NAType`
Expand Down Expand Up @@ -158,12 +218,16 @@ def register_const_op(op):


# register all lowering at init
for op in arith_ops + comparison_ops:
register_arithmetic_op(op)
register_const_op(op)
for binary_op in arith_ops + comparison_ops:
register_arithmetic_op(binary_op)
register_const_op(binary_op)
# null op impl can be shared between all ops
cuda_lower(op, MaskedType, NAType)(masked_scalar_null_op_impl)
cuda_lower(op, NAType, MaskedType)(masked_scalar_null_op_impl)
cuda_lower(binary_op, MaskedType, NAType)(masked_scalar_null_op_impl)
cuda_lower(binary_op, NAType, MaskedType)(masked_scalar_null_op_impl)

# register all lowering at init
for unary_op in unary_ops:
register_unary_op(unary_op)


@cuda_lower(operator.is_, MaskedType, NAType)
Expand Down
22 changes: 17 additions & 5 deletions python/cudf/cudf/core/udf/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pandas._libs.missing import NAType as _NAType

from cudf.core.udf import api
from cudf.core.udf._ops import arith_ops, comparison_ops
from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops


class MaskedType(types.Type):
Expand Down Expand Up @@ -223,6 +223,15 @@ def generic(self, args, kws):
return nb_signature(MaskedType(return_type), args[0], args[1])


class MaskedScalarUnaryOp(AbstractTemplate):
def generic(self, args, kws):
if len(args) == 1 and isinstance(args[0], MaskedType):
return_type = self.context.resolve_function_type(
self.key, (args[0].value_type,), kws
).return_type
return nb_signature(MaskedType(return_type), args[0])


class MaskedScalarNullOp(AbstractTemplate):
def generic(self, args, kws):
"""
Expand Down Expand Up @@ -303,8 +312,11 @@ def generic(self, args, kws):
return nb_signature(return_type, args[0])


for op in arith_ops + comparison_ops:
for binary_op in arith_ops + comparison_ops:
# Every op shares the same typing class
cuda_decl_registry.register_global(op)(MaskedScalarArithOp)
cuda_decl_registry.register_global(op)(MaskedScalarNullOp)
cuda_decl_registry.register_global(op)(MaskedScalarScalarOp)
cuda_decl_registry.register_global(binary_op)(MaskedScalarArithOp)
cuda_decl_registry.register_global(binary_op)(MaskedScalarNullOp)
cuda_decl_registry.register_global(binary_op)(MaskedScalarScalarOp)

for unary_op in unary_ops:
cuda_decl_registry.register_global(unary_op)(MaskedScalarUnaryOp)
67 changes: 44 additions & 23 deletions python/cudf/cudf/tests/test_udf_masked_ops.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,14 @@
import math
import operator

import pandas as pd
import pytest
from numba import cuda

import cudf
from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
from cudf.testing._utils import NUMERIC_TYPES, assert_eq

arith_ops = [
operator.add,
operator.sub,
operator.mul,
operator.truediv,
operator.floordiv,
operator.mod,
operator.pow,
]

comparison_ops = [
operator.eq,
operator.ne,
operator.lt,
operator.le,
operator.gt,
operator.ge,
]


def run_masked_udf_test(func_pdf, func_gdf, data, **kwargs):
gdf = data
Expand Down Expand Up @@ -175,6 +158,35 @@ def func_gdf(row):
run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False)


@pytest.mark.parametrize("op", unary_ops)
def test_unary_masked(op):
# This test should test all the typing
# and lowering for unary ops
def func_pdf(row):
x = row["a"]
return op(x) if x is not pd.NA else pd.NA

def func_gdf(row):
x = row["a"]
return op(x) if x is not cudf.NA else cudf.NA

if "log" in op.__name__:
gdf = cudf.DataFrame({"a": [0.1, 1.0, None, 3.5, 1e8]})
elif op.__name__ in {"asin", "acos"}:
gdf = cudf.DataFrame({"a": [0.0, 0.5, None, 1.0]})
elif op.__name__ in {"atanh"}:
gdf = cudf.DataFrame({"a": [0.0, -0.5, None, 0.8]})
elif op.__name__ in {"acosh", "sqrt", "lgamma"}:
gdf = cudf.DataFrame({"a": [1.0, 2.0, None, 11.0]})
elif op.__name__ in {"gamma"}:
gdf = cudf.DataFrame({"a": [0.1, 2, None, 4]})
elif op.__name__ in {"invert"}:
gdf = cudf.DataFrame({"a": [-100, 128, None, 0]}, dtype="int64")
else:
gdf = cudf.DataFrame({"a": [-125.60, 395.2, 0.0, None]})
run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False)


def test_masked_is_null_conditional():
def func_pdf(row):
x = row["a"]
Expand Down Expand Up @@ -318,6 +330,13 @@ def func_pdf(row):
return z / x
elif x + y is pd.NA:
return 2.5
elif w > 100:
return (
math.sin(x)
+ math.sqrt(y)
- (-z)
+ math.lgamma(x) * math.fabs(-0.8) / math.radians(3.14)
)
else:
return y > 2

Expand All @@ -334,15 +353,17 @@ def func_gdf(row):
return z / x
elif x + y is cudf.NA:
return 2.5
elif w > 100:
return math.sin(x) + math.sqrt(y) - operator.neg(z)
else:
return y > 2

gdf = cudf.DataFrame(
{
"a": [1, 3, 6, 0, None, 5, None],
"b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0],
"c": [2, 3, 6, 0, None, 5, None],
"d": [4, None, 6, 0, None, 5, None],
"a": [1, 3, 6, 0, None, 5, None, 101],
"b": [3.0, 2.5, None, 5.0, 1.0, 5.0, 11.0, 1.0],
"c": [2, 3, 6, 0, None, 5, None, 6],
"d": [4, None, 6, 0, None, 5, None, 7.5],
}
)
run_masked_udf_test(func_pdf, func_gdf, gdf, check_dtype=False)
Expand Down

0 comments on commit 5e2aaf9

Please sign in to comment.