Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Offer a User Configurable Option to Limit the Output Precision of Binary Ops #11167

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions python/cudf/cudf/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
_CUDF_CONFIG = {"binary_operation_result_type": "PROMOTE"}

def get_config(key):
return _CUDF_CONFIG[key]

def set_config(key, val):
if key not in _CUDF_CONFIG:
raise ValueError(f"Unrecognized key for cudf configs: {key}")
_CUDF_CONFIG[key] = val
10 changes: 9 additions & 1 deletion python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@
np_dtypes_to_pandas_dtypes,
numeric_normalize_types,
to_cudf_compatible_scalar,
_limit_to_max_precision
)
from cudf.config import get_config

from .numerical_base import NumericalBaseColumn

Expand Down Expand Up @@ -183,6 +185,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
out_dtype = self.dtype
if other is not None:
out_dtype = np.result_type(self.dtype, other.dtype)
if get_config("binary_operation_result_type") == "LARGEST":
out_dtype = _limit_to_max_precision(self.dtype, other.dtype, out_dtype)
if op in {"__mod__", "__floordiv__"}:
tmp = self if reflect else other
# Guard against division by zero for integers.
Expand Down Expand Up @@ -246,7 +250,11 @@ def normalize_binop_value(
if other_dtype.kind in {"b", "i", "u", "f"}:
if isinstance(other, cudf.Scalar):
return other
other_dtype = np.promote_types(self.dtype, other_dtype)
promoted_dtype = np.promote_types(self.dtype, other_dtype)
if get_config("binary_operation_result_type") == "LARGEST":
other_dtype = _limit_to_max_precision(self.dtype, other_dtype, promoted_dtype)
else:
other_dtype = promoted_dtype
if other_dtype == np.dtype("float16"):
other_dtype = cudf.dtype("float32")
other = other_dtype.type(other)
Expand Down
66 changes: 66 additions & 0 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
NUMERIC_TYPES,
TIMEDELTA_TYPES,
)
from cudf.config import set_config

STRING_TYPES = {"str"}

Expand Down Expand Up @@ -2962,3 +2963,68 @@ def test_binops_dot(df, other):
got = df @ other

utils.assert_eq(expected, got)


@pytest.fixture
def limit_maximum_binop_precisions():
set_config("binary_operation_result_type", "LARGEST")
yield
set_config("binary_operation_result_type", "PROMOTE")

@pytest.mark.parametrize(
"ldtype", NUMERIC_TYPES
)
@pytest.mark.parametrize(
"rdtype", NUMERIC_TYPES
)
@pytest.mark.parametrize(
"op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)]
)
def test_limiting_maximum_binop_precisions_series(ldtype, rdtype, op, limit_maximum_binop_precisions):
s = cudf.Series([1, 2, 3, 4, 5])
lhs, rhs = s.astype(ldtype), s.astype(rdtype)
max_itemsize = max(np.dtype(ldtype).itemsize, np.dtype(rdtype).itemsize)
result_dtype = op(lhs, rhs).dtype

assert result_dtype.itemsize <= max_itemsize

@pytest.mark.parametrize(
"ldtype", NUMERIC_TYPES
)
@pytest.mark.parametrize(
"rdtype", NUMERIC_TYPES
)
@pytest.mark.parametrize(
"op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)]
)
@pytest.mark.parametrize(
"reflected", [True, False]
)
def test_limiting_maximum_binop_precisions_series_scalar(ldtype, rdtype, op, reflected, limit_maximum_binop_precisions):
lhs = cudf.Series([1, 2, 3, 4, 5], dtype=ldtype)
rhs = cudf.Scalar(1, dtype=rdtype)
if reflected:
lhs, rhs = rhs, lhs
max_itemsize = max(np.dtype(ldtype).itemsize, np.dtype(rdtype).itemsize)
result_dtype = op(lhs, rhs).dtype

assert result_dtype.itemsize <= max_itemsize


@pytest.mark.parametrize(
"dtype", NUMERIC_TYPES
)
@pytest.mark.parametrize(
"op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)]
)
@pytest.mark.parametrize(
"reflected", [True, False]
)
def test_limiting_maximum_binop_precisions_series_literal(dtype, op, reflected, limit_maximum_binop_precisions):
lhs = cudf.Series([1, 2, 3, 4, 5], dtype=dtype)
rhs = 1
if reflected:
lhs, rhs = rhs, lhs
result_dtype = op(lhs, rhs).dtype

assert result_dtype.itemsize <= np.dtype(dtype).itemsize
7 changes: 7 additions & 0 deletions python/cudf/cudf/utils/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,13 @@ def _can_cast(from_dtype, to_dtype):
else:
return np.can_cast(from_dtype, to_dtype)

def _limit_to_max_precision(
self_dtype: np.dtype, other_dtype: np.dtype, promoted_dtype: np.dtype
) -> np.dtype:
max_itemsize = max(self_dtype.itemsize, other_dtype.itemsize)
# Use the data type "kind" from the inferred promoted_dtype, but limit its
# output bitwidth to no larger than the maximum bit width of input type.
return np.dtype(f"{promoted_dtype.str[:2]}{max_itemsize}")

# Type dispatch loops similar to what are found in `np.add.types`
# In NumPy, whether or not an op can be performed between two
Expand Down