From 815859974a7200fe080dc82c13eb936665088521 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 28 Jun 2022 18:17:32 -0700 Subject: [PATCH] Initial --- python/cudf/cudf/config.py | 9 ++++ python/cudf/cudf/core/column/numerical.py | 10 +++- python/cudf/cudf/tests/test_binops.py | 66 +++++++++++++++++++++++ python/cudf/cudf/utils/dtypes.py | 7 +++ 4 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 python/cudf/cudf/config.py diff --git a/python/cudf/cudf/config.py b/python/cudf/cudf/config.py new file mode 100644 index 00000000000..e692686726a --- /dev/null +++ b/python/cudf/cudf/config.py @@ -0,0 +1,9 @@ +_CUDF_CONFIG = {"binary_operation_result_type": "PROMOTE"} + +def get_config(key): + return _CUDF_CONFIG[key] + +def set_config(key, val): + if key not in _CUDF_CONFIG: + raise ValueError(f"Unrecognized key for cudf configs: {key}") + _CUDF_CONFIG[key] = val diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index d30026e8bfa..c3e7ddf4130 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -53,7 +53,9 @@ np_dtypes_to_pandas_dtypes, numeric_normalize_types, to_cudf_compatible_scalar, + _limit_to_max_precision ) +from cudf.config import get_config from .numerical_base import NumericalBaseColumn @@ -183,6 +185,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase: out_dtype = self.dtype if other is not None: out_dtype = np.result_type(self.dtype, other.dtype) + if get_config("binary_operation_result_type") == "LARGEST": + out_dtype = _limit_to_max_precision(self.dtype, other.dtype, out_dtype) if op in {"__mod__", "__floordiv__"}: tmp = self if reflect else other # Guard against division by zero for integers. @@ -246,7 +250,11 @@ def normalize_binop_value( if other_dtype.kind in {"b", "i", "u", "f"}: if isinstance(other, cudf.Scalar): return other - other_dtype = np.promote_types(self.dtype, other_dtype) + promoted_dtype = np.promote_types(self.dtype, other_dtype) + if get_config("binary_operation_result_type") == "LARGEST": + other_dtype = _limit_to_max_precision(self.dtype, other_dtype, promoted_dtype) + else: + other_dtype = promoted_dtype if other_dtype == np.dtype("float16"): other_dtype = cudf.dtype("float32") other = other_dtype.type(other) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 0d1bac6aead..5f6ae6b40f3 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -23,6 +23,7 @@ NUMERIC_TYPES, TIMEDELTA_TYPES, ) +from cudf.config import set_config STRING_TYPES = {"str"} @@ -2962,3 +2963,68 @@ def test_binops_dot(df, other): got = df @ other utils.assert_eq(expected, got) + + +@pytest.fixture +def limit_maximum_binop_precisions(): + set_config("binary_operation_result_type", "LARGEST") + yield + set_config("binary_operation_result_type", "PROMOTE") + +@pytest.mark.parametrize( + "ldtype", NUMERIC_TYPES +) +@pytest.mark.parametrize( + "rdtype", NUMERIC_TYPES +) +@pytest.mark.parametrize( + "op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)] +) +def test_limiting_maximum_binop_precisions_series(ldtype, rdtype, op, limit_maximum_binop_precisions): + s = cudf.Series([1, 2, 3, 4, 5]) + lhs, rhs = s.astype(ldtype), s.astype(rdtype) + max_itemsize = max(np.dtype(ldtype).itemsize, np.dtype(rdtype).itemsize) + result_dtype = op(lhs, rhs).dtype + + assert result_dtype.itemsize <= max_itemsize + +@pytest.mark.parametrize( + "ldtype", NUMERIC_TYPES +) +@pytest.mark.parametrize( + "rdtype", NUMERIC_TYPES +) +@pytest.mark.parametrize( + "op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)] +) +@pytest.mark.parametrize( + "reflected", [True, False] +) +def test_limiting_maximum_binop_precisions_series_scalar(ldtype, rdtype, op, reflected, limit_maximum_binop_precisions): + lhs = cudf.Series([1, 2, 3, 4, 5], dtype=ldtype) + rhs = cudf.Scalar(1, dtype=rdtype) + if reflected: + lhs, rhs = rhs, lhs + max_itemsize = max(np.dtype(ldtype).itemsize, np.dtype(rdtype).itemsize) + result_dtype = op(lhs, rhs).dtype + + assert result_dtype.itemsize <= max_itemsize + + +@pytest.mark.parametrize( + "dtype", NUMERIC_TYPES +) +@pytest.mark.parametrize( + "op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)] +) +@pytest.mark.parametrize( + "reflected", [True, False] +) +def test_limiting_maximum_binop_precisions_series_literal(dtype, op, reflected, limit_maximum_binop_precisions): + lhs = cudf.Series([1, 2, 3, 4, 5], dtype=dtype) + rhs = 1 + if reflected: + lhs, rhs = rhs, lhs + result_dtype = op(lhs, rhs).dtype + + assert result_dtype.itemsize <= np.dtype(dtype).itemsize diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index c2d9a57b72f..804d190c8d0 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -639,6 +639,13 @@ def _can_cast(from_dtype, to_dtype): else: return np.can_cast(from_dtype, to_dtype) +def _limit_to_max_precision( + self_dtype: np.dtype, other_dtype: np.dtype, promoted_dtype: np.dtype +) -> np.dtype: + max_itemsize = max(self_dtype.itemsize, other_dtype.itemsize) + # Use the data type "kind" from the inferred promoted_dtype, but limit its + # output bitwidth to no larger than the maximum bit width of input type. + return np.dtype(f"{promoted_dtype.str[:2]}{max_itemsize}") # Type dispatch loops similar to what are found in `np.add.types` # In NumPy, whether or not an op can be performed between two