rapidsai · isVoid · Jun 29, 2022
@@ -0,0 +1,9 @@
+_CUDF_CONFIG = {"binary_operation_result_type": "PROMOTE"}
+
+def get_config(key):
+    return _CUDF_CONFIG[key]
+
+def set_config(key, val):
+    if key not in _CUDF_CONFIG:
+        raise ValueError(f"Unrecognized key for cudf configs: {key}")
+    _CUDF_CONFIG[key] = val
@@ -53,7 +53,9 @@
     np_dtypes_to_pandas_dtypes,
     numeric_normalize_types,
     to_cudf_compatible_scalar,
+    _limit_to_max_precision
 )
+from cudf.config import get_config
 
 from .numerical_base import NumericalBaseColumn
 
@@ -183,6 +185,8 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
         out_dtype = self.dtype
         if other is not None:
             out_dtype = np.result_type(self.dtype, other.dtype)
+            if get_config("binary_operation_result_type") == "LARGEST":
+                out_dtype = _limit_to_max_precision(self.dtype, other.dtype, out_dtype)
             if op in {"__mod__", "__floordiv__"}:
                 tmp = self if reflect else other
                 # Guard against division by zero for integers.
@@ -246,7 +250,11 @@ def normalize_binop_value(
         if other_dtype.kind in {"b", "i", "u", "f"}:
             if isinstance(other, cudf.Scalar):
                 return other
-            other_dtype = np.promote_types(self.dtype, other_dtype)
+            promoted_dtype = np.promote_types(self.dtype, other_dtype)
+            if get_config("binary_operation_result_type") == "LARGEST":
+                other_dtype = _limit_to_max_precision(self.dtype, other_dtype, promoted_dtype)
+            else:
+                other_dtype = promoted_dtype
             if other_dtype == np.dtype("float16"):
                 other_dtype = cudf.dtype("float32")
                 other = other_dtype.type(other)

@@ -23,6 +23,7 @@
     NUMERIC_TYPES,
     TIMEDELTA_TYPES,
 )
+from cudf.config import set_config
 
 STRING_TYPES = {"str"}
 
@@ -2962,3 +2963,68 @@ def test_binops_dot(df, other):
     got = df @ other
 
     utils.assert_eq(expected, got)
+
+
+@pytest.fixture
+def limit_maximum_binop_precisions():
+    set_config("binary_operation_result_type", "LARGEST")
+    yield
+    set_config("binary_operation_result_type", "PROMOTE")
+
+@pytest.mark.parametrize(
+    "ldtype", NUMERIC_TYPES
+)
+@pytest.mark.parametrize(
+    "rdtype", NUMERIC_TYPES
+)
+@pytest.mark.parametrize(
+    "op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)]
+)
+def test_limiting_maximum_binop_precisions_series(ldtype, rdtype, op, limit_maximum_binop_precisions):
+    s = cudf.Series([1, 2, 3, 4, 5])
+    lhs, rhs = s.astype(ldtype), s.astype(rdtype)
+    max_itemsize = max(np.dtype(ldtype).itemsize, np.dtype(rdtype).itemsize)
+    result_dtype = op(lhs, rhs).dtype
+
+    assert result_dtype.itemsize <= max_itemsize
+
+@pytest.mark.parametrize(
+    "ldtype", NUMERIC_TYPES
+)
+@pytest.mark.parametrize(
+    "rdtype", NUMERIC_TYPES
+)
+@pytest.mark.parametrize(
+    "op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)]
+)
+@pytest.mark.parametrize(
+    "reflected", [True, False]
+)
+def test_limiting_maximum_binop_precisions_series_scalar(ldtype, rdtype, op, reflected, limit_maximum_binop_precisions):
+    lhs = cudf.Series([1, 2, 3, 4, 5], dtype=ldtype)
+    rhs = cudf.Scalar(1, dtype=rdtype)
+    if reflected:
+        lhs, rhs = rhs, lhs
+    max_itemsize = max(np.dtype(ldtype).itemsize, np.dtype(rdtype).itemsize)
+    result_dtype = op(lhs, rhs).dtype
+
+    assert result_dtype.itemsize <= max_itemsize
+
+
+@pytest.mark.parametrize(
+    "dtype", NUMERIC_TYPES
+)
+@pytest.mark.parametrize(
+    "op", [op for op in _binops if op not in (operator.truediv, operator.mod, operator.floordiv)]
+)
+@pytest.mark.parametrize(
+    "reflected", [True, False]
+)
+def test_limiting_maximum_binop_precisions_series_literal(dtype, op, reflected, limit_maximum_binop_precisions):
+    lhs = cudf.Series([1, 2, 3, 4, 5], dtype=dtype)
+    rhs = 1
+    if reflected:
+        lhs, rhs = rhs, lhs
+    result_dtype = op(lhs, rhs).dtype
+
+    assert result_dtype.itemsize <= np.dtype(dtype).itemsize
@@ -639,6 +639,13 @@ def _can_cast(from_dtype, to_dtype):
     else:
         return np.can_cast(from_dtype, to_dtype)
 
+def _limit_to_max_precision(
+    self_dtype: np.dtype, other_dtype: np.dtype, promoted_dtype: np.dtype
+) -> np.dtype:
+    max_itemsize = max(self_dtype.itemsize, other_dtype.itemsize)
+    # Use the data type "kind" from the inferred promoted_dtype, but limit its
+    # output bitwidth to no larger than the maximum bit width of input type.
+    return np.dtype(f"{promoted_dtype.str[:2]}{max_itemsize}")
 
 # Type dispatch loops similar to what are found in `np.add.types`
 # In NumPy, whether or not an op can be performed between two