From ec38b1e95e345140e119dfcf54d0f62133a86158 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Thu, 5 Sep 2024 09:48:03 -0400 Subject: [PATCH] Make isinstance check pass for proxy ndarrays (#16601) Closes #14537. Authors: - Matthew Murray (https://github.com/Matt711) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Matthew Roeschke (https://github.com/mroeschke) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/16601 --- python/cudf/cudf/pandas/_wrappers/numpy.py | 23 +++++++++ python/cudf/cudf/pandas/fast_slow_proxy.py | 26 +++++++++- python/cudf/cudf/pandas/proxy_base.py | 22 ++++++++ .../cudf_pandas_tests/test_cudf_pandas.py | 50 ++++++++++++++++++- 4 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 python/cudf/cudf/pandas/proxy_base.py diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 90ac5198270..d5e669cb58f 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -10,10 +10,13 @@ from packaging import version from ..fast_slow_proxy import ( + _fast_slow_function_call, _FastSlowAttribute, + is_proxy_object, make_final_proxy_type, make_intermediate_proxy_type, ) +from ..proxy_base import ProxyNDarrayBase from .common import ( array_interface, array_method, @@ -105,18 +108,38 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): return super(cls, cls)._fsproxy_wrap(arr, constructor) +def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): + result, _ = _fast_slow_function_call( + getattr(ufunc, method), + *inputs, + **kwargs, + ) + if isinstance(result, tuple): + if is_proxy_object(result[0]) and isinstance( + result[0]._fsproxy_wrapped, numpy.ndarray + ): + return tuple(numpy.asarray(x) for x in result) + elif is_proxy_object(result) and isinstance( + result._fsproxy_wrapped, numpy.ndarray + ): + return numpy.asarray(result) + return result + + ndarray = make_final_proxy_type( "ndarray", cupy.ndarray, numpy.ndarray, fast_to_slow=cupy.ndarray.get, slow_to_fast=cupy.asarray, + bases=(ProxyNDarrayBase,), additional_attributes={ "__array__": array_method, # So that pa.array(wrapped-numpy-array) works "__arrow_array__": arrow_array_method, "__cuda_array_interface__": cuda_array_interface, "__array_interface__": array_interface, + "__array_ufunc__": ndarray__array_ufunc__, # ndarrays are unhashable "__hash__": None, # iter(cupy-array) produces an iterable of zero-dim device diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 4b0fd9a5b36..afa1ce5f86c 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -19,6 +19,7 @@ from ..options import _env_get_bool from ..testing import assert_eq from .annotation import nvtx +from .proxy_base import ProxyNDarrayBase def call_operator(fn, args, kwargs): @@ -564,7 +565,17 @@ def _fsproxy_wrap(cls, value, func): _FinalProxy subclasses can override this classmethod if they need particular behaviour when wrapped up. """ - proxy = object.__new__(cls) + # TODO: Replace the if-elif-else using singledispatch helper function + base_class = _get_proxy_base_class(cls) + if base_class is object: + proxy = base_class.__new__(cls) + elif base_class is ProxyNDarrayBase: + proxy = base_class.__new__(cls, value) + else: + raise TypeError( + f"Cannot create an proxy instance of {cls.__name__} using base class {base_class.__name__}. " + f"Expected either 'object' or another type in 'PROXY_BASE_CLASSES'" + ) proxy._fsproxy_wrapped = value return proxy @@ -1193,6 +1204,19 @@ def is_proxy_object(obj: Any) -> bool: return False +def _get_proxy_base_class(cls): + """Returns the proxy base class if one exists""" + for proxy_class in PROXY_BASE_CLASSES: + if proxy_class in cls.__mro__: + return proxy_class + return object + + +PROXY_BASE_CLASSES: set[type] = { + ProxyNDarrayBase, +} + + NUMPY_TYPES: set[str] = set(np.sctypeDict.values()) diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py new file mode 100644 index 00000000000..6f732834e94 --- /dev/null +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import cupy as cp +import numpy as np + + +class ProxyNDarrayBase(np.ndarray): + def __new__(cls, arr): + if isinstance(arr, cp.ndarray): + arr = arr.get() + if not isinstance(arr, np.ndarray): + raise TypeError( + "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" + ) + return np.asarray(arr, dtype=arr.dtype).view(cls) + + def __array_finalize__(self, obj): + if obj is None: + return + self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", obj) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index d10c531d757..c4ab4b0a853 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -14,18 +14,20 @@ import types from io import BytesIO, StringIO +import cupy as cp import jupyter_client import nbformat import numpy as np import pyarrow as pa import pytest from nbconvert.preprocessors import ExecutePreprocessor -from numba import NumbaDeprecationWarning +from numba import NumbaDeprecationWarning, vectorize from pytz import utc from cudf.core._compat import PANDAS_GE_220 from cudf.pandas import LOADED, Profiler from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object +from cudf.testing import assert_eq if not LOADED: raise ImportError("These tests must be run with cudf.pandas loaded") @@ -1690,3 +1692,49 @@ def test_notebook_slow_repr(): assert ( string in html_result ), f"Expected string {string} not found in the output" + + +def test_numpy_ndarray_isinstancecheck(array): + arr1, arr2 = array + assert isinstance(arr1, np.ndarray) + assert isinstance(arr2, np.ndarray) + + +def test_numpy_ndarray_np_ufunc(array): + arr1, arr2 = array + + @np.vectorize + def add_one_ufunc(arr): + return arr + 1 + + assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2)) + + +def test_numpy_ndarray_cp_ufunc(array): + arr1, arr2 = array + + @cp.vectorize + def add_one_ufunc(arr): + return arr + 1 + + assert_eq(add_one_ufunc(cp.asarray(arr1)), add_one_ufunc(arr2)) + + +def test_numpy_ndarray_numba_ufunc(array): + arr1, arr2 = array + + @vectorize + def add_one_ufunc(arr): + return arr + 1 + + assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2)) + + +def test_numpy_ndarray_numba_cuda_ufunc(array): + arr1, arr2 = array + + @vectorize(["int64(int64)"], target="cuda") + def add_one_ufunc(a): + return a + 1 + + assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))