Skip to content

Commit

Permalink
Removing the FrameProxyObject workaround (#751)
Browse files Browse the repository at this point in the history
Adding testing of cudf dataframe copy, which fails prior to rapidsai/cudf#9397

#### UPDATE: This PR now handles the jit-unspill failures we have been seeing with cudf v21.12.

Some background, when we introduced `ProxyObject`, cudf was using `cudf._lib.table.Table` as the base class for all frame-like objects (dataframes, series, etc). `Table` was implemented in cython so our proxy object had to derive from `Table` in order to support cython functions. The result was to use the class `FrameProxyObject` as a workaround:
```python
    # In order to support the cuDF API implemented in Cython, we inherit from
    # `cudf._lib.table.Table`, which is the base class of Index, Series, and
    # Dataframes in cuDF.
    # Notice, the order of base classes matters. Since ProxyObject is the first
    # base class, ProxyObject.__init__() is called on creation, which doesn't
    # define the Table._data and Table._index attributes. Thus, accessing
    # FrameProxyObject._data and FrameProxyObject._index is pass-through to
    # ProxyObejct.__getattr__(), which is what we want.
    class FrameProxyObject(ProxyObject, cudf._lib.table.Table):
        pass
```
Now that cudf uses `Frame`, a pure Python class, we don't need this workaround anymore. In fact, using `FrameProxyObject` triggers some infinite recursion errors when used together with v21.12+

cc. @randerzander

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)

URL: #751
  • Loading branch information
madsbk authored Oct 11, 2021
1 parent 18cad78 commit bf4e00f
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 23 deletions.
23 changes: 2 additions & 21 deletions dask_cuda/proxify_device_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,38 +225,19 @@ def proxify_device_object_python_dict(
def _register_cudf():
import cudf

try:
# In v21.12, cuDF removed Table in favor of Frame as base class
from cudf._lib.table import Table as Frame
except ImportError:
from cudf.core.frame import Frame

# In order to support the cuDF API implemented in Cython, we inherit from
# `cudf.core.frame.Frame`, which is the base class of Index, Series, and
# Dataframes in cuDF.
# Notice, the order of base classes matters. Since ProxyObject is the first
# base class, ProxyObject.__init__() is called on creation, which doesn't
# define the Frame._data and Frame._index attributes. Thus, accessing
# FrameProxyObject._data and FrameProxyObject._index is pass-through to
# ProxyObejct.__getattr__(), which is what we want.
class FrameProxyObject(ProxyObject, Frame):
pass

@dispatch.register(cudf.DataFrame)
@dispatch.register(cudf.Series)
@dispatch.register(cudf.BaseIndex)
def proxify_device_object_cudf_dataframe(
obj, proxied_id_to_proxy, found_proxies, excl_proxies
):
return proxify(
obj, proxied_id_to_proxy, found_proxies, subclass=FrameProxyObject
)
return proxify(obj, proxied_id_to_proxy, found_proxies)

try:
from dask.array.dispatch import percentile_lookup

from dask_cudf.backends import percentile_cudf

percentile_lookup.register(FrameProxyObject, percentile_cudf)
percentile_lookup.register(ProxyObject, percentile_cudf)
except ImportError:
pass
2 changes: 1 addition & 1 deletion dask_cuda/tests/test_proxify_host_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def task(x):
assert isinstance(x, cudf.DataFrame)
if jit_unspill:
# Check that `x` is a proxy object and the proxied DataFrame is serialized
assert "FrameProxyObject" in str(type(x))
assert "ProxyObject" in str(type(x))
assert x._obj_pxy["serializer"] == "dask"
else:
assert type(x) == cudf.DataFrame
Expand Down
17 changes: 16 additions & 1 deletion dask_cuda/tests/test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def task(x):
assert isinstance(x, cudf.DataFrame)
if jit_unspill:
# Check that `x` is a proxy object and the proxied DataFrame is serialized
assert "FrameProxyObject" in str(type(x))
assert "ProxyObject" in str(type(x))
assert x._obj_pxy["serializer"] == "dask"
else:
assert type(x) == cudf.DataFrame
Expand Down Expand Up @@ -569,3 +569,18 @@ def test_array_ufucn_proxified_object(np_func):
actual = np_func(proxy_obj, np_array)

assert_series_equal(expected.to_pandas(), actual.to_pandas())


def test_cudf_copy():
cudf = pytest.importorskip("cudf")
df = cudf.DataFrame({"A": range(10)})
df = proxify_device_objects(df)
cpy = df.copy()
assert_frame_equal(cpy.to_pandas(), df.to_pandas())


def test_cudf_fillna():
cudf = pytest.importorskip("cudf")
df = cudf.DataFrame({"A": range(10)})
df = proxify_device_objects(df)
df = df.fillna(0)

0 comments on commit bf4e00f

Please sign in to comment.