From 5867f052ea38ec2d387f810cc51258e691edb9a8 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Tue, 30 Jul 2024 14:57:29 -0700 Subject: [PATCH 1/5] register read_parquet function to CudfDXBackendEntrypoint --- python/dask_cudf/dask_cudf/backends.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 4bdb5d921ec..ac5ef865288 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -666,6 +666,16 @@ def from_dict( constructor=constructor, ) + @classmethod + def read_parquet(cls, *args, engine=None, **kwargs): + import dask_expr as dx + + from dask_cudf.io.parquet import CudfEngine + + return _default_backend( + dx.read_parquet, *args, engine=CudfEngine, **kwargs + ) + @staticmethod def read_json(*args, **kwargs): from dask_cudf.io.json import read_json as read_json_impl From 17be6b0c784774c27726657ee2ebfa9a626123a3 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Mon, 12 Aug 2024 12:10:13 -0700 Subject: [PATCH 2/5] add read_csv def --- python/dask_cudf/dask_cudf/backends.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index d2f677035fd..65b26758cee 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -677,6 +677,32 @@ def read_parquet(cls, *args, engine=None, **kwargs): dx.read_parquet, *args, engine=CudfEngine, **kwargs ) + @staticmethod + def read_csv( + path, + *args, + header="infer", + dtype_backend=None, + storage_options=None, + **kwargs, + ): + from fsspec.utils import stringify_path + from dask_expr._collection import new_collection + from dask_expr.io.csv import ReadCSV + + if not isinstance(path, str): + path = stringify_path(path) + return new_collection( + ReadCSV( + path, + dtype_backend=dtype_backend, + storage_options=storage_options, + kwargs=kwargs, + header=header, + dataframe_backend="cudf", + ) + ) + @staticmethod def read_json(*args, **kwargs): from dask_cudf.io.json import read_json as read_json_impl From 04aa983ac3050af932c3a27f73d2f407bd439abe Mon Sep 17 00:00:00 2001 From: rjzamora Date: Mon, 12 Aug 2024 12:12:43 -0700 Subject: [PATCH 3/5] formatting --- python/dask_cudf/dask_cudf/backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 65b26758cee..5b86108dbd4 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -686,9 +686,9 @@ def read_csv( storage_options=None, **kwargs, ): - from fsspec.utils import stringify_path from dask_expr._collection import new_collection from dask_expr.io.csv import ReadCSV + from fsspec.utils import stringify_path if not isinstance(path, str): path = stringify_path(path) From 5d8c80d864cfc768db9fe3438ffede7f0c8b7295 Mon Sep 17 00:00:00 2001 From: rjzamora Date: Mon, 12 Aug 2024 14:25:40 -0700 Subject: [PATCH 4/5] simplify imports --- python/dask_cudf/dask_cudf/backends.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 5b86108dbd4..50548972cb0 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -686,14 +686,13 @@ def read_csv( storage_options=None, **kwargs, ): - from dask_expr._collection import new_collection - from dask_expr.io.csv import ReadCSV + import dask_expr as dx from fsspec.utils import stringify_path if not isinstance(path, str): path = stringify_path(path) - return new_collection( - ReadCSV( + return dx.new_collection( + dx.io.csv.ReadCSV( path, dtype_backend=dtype_backend, storage_options=storage_options, From ffcc137aada3c2d283570f791e2b1ba8b71ee44f Mon Sep 17 00:00:00 2001 From: "Richard (Rick) Zamora" Date: Tue, 13 Aug 2024 10:22:41 -0500 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Mads R. B. Kristensen --- python/dask_cudf/dask_cudf/backends.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 50548972cb0..01bab30190a 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -667,8 +667,8 @@ def from_dict( constructor=constructor, ) - @classmethod - def read_parquet(cls, *args, engine=None, **kwargs): + @staticmethod + def read_parquet(*args, engine=None, **kwargs): import dask_expr as dx from dask_cudf.io.parquet import CudfEngine