diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 7167918d14d..f0d02a706e2 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -1,6 +1,7 @@ # Copyright (c) 2020, NVIDIA CORPORATION. from __future__ import annotations +import pandas as pd import pyarrow as pa import cudf @@ -80,6 +81,16 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) + def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series": + # We cannot go via Arrow's `to_pandas` because of the following issue: + # https://issues.apache.org/jira/browse/ARROW-12680 + + pd_series = pd.Series(self.to_arrow().tolist(), dtype="object") + + if index is not None: + pd_series.index = index + return pd_series + def __getitem__(self, args): result = super().__getitem__(args) if isinstance(result, dict): diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index 4e5e9c96146..d9558cb5041 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -7,7 +7,7 @@ import cudf from cudf.core.dtypes import StructDtype -from cudf.testing._utils import assert_eq +from cudf.testing._utils import DATETIME_TYPES, TIMEDELTA_TYPES, assert_eq @pytest.mark.parametrize( @@ -292,3 +292,35 @@ def test_struct_field_errors(data): with pytest.raises(IndexError): got.struct.field(100) + + +@pytest.mark.parametrize("dtype", DATETIME_TYPES + TIMEDELTA_TYPES) +def test_struct_with_datetime_and_timedelta(dtype): + df = cudf.DataFrame( + { + "a": [12, 232, 2334], + "datetime": cudf.Series([23432, 3432423, 324324], dtype=dtype), + } + ) + series = df.to_struct() + a_array = np.array([12, 232, 2334]) + datetime_array = np.array([23432, 3432423, 324324]).astype(dtype) + + actual = series.to_pandas() + values_list = [] + for i, val in enumerate(a_array): + values_list.append({"a": val, "datetime": datetime_array[i]}) + + expected = pd.Series(values_list) + assert_eq(expected, actual) + + +def test_struct_int_values(): + series = cudf.Series( + [{"a": 1, "b": 2}, {"a": 10, "b": None}, {"a": 5, "b": 6}] + ) + actual_series = series.to_pandas() + + assert isinstance(actual_series[0]["b"], int) + assert isinstance(actual_series[1]["b"], type(None)) + assert isinstance(actual_series[2]["b"], int)