Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use _from_data instead of _from_columns for initialzing Frame #14755

Merged
merged 3 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions python/cudf/cudf/core/_internals/timezones.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

import os
import zoneinfo
Expand Down Expand Up @@ -89,8 +89,10 @@ def _read_tzfile_as_frame(tzdir, zone_name):
[np.timedelta64(0, "s")]
)

return DataFrame._from_columns(
transition_times_and_offsets, ["transition_times", "offsets"]
return DataFrame._from_data(
dict(
zip(["transition_times", "offsets"], transition_times_and_offsets)
)
)


Expand Down
14 changes: 2 additions & 12 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,6 @@ def _from_data(cls, data: MutableMapping):
def _from_data_like_self(self, data: MutableMapping):
return self._from_data(data)

@classmethod
@_cudf_nvtx_annotate
def _from_columns(
cls,
columns: List[ColumnBase],
column_names: abc.Iterable[str],
):
"""Construct a `Frame` object from a list of columns."""
data = {name: columns[i] for i, name in enumerate(column_names)}
return cls._from_data(data)

@_cudf_nvtx_annotate
def _from_columns_like_self(
self,
Expand All @@ -169,7 +158,8 @@ def _from_columns_like_self(
"""
if column_names is None:
column_names = self._column_names
frame = self.__class__._from_columns(columns, column_names)
data = dict(zip(column_names, columns))
frame = self.__class__._from_data(data)
return frame._copy_type_metadata(self, override_dtypes=override_dtypes)

@_cudf_nvtx_annotate
Expand Down
22 changes: 16 additions & 6 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2111,9 +2111,13 @@ def diff(self, periods=1, axis=0):
def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
"""Internal implementation for `ffill` and `bfill`"""
values = self.grouping.values
result = self.obj._from_columns(
self._groupby.replace_nulls([*values._columns], method),
values._column_names,
result = self.obj._from_data(
dict(
zip(
values._column_names,
self._groupby.replace_nulls([*values._columns], method),
)
)
)
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)
Expand Down Expand Up @@ -2305,9 +2309,15 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
else:
fill_value = [fill_value] * len(values._data)

result = self.obj.__class__._from_columns(
self._groupby.shift([*values._columns], periods, fill_value)[0],
values._column_names,
result = self.obj.__class__._from_data(
dict(
zip(
values._column_names,
self._groupby.shift(
[*values._columns], periods, fill_value
)[0],
)
)
)
result = self._mimic_pandas_order(result)
return result._copy_type_metadata(values)
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,22 +800,22 @@ def sort_values(
@_cudf_nvtx_annotate
def _gather(self, gather_map, nullify=False, check_bounds=True):
gather_map = cudf.core.column.as_column(gather_map)
return _dtype_to_index[self.dtype.type]._from_columns(
[self._values.take(gather_map, nullify, check_bounds)], [self.name]
return _dtype_to_index[self.dtype.type]._from_data(
{self.name: self._values.take(gather_map, nullify, check_bounds)}
)

@_cudf_nvtx_annotate
def _apply_boolean_mask(self, boolean_mask):
return _dtype_to_index[self.dtype.type]._from_columns(
[self._values.apply_boolean_mask(boolean_mask)], [self.name]
return _dtype_to_index[self.dtype.type]._from_data(
{self.name: self._values.apply_boolean_mask(boolean_mask)}
)

def repeat(self, repeats, axis=None):
return self._as_int_index().repeat(repeats, axis)

def _split(self, splits):
return _dtype_to_index[self.dtype.type]._from_columns(
[self._as_int_index()._split(splits)], [self.name]
return _dtype_to_index[self.dtype.type]._from_data(
{self.name: self._as_int_index()._split(splits)}
)

def _binaryop(self, other, op: str):
Expand Down
51 changes: 17 additions & 34 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,19 +291,27 @@ def _from_data_like_self(self, data: MutableMapping):
out._data._level_names = self._data._level_names
return out

@classmethod
@_cudf_nvtx_annotate
def _from_columns(
cls,
def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: List[str],
column_names: Optional[abc.Iterable[str]] = None,
index_names: Optional[List[str]] = None,
):
"""Construct a `Frame` object from a list of columns.
*,
override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
) -> Self:
"""Construct a `Frame` from a list of columns with metadata from self.

If `index_names` is set, the first `len(index_names)` columns are
used to construct the index of the frame.

If override_dtypes is provided then any non-None entry will be
used for the dtype of the matching column in preference to the
dtype of the column in self.
"""
if column_names is None:
column_names = self._column_names

data_columns = columns
index = None

Expand All @@ -316,36 +324,11 @@ def _from_columns(
else:
index.name = index_names[0]

out = super()._from_columns(data_columns, column_names)
data = dict(zip(column_names, data_columns))
frame = self.__class__._from_data(data)

if index is not None:
out._index = index

return out

@_cudf_nvtx_annotate
def _from_columns_like_self(
self,
columns: List[ColumnBase],
column_names: Optional[abc.Iterable[str]] = None,
index_names: Optional[List[str]] = None,
*,
override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
) -> Self:
"""Construct a `Frame` from a list of columns with metadata from self.

If `index_names` is set, the first `len(index_names)` columns are
used to construct the index of the frame.

If override_dtypes is provided then any non-None entry will be
used for the dtype of the matching column in preference to the
dtype of the column in self.
"""
if column_names is None:
column_names = self._column_names
frame = self.__class__._from_columns(
columns, column_names, index_names
)
frame._index = index
return frame._copy_type_metadata(
self,
include_index=bool(index_names),
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/io/dlpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def from_dlpack(pycapsule_obj):
"""

columns = libdlpack.from_dlpack(pycapsule_obj)
column_names = range(len(columns))
data = dict(enumerate(columns))

if len(columns) == 1:
return cudf.Series._from_columns(columns, column_names=column_names)
return cudf.Series._from_data(data)
else:
return cudf.DataFrame._from_columns(columns, column_names=column_names)
return cudf.DataFrame._from_data(data)


@ioutils.doc_to_dlpack()
Expand Down