diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py index 67043d3fbb3..053425fff8d 100644 --- a/python/cudf/cudf/core/_internals/timezones.py +++ b/python/cudf/cudf/core/_internals/timezones.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. import os import zoneinfo @@ -89,8 +89,10 @@ def _read_tzfile_as_frame(tzdir, zone_name): [np.timedelta64(0, "s")] ) - return DataFrame._from_columns( - transition_times_and_offsets, ["transition_times", "offsets"] + return DataFrame._from_data( + dict( + zip(["transition_times", "offsets"], transition_times_and_offsets) + ) ) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5f7a86e86d8..05104a3ef05 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -144,17 +144,6 @@ def _from_data(cls, data: MutableMapping): def _from_data_like_self(self, data: MutableMapping): return self._from_data(data) - @classmethod - @_cudf_nvtx_annotate - def _from_columns( - cls, - columns: List[ColumnBase], - column_names: abc.Iterable[str], - ): - """Construct a `Frame` object from a list of columns.""" - data = {name: columns[i] for i, name in enumerate(column_names)} - return cls._from_data(data) - @_cudf_nvtx_annotate def _from_columns_like_self( self, @@ -169,7 +158,8 @@ def _from_columns_like_self( """ if column_names is None: column_names = self._column_names - frame = self.__class__._from_columns(columns, column_names) + data = dict(zip(column_names, columns)) + frame = self.__class__._from_data(data) return frame._copy_type_metadata(self, override_dtypes=override_dtypes) @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 4e8947652ff..6c83bcd9efb 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -2111,9 +2111,13 @@ def diff(self, periods=1, axis=0): def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries: """Internal implementation for `ffill` and `bfill`""" values = self.grouping.values - result = self.obj._from_columns( - self._groupby.replace_nulls([*values._columns], method), - values._column_names, + result = self.obj._from_data( + dict( + zip( + values._column_names, + self._groupby.replace_nulls([*values._columns], method), + ) + ) ) result = self._mimic_pandas_order(result) return result._copy_type_metadata(values) @@ -2305,9 +2309,15 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): else: fill_value = [fill_value] * len(values._data) - result = self.obj.__class__._from_columns( - self._groupby.shift([*values._columns], periods, fill_value)[0], - values._column_names, + result = self.obj.__class__._from_data( + dict( + zip( + values._column_names, + self._groupby.shift( + [*values._columns], periods, fill_value + )[0], + ) + ) ) result = self._mimic_pandas_order(result) return result._copy_type_metadata(values) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index e012d8e7140..3e8f6bc2ccb 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -800,22 +800,22 @@ def sort_values( @_cudf_nvtx_annotate def _gather(self, gather_map, nullify=False, check_bounds=True): gather_map = cudf.core.column.as_column(gather_map) - return _dtype_to_index[self.dtype.type]._from_columns( - [self._values.take(gather_map, nullify, check_bounds)], [self.name] + return _dtype_to_index[self.dtype.type]._from_data( + {self.name: self._values.take(gather_map, nullify, check_bounds)} ) @_cudf_nvtx_annotate def _apply_boolean_mask(self, boolean_mask): - return _dtype_to_index[self.dtype.type]._from_columns( - [self._values.apply_boolean_mask(boolean_mask)], [self.name] + return _dtype_to_index[self.dtype.type]._from_data( + {self.name: self._values.apply_boolean_mask(boolean_mask)} ) def repeat(self, repeats, axis=None): return self._as_int_index().repeat(repeats, axis) def _split(self, splits): - return _dtype_to_index[self.dtype.type]._from_columns( - [self._as_int_index()._split(splits)], [self.name] + return _dtype_to_index[self.dtype.type]._from_data( + {self.name: self._as_int_index()._split(splits)} ) def _binaryop(self, other, op: str): diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 2a35ac0f959..70be5c3ad0f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -291,19 +291,27 @@ def _from_data_like_self(self, data: MutableMapping): out._data._level_names = self._data._level_names return out - @classmethod @_cudf_nvtx_annotate - def _from_columns( - cls, + def _from_columns_like_self( + self, columns: List[ColumnBase], - column_names: List[str], + column_names: Optional[abc.Iterable[str]] = None, index_names: Optional[List[str]] = None, - ): - """Construct a `Frame` object from a list of columns. + *, + override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None, + ) -> Self: + """Construct a `Frame` from a list of columns with metadata from self. If `index_names` is set, the first `len(index_names)` columns are used to construct the index of the frame. + + If override_dtypes is provided then any non-None entry will be + used for the dtype of the matching column in preference to the + dtype of the column in self. """ + if column_names is None: + column_names = self._column_names + data_columns = columns index = None @@ -316,36 +324,11 @@ def _from_columns( else: index.name = index_names[0] - out = super()._from_columns(data_columns, column_names) + data = dict(zip(column_names, data_columns)) + frame = self.__class__._from_data(data) if index is not None: - out._index = index - - return out - - @_cudf_nvtx_annotate - def _from_columns_like_self( - self, - columns: List[ColumnBase], - column_names: Optional[abc.Iterable[str]] = None, - index_names: Optional[List[str]] = None, - *, - override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None, - ) -> Self: - """Construct a `Frame` from a list of columns with metadata from self. - - If `index_names` is set, the first `len(index_names)` columns are - used to construct the index of the frame. - - If override_dtypes is provided then any non-None entry will be - used for the dtype of the matching column in preference to the - dtype of the column in self. - """ - if column_names is None: - column_names = self._column_names - frame = self.__class__._from_columns( - columns, column_names, index_names - ) + frame._index = index return frame._copy_type_metadata( self, include_index=bool(index_names), diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py index bed376e4a79..d3d99aab0cd 100644 --- a/python/cudf/cudf/io/dlpack.py +++ b/python/cudf/cudf/io/dlpack.py @@ -35,12 +35,12 @@ def from_dlpack(pycapsule_obj): """ columns = libdlpack.from_dlpack(pycapsule_obj) - column_names = range(len(columns)) + data = dict(enumerate(columns)) if len(columns) == 1: - return cudf.Series._from_columns(columns, column_names=column_names) + return cudf.Series._from_data(data) else: - return cudf.DataFrame._from_columns(columns, column_names=column_names) + return cudf.DataFrame._from_data(data) @ioutils.doc_to_dlpack()