Use _from_data instead of _from_columns for initialzing Frame (#14755)

In the spirit of reducing redundant methods, `_from_columns` just calls `_from_data` (hoping to rename to `_from_mapping` or similar) so removing the need for `_from_columns`. Hoping to do the same for the `_from_columns_like_self` in a follow up. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Michael Wang (https://github.com/isVoid) URL: #14755
rapidsai · Jan 18, 2024 · 70cdeec · 70cdeec
1 parent 734ca75
commit 70cdeec
Show file tree

Hide file tree

Showing 6 changed files with 49 additions and 64 deletions.
diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 import os
 import zoneinfo
@@ -89,8 +89,10 @@ def _read_tzfile_as_frame(tzdir, zone_name):
             [np.timedelta64(0, "s")]
         )
 
-    return DataFrame._from_columns(
-        transition_times_and_offsets, ["transition_times", "offsets"]
+    return DataFrame._from_data(
+        dict(
+            zip(["transition_times", "offsets"], transition_times_and_offsets)
+        )
     )
 
 

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
@@ -144,17 +144,6 @@ def _from_data(cls, data: MutableMapping):
     def _from_data_like_self(self, data: MutableMapping):
         return self._from_data(data)
 
-    @classmethod
-    @_cudf_nvtx_annotate
-    def _from_columns(
-        cls,
-        columns: List[ColumnBase],
-        column_names: abc.Iterable[str],
-    ):
-        """Construct a `Frame` object from a list of columns."""
-        data = {name: columns[i] for i, name in enumerate(column_names)}
-        return cls._from_data(data)
-
     @_cudf_nvtx_annotate
     def _from_columns_like_self(
         self,
@@ -169,7 +158,8 @@ def _from_columns_like_self(
         """
         if column_names is None:
             column_names = self._column_names
-        frame = self.__class__._from_columns(columns, column_names)
+        data = dict(zip(column_names, columns))
+        frame = self.__class__._from_data(data)
         return frame._copy_type_metadata(self, override_dtypes=override_dtypes)
 
     @_cudf_nvtx_annotate

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
@@ -2111,9 +2111,13 @@ def diff(self, periods=1, axis=0):
     def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
         """Internal implementation for `ffill` and `bfill`"""
         values = self.grouping.values
-        result = self.obj._from_columns(
-            self._groupby.replace_nulls([*values._columns], method),
-            values._column_names,
+        result = self.obj._from_data(
+            dict(
+                zip(
+                    values._column_names,
+                    self._groupby.replace_nulls([*values._columns], method),
+                )
+            )
         )
         result = self._mimic_pandas_order(result)
         return result._copy_type_metadata(values)
@@ -2305,9 +2309,15 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         else:
             fill_value = [fill_value] * len(values._data)
 
-        result = self.obj.__class__._from_columns(
-            self._groupby.shift([*values._columns], periods, fill_value)[0],
-            values._column_names,
+        result = self.obj.__class__._from_data(
+            dict(
+                zip(
+                    values._column_names,
+                    self._groupby.shift(
+                        [*values._columns], periods, fill_value
+                    )[0],
+                )
+            )
         )
         result = self._mimic_pandas_order(result)
         return result._copy_type_metadata(values)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
@@ -800,22 +800,22 @@ def sort_values(
     @_cudf_nvtx_annotate
     def _gather(self, gather_map, nullify=False, check_bounds=True):
         gather_map = cudf.core.column.as_column(gather_map)
-        return _dtype_to_index[self.dtype.type]._from_columns(
-            [self._values.take(gather_map, nullify, check_bounds)], [self.name]
+        return _dtype_to_index[self.dtype.type]._from_data(
+            {self.name: self._values.take(gather_map, nullify, check_bounds)}
         )
 
     @_cudf_nvtx_annotate
     def _apply_boolean_mask(self, boolean_mask):
-        return _dtype_to_index[self.dtype.type]._from_columns(
-            [self._values.apply_boolean_mask(boolean_mask)], [self.name]
+        return _dtype_to_index[self.dtype.type]._from_data(
+            {self.name: self._values.apply_boolean_mask(boolean_mask)}
         )
 
     def repeat(self, repeats, axis=None):
         return self._as_int_index().repeat(repeats, axis)
 
     def _split(self, splits):
-        return _dtype_to_index[self.dtype.type]._from_columns(
-            [self._as_int_index()._split(splits)], [self.name]
+        return _dtype_to_index[self.dtype.type]._from_data(
+            {self.name: self._as_int_index()._split(splits)}
         )
 
     def _binaryop(self, other, op: str):

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
@@ -291,19 +291,27 @@ def _from_data_like_self(self, data: MutableMapping):
         out._data._level_names = self._data._level_names
         return out
 
-    @classmethod
     @_cudf_nvtx_annotate
-    def _from_columns(
-        cls,
+    def _from_columns_like_self(
+        self,
         columns: List[ColumnBase],
-        column_names: List[str],
+        column_names: Optional[abc.Iterable[str]] = None,
         index_names: Optional[List[str]] = None,
-    ):
-        """Construct a `Frame` object from a list of columns.
+        *,
+        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+    ) -> Self:
+        """Construct a `Frame` from a list of columns with metadata from self.
 
         If `index_names` is set, the first `len(index_names)` columns are
         used to construct the index of the frame.
+
+        If override_dtypes is provided then any non-None entry will be
+        used for the dtype of the matching column in preference to the
+        dtype of the column in self.
         """
+        if column_names is None:
+            column_names = self._column_names
+
         data_columns = columns
         index = None
 
@@ -316,36 +324,11 @@ def _from_columns(
             else:
                 index.name = index_names[0]
 
-        out = super()._from_columns(data_columns, column_names)
+        data = dict(zip(column_names, data_columns))
+        frame = self.__class__._from_data(data)
 
         if index is not None:
-            out._index = index
-
-        return out
-
-    @_cudf_nvtx_annotate
-    def _from_columns_like_self(
-        self,
-        columns: List[ColumnBase],
-        column_names: Optional[abc.Iterable[str]] = None,
-        index_names: Optional[List[str]] = None,
-        *,
-        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
-    ) -> Self:
-        """Construct a `Frame` from a list of columns with metadata from self.
-
-        If `index_names` is set, the first `len(index_names)` columns are
-        used to construct the index of the frame.
-
-        If override_dtypes is provided then any non-None entry will be
-        used for the dtype of the matching column in preference to the
-        dtype of the column in self.
-        """
-        if column_names is None:
-            column_names = self._column_names
-        frame = self.__class__._from_columns(
-            columns, column_names, index_names
-        )
+            frame._index = index
         return frame._copy_type_metadata(
             self,
             include_index=bool(index_names),

diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
@@ -35,12 +35,12 @@ def from_dlpack(pycapsule_obj):
     """
 
     columns = libdlpack.from_dlpack(pycapsule_obj)
-    column_names = range(len(columns))
+    data = dict(enumerate(columns))
 
     if len(columns) == 1:
-        return cudf.Series._from_columns(columns, column_names=column_names)
+        return cudf.Series._from_data(data)
     else:
-        return cudf.DataFrame._from_columns(columns, column_names=column_names)
+        return cudf.DataFrame._from_data(data)
 
 
 @ioutils.doc_to_dlpack()