Consolidate Several Series and Dataframe Methods (#9059)

Partly addresses #9038 This function consolidate several (trivial) functions from `Series` and `DataFrame` into Frame. `__invert__` was consolidated to shared (more efficient) code path using factory methods. `deserialize` was not consolidated because we have to provide backward compatibility to older classes. But factory method was used for faster class construction. Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Sheilah Kirui (https://github.com/skirui-source) URL: #9059
rapidsai · Aug 30, 2021 · b2423ac · b2423ac
1 parent 4945198
commit b2423ac
Show file tree

Hide file tree

Showing 3 changed files with 262 additions and 319 deletions.
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -35,7 +35,6 @@
 from cudf.core.index import BaseIndex, RangeIndex, as_index
 from cudf.core.indexing import _DataFrameIlocIndexer, _DataFrameLocIndexer
 from cudf.core.series import Series
-from cudf.core.window import Rolling
 from cudf.utils import applyutils, docutils, ioutils, queryutils, utils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
@@ -526,11 +525,12 @@ def serialize(self):
 
         # Use the column directly to avoid duplicating the index
         # need to pickle column names to handle numpy integer columns
-        header["column_names"] = pickle.dumps(tuple(self._data.names))
-        column_header, column_frames = column.serialize_columns(self._columns)
-        header["columns"] = column_header
+        header["columns"], column_frames = column.serialize_columns(
+            self._columns
+        )
         frames.extend(column_frames)
 
+        header["column_names"] = pickle.dumps(tuple(self._data.names))
         return header, frames
 
     @classmethod
@@ -547,7 +547,7 @@ def deserialize(cls, header, frames):
         column_names = pickle.loads(header["column_names"])
         columns = column.deserialize_columns(header["columns"], column_frames)
 
-        return cls(dict(zip(column_names, columns)), index=index)
+        return cls._from_data(dict(zip(column_names, columns)), index=index,)
 
     @property
     def dtypes(self):
@@ -1029,68 +1029,6 @@ def assign(self, **kwargs):
             new[k] = v
         return new
 
-    def head(self, n=5):
-        """
-        Returns the first n rows as a new DataFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame()
-        >>> df['key'] = [0, 1, 2, 3, 4]
-        >>> df['val'] = [float(i + 10) for i in range(5)]  # insert column
-        >>> df.head(2)
-           key   val
-        0    0  10.0
-        1    1  11.0
-        """
-        return self.iloc[:n]
-
-    def tail(self, n=5):
-        """
-        Returns the last n rows as a new DataFrame
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame()
-        >>> df['key'] = [0, 1, 2, 3, 4]
-        >>> df['val'] = [float(i + 10) for i in range(5)]  # insert column
-        >>> df.tail(2)
-           key   val
-        3    3  13.0
-        4    4  14.0
-        """
-        if n == 0:
-            return self.iloc[0:0]
-
-        return self.iloc[-n:]
-
-    def to_string(self):
-        """
-        Convert to string
-
-        cuDF uses Pandas internals for efficient string formatting.
-        Set formatting options using pandas string formatting options and
-        cuDF objects will print identically to Pandas objects.
-
-        cuDF supports `null/None` as a value in any column type, which
-        is transparently supported during this output process.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> df = cudf.DataFrame()
-        >>> df['key'] = [0, 1, 2]
-        >>> df['val'] = [float(i + 10) for i in range(3)]
-        >>> df.to_string()
-        '   key   val\\n0    0  10.0\\n1    1  11.0\\n2    2  12.0'
-        """
-        return self.__repr__()
-
-    def __str__(self):
-        return self.to_string()
-
     def astype(self, dtype, copy=False, errors="raise", **kwargs):
         """
         Cast the DataFrame to the given dtype
@@ -1644,14 +1582,6 @@ def update(
 
         self._mimic_inplace(source_df, inplace=True)
 
-    def __invert__(self):
-        # Defer logic to Series since pandas semantics dictate different
-        # behaviors for different types that requires too much special casing
-        # of the standard _unaryop.
-        return DataFrame(
-            data={col: ~self[col] for col in self}, index=self.index
-        )
-
     def radd(self, other, axis=1, level=None, fill_value=None):
         """
         Get Addition of dataframe and other, element-wise (binary
@@ -3505,15 +3435,6 @@ def rename(
         else:
             return out.copy(deep=copy)
 
-    def nans_to_nulls(self):
-        """
-        Convert nans (if any) to nulls.
-        """
-        df = self.copy()
-        for col in df.columns:
-            df[col] = df[col].nans_to_nulls()
-        return df
-
     def as_gpu_matrix(self, columns=None, order="F"):
         """Convert to a matrix in device memory.
 
@@ -4506,19 +4427,6 @@ def groupby(
             sort=sort,
         )
 
-    @copy_docstring(Rolling)
-    def rolling(
-        self, window, min_periods=None, center=False, axis=0, win_type=None
-    ):
-        return Rolling(
-            self,
-            window,
-            min_periods=min_periods,
-            center=center,
-            axis=axis,
-            win_type=win_type,
-        )
-
     def query(self, expr, local_dict=None):
         """
         Query with a boolean expression using Numba to compile a GPU kernel.
@@ -6732,27 +6640,6 @@ def to_feather(self, path, *args, **kwargs):
 
         feather.to_feather(self, path, *args, **kwargs)
 
-    @ioutils.doc_to_json()
-    def to_json(self, path_or_buf=None, *args, **kwargs):
-        """{docstring}"""
-        from cudf.io import json as json
-
-        return json.to_json(self, path_or_buf=path_or_buf, *args, **kwargs)
-
-    @ioutils.doc_to_hdf()
-    def to_hdf(self, path_or_buf, key, *args, **kwargs):
-        """{docstring}"""
-        from cudf.io import hdf as hdf
-
-        hdf.to_hdf(path_or_buf, key, self, *args, **kwargs)
-
-    @ioutils.doc_to_dlpack()
-    def to_dlpack(self):
-        """{docstring}"""
-        from cudf.io import dlpack as dlpack
-
-        return dlpack.to_dlpack(self)
-
     @ioutils.doc_dataframe_to_csv()
     def to_csv(
         self,