Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate Several Series and Dataframe Methods #9059

Merged
merged 10 commits into from
Aug 30, 2021
123 changes: 5 additions & 118 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from cudf.core.index import BaseIndex, RangeIndex, as_index
from cudf.core.indexing import _DataFrameIlocIndexer, _DataFrameLocIndexer
from cudf.core.series import Series
from cudf.core.window import Rolling
from cudf.utils import applyutils, docutils, ioutils, queryutils, utils
from cudf.utils.docutils import copy_docstring
from cudf.utils.dtypes import (
Expand Down Expand Up @@ -526,11 +525,12 @@ def serialize(self):

# Use the column directly to avoid duplicating the index
# need to pickle column names to handle numpy integer columns
header["column_names"] = pickle.dumps(tuple(self._data.names))
column_header, column_frames = column.serialize_columns(self._columns)
header["columns"] = column_header
header["columns"], column_frames = column.serialize_columns(
self._columns
)
frames.extend(column_frames)

header["column_names"] = pickle.dumps(tuple(self._data.names))
return header, frames

@classmethod
Expand All @@ -547,7 +547,7 @@ def deserialize(cls, header, frames):
column_names = pickle.loads(header["column_names"])
columns = column.deserialize_columns(header["columns"], column_frames)

return cls(dict(zip(column_names, columns)), index=index)
return cls._from_data(dict(zip(column_names, columns)), index=index,)

@property
def dtypes(self):
Expand Down Expand Up @@ -1029,68 +1029,6 @@ def assign(self, **kwargs):
new[k] = v
return new

def head(self, n=5):
"""
Returns the first n rows as a new DataFrame

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame()
>>> df['key'] = [0, 1, 2, 3, 4]
>>> df['val'] = [float(i + 10) for i in range(5)] # insert column
>>> df.head(2)
key val
0 0 10.0
1 1 11.0
"""
return self.iloc[:n]

def tail(self, n=5):
"""
Returns the last n rows as a new DataFrame

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame()
>>> df['key'] = [0, 1, 2, 3, 4]
>>> df['val'] = [float(i + 10) for i in range(5)] # insert column
>>> df.tail(2)
key val
3 3 13.0
4 4 14.0
"""
if n == 0:
return self.iloc[0:0]

return self.iloc[-n:]

def to_string(self):
"""
Convert to string

cuDF uses Pandas internals for efficient string formatting.
Set formatting options using pandas string formatting options and
cuDF objects will print identically to Pandas objects.

cuDF supports `null/None` as a value in any column type, which
is transparently supported during this output process.

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame()
>>> df['key'] = [0, 1, 2]
>>> df['val'] = [float(i + 10) for i in range(3)]
>>> df.to_string()
' key val\\n0 0 10.0\\n1 1 11.0\\n2 2 12.0'
"""
return self.__repr__()

def __str__(self):
return self.to_string()

def astype(self, dtype, copy=False, errors="raise", **kwargs):
"""
Cast the DataFrame to the given dtype
Expand Down Expand Up @@ -1644,14 +1582,6 @@ def update(

self._mimic_inplace(source_df, inplace=True)

def __invert__(self):
# Defer logic to Series since pandas semantics dictate different
# behaviors for different types that requires too much special casing
# of the standard _unaryop.
return DataFrame(
data={col: ~self[col] for col in self}, index=self.index
)

def radd(self, other, axis=1, level=None, fill_value=None):
"""
Get Addition of dataframe and other, element-wise (binary
Expand Down Expand Up @@ -3505,15 +3435,6 @@ def rename(
else:
return out.copy(deep=copy)

def nans_to_nulls(self):
"""
Convert nans (if any) to nulls.
"""
df = self.copy()
for col in df.columns:
df[col] = df[col].nans_to_nulls()
return df

def as_gpu_matrix(self, columns=None, order="F"):
"""Convert to a matrix in device memory.

Expand Down Expand Up @@ -4506,19 +4427,6 @@ def groupby(
sort=sort,
)

@copy_docstring(Rolling)
def rolling(
self, window, min_periods=None, center=False, axis=0, win_type=None
):
return Rolling(
self,
window,
min_periods=min_periods,
center=center,
axis=axis,
win_type=win_type,
)

def query(self, expr, local_dict=None):
"""
Query with a boolean expression using Numba to compile a GPU kernel.
Expand Down Expand Up @@ -6732,27 +6640,6 @@ def to_feather(self, path, *args, **kwargs):

feather.to_feather(self, path, *args, **kwargs)

@ioutils.doc_to_json()
def to_json(self, path_or_buf=None, *args, **kwargs):
"""{docstring}"""
from cudf.io import json as json

return json.to_json(self, path_or_buf=path_or_buf, *args, **kwargs)

@ioutils.doc_to_hdf()
def to_hdf(self, path_or_buf, key, *args, **kwargs):
"""{docstring}"""
from cudf.io import hdf as hdf

hdf.to_hdf(path_or_buf, key, self, *args, **kwargs)

@ioutils.doc_to_dlpack()
def to_dlpack(self):
"""{docstring}"""
from cudf.io import dlpack as dlpack

return dlpack.to_dlpack(self)

@ioutils.doc_dataframe_to_csv()
def to_csv(
self,
Expand Down
Loading