Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Frame reductions #8944

Merged
merged 25 commits into from
Aug 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0b373d9
Move logic for any/all down from Series into Column.
vyasr Jul 21, 2021
3a0e11e
Simplify _apply_support_method for improved performance.
vyasr Jul 21, 2021
e6a1e2a
Alias DataFrame.product as DataFrame.prod.
vyasr Jul 22, 2021
a70eebd
Revert "Simplify _apply_support_method for improved performance."
vyasr Jul 22, 2021
3d19fec
Standardize reduction operation argument handling.
vyasr Jul 22, 2021
b437f50
Replace asserts with exception handlers.
vyasr Jul 22, 2021
ad025b6
Split axis 0 and axis 1 support methods into separate functions.
vyasr Jul 22, 2021
823f558
Switch reductions to use a new helper method.
vyasr Jul 26, 2021
5442189
Reimplement axis 0 reductions as column rather than Series ops.
vyasr Jul 26, 2021
30781d8
Move most reductions up to the Frame level.
vyasr Jul 26, 2021
e2e37c1
Fix behavior of bool_only to not fail for bool_only=False for DataFra…
vyasr Jul 26, 2021
f9d68f4
Move any and all into Frame.
vyasr Jul 26, 2021
8b51959
Optimize DataFrame-Series binop.
vyasr Jul 30, 2021
25f1233
Move sum_of_squares to Frame.
vyasr Jul 30, 2021
57b4d0c
Move median to Frame and add tests of both median and sum_of_squares …
vyasr Jul 30, 2021
7ed91c7
Revert "Optimize DataFrame-Series binop."
vyasr Jul 30, 2021
933f578
Fix axis lookup logic.
vyasr Aug 3, 2021
3cd7903
Disable reductions for index types.
vyasr Aug 4, 2021
8336fb5
Enable reductions for Index types.
vyasr Aug 5, 2021
76cdc13
Use f-string for exception.
vyasr Aug 5, 2021
dd756a7
Fix assertion.
vyasr Aug 5, 2021
8ab25b5
Fix NaN handling for ColumnBase's any and all.
vyasr Aug 5, 2021
6a52830
Merge remote-tracking branch 'origin/branch-21.10' into refactor/fram…
vyasr Aug 5, 2021
d1cdb9d
Merge remote-tracking branch 'origin/branch-21.10' into refactor/fram…
vyasr Aug 6, 2021
50f92c0
Add tests for more index reductions.
vyasr Aug 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,31 @@ def equals(self, other: ColumnBase, check_dtypes: bool = False) -> bool:
def _null_equals(self, other: ColumnBase) -> ColumnBase:
return self.binary_operator("NULL_EQUALS", other)

def all(self) -> bool:
return bool(libcudf.reduce.reduce("all", self, dtype=np.bool_))
def all(self, skipna: bool = True) -> bool:
# If all entries are null the result is True, including when the column
# is empty.
result_col = self.nans_to_nulls() if skipna else self

def any(self) -> bool:
return bool(libcudf.reduce.reduce("any", self, dtype=np.bool_))
if result_col.null_count == result_col.size:
return True

if isinstance(result_col, ColumnBase):
return libcudf.reduce.reduce("all", result_col, dtype=np.bool_)
else:
return result_col

def any(self, skipna: bool = True) -> bool:
# Early exit for fast cases.
result_col = self.nans_to_nulls() if skipna else self
if not skipna and result_col.has_nulls:
return True
elif skipna and result_col.null_count == result_col.size:
return False

if isinstance(result_col, ColumnBase):
return libcudf.reduce.reduce("any", result_col, dtype=np.bool_)
else:
return result_col

def __sizeof__(self) -> int:
n = 0
Expand Down Expand Up @@ -911,9 +931,9 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
return self.as_interval_column(dtype, **kwargs)
elif is_decimal_dtype(dtype):
return self.as_decimal_column(dtype, **kwargs)
elif np.issubdtype(dtype, np.datetime64):
elif np.issubdtype(cast(Any, dtype), np.datetime64):
return self.as_datetime_column(dtype, **kwargs)
elif np.issubdtype(dtype, np.timedelta64):
elif np.issubdtype(cast(Any, dtype), np.timedelta64):
return self.as_timedelta_column(dtype, **kwargs)
else:
return self.as_numerical_column(dtype, **kwargs)
Expand Down
Loading