Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expunge as_frame conversions in Column algorithms #14491

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,14 +943,14 @@ def is_unique(self) -> bool:

@property
def is_monotonic_increasing(self) -> bool:
return not self.has_nulls() and self.as_frame()._is_sorted(
ascending=None, null_position=None
return not self.has_nulls() and libcudf.sort.is_sorted(
[self], [True], None
)

@property
def is_monotonic_decreasing(self) -> bool:
return not self.has_nulls() and self.as_frame()._is_sorted(
ascending=[False], null_position=None
return not self.has_nulls() and libcudf.sort.is_sorted(
[self], [False], None
)

def sort_values(
Expand Down Expand Up @@ -1134,8 +1134,8 @@ def apply_boolean_mask(self, mask) -> ColumnBase:
def argsort(
self, ascending: bool = True, na_position: str = "last"
) -> "cudf.core.column.NumericalColumn":
return self.as_frame()._get_sorted_inds(
ascending=ascending, na_position=na_position
return libcudf.sort.order_by(
[self], [ascending], na_position, stable=True
)

def __arrow_array__(self, type=None):
Expand All @@ -1161,10 +1161,17 @@ def searchsorted(
side: str = "left",
ascending: bool = True,
na_position: str = "last",
):
values = as_column(value).as_frame()
return self.as_frame().searchsorted(
values, side, ascending=ascending, na_position=na_position
) -> Self:
if not isinstance(value, ColumnBase) or value.dtype != self.dtype:
raise ValueError(
"Column searchsorted expects values to be column of same dtype"
)
return libcudf.search.search_sorted(
[self],
[value],
side=side,
ascending=ascending,
na_position=na_position,
)

def unique(self) -> ColumnBase:
Expand Down
14 changes: 5 additions & 9 deletions python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import cudf
from cudf import _lib as libcudf
from cudf._lib.quantiles import quantile as cpp_quantile
from cudf._lib.strings.convert.convert_fixed_point import (
from_decimal as cpp_from_decimal,
)
Expand Down Expand Up @@ -192,15 +191,12 @@ def _decimal_quantile(
) -> ColumnBase:
quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q
# get sorted indices and exclude nulls
sorted_indices = self.as_frame()._get_sorted_inds(
ascending=True, na_position="first"
indices = libcudf.sort.order_by(
[self], [True], "first", stable=True
).slice(self.null_count, len(self))
result = libcudf.quantiles.quantile(
self, quant, interpolation, indices, exact
)
sorted_indices = sorted_indices[self.null_count :]

result = cpp_quantile(
self, quant, interpolation, sorted_indices, exact
)

return result._with_type_metadata(self.dtype)

def as_numerical_column(
Expand Down
11 changes: 4 additions & 7 deletions python/cudf/cudf/core/column/numerical_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,15 +182,12 @@ def _numeric_quantile(
self, q: np.ndarray, interpolation: str, exact: bool
) -> NumericalBaseColumn:
# get sorted indices and exclude nulls
sorted_indices = self.as_frame()._get_sorted_inds(
ascending=True, na_position="first"
)
sorted_indices = sorted_indices.slice(
self.null_count, len(sorted_indices)
)
indices = libcudf.sort.order_by(
[self], [True], "first", stable=True
).slice(self.null_count, len(self))

return libcudf.quantiles.quantile(
self, q, interpolation, sorted_indices, exact
self, q, interpolation, indices, exact
)

def cov(self, other: NumericalBaseColumn) -> float:
Expand Down
5 changes: 4 additions & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3260,8 +3260,11 @@ def _first_or_last(
# is on the end of the offset. See pandas gh29623 for detail.
to_search = to_search - pd_offset.base
return self.loc[:to_search]
needle = as_column(to_search, dtype=self._index.dtype)
end_point = int(
self._index._column.searchsorted(to_search, side=side)[0]
self._index._column.searchsorted(
needle, side=side
).element_indexing(0)
)
return slice_func(end_point)

Expand Down
5 changes: 4 additions & 1 deletion python/cudf/cudf/tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ def test_searchsorted(side, obj_class, vals_class):
pvals = vals.to_pandas()

expect = psr.searchsorted(pvals, side)
got = sr.searchsorted(vals, side)
if obj_class == "column":
got = sr.searchsorted(vals._column, side)
else:
got = sr.searchsorted(vals, side)

assert_eq(expect, cupy.asnumpy(got))

Expand Down