Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use cudf::lists::distinct in Python binding #11234

Merged
merged 38 commits into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
1d7e8e0
Add new implementation and test files
ttnghia Jun 24, 2022
51b80db
Fix compile error
ttnghia Jun 24, 2022
08a76ad
Rename function
ttnghia Jun 27, 2022
16101f7
Implement `cudf::detail::stable_distinct` and `lists::distinct`
ttnghia Jun 27, 2022
5ec13d6
Rewrite doxygen
ttnghia Jun 27, 2022
6c5b738
Rename variable
ttnghia Jun 27, 2022
5b70eee
Rewrite comment
ttnghia Jun 27, 2022
238248d
Rename files
ttnghia Jun 27, 2022
ba6bf6b
Implement float tests
ttnghia Jun 27, 2022
3845c95
Implement string tests
ttnghia Jun 27, 2022
507c82d
Implement tests for `ListDistinctTypedTest`
ttnghia Jun 28, 2022
2cb8347
Complete the remaining tests
ttnghia Jun 28, 2022
7efdea0
Merge branch 'branch-22.08' into add_lists_distinct
ttnghia Jun 28, 2022
4388637
Rewrite doxygen
ttnghia Jun 28, 2022
4dd5e74
Misc
ttnghia Jun 28, 2022
3b0760c
Misc
ttnghia Jun 28, 2022
9730b70
Rewrite test
ttnghia Jun 28, 2022
9bd9b6f
Fix doxygen
ttnghia Jun 28, 2022
790a482
Fix header
ttnghia Jun 28, 2022
1c58baa
Rewrite doxygen
ttnghia Jun 28, 2022
d493c4f
Rewrite doxygen and fix headers
ttnghia Jun 28, 2022
d090d2a
Fix iterator type
ttnghia Jun 30, 2022
ee51822
Rewrite doxygen
ttnghia Jun 30, 2022
ccdd6f0
Add empty lines
ttnghia Jun 30, 2022
034ee2a
Merge branch 'branch-22.08' into add_lists_distinct
ttnghia Jun 30, 2022
b1231a2
Update default stream
ttnghia Jun 30, 2022
af91b80
Merge branch 'branch-22.08' into add_lists_distinct
ttnghia Jul 5, 2022
86c9ba8
Merge branch 'branch-22.08' into add_lists_distinct
ttnghia Jul 8, 2022
99d70b1
Handle empty input
ttnghia Jul 8, 2022
d1fa26c
Merge branch 'add_lists_distinct' into use_lists_distinct_in_python
ttnghia Jul 8, 2022
717118e
Use `lists::distinct`
ttnghia Jul 8, 2022
24b7d0f
Fix style
ttnghia Jul 10, 2022
cfc7b9a
Update copyright year
ttnghia Jul 10, 2022
e957613
Fix import
ttnghia Jul 11, 2022
1f41fa5
Fix style
ttnghia Jul 11, 2022
a0440ec
Rename function
ttnghia Jul 11, 2022
7323122
Fix style
ttnghia Jul 11, 2022
29b642f
Merge branch 'branch-22.08' into use_lists_distinct_in_python
ttnghia Jul 12, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
# Copyright (c) 2021-2022, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr

Expand All @@ -7,9 +7,9 @@ from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
from cudf._lib.cpp.types cimport nan_equality, null_equality


cdef extern from "cudf/lists/drop_list_duplicates.hpp" \
cdef extern from "cudf/lists/stream_compaction.hpp" \
namespace "cudf::lists" nogil:
cdef unique_ptr[column] drop_list_duplicates(
cdef unique_ptr[column] distinct(
const lists_column_view lists_column,
null_equality nulls_equal,
nan_equality nans_equal
Expand Down
16 changes: 7 additions & 9 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@ from cudf._lib.cpp.lists.combine cimport (
from cudf._lib.cpp.lists.count_elements cimport (
count_elements as cpp_count_elements,
)
from cudf._lib.cpp.lists.drop_list_duplicates cimport (
drop_list_duplicates as cpp_drop_list_duplicates,
)
from cudf._lib.cpp.lists.explode cimport explode_outer as cpp_explode_outer
from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
from cudf._lib.cpp.lists.sorting cimport sort_lists as cpp_sort_lists
from cudf._lib.cpp.lists.stream_compaction cimport distinct as cpp_distinct
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
Expand Down Expand Up @@ -75,12 +73,12 @@ def explode_outer(
return columns_from_unique_ptr(move(c_result))


def drop_list_duplicates(Column col, bool nulls_equal, bool nans_all_equal):
def distinct(Column col, bool nulls_equal, bool nans_all_equal):
"""
nans_all_equal == True indicates that libcudf should treat any two elements
from {+nan, -nan} as equal, and as unequal otherwise.
nulls_equal == True indicates that libcudf should treat any two nulls as
equal, and as unequal otherwise.
nans_all_equal == True indicates that libcudf should treat any two
elements from {-nan, +nan} as equal, and as unequal otherwise.
"""
cdef shared_ptr[lists_column_view] list_view = (
make_shared[lists_column_view](col.view())
Expand All @@ -96,9 +94,9 @@ def drop_list_duplicates(Column col, bool nulls_equal, bool nans_all_equal):

with nogil:
c_result = move(
cpp_drop_list_duplicates(list_view.get()[0],
c_nulls_equal,
c_nans_equal)
cpp_distinct(list_view.get()[0],
c_nulls_equal,
c_nans_equal)
)
return Column.from_unique_ptr(move(c_result))

Expand Down
6 changes: 2 additions & 4 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
concatenate_rows,
contains_scalar,
count_elements,
drop_list_duplicates,
distinct,
extract_element_column,
extract_element_scalar,
index_of_column,
Expand Down Expand Up @@ -603,9 +603,7 @@ def unique(self) -> ParentType:
raise NotImplementedError("Nested lists unique is not supported.")

return self._return_or_inplace(
drop_list_duplicates(
self._column, nulls_equal=True, nans_all_equal=True
)
distinct(self._column, nulls_equal=True, nans_all_equal=True)
)

def sort_values(
Expand Down