Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Struct binary comparison op functionality for spark rapids #11153

Merged
merged 140 commits into from
Jul 8, 2022
Merged
Show file tree
Hide file tree
Changes from 131 commits
Commits
Show all changes
140 commits
Select commit Hold shift + click to select a range
262c2a2
struct binop first pass
rwlee Sep 30, 2021
a865fe0
vector-vector nested struct comparison
rwlee Oct 14, 2021
ce4440c
cleanup and simplify core code
rwlee Oct 16, 2021
1472c5f
remove type dispatch and other code cleanup
rwlee Oct 20, 2021
4a31fb6
move struct comparison to compiled binops code
rwlee Oct 20, 2021
ce2d727
improved testing, type checks, and skipped null value calculations
rwlee Oct 21, 2021
10c95f9
cleanup
rwlee Oct 21, 2021
12cd09e
Merge branch 'branch-21.12' into rwlee/struct_col_compare
rwlee Oct 21, 2021
b6fa590
fix upmerge issues
rwlee Oct 21, 2021
d64c1f9
fix logic and improve documentation
rwlee Oct 22, 2021
57900da
clean up logic for nulls
rwlee Oct 22, 2021
f170149
remove unecessary call to superimpose parent nulls
rwlee Oct 22, 2021
de129a1
PR fixes
rwlee Oct 27, 2021
5e84e89
Merge branch 'branch-21.12' into rwlee/struct_col_compare
rwlee Oct 27, 2021
b632192
pr fixes
rwlee Oct 27, 2021
4266f8c
Merge branch 'branch-21.12' into rwlee/struct_col_compare
rwlee Oct 27, 2021
367ec07
restructure struct binop code and other pr fixes
rwlee Nov 3, 2021
5a1f016
Merge branch 'branch-21.12' into rwlee/struct_col_compare
rwlee Nov 3, 2021
1f29168
Merge branch 'branch-21.12' into rwlee/struct_col_compare
rwlee Nov 3, 2021
1d6263e
full paths for includes
rwlee Nov 9, 2021
97bd5e1
Merge branch 'branch-21.12' into rwlee/struct_col_compare
rwlee Nov 9, 2021
48d0355
move to new TU and remove common code
rwlee Nov 11, 2021
6cf0e16
fix logic errors and push down struct branching
rwlee Nov 11, 2021
9ec2acf
remove deleted file from CMakeLists
rwlee Nov 11, 2021
3016abf
Naming and comment fixes
rwlee Nov 11, 2021
b2a7973
naming
rwlee Nov 11, 2021
191da69
style formatting
rwlee Nov 11, 2021
2cf2b28
merge apply_binary_op and _impl implementation
rwlee Nov 12, 2021
2b634c4
all apply_binary_op calls call apply_binary_op_impl
rwlee Nov 13, 2021
19f1afb
common code path
rwlee Nov 23, 2021
f316a0a
explicit instantiation of struct_compare
rwlee Nov 23, 2021
c684ef1
Merge branch 'branch-22.02' into rwlee/struct_col_compare
rwlee Nov 23, 2021
7f36241
streamline explicit instantiation
rwlee Nov 29, 2021
8cf0660
Merge remote-tracking branch 'pub/branch-22.02' into rwlee/struct_col…
rwlee Nov 29, 2021
2abefd5
remove op argument
rwlee Dec 5, 2021
8cc05e2
documentation
rwlee Dec 6, 2021
1bde152
Merge branch 'branch-22.02' into rwlee/struct_col_compare
rwlee Dec 6, 2021
470acfe
Fix upmerge errors
rwlee Dec 6, 2021
ce21d90
Merge remote-tracking branch 'pub/branch-22.02' into rwlee/struct_col…
rwlee Dec 16, 2021
83fa370
Merge remote-tracking branch 'pub/branch-22.04' into rwlee/struct_col…
rwlee Feb 1, 2022
2b77739
fix new ops from upmerge
rwlee Feb 4, 2022
de09cec
Fix floating point nan handling in struct comparison binops
rwlee Feb 8, 2022
8ad9545
Merge remote-tracking branch 'pub/branch-22.04' into rwlee/struct_col…
rwlee Feb 8, 2022
251d607
fix formatting
rwlee Feb 9, 2022
703aaf8
fix copyright
rwlee Feb 14, 2022
43e451b
fix accidently deletd function
rwlee Feb 14, 2022
9ec4a41
style fix
rwlee Feb 15, 2022
201a89b
copyright fix
rwlee Feb 15, 2022
cc164d6
Merge remote-tracking branch 'pub/branch-22.04' into rwlee/struct_col…
rwlee Feb 15, 2022
1bb1534
fix cmake style
rwlee Feb 15, 2022
6c6c8ab
re-add missing function name
rwlee Feb 16, 2022
42e58ae
style fix
rwlee Feb 16, 2022
475c896
Fix struct equality binop comparisons
rwlee Feb 19, 2022
1dae04a
PR reviews
rwlee Mar 7, 2022
62224cf
Merge remote-tracking branch 'pub/branch-22.04' into rwlee/struct_col…
rwlee Mar 8, 2022
a35600d
refactor row comparison operators into common spaceship operator
rwlee Mar 22, 2022
b6f0397
Merge remote-tracking branch 'pub/branch-22.04' into rwlee/struct_col…
rwlee Mar 22, 2022
fcc1dd2
first pass, test failures
rwlee Mar 29, 2022
5abf2a8
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/struct_col…
rwlee Mar 29, 2022
9d50ac0
Refactor struct binop comparison to use experimental ops
rwlee Apr 16, 2022
8628c24
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/struct_col…
rwlee Apr 16, 2022
a836a96
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/struct_col…
rwlee Apr 18, 2022
a537805
fix performance regression and code cleanup
rwlee May 2, 2022
f7af41f
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/struct_col…
rwlee May 2, 2022
1af4643
fix merge errors
rwlee May 3, 2022
4d929d9
Merge remote-tracking branch 'upstream/branch-22.06' into rwlee/struc…
bdice May 3, 2022
2298988
Revert include changes.
bdice May 3, 2022
bf1c6ee
split off weak ordering row operator changes
rwlee May 4, 2022
5d87db2
device_row_comparator private with friend class
rwlee May 4, 2022
fd716b9
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/row_op_split
rwlee May 4, 2022
2dd2045
device_less conversion to templated struct
rwlee May 6, 2022
7ba960e
fold parameter pack
rwlee May 9, 2022
84833e7
Apply suggestions from code review
rwlee May 10, 2022
a944b4f
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/struct_col…
rwlee May 10, 2022
4d197ea
fix code style
rwlee May 10, 2022
08092fe
Merge branch 'rwlee/row_op_split' of github.com:rwlee/cudf into rwlee…
rwlee May 10, 2022
d8986c5
Merge branch 'rwlee/row_op_split' into rwlee/struct_col_compare
rwlee May 10, 2022
548dcf1
fix code format
rwlee May 10, 2022
1dd1159
Merge remote-tracking branch 'upstream/branch-22.06' into rwlee/struc…
bdice May 11, 2022
eaffdea
NaN handling in device_row_comparators
rwlee May 17, 2022
a657b14
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/struct_col…
rwlee May 17, 2022
3d2a475
Merge remote-tracking branch 'pub/branch-22.06' into rwlee/nanconfig
rwlee May 18, 2022
e518668
template the comparator
rwlee May 20, 2022
c99e3c5
partial fix to performance regression
rwlee May 23, 2022
bb00193
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/nanconfig
rwlee May 24, 2022
798d6c5
Template NaN config lexicographic and equality
rwlee May 31, 2022
c5f9961
Add experimental row operator tests
rwlee May 31, 2022
0d4e798
switch to CUDF_ENABLE_IF
rwlee Jun 1, 2022
f35d9e3
Naming and add equality tests
rwlee Jun 1, 2022
d7fca8c
pr fixes, split off struct op
rwlee Jun 1, 2022
e4cee95
reorder cmake test file
rwlee Jun 1, 2022
58d2663
fix cmake formatting
rwlee Jun 1, 2022
3a6cd68
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/nanconfig
rwlee Jun 1, 2022
6d57678
Merge branch 'rwlee/nanconfig' into rwlee/struct_col_compare
rwlee Jun 1, 2022
49be087
Apply suggestions from code review
rwlee Jun 2, 2022
d0f64f2
ctad refactor + split device_comaparator call
rwlee Jun 2, 2022
0576e6c
Merge branch 'rwlee/nanconfig' of github.com:rwlee/cudf into rwlee/na…
rwlee Jun 2, 2022
1bd5405
rename experimental op test file
rwlee Jun 2, 2022
70da3b4
comment cleanup and pr fixes
rwlee Jun 2, 2022
e4a7029
physical comparator clarification docs
rwlee Jun 2, 2022
1240c85
fix whitespace
rwlee Jun 2, 2022
3304615
fix formatting
rwlee Jun 2, 2022
97c116e
update binary op functionality for nanconfig comparators
rwlee Jun 2, 2022
b1bc702
Merge branch 'rwlee/nanconfig' into rwlee/struct_col_compare
rwlee Jun 2, 2022
4f80b1c
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/nanconfig
rwlee Jun 2, 2022
9ca2d29
fix copyright
rwlee Jun 2, 2022
933b25d
Merge remote-tracking branch 'origin/rwlee/nanconfig' into rwlee/stru…
rwlee Jun 2, 2022
51938fd
Functors as arguments, enabling CTAD
rwlee Jun 2, 2022
3f545fe
Docs, cleanup, and renaming
rwlee Jun 3, 2022
fe4fc60
update struct ops for new nan configurations
rwlee Jun 3, 2022
d0c58a6
Merge remote-tracking branch 'origin/rwlee/nanconfig' into rwlee/stru…
rwlee Jun 3, 2022
2eb8103
device_comparator --> equal_to
rwlee Jun 3, 2022
2e1c9a3
Merge remote-tracking branch 'origin/rwlee/nanconfig' into rwlee/stru…
rwlee Jun 3, 2022
576fab0
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/struct_col…
rwlee Jun 3, 2022
44ac4e0
fix nullate logic in binop
rwlee Jun 7, 2022
1138c5a
JNI work and restructuring for comparison ops
rwlee Jun 16, 2022
fc5f339
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/spark_stru…
rwlee Jun 16, 2022
944e05c
restore files that should match main
rwlee Jun 16, 2022
feede52
code cleanup, strip out struct binop utilities
rwlee Jun 17, 2022
b9337df
JNI tests, checks, fixes, and cleanup for struct binop compare
rwlee Jun 24, 2022
7239675
fix C++ code style
rwlee Jun 24, 2022
20d76e7
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/spark_stru…
rwlee Jun 24, 2022
8bdac0d
Update copyright.
bdice Jun 27, 2022
6e6a476
Blank lines around namespaces.
bdice Jun 27, 2022
f32aba5
Make fallthrough explicit, enable clang-format.
bdice Jun 27, 2022
7fe1fb5
Merge branch 'rwlee/spark_structbinops' of github.com:rwlee/cudf into…
rwlee Jun 27, 2022
aec885f
code cleanup and PR fixes
rwlee Jun 27, 2022
1ada1be
Revert chnages to scalar_to_column_view
rwlee Jun 28, 2022
b828d37
cleanup jni code and fix error
rwlee Jun 29, 2022
5424aa1
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/spark_stru…
rwlee Jun 29, 2022
fd3ae69
cleanup java tests
rwlee Jul 1, 2022
589abe4
pr fixes and cleanup
rwlee Jul 6, 2022
8116a23
Merge remote-tracking branch 'pub/branch-22.08' into rwlee/spark_stru…
rwlee Jul 6, 2022
f9c2c5c
naming and sfinae formatting
rwlee Jul 7, 2022
d7cd266
change argument ordering
rwlee Jul 7, 2022
9767653
Revert "change argument ordering"
rwlee Jul 7, 2022
f8a2910
review fixes
rwlee Jul 8, 2022
b18a277
Apply suggestions from code review
rwlee Jul 8, 2022
a27732c
change templating and cleanup
rwlee Jul 8, 2022
5f9e565
comment and remove extra ;
rwlee Jul 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions cpp/include/cudf/binaryop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,5 +214,47 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op,
cudf::data_type const& lhs,
cudf::data_type const& rhs);

namespace binops {
ttnghia marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Computes output valid mask for op between a column and a scalar
*
* @param col Column to compute the valid mask from
* @param s Scalar to compute the valid mask from
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned valid mask
* @return Computed validity mask
*/
rmm::device_buffer scalar_col_valid_mask_and(
rwlee marked this conversation as resolved.
Show resolved Hide resolved
column_view const& col,
scalar const& s,
rmm::cuda_stream_view stream = cudf::default_stream_value,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

namespace compiled {
namespace detail {

/**
* @brief struct binary operation using `NaN` aware sorting physical element comparators
*
* @param out mutable view of output column
* @param lhs view of left operand column
* @param rhs view of right operand column
* @param is_lhs_scalar true if @p lhs is a single element column representing a scalar
* @param is_rhs_scalar true if @p rhs is a single element column representing a scalar
* @param op binary operator identifier
* @param stream CUDA stream used for device memory operations
*/
void apply_sorting_struct_binary_op(mutable_column_view& out,
column_view const& lhs,
column_view const& rhs,
bool is_lhs_scalar,
bool is_rhs_scalar,
binary_operator op,
rmm::cuda_stream_view stream = cudf::default_stream_value);
} // namespace detail
} // namespace compiled
} // namespace binops

/** @} */ // end of group
} // namespace cudf
87 changes: 85 additions & 2 deletions cpp/src/binaryop/compiled/binary_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@

#include "binary_ops.hpp"
#include "operation.cuh"
#include "struct_binary_ops.cuh"

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/structs/utilities.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/detail/utilities.cuh>

Expand All @@ -44,15 +46,23 @@ namespace {
*/
struct scalar_as_column_view {
using return_type = typename std::pair<column_view, std::unique_ptr<column>>;
template <typename T, std::enable_if_t<(is_fixed_width<T>())>* = nullptr>
template <typename T, CUDF_ENABLE_IF(is_fixed_width<T>())>
return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
{
auto& h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
auto col_v =
column_view(s.type(), 1, h_scalar_type_view.data(), (bitmask_type const*)s.validity_data());
return std::pair{col_v, std::unique_ptr<column>(nullptr)};
}
template <typename T, std::enable_if_t<(!is_fixed_width<T>())>* = nullptr>
template <typename T, CUDF_ENABLE_IF(is_struct<T>())>
rwlee marked this conversation as resolved.
Show resolved Hide resolved
return_type operator()(scalar const& s,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto col = make_column_from_scalar(s, 1, stream, mr);
return std::pair{col->view(), std::move(col)};
}
template <typename T, CUDF_ENABLE_IF(!is_fixed_width<T>() and !is_struct<T>())>
return_type operator()(scalar const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
{
CUDF_FAIL("Unsupported type");
Expand Down Expand Up @@ -375,6 +385,79 @@ void binary_operation(mutable_column_view& out,
auto [rhsv, aux] = scalar_to_column_view(rhs, stream);
operator_dispatcher(out, lhs, rhsv, false, true, op, stream);
}

namespace detail {
void apply_sorting_struct_binary_op(mutable_column_view& out,
rwlee marked this conversation as resolved.
Show resolved Hide resolved
column_view const& lhs,
column_view const& rhs,
bool is_lhs_scalar,
bool is_rhs_scalar,
binary_operator op,
rmm::cuda_stream_view stream)
{
CUDF_EXPECTS(lhs.type().id() == type_id::STRUCT && rhs.type().id() == type_id::STRUCT,
"Both columns must be struct columns");
CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(lhs) and
!cudf::structs::detail::is_or_has_nested_lists(rhs),
"Lists not supported");
rwlee marked this conversation as resolved.
Show resolved Hide resolved
// Struct child column type and structure mismatches are caught within the two_table_comparator
switch (op) {
rwlee marked this conversation as resolved.
Show resolved Hide resolved
case binary_operator::EQUAL: [[fallthrough]];
case binary_operator::NOT_EQUAL:
detail::apply_struct_equality_op(
out,
lhs,
rhs,
is_lhs_scalar,
is_rhs_scalar,
op,
cudf::experimental::row::equality::nan_equal_physical_equality_comparator{},
stream);
break;
case binary_operator::LESS:
detail::apply_struct_binary_op<ops::Less>(
out,
lhs,
rhs,
is_lhs_scalar,
is_rhs_scalar,
cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
stream);
break;
case binary_operator::GREATER:
detail::apply_struct_binary_op<ops::Greater>(
out,
lhs,
rhs,
is_lhs_scalar,
is_rhs_scalar,
cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
stream);
break;
case binary_operator::LESS_EQUAL:
detail::apply_struct_binary_op<ops::LessEqual>(
out,
lhs,
rhs,
is_lhs_scalar,
is_rhs_scalar,
cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
stream);
break;
case binary_operator::GREATER_EQUAL:
detail::apply_struct_binary_op<ops::GreaterEqual>(
out,
lhs,
rhs,
is_lhs_scalar,
is_rhs_scalar,
cudf::experimental::row::lexicographic::sorting_physical_element_comparator{},
stream);
break;
default: CUDF_FAIL("Unsupported operator for structs");
}
}
} // namespace detail
} // namespace compiled
} // namespace binops
} // namespace cudf
5 changes: 1 addition & 4 deletions cpp/src/binaryop/compiled/binary_ops.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,7 @@ void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f)
const int grid_size = util::div_rounding_up_safe(size, 2 * block_size);
for_each_kernel<<<grid_size, block_size, 0, stream.value()>>>(size, std::forward<Functor&&>(f));
}
namespace detail {
template <class T, class... Ts>
inline constexpr bool is_any_v = std::disjunction<std::is_same<T, Ts>...>::value;
}

template <class BinaryOperator>
void apply_binary_op(mutable_column_view& out,
column_view const& lhs,
Expand Down
127 changes: 127 additions & 0 deletions cpp/src/binaryop/compiled/struct_binary_ops.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "binary_ops.hpp"
#include "operation.cuh"

#include <cudf/binaryop.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_view.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/table/experimental/row_operators.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

namespace cudf::binops::compiled::detail {
template <class T, class... Ts>
inline constexpr bool is_any_v = std::disjunction<std::is_same<T, Ts>...>::value;

template <class BinaryOperator,
typename PhysicalElementComparator =
cudf::experimental::row::lexicographic::sorting_physical_element_comparator>
void apply_struct_binary_op(mutable_column_view& out,
column_view const& lhs,
column_view const& rhs,
bool is_lhs_scalar,
bool is_rhs_scalar,
PhysicalElementComparator c = {},
rwlee marked this conversation as resolved.
Show resolved Hide resolved
rmm::cuda_stream_view stream = cudf::default_stream_value)
{
auto compare_orders = std::vector<order>(lhs.size(),
is_any_v<BinaryOperator, ops::Greater, ops::GreaterEqual>
? order::DESCENDING
: order::ASCENDING);
auto tlhs = table_view{{lhs}};
auto trhs = table_view{{rhs}};
auto table_comparator = cudf::experimental::row::lexicographic::two_table_comparator{
tlhs, trhs, compare_orders, {}, stream};
rwlee marked this conversation as resolved.
Show resolved Hide resolved
auto outd = column_device_view::create(out, stream);
auto optional_iter =
cudf::detail::make_optional_iterator<bool>(*outd, nullate::DYNAMIC{out.has_nulls()});

if (is_any_v<BinaryOperator, ops::LessEqual, ops::GreaterEqual>) {
rwlee marked this conversation as resolved.
Show resolved Hide resolved
auto device_comparator = table_comparator.less_equivalent(
nullate::DYNAMIC{nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}}, c);
thrust::tabulate(
rmm::exec_policy(stream),
out.begin<bool>(),
out.end<bool>(),
[optional_iter, is_lhs_scalar, is_rhs_scalar, device_comparator] __device__(size_type i) {
return optional_iter[i].has_value() &&
device_comparator(cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i},
cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i});
});

} else {
auto device_comparator = table_comparator.less(
nullate::DYNAMIC{nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}}, c);
thrust::tabulate(
rmm::exec_policy(stream),
out.begin<bool>(),
out.end<bool>(),
[optional_iter, is_lhs_scalar, is_rhs_scalar, device_comparator] __device__(size_type i) {
return optional_iter[i].has_value() &&
device_comparator(cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i},
cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i});
});
}
return;
rwlee marked this conversation as resolved.
Show resolved Hide resolved
}

template <typename PhysicalEqualityComparator =
cudf::experimental::row::equality::physical_equality_comparator>
void apply_struct_equality_op(mutable_column_view& out,
column_view const& lhs,
column_view const& rhs,
bool is_lhs_scalar,
bool is_rhs_scalar,
binary_operator op,
PhysicalEqualityComparator c = {},
rmm::cuda_stream_view stream = cudf::default_stream_value)
{
CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL,
"Unsupported operator for these types");

auto tlhs = table_view{{lhs}};
auto trhs = table_view{{rhs}};
auto table_comparator =
cudf::experimental::row::equality::two_table_comparator{tlhs, trhs, stream};
auto device_comparator = table_comparator.equal_to(
nullate::DYNAMIC{nullate::DYNAMIC{has_nested_nulls(tlhs) || has_nested_nulls(trhs)}},
null_equality::EQUAL,
c);

auto outd = column_device_view::create(out, stream);
auto optional_iter =
cudf::detail::make_optional_iterator<bool>(*outd, nullate::DYNAMIC{out.has_nulls()});
thrust::tabulate(rmm::exec_policy(stream),
out.begin<bool>(),
out.end<bool>(),
[optional_iter,
is_lhs_scalar,
is_rhs_scalar,
preserve_output = (op != binary_operator::NOT_EQUAL),
device_comparator] __device__(size_type i) {
auto lhs = cudf::experimental::row::lhs_index_type{is_lhs_scalar ? 0 : i};
auto rhs = cudf::experimental::row::rhs_index_type{is_rhs_scalar ? 0 : i};
return optional_iter[i].has_value() and
(device_comparator(lhs, rhs) == preserve_output);
});
}
} // namespace cudf::binops::compiled::detail
rwlee marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion java/src/main/native/include/maps_column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class maps_column_view {
*/

std::unique_ptr<column>
contains(column_view const &key, rmm::cuda_stream_view stream = rmm::cuda_stream_default,
contains(column_view const &key, rmm::cuda_stream_view stream = cudf::default_stream_value,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const;

private:
Expand Down
25 changes: 23 additions & 2 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/concatenate.hpp>
#include <cudf/datetime.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/filling.hpp>
#include <cudf/hashing.hpp>
#include <cudf/lists/contains.hpp>
Expand Down Expand Up @@ -1289,9 +1290,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVV(JNIEnv *env, j
cudf::jni::auto_set_device(env);
auto lhs = reinterpret_cast<cudf::column_view *>(lhs_view);
auto rhs = reinterpret_cast<cudf::column_view *>(rhs_view);

cudf::data_type n_data_type = cudf::jni::make_data_type(out_dtype, scale);
cudf::binary_operator op = static_cast<cudf::binary_operator>(int_op);

if (lhs->type().id() == cudf::type_id::STRUCT) {
auto [new_mask, null_count] = cudf::bitmask_and(cudf::table_view{{*lhs, *rhs}});
auto out = make_fixed_width_column(n_data_type, lhs->size(), std::move(new_mask), null_count);
auto out_view = out->mutable_view();
cudf::binops::compiled::detail::apply_sorting_struct_binary_op(out_view, *lhs, *rhs, false,
false, op);
return release_as_jlong(out);
}

return release_as_jlong(cudf::binary_operation(*lhs, *rhs, op, n_data_type));
}
CATCH_STD(env, 0);
Expand Down Expand Up @@ -1320,8 +1330,19 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j
auto lhs = reinterpret_cast<cudf::column_view *>(lhs_view);
cudf::scalar *rhs = reinterpret_cast<cudf::scalar *>(rhs_ptr);
cudf::data_type n_data_type = cudf::jni::make_data_type(out_dtype, scale);

cudf::binary_operator op = static_cast<cudf::binary_operator>(int_op);

if (lhs->type().id() == cudf::type_id::STRUCT) {
auto new_mask = cudf::binops::scalar_col_valid_mask_and(*lhs, *rhs);
auto out = make_fixed_width_column(n_data_type, lhs->size(), std::move(new_mask),
cudf::UNKNOWN_NULL_COUNT);
auto rhsv = cudf::make_column_from_scalar(*rhs, 1);
auto out_view = out->mutable_view();
cudf::binops::compiled::detail::apply_sorting_struct_binary_op(out_view, *lhs, rhsv->view(),
false, true, op);
return release_as_jlong(out);
}

return release_as_jlong(cudf::binary_operation(*lhs, *rhs, op, n_data_type));
}
CATCH_STD(env, 0);
Expand Down
Loading