Skip to content

Commit

Permalink
Make AST operators nullable (#9096)
Browse files Browse the repository at this point in the history
Contributes to #8980 and #8145.

This PR makes all `operator_functors` templated on the potential nullability of the inputs, allowing per-operator customization of null handling with minimal performance penalties. The default specialization in the nullable case is a null-propagating fall-through to the non-nullable case, which matches standard libcudf semantics, but any specific operator's null handling behavior can be customized by explicit specialization of the `operator_functor` template corresponding to that operator.

This PR implements `NULL_EQUAL`, `NULL_LOGICAL_AND`, and `NULL_LOGICAL_OR` operators that behave like their non-nullable counterparts when no nulls are present but can return non-null results from null inputs. The `NULL_EQUAL` operator replaces the `compare_nulls` parameter for conditional joins.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Bradley Dice (https://github.com/bdice)
  - https://github.com/nvdbaranec

URL: #9096
  • Loading branch information
vyasr authored Sep 9, 2021
1 parent 794734c commit 8a78196
Show file tree
Hide file tree
Showing 16 changed files with 849 additions and 693 deletions.
14 changes: 9 additions & 5 deletions cpp/benchmarks/join/conditional_join_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ template <typename key_type, typename payload_type>
class ConditionalJoin : public cudf::benchmark {
};

// For compatibility with the shared logic for equality (hash) joins, all of
// the join lambdas defined by these macros accept a null_equality parameter
// but ignore it (don't forward it to the underlying join implementation)
// because conditional joins do not use this parameter.
#define CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(name, key_type, payload_type, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(ConditionalJoin, name, key_type, payload_type) \
(::benchmark::State & st) \
Expand All @@ -28,7 +32,7 @@ class ConditionalJoin : public cudf::benchmark {
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_inner_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_inner_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand All @@ -47,7 +51,7 @@ CONDITIONAL_INNER_JOIN_BENCHMARK_DEFINE(conditional_inner_join_64bit_nulls, int6
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_left_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_left_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand All @@ -66,7 +70,7 @@ CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit_nulls, int64_
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_inner_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_inner_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand All @@ -85,7 +89,7 @@ CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE(conditional_full_join_64bit_nulls, int64_
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_left_anti_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_left_anti_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand Down Expand Up @@ -116,7 +120,7 @@ CONDITIONAL_LEFT_ANTI_JOIN_BENCHMARK_DEFINE(conditional_left_anti_join_64bit_nul
cudf::table_view const& right, \
cudf::ast::operation binary_pred, \
cudf::null_equality compare_nulls) { \
return cudf::conditional_left_semi_join(left, right, binary_pred, compare_nulls); \
return cudf::conditional_left_semi_join(left, right, binary_pred); \
}; \
constexpr bool is_conditional = true; \
BM_join<key_type, payload_type, nullable, is_conditional>(st, join); \
Expand Down
100 changes: 33 additions & 67 deletions cpp/include/cudf/ast/detail/expression_evaluator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,13 @@ struct expression_evaluator {
* @param plan The collection of device references representing the expression to evaluate.
* @param thread_intermediate_storage Pointer to this thread's portion of shared memory for
* storing intermediates.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
*/
__device__ expression_evaluator(table_device_view const& left,
table_device_view const& right,
expression_device_view const& plan,
IntermediateDataType<has_nulls>* thread_intermediate_storage,
null_equality compare_nulls = null_equality::EQUAL)
: left(left),
right(right),
plan(plan),
thread_intermediate_storage(thread_intermediate_storage),
compare_nulls(compare_nulls)
IntermediateDataType<has_nulls>* thread_intermediate_storage)
: left(left), right(right), plan(plan), thread_intermediate_storage(thread_intermediate_storage)
{
}

Expand All @@ -258,17 +252,14 @@ struct expression_evaluator {
* @param plan The collection of device references representing the expression to evaluate.
* @param thread_intermediate_storage Pointer to this thread's portion of shared memory for
* storing intermediates.
* @param compare_nulls Whether the equality operator returns true or false for two nulls.
*/
__device__ expression_evaluator(table_device_view const& table,
expression_device_view const& plan,
IntermediateDataType<has_nulls>* thread_intermediate_storage,
null_equality compare_nulls = null_equality::EQUAL)
IntermediateDataType<has_nulls>* thread_intermediate_storage)
: left(table),
right(table),
plan(plan),
thread_intermediate_storage(thread_intermediate_storage),
compare_nulls(compare_nulls)
thread_intermediate_storage(thread_intermediate_storage)
{
}

Expand Down Expand Up @@ -603,32 +594,28 @@ struct expression_evaluator {
* @param input Input to the operation.
* @param output Output data reference.
*/
template <
ast_operator op,
typename OutputType,
std::enable_if_t<detail::is_valid_unary_op<detail::operator_functor<op>, Input>>* = nullptr>
template <ast_operator op,
typename OutputType,
std::enable_if_t<
detail::is_valid_unary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<Input, has_nulls>>>* = nullptr>
__device__ void operator()(OutputType& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<Input, has_nulls> const input,
detail::device_data_reference const output) const
{
using OperatorFunctor = detail::operator_functor<op>;
using Out = cuda::std::invoke_result_t<OperatorFunctor, Input>;
if constexpr (has_nulls) {
auto const result = input.has_value()
? possibly_null_value_t<Out, has_nulls>(OperatorFunctor{}(*input))
: possibly_null_value_t<Out, has_nulls>();
this->template resolve_output<Out>(output_object, output, output_row_index, result);
} else {
this->template resolve_output<Out>(
output_object, output, output_row_index, OperatorFunctor{}(input));
}
// The output data type is the same whether or not nulls are present, so
// pull from the non-nullable operator.
using Out = cuda::std::invoke_result_t<detail::operator_functor<op, false>, Input>;
this->template resolve_output<Out>(
output_object, output, output_row_index, detail::operator_functor<op, has_nulls>{}(input));
}

template <
ast_operator op,
typename OutputType,
std::enable_if_t<!detail::is_valid_unary_op<detail::operator_functor<op>, Input>>* = nullptr>
template <ast_operator op,
typename OutputType,
std::enable_if_t<
!detail::is_valid_unary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<Input, has_nulls>>>* = nullptr>
__device__ void operator()(OutputType& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<Input, has_nulls> const input,
Expand Down Expand Up @@ -665,50 +652,31 @@ struct expression_evaluator {
*/
template <ast_operator op,
typename OutputType,
std::enable_if_t<
detail::is_valid_binary_op<detail::operator_functor<op>, LHS, RHS>>* = nullptr>
std::enable_if_t<detail::is_valid_binary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<LHS, has_nulls>,
possibly_null_value_t<RHS, has_nulls>>>* =
nullptr>
__device__ void operator()(OutputType& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<LHS, has_nulls> const lhs,
possibly_null_value_t<RHS, has_nulls> const rhs,
detail::device_data_reference const output) const
{
using OperatorFunctor = detail::operator_functor<op>;
using Out = cuda::std::invoke_result_t<OperatorFunctor, LHS, RHS>;
if constexpr (has_nulls) {
if constexpr (op == ast_operator::EQUAL) {
// Special handling of the equality operator based on what kind
// of null handling was requested.
possibly_null_value_t<Out, has_nulls> result;
if (!lhs.has_value() && !rhs.has_value()) {
// Case 1: Both null, so the output is based on compare_nulls.
result = possibly_null_value_t<Out, has_nulls>(this->evaluator.compare_nulls ==
null_equality::EQUAL);
} else if (lhs.has_value() && rhs.has_value()) {
// Case 2: Neither is null, so the output is given by the operation.
result = possibly_null_value_t<Out, has_nulls>(OperatorFunctor{}(*lhs, *rhs));
} else {
// Case 3: One value is null, while the other is not, so we simply propagate nulls.
result = possibly_null_value_t<Out, has_nulls>();
}
this->template resolve_output<Out>(output_object, output, output_row_index, result);
} else {
// Default behavior for all other operators is to propagate nulls.
auto result = (lhs.has_value() && rhs.has_value())
? possibly_null_value_t<Out, has_nulls>(OperatorFunctor{}(*lhs, *rhs))
: possibly_null_value_t<Out, has_nulls>();
this->template resolve_output<Out>(output_object, output, output_row_index, result);
}
} else {
this->template resolve_output<Out>(
output_object, output, output_row_index, OperatorFunctor{}(lhs, rhs));
}
// The output data type is the same whether or not nulls are present, so
// pull from the non-nullable operator.
using Out = cuda::std::invoke_result_t<detail::operator_functor<op, false>, LHS, RHS>;
this->template resolve_output<Out>(output_object,
output,
output_row_index,
detail::operator_functor<op, has_nulls>{}(lhs, rhs));
}

template <ast_operator op,
typename OutputType,
std::enable_if_t<
!detail::is_valid_binary_op<detail::operator_functor<op>, LHS, RHS>>* = nullptr>
!detail::is_valid_binary_op<detail::operator_functor<op, has_nulls>,
possibly_null_value_t<LHS, has_nulls>,
possibly_null_value_t<RHS, has_nulls>>>* = nullptr>
__device__ void operator()(OutputType& output_object,
cudf::size_type const output_row_index,
possibly_null_value_t<LHS, has_nulls> const lhs,
Expand All @@ -726,8 +694,6 @@ struct expression_evaluator {
IntermediateDataType<has_nulls>*
thread_intermediate_storage; ///< The shared memory store of intermediates produced during
///< evaluation.
null_equality
compare_nulls; ///< Whether the equality operator returns true or false for two nulls.
};

} // namespace detail
Expand Down
Loading

0 comments on commit 8a78196

Please sign in to comment.