Skip to content

Commit

Permalink
perf: Added optimizer rules for is_null().all() and similar express…
Browse files Browse the repository at this point in the history
…ions to use `null_count()` (#18359)
  • Loading branch information
barak1412 authored Aug 30, 2024
1 parent ae3c647 commit 8b65fe3
Show file tree
Hide file tree
Showing 5 changed files with 380 additions and 0 deletions.
13 changes: 13 additions & 0 deletions crates/polars-plan/src/plans/aexpr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,19 @@ impl AExpr {
pub(crate) fn is_leaf(&self) -> bool {
matches!(self, AExpr::Column(_) | AExpr::Literal(_) | AExpr::Len)
}
pub(crate) fn new_null_count(input: &[ExprIR]) -> Self {
AExpr::Function {
input: input.to_vec(),
function: FunctionExpr::NullCount,
options: FunctionOptions {
collect_groups: ApplyOptions::GroupWise,
fmt_str: "",
cast_to_supertypes: None,
check_lengths: UnsafeBool::default(),
flags: FunctionFlags::ALLOW_GROUP_AWARE | FunctionFlags::RETURNS_SCALAR,
},
}
}
}

impl IRAggExpr {
Expand Down
11 changes: 11 additions & 0 deletions crates/polars-plan/src/plans/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,17 @@ impl LiteralValue {
LiteralValue::StrCat(_) => DataType::Unknown(UnknownKind::Str),
}
}

pub(crate) fn new_idxsize(value: IdxSize) -> Self {
#[cfg(feature = "bigidx")]
{
LiteralValue::UInt64(value)
}
#[cfg(not(feature = "bigidx"))]
{
LiteralValue::UInt32(value)
}
}
}

pub trait Literal {
Expand Down
69 changes: 69 additions & 0 deletions crates/polars-plan/src/plans/optimizer/simplify_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,75 @@ impl OptimizationRule for SimplifyExprRule {
let expr = expr_arena.get(expr_node).clone();

let out = match &expr {
// drop_nulls().len() -> len() - null_count()
// drop_nulls().count() -> len() - null_count()
AExpr::Agg(IRAggExpr::Count(input, _)) => {
let input_expr = expr_arena.get(*input);
match input_expr {
AExpr::Function {
input,
function: FunctionExpr::DropNulls,
options: _,
} => {
// we should perform optimization only if the original expression is a column
// so in case of disabled CSE, we will not suffer from performance regression
if input.len() == 1 {
let drop_nulls_input_node = input[0].node();
match expr_arena.get(drop_nulls_input_node) {
AExpr::Column(_) => Some(AExpr::BinaryExpr {
op: Operator::Minus,
right: expr_arena.add(AExpr::new_null_count(input)),
left: expr_arena.add(AExpr::Agg(IRAggExpr::Count(
drop_nulls_input_node,
true,
))),
}),
_ => None,
}
} else {
None
}
},
_ => None,
}
},
// is_null().sum() -> null_count()
// is_not_null().sum() -> len() - null_count()
AExpr::Agg(IRAggExpr::Sum(input)) => {
let input_expr = expr_arena.get(*input);
match input_expr {
AExpr::Function {
input,
function: FunctionExpr::Boolean(BooleanFunction::IsNull),
options: _,
} => Some(AExpr::new_null_count(input)),
AExpr::Function {
input,
function: FunctionExpr::Boolean(BooleanFunction::IsNotNull),
options: _,
} => {
// we should perform optimization only if the original expression is a column
// so in case of disabled CSE, we will not suffer from performance regression
if input.len() == 1 {
let is_not_null_input_node = input[0].node();
match expr_arena.get(is_not_null_input_node) {
AExpr::Column(_) => Some(AExpr::BinaryExpr {
op: Operator::Minus,
right: expr_arena.add(AExpr::new_null_count(input)),
left: expr_arena.add(AExpr::Agg(IRAggExpr::Count(
is_not_null_input_node,
true,
))),
}),
_ => None,
}
} else {
None
}
},
_ => None,
}
},
// lit(left) + lit(right) => lit(left + right)
// and null propagation
AExpr::BinaryExpr { left, op, right } => {
Expand Down
81 changes: 81 additions & 0 deletions crates/polars-plan/src/plans/optimizer/simplify_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,87 @@ pub(super) fn optimize_functions(
expr_arena: &mut Arena<AExpr>,
) -> PolarsResult<Option<AExpr>> {
let out = match function {
// is_null().any() -> null_count() > 0
// is_not_null().any() -> null_count() < len()
// CORRECTNESS: we can ignore 'ignore_nulls' since is_null/is_not_null never produces NULLS
FunctionExpr::Boolean(BooleanFunction::Any { ignore_nulls: _ }) => {
let input_node = expr_arena.get(input[0].node());
match input_node {
AExpr::Function {
input,
function: FunctionExpr::Boolean(BooleanFunction::IsNull),
options: _,
} => Some(AExpr::BinaryExpr {
left: expr_arena.add(AExpr::new_null_count(input)),
op: Operator::Gt,
right: expr_arena.add(AExpr::Literal(LiteralValue::new_idxsize(0))),
}),
AExpr::Function {
input,
function: FunctionExpr::Boolean(BooleanFunction::IsNotNull),
options: _,
} => {
// we should perform optimization only if the original expression is a column
// so in case of disabled CSE, we will not suffer from performance regression
if input.len() == 1 {
let is_not_null_input_node = input[0].node();
match expr_arena.get(is_not_null_input_node) {
AExpr::Column(_) => Some(AExpr::BinaryExpr {
op: Operator::Lt,
left: expr_arena.add(AExpr::new_null_count(input)),
right: expr_arena.add(AExpr::Agg(IRAggExpr::Count(
is_not_null_input_node,
true,
))),
}),
_ => None,
}
} else {
None
}
},
_ => None,
}
},
// is_null().all() -> null_count() == len()
// is_not_null().all() -> null_count() == 0
FunctionExpr::Boolean(BooleanFunction::All { ignore_nulls: _ }) => {
let input_node = expr_arena.get(input[0].node());
match input_node {
AExpr::Function {
input,
function: FunctionExpr::Boolean(BooleanFunction::IsNull),
options: _,
} => {
// we should perform optimization only if the original expression is a column
// so in case of disabled CSE, we will not suffer from performance regression
if input.len() == 1 {
let is_null_input_node = input[0].node();
match expr_arena.get(is_null_input_node) {
AExpr::Column(_) => Some(AExpr::BinaryExpr {
op: Operator::Eq,
right: expr_arena.add(AExpr::new_null_count(input)),
left: expr_arena
.add(AExpr::Agg(IRAggExpr::Count(is_null_input_node, true))),
}),
_ => None,
}
} else {
None
}
},
AExpr::Function {
input,
function: FunctionExpr::Boolean(BooleanFunction::IsNotNull),
options: _,
} => Some(AExpr::BinaryExpr {
left: expr_arena.add(AExpr::new_null_count(input)),
op: Operator::Eq,
right: expr_arena.add(AExpr::Literal(LiteralValue::new_idxsize(0))),
}),
_ => None,
}
},
// sort().reverse() -> sort(reverse)
// sort_by().reverse() -> sort_by(reverse)
FunctionExpr::Reverse => {
Expand Down
Loading

0 comments on commit 8b65fe3

Please sign in to comment.