Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(rust, python): raise error on ambiguous filter predicates #7265

Merged
merged 1 commit into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions polars/polars-lazy/polars-plan/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ use crate::logical_plan::projection::{is_regex_projection, rewrite_projections};
use crate::logical_plan::schema::{det_join_schema, FileInfo};
use crate::prelude::*;
use crate::utils;
use crate::utils::{combine_predicates_expr, has_expr};

pub(crate) fn prepare_projection(
exprs: Vec<Expr>,
Expand Down Expand Up @@ -428,18 +427,43 @@ impl LogicalPlanBuilder {
_ => false,
}) {
let schema = try_delayed!(self.0.schema(), &self.0, into);
let rewritten = try_delayed!(
let mut rewritten = try_delayed!(
rewrite_projections(vec![predicate], &schema, &[]),
&self.0,
into
);
if rewritten.is_empty() {
let msg = "The predicate expanded to zero expressions. \
match rewritten.len() {
1 => {
// all good
rewritten.pop().unwrap()
}
0 => {
let msg = "The predicate expanded to zero expressions. \
This may for example be caused by a regex not matching column names or \
a column dtype match not hitting any dtypes in the DataFrame";
return raise_err!(PolarsError::ComputeError(msg.into()), &self.0, into);
return raise_err!(PolarsError::ComputeError(msg.into()), &self.0, into);
}
_ => {
let mut expanded = String::new();
for e in rewritten.iter().take(5) {
expanded.push_str(&format!("\t{e},\n"))
}
// pop latest comma
expanded.pop();
if rewritten.len() > 5 {
expanded.push_str("\t...\n")
}

let msg = if cfg!(feature = "python") {
format!("The predicate passed to 'filter' expanded to multiple expressions: \n\n{expanded}\n\
This is ambiguous. Try to combine the predicates with the 'all' or `any' expression.")
} else {
format!("The predicate passed to 'filter' expanded to multiple expressions: \n\n{expanded}\n\
This is ambiguous. Try to combine the predicates with the 'all_exprs' or `any_exprs' expression.")
};
return raise_err!(PolarsError::ComputeError(msg.into()), &self.0, into);
}
}
combine_predicates_expr(rewritten.into_iter())
} else {
predicate
};
Expand Down
12 changes: 8 additions & 4 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ use polars_plan::global::FETCH_ROWS;
#[cfg(any(feature = "ipc", feature = "parquet", feature = "csv-file"))]
use polars_plan::logical_plan::collect_fingerprints;
use polars_plan::logical_plan::optimize;
use polars_plan::utils::{combine_predicates_expr, expr_to_leaf_column_names};
use polars_plan::utils::expr_to_leaf_column_names;

use crate::physical_plan::executors::Executor;
use crate::physical_plan::planner::create_physical_plan;
Expand Down Expand Up @@ -1012,10 +1012,14 @@ impl LazyFrame {
/// Equal to `LazyFrame::filter(col("*").is_not_null())`
pub fn drop_nulls(self, subset: Option<Vec<Expr>>) -> LazyFrame {
match subset {
None => self.filter(col("*").is_not_null()),
None => self.filter(all_exprs([col("*").is_not_null()])),
Some(subset) => {
let it = subset.into_iter().map(|e| e.is_not_null());
let predicate = combine_predicates_expr(it);
let predicate = all_exprs(
subset
.into_iter()
.map(|e| e.is_not_null())
.collect::<Vec<_>>(),
);
self.filter(predicate)
}
}
Expand Down
26 changes: 0 additions & 26 deletions polars/tests/it/lazy/predicate_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,32 +44,6 @@ fn filter_true_lit() -> PolarsResult<()> {
Ok(())
}

#[test]
fn test_combine_columns_in_filter() -> PolarsResult<()> {
let df = df![
"a" => [1, 2, 3],
"b" => [None, Some("a"), Some("b")]
]?;

let out = df
.lazy()
.filter(
cols(vec!["a".to_string(), "b".to_string()])
.cast(DataType::Utf8)
.gt(lit("2")),
)
.collect()?;

let expected = df![
"a" => [3],
"b" => ["b"],
]?;

// "b" > "2" == true
assert!(out.frame_equal(&expected));
Ok(())
}

fn create_n_filters(col_name: &str, num_filters: usize) -> Vec<Expr> {
(0..num_filters)
.into_iter()
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,12 @@ def test_file_path_truncate_err() -> None:
match=r"\.\.\.42jfdksl32jfdksl22jfdksl12jfdksl02jfdksl91jfdksl81jfdksl71jfdksl61jfdksl51jfdksl41jfdksl",
):
pl.read_csv(content)


def test_ambiguous_filter_err() -> None:
df = pl.DataFrame({"a": [None, "2", "3"], "b": [None, None, "z"]})
with pytest.raises(
pl.ComputeError,
match=r"The predicate passed to 'filter' expanded to multiple expressions",
):
df.filter(pl.col(["a", "b"]).is_null())