Skip to content

Commit

Permalink
fix: fix project pushdown for double projection contains count (#11843)
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa authored Oct 20, 2023
1 parent a7fdbee commit 0b8be40
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,31 @@ fn is_count(node: Node, expr_arena: &Arena<AExpr>) -> bool {
}
}

/// In this function we check a double projection case
/// df
/// .select(col("foo").alias("bar"))
/// .select(col("bar")
///
/// In this query, bar cannot pass this projection, as it would not exist in DF.
/// THE ORDER IS IMPORTANT HERE!
/// this removes projection names, so any checks to upstream names should
/// be done before this branch.
fn check_double_projection(
expr: &Node,
expr_arena: &mut Arena<AExpr>,
acc_projections: &mut Vec<Node>,
projected_names: &mut PlHashSet<Arc<str>>,
) {
for (_, ae) in (&*expr_arena).iter(*expr) {
if let AExpr::Alias(_, name) = ae {
if projected_names.remove(name) {
acc_projections
.retain(|expr| !aexpr_to_leaf_names(*expr, expr_arena).contains(name));
}
}
}
}

#[allow(clippy::too_many_arguments)]
pub(super) fn process_projection(
proj_pd: &mut ProjectionPushDown,
Expand All @@ -29,6 +54,14 @@ pub(super) fn process_projection(
// simply select the first column
let (first_name, _) = input_schema.try_get_at_index(0)?;
let expr = expr_arena.add(AExpr::Column(Arc::from(first_name.as_str())));
if !acc_projections.is_empty() {
check_double_projection(
&exprs[0],
expr_arena,
&mut acc_projections,
&mut projected_names,
);
}
add_expr_to_accumulated(expr, &mut acc_projections, &mut projected_names, expr_arena);
local_projection.push(exprs[0]);
} else {
Expand All @@ -48,24 +81,7 @@ pub(super) fn process_projection(
continue;
}

// in this branch we check a double projection case
// df
// .select(col("foo").alias("bar"))
// .select(col("bar")
//
// In this query, bar cannot pass this projection, as it would not exist in DF.
// THE ORDER IS IMPORTANT HERE!
// this removes projection names, so any checks to upstream names should
// be done before this branch.
for (_, ae) in (&*expr_arena).iter(*e) {
if let AExpr::Alias(_, name) = ae {
if projected_names.remove(name) {
acc_projections.retain(|expr| {
!aexpr_to_leaf_names(*expr, expr_arena).contains(name)
});
}
}
}
check_double_projection(e, expr_arena, &mut acc_projections, &mut projected_names);
}
// do local as we still need the effect of the projection
// e.g. a projection is more than selecting a column, it can
Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/unit/test_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,3 +320,9 @@ def test_projection_rename_10595() -> None:
assert lf.select("a", "b").rename({"b": "a", "a": "b"}).select(
"a"
).collect().schema == {"a": pl.Float32}


def test_projection_count_11841() -> None:
pl.LazyFrame({"x": 1}).select(records=pl.count()).select(
pl.lit(1).alias("x"), pl.all()
).collect()

0 comments on commit 0b8be40

Please sign in to comment.