Skip to content

Commit

Permalink
Minor: Add tests for using FilterExec when parquet was pushed down
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Sep 6, 2024
1 parent cad4146 commit 782509b
Showing 1 changed file with 59 additions and 0 deletions.
59 changes: 59 additions & 0 deletions datafusion/sqllogictest/test_files/parquet.slt
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,65 @@ SELECT id, CAST(string_col AS varchar) FROM alltypes_plain
0 0
1 1

# Query with filtering on a column
query I
select id from alltypes_plain where id > 3;
----
4
5
6
7

# When filter pushdown is not enabled, ParquetExec only filters based on
# metadata, so a FilterExec is required to filter the
# output of the `ParquetExec`
query TT
EXPLAIN SELECT id FROM alltypes_plain WHERE id > 3;
----
logical_plan
01)Filter: alltypes_plain.id > Int32(3)
02)--TableScan: alltypes_plain projection=[id], partial_filters=[alltypes_plain.id > Int32(3)]
physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: id@0 > 3
03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id], predicate=id@0 > 3, pruning_predicate=CASE WHEN id_null_count@1 = id_row_count@2 THEN false ELSE id_max@0 > 3 END, required_guarantees=[]


# Enable predicate pushdown
statement ok
set datafusion.execution.parquet.pushdown_filters = true;

# Query with filtering on a column
query I
select id from alltypes_plain where id > 3;
----
4
5
6
7

# When filter pushdown *is* enabled, ParquetExec can filter exactly,
# not just metadata, so we expect to see no FilterExec
#once https://github.com/apache/datafusion/issues/4028 is fixed

query TT
EXPLAIN SELECT id FROM alltypes_plain WHERE id > 3;
----
logical_plan
01)Filter: alltypes_plain.id > Int32(3)
02)--TableScan: alltypes_plain projection=[id], partial_filters=[alltypes_plain.id > Int32(3)]
physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: id@0 > 3
03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
04)------ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id], predicate=id@0 > 3, pruning_predicate=CASE WHEN id_null_count@1 = id_row_count@2 THEN false ELSE id_max@0 > 3 END, required_guarantees=[]

# reset pushdown
statement ok
set datafusion.execution.parquet.pushdown_filters = false;


# Clean up
statement ok
DROP TABLE alltypes_plain;
Expand Down

0 comments on commit 782509b

Please sign in to comment.