From 83b475e6ee8db1d9dc7a75a2e0ecb46f6352135e Mon Sep 17 00:00:00 2001 From: Valentin Lorentz Date: Mon, 10 Jun 2024 22:18:08 +0200 Subject: [PATCH] Add a test for Bloom Filters written at the end --- parquet/examples/write_parquet.rs | 1 + parquet/src/arrow/arrow_writer/mod.rs | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/parquet/examples/write_parquet.rs b/parquet/examples/write_parquet.rs index 06259ee5ad04..22f9028ba532 100644 --- a/parquet/examples/write_parquet.rs +++ b/parquet/examples/write_parquet.rs @@ -34,6 +34,7 @@ fn main() -> Result<()> { let properties = WriterProperties::builder() .set_column_bloom_filter_enabled("id".into(), true) .set_column_encoding("id".into(), Encoding::DELTA_BINARY_PACKED) + .set_bloom_filter_position(BloomFilterPosition::End) .build(); let schema = Arc::new(Schema::new(vec![Field::new("id", UInt64, false)])); // Create parquet file that will be read. diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 74186dcaca6b..da24b5e24022 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -1689,6 +1689,7 @@ mod tests { values: ArrayRef, schema: SchemaRef, bloom_filter: bool, + bloom_filter_position: BloomFilterPosition, } impl RoundTripOptions { @@ -1699,6 +1700,7 @@ mod tests { values, schema: Arc::new(schema), bloom_filter: false, + bloom_filter_position: BloomFilterPosition::AfterRowGroup, } } } @@ -1718,6 +1720,7 @@ mod tests { values, schema, bloom_filter, + bloom_filter_position, } = options; let encodings = match values.data_type() { @@ -1758,7 +1761,7 @@ mod tests { .set_dictionary_page_size_limit(dictionary_size.max(1)) .set_encoding(*encoding) .set_bloom_filter_enabled(bloom_filter) - .set_bloom_filter_position(BloomFilterPosition::AfterRowGroup) + .set_bloom_filter_position(bloom_filter_position) .build(); files.push(roundtrip_opts(&expected_batch, props)) @@ -2106,6 +2109,22 @@ mod tests { values_required::(many_vecs_iter); } + #[test] + fn i32_column_bloom_filter_at_end() { + let array = Arc::new(Int32Array::from_iter(0..SMALL_SIZE as i32)); + let mut options = RoundTripOptions::new(array, false); + options.bloom_filter = true; + options.bloom_filter_position = BloomFilterPosition::End; + + let files = one_column_roundtrip_with_options(options); + check_bloom_filter( + files, + "col".to_string(), + (0..SMALL_SIZE as i32).collect(), + (SMALL_SIZE as i32 + 1..SMALL_SIZE as i32 + 10).collect(), + ); + } + #[test] fn i32_column_bloom_filter() { let array = Arc::new(Int32Array::from_iter(0..SMALL_SIZE as i32));