From fa809afcf693e0c272af28f2b6c91e5dac1e9079 Mon Sep 17 00:00:00 2001 From: coastalwhite Date: Sat, 3 Aug 2024 15:34:04 +0200 Subject: [PATCH] fix: integer overflow --- .../src/arrow/write/dictionary.rs | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/crates/polars-parquet/src/arrow/write/dictionary.rs b/crates/polars-parquet/src/arrow/write/dictionary.rs index 2b30161bceba6..6e2dec223b62c 100644 --- a/crates/polars-parquet/src/arrow/write/dictionary.rs +++ b/crates/polars-parquet/src/arrow/write/dictionary.rs @@ -30,9 +30,8 @@ use crate::parquet::statistics::ParquetStatistics; use crate::parquet::CowBuffer; use crate::write::DynIter; -trait MinMaxThreshold: std::cmp::Ord { +trait MinMaxThreshold { const DELTA_THRESHOLD: Self; - fn clamp(self, min: Self, max: Self) -> Self; } macro_rules! minmaxthreshold_impls { @@ -40,10 +39,6 @@ macro_rules! minmaxthreshold_impls { $( impl MinMaxThreshold for $t { const DELTA_THRESHOLD: Self = $threshold; - #[inline(always)] - fn clamp(self, min: Self, max: Self) -> Self { - std::cmp::Ord::clamp(self, min, max + 1) - } } )+ }; @@ -65,7 +60,12 @@ fn min_max_integer_encode_as_dictionary_optional<'a, E, T>( ) -> Option> where E: std::fmt::Debug, - T: NativeType + MinMaxThreshold + TryInto + num_traits::WrappingSub, + T: NativeType + + MinMaxThreshold + + std::cmp::Ord + + TryInto + + std::ops::Sub + + num_traits::CheckedSub, std::ops::RangeInclusive: Iterator, PrimitiveArray: MinMaxKernel = T>, { @@ -74,8 +74,8 @@ where array.as_any().downcast_ref().unwrap(), )?; - debug_assert!(max >= min); - if max.wrapping_sub(&min) > T::DELTA_THRESHOLD { + debug_assert!(max >= min, "{max} >= {min}"); + if !max.checked_sub(&min).is_some_and(|v| v <= T::DELTA_THRESHOLD) { return None; } @@ -84,7 +84,6 @@ where let values = PrimitiveArray::new(DT::from(T::PRIMITIVE), (min..=max).collect(), None); let values = Box::new(values); - let delta = max.wrapping_sub(&min); let keys: Buffer = array .as_any() .downcast_ref::>() @@ -97,7 +96,7 @@ where // clamp the values to between the min and max value. This way, they will still // be valid dictionary keys. This is mostly to make the // unwrap_unchecked_release not produce any unsafety. - MinMaxThreshold::clamp(v.wrapping_sub(&min), T::zeroed(), delta) + (*v.clamp(&min, &max) - min) .try_into() .unwrap_unchecked_release() })