Skip to content

Commit

Permalink
fix: integer overflow
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Aug 3, 2024
1 parent 8262bb1 commit fa809af
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions crates/polars-parquet/src/arrow/write/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,15 @@ use crate::parquet::statistics::ParquetStatistics;
use crate::parquet::CowBuffer;
use crate::write::DynIter;

trait MinMaxThreshold: std::cmp::Ord {
trait MinMaxThreshold {
const DELTA_THRESHOLD: Self;
fn clamp(self, min: Self, max: Self) -> Self;
}

macro_rules! minmaxthreshold_impls {
($($t:ty => $threshold:literal,)+) => {
$(
impl MinMaxThreshold for $t {
const DELTA_THRESHOLD: Self = $threshold;
#[inline(always)]
fn clamp(self, min: Self, max: Self) -> Self {
std::cmp::Ord::clamp(self, min, max + 1)
}
}
)+
};
Expand All @@ -65,7 +60,12 @@ fn min_max_integer_encode_as_dictionary_optional<'a, E, T>(
) -> Option<DictionaryArray<u32>>
where
E: std::fmt::Debug,
T: NativeType + MinMaxThreshold + TryInto<u32, Error = E> + num_traits::WrappingSub,
T: NativeType
+ MinMaxThreshold
+ std::cmp::Ord
+ TryInto<u32, Error = E>
+ std::ops::Sub<T, Output = T>
+ num_traits::CheckedSub,
std::ops::RangeInclusive<T>: Iterator<Item = T>,
PrimitiveArray<T>: MinMaxKernel<Scalar<'a> = T>,
{
Expand All @@ -74,8 +74,8 @@ where
array.as_any().downcast_ref().unwrap(),
)?;

debug_assert!(max >= min);
if max.wrapping_sub(&min) > T::DELTA_THRESHOLD {
debug_assert!(max >= min, "{max} >= {min}");
if !max.checked_sub(&min).is_some_and(|v| v <= T::DELTA_THRESHOLD) {
return None;
}

Expand All @@ -84,7 +84,6 @@ where
let values = PrimitiveArray::new(DT::from(T::PRIMITIVE), (min..=max).collect(), None);
let values = Box::new(values);

let delta = max.wrapping_sub(&min);
let keys: Buffer<u32> = array
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
Expand All @@ -97,7 +96,7 @@ where
// clamp the values to between the min and max value. This way, they will still
// be valid dictionary keys. This is mostly to make the
// unwrap_unchecked_release not produce any unsafety.
MinMaxThreshold::clamp(v.wrapping_sub(&min), T::zeroed(), delta)
(*v.clamp(&min, &max) - min)
.try_into()
.unwrap_unchecked_release()
})
Expand Down

0 comments on commit fa809af

Please sign in to comment.