Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Throw bigidx error for Parquet row-count #18154

Merged
merged 3 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/arithmetic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ pub(crate) mod test {
let mut a1 = Int32Chunked::new("a", &[1, 2, 3]);
let a2 = Int32Chunked::new("a", &[4, 5, 6]);
let a3 = Int32Chunked::new("a", &[1, 2, 3, 4, 5, 6]);
a1.append(&a2);
a1.append(&a2).unwrap();
(a1, a3)
}

Expand Down
6 changes: 4 additions & 2 deletions crates/polars-core/src/chunked_array/comparison/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ mod test {
let mut a1 = Int32Chunked::new("a", &[1, 2, 3]);
let a2 = Int32Chunked::new("a", &[4, 5, 6]);
let a3 = Int32Chunked::new("a", &[1, 2, 3, 4, 5, 6]);
a1.append(&a2);
a1.append(&a2).unwrap();
(a1, a3)
}

Expand Down Expand Up @@ -1049,7 +1049,9 @@ mod test {
let a2: Int32Chunked = [Some(1), Some(2), Some(3)].iter().copied().collect();

let mut a2_2chunks: Int32Chunked = [Some(1), Some(2)].iter().copied().collect();
a2_2chunks.append(&[Some(3)].iter().copied().collect());
a2_2chunks
.append(&[Some(3)].iter().copied().collect())
.unwrap();

assert_eq!(
a1.equal(&a2).into_iter().collect::<Vec<_>>(),
Expand Down
16 changes: 8 additions & 8 deletions crates/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ mod test {
fn out_of_bounds() {
let mut a = UInt32Chunked::from_slice("a", &[1, 2, 3]);
let b = UInt32Chunked::from_slice("a", &[1, 2, 3]);
a.append(&b);
a.append(&b).unwrap();

let v = a.into_iter().collect::<Vec<_>>();
assert_eq!(
Expand Down Expand Up @@ -624,7 +624,7 @@ mod test {
fn $test_name() {
let mut a = <$ca_type>::from_slice("test", &[$first_val, $second_val]);
let a_b = <$ca_type>::from_slice("", &[$third_val]);
a.append(&a_b);
a.append(&a_b).unwrap();

// normal iterator
let mut it = a.into_iter();
Expand Down Expand Up @@ -687,7 +687,7 @@ mod test {
fn $test_name() {
let mut a = <$ca_type>::new("test", &[$first_val, $second_val]);
let a_b = <$ca_type>::new("", &[$third_val]);
a.append(&a_b);
a.append(&a_b).unwrap();

// normal iterator
let mut it = a.into_iter();
Expand Down Expand Up @@ -841,7 +841,7 @@ mod test {
fn $test_name() {
let mut a = <$ca_type>::from_slice("test", &[$first_val, $second_val]);
let a_b = <$ca_type>::from_slice("", &[$third_val]);
a.append(&a_b);
a.append(&a_b).unwrap();

// normal iterator
let mut it = a.into_no_null_iter();
Expand Down Expand Up @@ -960,14 +960,14 @@ mod test {
impl_test_iter_skip!(utf8_iter_many_chunk_skip, 18, Some("0"), Some("9"), {
let mut a = StringChunked::from_slice("test", &generate_utf8_vec(SKIP_ITERATOR_SIZE));
let a_b = StringChunked::from_slice("test", &generate_utf8_vec(SKIP_ITERATOR_SIZE));
a.append(&a_b);
a.append(&a_b).unwrap();
a
});

impl_test_iter_skip!(utf8_iter_many_chunk_null_check_skip, 18, Some("0"), None, {
let mut a = StringChunked::new("test", &generate_opt_utf8_vec(SKIP_ITERATOR_SIZE));
let a_b = StringChunked::new("test", &generate_opt_utf8_vec(SKIP_ITERATOR_SIZE));
a.append(&a_b);
a.append(&a_b).unwrap();
a
});

Expand Down Expand Up @@ -997,14 +997,14 @@ mod test {
impl_test_iter_skip!(bool_iter_many_chunk_skip, 18, Some(true), Some(false), {
let mut a = BooleanChunked::from_slice("test", &generate_boolean_vec(SKIP_ITERATOR_SIZE));
let a_b = BooleanChunked::from_slice("test", &generate_boolean_vec(SKIP_ITERATOR_SIZE));
a.append(&a_b);
a.append(&a_b).unwrap();
a
});

impl_test_iter_skip!(bool_iter_many_chunk_null_check_skip, 18, None, None, {
let mut a = BooleanChunked::new("test", &generate_opt_boolean_vec(SKIP_ITERATOR_SIZE));
let a_b = BooleanChunked::new("test", &generate_opt_boolean_vec(SKIP_ITERATOR_SIZE));
a.append(&a_b);
a.append(&a_b).unwrap();
a
});
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ struct CategoricalAppend;
impl CategoricalMergeOperation for CategoricalAppend {
fn finish(self, lhs: &UInt32Chunked, rhs: &UInt32Chunked) -> PolarsResult<UInt32Chunked> {
let mut lhs_mut = lhs.clone();
lhs_mut.append(rhs);
lhs_mut.append(rhs)?;
Ok(lhs_mut)
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,7 @@ pub(crate) mod test {
fn slice() {
let mut first = UInt32Chunked::new("first", &[0, 1, 2]);
let second = UInt32Chunked::new("second", &[3, 4, 5]);
first.append(&second);
first.append(&second).unwrap();
assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
Expand Down
33 changes: 26 additions & 7 deletions crates/polars-core/src/chunked_array/ops/append.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use polars_error::constants::LENGTH_LIMIT_MSG;

use crate::prelude::*;
use crate::series::IsSorted;

Expand Down Expand Up @@ -136,12 +138,16 @@ where
/// Append in place. This is done by adding the chunks of `other` to this [`ChunkedArray`].
///
/// See also [`extend`](Self::extend) for appends to the underlying memory
pub fn append(&mut self, other: &Self) {
pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
update_sorted_flag_before_append::<T>(self, other);
let len = self.len();
self.length += other.length;
self.length = self
.length
.checked_add(other.length)
.ok_or(polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
self.null_count += other.null_count;
new_chunks(&mut self.chunks, &other.chunks, len);
Ok(())
}
}

Expand All @@ -152,7 +158,10 @@ impl ListChunked {
self.field = Arc::new(Field::new(self.name(), dtype));

let len = self.len();
self.length += other.length;
self.length = self
.length
.checked_add(other.length)
.ok_or(polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
self.null_count += other.null_count;
new_chunks(&mut self.chunks, &other.chunks, len);
self.set_sorted_flag(IsSorted::Not);
Expand All @@ -172,7 +181,10 @@ impl ArrayChunked {

let len = self.len();

self.length += other.length;
self.length = self
.length
.checked_add(other.length)
.ok_or(polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
self.null_count += other.null_count;

new_chunks(&mut self.chunks, &other.chunks, len);
Expand All @@ -190,7 +202,10 @@ impl StructChunked {

let len = self.len();

self.length += other.length;
self.length = self
.length
.checked_add(other.length)
.ok_or(polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
self.null_count += other.null_count;

new_chunks(&mut self.chunks, &other.chunks, len);
Expand All @@ -202,11 +217,15 @@ impl StructChunked {
#[cfg(feature = "object")]
#[doc(hidden)]
impl<T: PolarsObject> ObjectChunked<T> {
pub fn append(&mut self, other: &Self) {
pub fn append(&mut self, other: &Self) -> PolarsResult<()> {
let len = self.len();
self.length += other.length;
self.length = self
.length
.checked_add(other.length)
.ok_or(polars_err!(ComputeError: LENGTH_LIMIT_MSG))?;
self.null_count += other.null_count;
self.set_sorted_flag(IsSorted::Not);
new_chunks(&mut self.chunks, &other.chunks, len);
Ok(())
}
}
42 changes: 25 additions & 17 deletions crates/polars-core/src/chunked_array/ops/extend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ where
/// Prefer `append` over `extend` when you want to append many times before doing a query. For instance
/// when you read in multiple files and when to store them in a single `DataFrame`.
/// In the latter case finish the sequence of `append` operations with a [`rechunk`](Self::rechunk).
pub fn extend(&mut self, other: &Self) {
pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
update_sorted_flag_before_append::<T>(self, other);
// all to a single chunk
if self.chunks.len() > 1 {
self.append(other);
self.append(other)?;
*self = self.rechunk();
return;
return Ok(());
}
// Depending on the state of the underlying arrow array we
// might be able to get a `MutablePrimitiveArray`
Expand Down Expand Up @@ -84,42 +84,43 @@ where
}
}
self.compute_len();
Ok(())
}
}

#[doc(hidden)]
impl StringChunked {
pub fn extend(&mut self, other: &Self) {
pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
self.set_sorted_flag(IsSorted::Not);
self.append(other)
}
}

#[doc(hidden)]
impl BinaryChunked {
pub fn extend(&mut self, other: &Self) {
pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
self.set_sorted_flag(IsSorted::Not);
self.append(other)
}
}

#[doc(hidden)]
impl BinaryOffsetChunked {
pub fn extend(&mut self, other: &Self) {
pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
self.set_sorted_flag(IsSorted::Not);
self.append(other)
}
}

#[doc(hidden)]
impl BooleanChunked {
pub fn extend(&mut self, other: &Self) {
pub fn extend(&mut self, other: &Self) -> PolarsResult<()> {
update_sorted_flag_before_append::<BooleanType>(self, other);
// make sure that we are a single chunk already
if self.chunks.len() > 1 {
self.append(other);
self.append(other)?;
*self = self.rechunk();
return;
return Ok(());
}
let arr = self.downcast_iter().next().unwrap();

Expand Down Expand Up @@ -148,6 +149,7 @@ impl BooleanChunked {
}
self.compute_len();
self.set_sorted_flag(IsSorted::Not);
Ok(())
}
}

Expand Down Expand Up @@ -189,7 +191,7 @@ mod test {

#[test]
#[allow(clippy::redundant_clone)]
fn test_extend_primitive() {
fn test_extend_primitive() -> PolarsResult<()> {
// create a vec with overcapacity, so that we do not trigger a realloc
// this allows us to test if the mutation was successful

Expand All @@ -199,38 +201,44 @@ mod test {
let location = ca.cont_slice().unwrap().as_ptr() as usize;
let to_append = Int32Chunked::new("a", &[4, 5, 6]);

ca.extend(&to_append);
ca.extend(&to_append)?;
let location2 = ca.cont_slice().unwrap().as_ptr() as usize;
assert_eq!(location, location2);
assert_eq!(ca.cont_slice().unwrap(), [1, 2, 3, 4, 5, 6]);

// now check if it succeeds if we cannot do this with a mutable.
let _temp = ca.chunks.clone();
ca.extend(&to_append);
ca.extend(&to_append)?;
let location2 = ca.cont_slice().unwrap().as_ptr() as usize;
assert_ne!(location, location2);
assert_eq!(ca.cont_slice().unwrap(), [1, 2, 3, 4, 5, 6, 4, 5, 6]);

Ok(())
}

#[test]
fn test_extend_string() {
fn test_extend_string() -> PolarsResult<()> {
let mut ca = StringChunked::new("a", &["a", "b", "c"]);
let to_append = StringChunked::new("a", &["a", "b", "e"]);

ca.extend(&to_append);
ca.extend(&to_append)?;
assert_eq!(ca.len(), 6);
let vals = ca.into_no_null_iter().collect::<Vec<_>>();
assert_eq!(vals, ["a", "b", "c", "a", "b", "e"])
assert_eq!(vals, ["a", "b", "c", "a", "b", "e"]);

Ok(())
}

#[test]
fn test_extend_bool() {
fn test_extend_bool() -> PolarsResult<()> {
let mut ca = BooleanChunked::new("a", [true, false]);
let to_append = BooleanChunked::new("a", &[false, false]);

ca.extend(&to_append);
ca.extend(&to_append)?;
assert_eq!(ca.len(), 4);
let vals = ca.into_no_null_iter().collect::<Vec<_>>();
assert_eq!(vals, [true, false, false, false]);

Ok(())
}
}
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/ops/shift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ macro_rules! impl_shift_fill {
};

if $periods < 0 {
slice.append(&fill);
slice.append(&fill).unwrap();
slice
} else {
fill.append(&slice);
fill.append(&slice).unwrap();
fill
}
}};
Expand Down
3 changes: 1 addition & 2 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2391,7 +2391,6 @@ impl DataFrame {
#[must_use]
pub fn shift(&self, periods: i64) -> Self {
let col = self._apply_columns_par(&|s| s.shift(periods));

unsafe { DataFrame::new_no_checks(col) }
}

Expand Down Expand Up @@ -2816,7 +2815,7 @@ impl DataFrame {
let mut iter = cas.into_iter();
let mut acc_ca = iter.next().unwrap();
for ca in iter {
acc_ca.append(&ca);
acc_ca.append(&ca)?;
}
Ok(acc_ca.rechunk())
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/series/implementations/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,13 @@ impl SeriesTrait for SeriesWrap<BinaryChunked> {
fn append(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), append);
// todo! add object
self.0.append(other.as_ref().as_ref());
self.0.append(other.as_ref().as_ref())?;
Ok(())
}

fn extend(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), extend);
self.0.extend(other.as_ref().as_ref());
self.0.extend(other.as_ref().as_ref())?;
Ok(())
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ impl SeriesTrait for SeriesWrap<BinaryOffsetChunked> {
fn append(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), append);
// todo! add object
self.0.append(other.as_ref().as_ref());
self.0.append(other.as_ref().as_ref())?;
Ok(())
}

fn extend(&mut self, other: &Series) -> PolarsResult<()> {
polars_ensure!(self.0.dtype() == other.dtype(), extend);
self.0.extend(other.as_ref().as_ref());
self.0.extend(other.as_ref().as_ref())?;
Ok(())
}

Expand Down
Loading