Skip to content

Commit

Permalink
Auto merge of rust-lang#81126 - oxalica:retain-early-drop, r=m-ou-se
Browse files Browse the repository at this point in the history
Optimize Vec::retain

Use `copy_non_overlapping` instead of `swap` to reduce memory writes, like what we've done in rust-lang#44355 and `String::retain`.
rust-lang#48065 already tried to do this optimization but it is reverted in rust-lang#67300 due to bad codegen of `DrainFilter::drop`.

This PR re-implement the drop-then-move approach. I did a [benchmark](https://gist.github.com/oxalica/3360eec9376f22533fcecff02798b698) on small-no-drop, small-need-drop, large-no-drop elements with different predicate functions. It turns out that the new implementation is >20% faster in average for almost all cases. Only 2/24 cases are slower by 3% and 5%. See the link above for more detail.

I think regression in may-panic cases is due to drop-guard preventing some optimization. If it's permitted to leak elements when predicate function of element's `drop` panic, the new implementation should be almost always faster than current one.
I'm not sure if we should leak on panic, since there is indeed an issue (rust-lang#52267) complains about it before.
  • Loading branch information
bors committed Feb 11, 2021
2 parents 9ce7268 + 2a11c57 commit 1efd804
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 11 deletions.
75 changes: 64 additions & 11 deletions library/alloc/src/vec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1399,22 +1399,75 @@ impl<T, A: Allocator> Vec<T, A> {
where
F: FnMut(&T) -> bool,
{
let len = self.len();
let mut del = 0;
{
let v = &mut **self;
let original_len = self.len();
// Avoid double drop if the drop guard is not executed,
// since we may make some holes during the process.
unsafe { self.set_len(0) };

// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
// |<- processed len ->| ^- next to check
// |<- deleted cnt ->|
// |<- original_len ->|
// Kept: Elements which predicate returns true on.
// Hole: Moved or dropped element slot.
// Unchecked: Unchecked valid elements.
//
// This drop guard will be invoked when predicate or `drop` of element panicked.
// It shifts unchecked elements to cover holes and `set_len` to the correct length.
// In cases when predicate and `drop` never panick, it will be optimized out.
struct BackshiftOnDrop<'a, T, A: Allocator> {
v: &'a mut Vec<T, A>,
processed_len: usize,
deleted_cnt: usize,
original_len: usize,
}

for i in 0..len {
if !f(&v[i]) {
del += 1;
} else if del > 0 {
v.swap(i - del, i);
impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
fn drop(&mut self) {
if self.deleted_cnt > 0 {
// SAFETY: Trailing unchecked items must be valid since we never touch them.
unsafe {
ptr::copy(
self.v.as_ptr().add(self.processed_len),
self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
self.original_len - self.processed_len,
);
}
}
// SAFETY: After filling holes, all items are in contiguous memory.
unsafe {
self.v.set_len(self.original_len - self.deleted_cnt);
}
}
}
if del > 0 {
self.truncate(len - del);

let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };

while g.processed_len < original_len {
// SAFETY: Unchecked element must be valid.
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
if !f(cur) {
// Advance early to avoid double drop if `drop_in_place` panicked.
g.processed_len += 1;
g.deleted_cnt += 1;
// SAFETY: We never touch this element again after dropped.
unsafe { ptr::drop_in_place(cur) };
// We already advanced the counter.
continue;
}
if g.deleted_cnt > 0 {
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
// We use copy for move, and never touch this element again.
unsafe {
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
ptr::copy_nonoverlapping(cur, hole_slot, 1);
}
}
g.processed_len += 1;
}

// All item are processed. This can be optimized to `set_len` by LLVM.
drop(g);
}

/// Removes all but the first of consecutive elements in the vector that resolve to the same
Expand Down
61 changes: 61 additions & 0 deletions library/alloc/tests/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,67 @@ fn test_retain() {
assert_eq!(vec, [2, 4]);
}

#[test]
fn test_retain_pred_panic_with_hole() {
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
catch_unwind(AssertUnwindSafe(|| {
let mut v = v.clone();
v.retain(|r| match **r {
0 => true,
1 => false,
2 => true,
_ => panic!(),
});
}))
.unwrap_err();
// Everything is dropped when predicate panicked.
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
}

#[test]
fn test_retain_pred_panic_no_hole() {
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
catch_unwind(AssertUnwindSafe(|| {
let mut v = v.clone();
v.retain(|r| match **r {
0 | 1 | 2 => true,
_ => panic!(),
});
}))
.unwrap_err();
// Everything is dropped when predicate panicked.
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
}

#[test]
fn test_retain_drop_panic() {
struct Wrap(Rc<i32>);

impl Drop for Wrap {
fn drop(&mut self) {
if *self.0 == 3 {
panic!();
}
}
}

let v = (0..5).map(|x| Rc::new(x)).collect::<Vec<_>>();
catch_unwind(AssertUnwindSafe(|| {
let mut v = v.iter().map(|r| Wrap(r.clone())).collect::<Vec<_>>();
v.retain(|w| match *w.0 {
0 => true,
1 => false,
2 => true,
3 => false, // Drop panic.
_ => true,
});
}))
.unwrap_err();
// Other elements are dropped when `drop` of one element panicked.
// The panicked wrapper also has its Rc dropped.
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
}

#[test]
fn test_dedup() {
fn case(a: Vec<i32>, b: Vec<i32>) {
Expand Down

0 comments on commit 1efd804

Please sign in to comment.