From 86bf96291d8222dbec9e07461404ecb189be0a98 Mon Sep 17 00:00:00 2001 From: Pyry Kontio Date: Mon, 16 Dec 2019 04:05:25 +0900 Subject: [PATCH 1/2] Implement split_inclusive for slice and str, an splitting iterator that includes the matched part in the iterated substrings as a terminator. --- src/liballoc/tests/lib.rs | 1 + src/liballoc/tests/slice.rs | 20 +++ src/liballoc/tests/str.rs | 18 +++ src/libcore/slice/mod.rs | 261 +++++++++++++++++++++++++++++++++++- src/libcore/str/mod.rs | 123 +++++++++++++++++ 5 files changed, 422 insertions(+), 1 deletion(-) diff --git a/src/liballoc/tests/lib.rs b/src/liballoc/tests/lib.rs index c1ae67a1a339f..ea75f8903c368 100644 --- a/src/liballoc/tests/lib.rs +++ b/src/liballoc/tests/lib.rs @@ -12,6 +12,7 @@ #![feature(binary_heap_into_iter_sorted)] #![feature(binary_heap_drain_sorted)] #![feature(vec_remove_item)] +#![feature(split_inclusive)] use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 51ddb5e7a4ec6..9e903b16a2621 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -851,6 +851,26 @@ fn test_splitator() { assert_eq!(xs.split(|x| *x == 5).collect::>(), splits); } +#[test] +fn test_splitator_inclusive() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5], &[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5], &[]]; + assert_eq!(xs.split_inclusive(|_| true).collect::>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).collect::>(), splits); +} + #[test] fn test_splitnator() { let xs = &[1, 2, 3, 4, 5]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index d3c72615696d5..fc2fcb6e56939 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1247,6 +1247,24 @@ fn test_split_char_iterator_no_trailing() { assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); } +#[test] +fn test_split_char_iterator_inclusive() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').collect(); + assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n", ""]); + + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut first_char = true; + let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| { + let split = !first_char && c.is_uppercase(); + first_char = split; + split + }).collect(); + assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT", ""]); +} + + #[test] fn test_rsplit() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs index 9b4d201573238..202ffc390e4c3 100644 --- a/src/libcore/slice/mod.rs +++ b/src/libcore/slice/mod.rs @@ -1155,6 +1155,72 @@ impl [T] { SplitMut { v: self, pred, finished: false } } + /// Returns an iterator over subslices separated by elements that match + /// `pred`. The matched element is contained in the end of the previous + /// subslice as a terminator. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let slice = [10, 40, 33, 20]; + /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); + /// + /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); + /// assert_eq!(iter.next().unwrap(), &[20]); + /// assert!(iter.next().is_none()); + /// ``` + /// + /// If the first element is matched, an empty slice will be the first item + /// returned by the iterator. Similarly, if the last element in the slice + /// is matched, an empty slice will be the last item returned by the + /// iterator: + /// + /// ``` + /// #![feature(split_inclusive)] + /// let slice = [10, 40, 33]; + /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); + /// + /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); + /// assert_eq!(iter.next().unwrap(), &[]); + /// assert!(iter.next().is_none()); + /// ``` + #[unstable(feature = "split_inclusive", issue = "0")] + #[inline] + pub fn split_inclusive(&self, pred: F) -> SplitInclusive<'_, T, F> + where F: FnMut(&T) -> bool + { + SplitInclusive { + v: self, + pred, + finished: false + } + } + + /// Returns an iterator over mutable subslices separated by elements that + /// match `pred`. The matched element is contained in the previous + /// subslice as a terminator. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let mut v = [10, 40, 30, 20, 60, 50]; + /// + /// for group in v.split_inclusive_mut(|num| *num % 3 == 0) { + /// let terminator_idx = group.len()-1; + /// group[terminator_idx] = 1; + /// } + /// assert_eq!(v, [10, 40, 1, 20, 1, 1]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "0")] + #[inline] + pub fn split_inclusive_mut(&mut self, pred: F) -> SplitInclusiveMut<'_, T, F> + where F: FnMut(&T) -> bool + { + SplitInclusiveMut { v: self, pred, finished: false } + } + /// Returns an iterator over subslices separated by elements that match /// `pred`, starting at the end of the slice and working backwards. /// The matched element is not contained in the subslices. @@ -3675,7 +3741,100 @@ where #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for Split<'_, T, P> where P: FnMut(&T) -> bool {} -/// An iterator over the subslices of the vector which are separated +/// An iterator over subslices separated by elements that match a predicate +/// function. Unlike `Split`, it contains the matched part as a terminator +/// of the subslice. +/// +/// This struct is created by the [`split_inclusive`] method on [slices]. +/// +/// [`split_inclusive`]: ../../std/primitive.slice.html#method.split_inclusive +/// [slices]: ../../std/primitive.slice.html +#[unstable(feature = "split_inclusive", issue = "0")] +pub struct SplitInclusive<'a, T:'a, P> where P: FnMut(&T) -> bool { + v: &'a [T], + pred: P, + finished: bool +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl fmt::Debug for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusive") + .field("v", &self.v) + .field("finished", &self.finished) + .finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_inclusive", issue = "0")] +impl Clone for SplitInclusive<'_, T, P> where P: Clone + FnMut(&T) -> bool { + fn clone(&self) -> Self { + SplitInclusive { + v: self.v, + pred: self.pred.clone(), + finished: self.finished, + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> Iterator for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { + type Item = &'a [T]; + + #[inline] + fn next(&mut self) -> Option<&'a [T]> { + if self.finished { return None; } + + match self.v.iter().position(|x| (self.pred)(x)) { + None => self.finish(), + Some(idx) => { + let ret = Some(&self.v[..idx + 1]); + self.v = &self.v[idx + 1..]; + ret + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + if self.finished { + (0, Some(0)) + } else { + (1, Some(self.v.len() + 1)) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { + #[inline] + fn next_back(&mut self) -> Option<&'a [T]> { + if self.finished { return None; } + + match self.v.iter().rposition(|x| (self.pred)(x)) { + None => self.finish(), + Some(idx) => { + let ret = Some(&self.v[idx + 1..]); + self.v = &self.v[..idx]; + ret + } + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> SplitIter for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { + #[inline] + fn finish(&mut self) -> Option<&'a [T]> { + if self.finished { None } else { self.finished = true; Some(self.v) } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl FusedIterator for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool {} + +/// An iterator over the mutable subslices of the vector which are separated /// by elements that match `pred`. /// /// This struct is created by the [`split_mut`] method on [slices]. @@ -3789,6 +3948,106 @@ where #[stable(feature = "fused", since = "1.26.0")] impl FusedIterator for SplitMut<'_, T, P> where P: FnMut(&T) -> bool {} +/// An iterator over the mutable subslices of the vector which are separated +/// by elements that match `pred`. Unlike `SplitMut`, it contains the matched +/// parts in the ends of the subslices. +/// +/// This struct is created by the [`split_inclusive_mut`] method on [slices]. +/// +/// [`split_inclusive_mut`]: ../../std/primitive.slice.html#method.split_inclusive_mut +/// [slices]: ../../std/primitive.slice.html +#[unstable(feature = "split_inclusive", issue = "0")] +pub struct SplitInclusiveMut<'a, T:'a, P> where P: FnMut(&T) -> bool { + v: &'a mut [T], + pred: P, + finished: bool +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl fmt::Debug for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusiveMut") + .field("v", &self.v) + .field("finished", &self.finished) + .finish() + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> SplitIter for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bool { + #[inline] + fn finish(&mut self) -> Option<&'a mut [T]> { + if self.finished { + None + } else { + self.finished = true; + Some(mem::replace(&mut self.v, &mut [])) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> Iterator for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bool { + type Item = &'a mut [T]; + + #[inline] + fn next(&mut self) -> Option<&'a mut [T]> { + if self.finished { return None; } + + let idx_opt = { // work around borrowck limitations + let pred = &mut self.pred; + self.v.iter().position(|x| (*pred)(x)) + }; + match idx_opt { + None => self.finish(), + Some(idx) => { + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx+1); + self.v = tail; + Some(head) + } + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + if self.finished { + (0, Some(0)) + } else { + // if the predicate doesn't match anything, we yield one slice + // if it matches every element, we yield len+1 empty slices. + (1, Some(self.v.len() + 1)) + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusiveMut<'a, T, P> where + P: FnMut(&T) -> bool, +{ + #[inline] + fn next_back(&mut self) -> Option<&'a mut [T]> { + if self.finished { return None; } + + let idx_opt = { // work around borrowck limitations + let pred = &mut self.pred; + self.v.iter().rposition(|x| (*pred)(x)) + }; + match idx_opt { + None => self.finish(), + Some(idx) => { + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx+1); + self.v = head; + Some(tail) + } + } + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl FusedIterator for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool {} + /// An iterator over subslices separated by elements that match a predicate /// function, starting from the end of the slice. /// diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 5a7cddd4041d5..3f7defa912bb1 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1132,6 +1132,21 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { } } + #[inline] + fn next_inclusive(&mut self) -> Option<&'a str> { + if self.finished { return None } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match() { + Some((_, b)) => unsafe { + let elt = haystack.get_unchecked(self.start..b); + self.start = b; + Some(elt) + }, + None => self.get_end(), + } + } + #[inline] fn next_back(&mut self) -> Option<&'a str> where @@ -1168,6 +1183,34 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { }, } } + + #[inline] + fn next_back_inclusive(&mut self) -> Option<&'a str> + where P::Searcher: ReverseSearcher<'a> + { + if self.finished { return None } + + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => if self.finished { return None } + } + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + Some((_, b)) => unsafe { + let elt = haystack.get_unchecked(b..self.end); + self.end = b; + Some(elt) + }, + None => unsafe { + self.finished = true; + Some(haystack.get_unchecked(self.start..self.end)) + }, + } + } } generate_pattern_iterators! { @@ -3212,6 +3255,31 @@ impl str { }) } + /// An iterator over substrings of this string slice, separated by + /// characters matched by a pattern. Differs from the iterator produced by + /// `split` in that `split_inclusive` leaves the matched part as the + /// terminator of the substring. + /// + /// # Examples + /// + /// ``` + /// #![feature(split_inclusive)] + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb." + /// .split_inclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "0")] + #[inline] + pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> { + SplitInclusive(SplitInternal { + start: 0, + end: self.len(), + matcher: pat.into_searcher(self), + allow_trailing_empty: true, + finished: false, + }) + } + /// An iterator over substrings of the given string slice, separated by /// characters matched by a pattern and yielded in reverse order. /// @@ -4405,6 +4473,19 @@ pub struct SplitAsciiWhitespace<'a> { inner: Map, BytesIsNotEmpty>, UnsafeBytesToStr>, } +/// An iterator over the substrings of a string, +/// terminated by a substring matching to a predicate function +/// Unlike `Split`, it contains the matched part as a terminator +/// of the subslice. +/// +/// This struct is created by the [`split_inclusive`] method on [`str`]. +/// See its documentation for more. +/// +/// [`split_inclusive`]: ../../std/primitive.str.html#method.split_inclusive +/// [`str`]: ../../std/primitive.str.html +#[unstable(feature = "split_inclusive", issue = "0")] +pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>); + impl_fn_for_zst! { #[derive(Clone)] struct IsWhitespace impl Fn = |c: char| -> bool { @@ -4433,6 +4514,8 @@ impl_fn_for_zst! { }; } + + #[stable(feature = "split_whitespace", since = "1.1.0")] impl<'a> Iterator for SplitWhitespace<'a> { type Item = &'a str; @@ -4495,6 +4578,46 @@ impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> { #[stable(feature = "split_ascii_whitespace", since = "1.34.0")] impl FusedIterator for SplitAsciiWhitespace<'_> {} +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { + self.0.next_inclusive() + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SplitInclusive") + .field("0", &self.0) + .finish() + } +} + +// FIXME(#26925) Remove in favor of `#[derive(Clone)]` +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> { + fn clone(&self) -> Self { + SplitInclusive(self.0.clone()) + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator + for SplitInclusive<'a, P> +{ + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + self.0.next_back_inclusive() + } +} + +#[unstable(feature = "split_inclusive", issue = "0")] +impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {} + /// An iterator of [`u16`] over the string encoded as UTF-16. /// /// [`u16`]: ../../std/primitive.u16.html From 5c9dc57cb587761561e85574c821a6f9c0c7cc67 Mon Sep 17 00:00:00 2001 From: Pyry Kontio Date: Sat, 25 Jan 2020 17:47:46 +0900 Subject: [PATCH 2/2] Don't return empty slice on last iteration with matched terminator. Test reverse iteration. --- src/liballoc/tests/slice.rs | 64 ++++++++++- src/liballoc/tests/str.rs | 39 +++++-- src/libcore/slice/mod.rs | 221 +++++++++++++++++++----------------- src/libcore/str/mod.rs | 63 +++++++--- 4 files changed, 255 insertions(+), 132 deletions(-) diff --git a/src/liballoc/tests/slice.rs b/src/liballoc/tests/slice.rs index 9e903b16a2621..3d6b4bff5e060 100644 --- a/src/liballoc/tests/slice.rs +++ b/src/liballoc/tests/slice.rs @@ -859,11 +859,11 @@ fn test_splitator_inclusive() { assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).collect::>(), splits); let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; assert_eq!(xs.split_inclusive(|x| *x == 1).collect::>(), splits); - let splits: &[&[_]] = &[&[1, 2, 3, 4, 5], &[]]; + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; assert_eq!(xs.split_inclusive(|x| *x == 5).collect::>(), splits); let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; assert_eq!(xs.split_inclusive(|x| *x == 10).collect::>(), splits); - let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5], &[]]; + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]]; assert_eq!(xs.split_inclusive(|_| true).collect::>(), splits); let xs: &[i32] = &[]; @@ -871,6 +871,66 @@ fn test_splitator_inclusive() { assert_eq!(xs.split_inclusive(|x| *x == 5).collect::>(), splits); } +#[test] +fn test_splitator_inclusive_reverse() { + let xs = &[1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]]; + assert_eq!(xs.split_inclusive(|x| *x % 2 == 0).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]]; + assert_eq!(xs.split_inclusive(|x| *x == 1).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive(|x| *x == 10).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]]; + assert_eq!(xs.split_inclusive(|_| true).rev().collect::>(), splits); + + let xs: &[i32] = &[]; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive(|x| *x == 5).rev().collect::>(), splits); +} + +#[test] +fn test_splitator_mut_inclusive() { + let xs = &mut [1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[1, 2], &[3, 4], &[5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 1).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 10).collect::>(), splits); + let splits: &[&[_]] = &[&[1], &[2], &[3], &[4], &[5]]; + assert_eq!(xs.split_inclusive_mut(|_| true).collect::>(), splits); + + let xs: &mut [i32] = &mut []; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).collect::>(), splits); +} + +#[test] +fn test_splitator_mut_inclusive_reverse() { + let xs = &mut [1, 2, 3, 4, 5]; + + let splits: &[&[_]] = &[&[5], &[3, 4], &[1, 2]]; + assert_eq!(xs.split_inclusive_mut(|x| *x % 2 == 0).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[2, 3, 4, 5], &[1]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 1).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[1, 2, 3, 4, 5]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 10).rev().collect::>(), splits); + let splits: &[&[_]] = &[&[5], &[4], &[3], &[2], &[1]]; + assert_eq!(xs.split_inclusive_mut(|_| true).rev().collect::>(), splits); + + let xs: &mut [i32] = &mut []; + let splits: &[&[i32]] = &[&[]]; + assert_eq!(xs.split_inclusive_mut(|x| *x == 5).rev().collect::>(), splits); +} + #[test] fn test_splitnator() { let xs = &[1, 2, 3, 4, 5]; diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index fc2fcb6e56939..b703df6f3cb7d 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -1252,18 +1252,43 @@ fn test_split_char_iterator_inclusive() { let data = "\nMäry häd ä little lämb\nLittle lämb\n"; let split: Vec<&str> = data.split_inclusive('\n').collect(); - assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n", ""]); + assert_eq!(split, ["\n", "Märy häd ä little lämb\n", "Little lämb\n"]); let uppercase_separated = "SheePSharKTurtlECaT"; let mut first_char = true; - let split: Vec<&str> = uppercase_separated.split_inclusive(|c: char| { - let split = !first_char && c.is_uppercase(); - first_char = split; - split - }).collect(); - assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT", ""]); + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = !first_char && c.is_uppercase(); + first_char = split; + split + }) + .collect(); + assert_eq!(split, ["SheeP", "SharK", "TurtlE", "CaT"]); } +#[test] +fn test_split_char_iterator_inclusive_rev() { + let data = "\nMäry häd ä little lämb\nLittle lämb\n"; + + let split: Vec<&str> = data.split_inclusive('\n').rev().collect(); + assert_eq!(split, ["Little lämb\n", "Märy häd ä little lämb\n", "\n"]); + + // Note that the predicate is stateful and thus dependent + // on the iteration order. + // (A different predicate is needed for reverse iterator vs normal iterator.) + // Not sure if anything can be done though. + let uppercase_separated = "SheePSharKTurtlECaT"; + let mut term_char = true; + let split: Vec<&str> = uppercase_separated + .split_inclusive(|c: char| { + let split = term_char && c.is_uppercase(); + term_char = c.is_uppercase(); + split + }) + .rev() + .collect(); + assert_eq!(split, ["CaT", "TurtlE", "SharK", "SheeP"]); +} #[test] fn test_rsplit() { diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs index 202ffc390e4c3..2f70dcff2f343 100644 --- a/src/libcore/slice/mod.rs +++ b/src/libcore/slice/mod.rs @@ -1171,30 +1171,26 @@ impl [T] { /// assert!(iter.next().is_none()); /// ``` /// - /// If the first element is matched, an empty slice will be the first item - /// returned by the iterator. Similarly, if the last element in the slice - /// is matched, an empty slice will be the last item returned by the - /// iterator: + /// If the last element of the slice is matched, + /// that element will be considered the terminator of the preceding slice. + /// That slice will be the last item returned by the iterator. /// /// ``` /// #![feature(split_inclusive)] - /// let slice = [10, 40, 33]; + /// let slice = [3, 10, 40, 33]; /// let mut iter = slice.split_inclusive(|num| num % 3 == 0); /// + /// assert_eq!(iter.next().unwrap(), &[3]); /// assert_eq!(iter.next().unwrap(), &[10, 40, 33]); - /// assert_eq!(iter.next().unwrap(), &[]); /// assert!(iter.next().is_none()); /// ``` - #[unstable(feature = "split_inclusive", issue = "0")] + #[unstable(feature = "split_inclusive", issue = "none")] #[inline] pub fn split_inclusive(&self, pred: F) -> SplitInclusive<'_, T, F> - where F: FnMut(&T) -> bool + where + F: FnMut(&T) -> bool, { - SplitInclusive { - v: self, - pred, - finished: false - } + SplitInclusive { v: self, pred, finished: false } } /// Returns an iterator over mutable subslices separated by elements that @@ -1213,10 +1209,11 @@ impl [T] { /// } /// assert_eq!(v, [10, 40, 1, 20, 1, 1]); /// ``` - #[unstable(feature = "split_inclusive", issue = "0")] + #[unstable(feature = "split_inclusive", issue = "none")] #[inline] pub fn split_inclusive_mut(&mut self, pred: F) -> SplitInclusiveMut<'_, T, F> - where F: FnMut(&T) -> bool + where + F: FnMut(&T) -> bool, { SplitInclusiveMut { v: self, pred, finished: false } } @@ -3749,15 +3746,21 @@ impl FusedIterator for Split<'_, T, P> where P: FnMut(&T) -> bool {} /// /// [`split_inclusive`]: ../../std/primitive.slice.html#method.split_inclusive /// [slices]: ../../std/primitive.slice.html -#[unstable(feature = "split_inclusive", issue = "0")] -pub struct SplitInclusive<'a, T:'a, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +pub struct SplitInclusive<'a, T: 'a, P> +where + P: FnMut(&T) -> bool, +{ v: &'a [T], pred: P, - finished: bool + finished: bool, } -#[unstable(feature = "split_inclusive", issue = "0")] -impl fmt::Debug for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +impl fmt::Debug for SplitInclusive<'_, T, P> +where + P: FnMut(&T) -> bool, +{ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitInclusive") .field("v", &self.v) @@ -3767,71 +3770,71 @@ impl fmt::Debug for SplitInclusive<'_, T, P> where P: FnMut(&T } // FIXME(#26925) Remove in favor of `#[derive(Clone)]` -#[unstable(feature = "split_inclusive", issue = "0")] -impl Clone for SplitInclusive<'_, T, P> where P: Clone + FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +impl Clone for SplitInclusive<'_, T, P> +where + P: Clone + FnMut(&T) -> bool, +{ fn clone(&self) -> Self { - SplitInclusive { - v: self.v, - pred: self.pred.clone(), - finished: self.finished, - } + SplitInclusive { v: self.v, pred: self.pred.clone(), finished: self.finished } } } -#[unstable(feature = "split_inclusive", issue = "0")] -impl<'a, T, P> Iterator for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> Iterator for SplitInclusive<'a, T, P> +where + P: FnMut(&T) -> bool, +{ type Item = &'a [T]; #[inline] fn next(&mut self) -> Option<&'a [T]> { - if self.finished { return None; } + if self.finished { + return None; + } - match self.v.iter().position(|x| (self.pred)(x)) { - None => self.finish(), - Some(idx) => { - let ret = Some(&self.v[..idx + 1]); - self.v = &self.v[idx + 1..]; - ret - } + let idx = + self.v.iter().position(|x| (self.pred)(x)).map(|idx| idx + 1).unwrap_or(self.v.len()); + if idx == self.v.len() { + self.finished = true; } + let ret = Some(&self.v[..idx]); + self.v = &self.v[idx..]; + ret } #[inline] fn size_hint(&self) -> (usize, Option) { - if self.finished { - (0, Some(0)) - } else { - (1, Some(self.v.len() + 1)) - } + if self.finished { (0, Some(0)) } else { (1, Some(self.v.len() + 1)) } } } -#[unstable(feature = "split_inclusive", issue = "0")] -impl<'a, T, P> DoubleEndedIterator for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusive<'a, T, P> +where + P: FnMut(&T) -> bool, +{ #[inline] fn next_back(&mut self) -> Option<&'a [T]> { - if self.finished { return None; } - - match self.v.iter().rposition(|x| (self.pred)(x)) { - None => self.finish(), - Some(idx) => { - let ret = Some(&self.v[idx + 1..]); - self.v = &self.v[..idx]; - ret - } + if self.finished { + return None; } - } -} -#[unstable(feature = "split_inclusive", issue = "0")] -impl<'a, T, P> SplitIter for SplitInclusive<'a, T, P> where P: FnMut(&T) -> bool { - #[inline] - fn finish(&mut self) -> Option<&'a [T]> { - if self.finished { None } else { self.finished = true; Some(self.v) } + // The last index of self.v is already checked and found to match + // by the last iteration, so we start searching a new match + // one index to the left. + let remainder = if self.v.len() == 0 { &[] } else { &self.v[..(self.v.len() - 1)] }; + let idx = remainder.iter().rposition(|x| (self.pred)(x)).map(|idx| idx + 1).unwrap_or(0); + if idx == 0 { + self.finished = true; + } + let ret = Some(&self.v[idx..]); + self.v = &self.v[..idx]; + ret } } -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl FusedIterator for SplitInclusive<'_, T, P> where P: FnMut(&T) -> bool {} /// An iterator over the mutable subslices of the vector which are separated @@ -3956,15 +3959,21 @@ impl FusedIterator for SplitMut<'_, T, P> where P: FnMut(&T) -> bool {} /// /// [`split_inclusive_mut`]: ../../std/primitive.slice.html#method.split_inclusive_mut /// [slices]: ../../std/primitive.slice.html -#[unstable(feature = "split_inclusive", issue = "0")] -pub struct SplitInclusiveMut<'a, T:'a, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +pub struct SplitInclusiveMut<'a, T: 'a, P> +where + P: FnMut(&T) -> bool, +{ v: &'a mut [T], pred: P, - finished: bool + finished: bool, } -#[unstable(feature = "split_inclusive", issue = "0")] -impl fmt::Debug for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +impl fmt::Debug for SplitInclusiveMut<'_, T, P> +where + P: FnMut(&T) -> bool, +{ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("SplitInclusiveMut") .field("v", &self.v) @@ -3973,40 +3982,32 @@ impl fmt::Debug for SplitInclusiveMut<'_, T, P> where P: FnMut } } -#[unstable(feature = "split_inclusive", issue = "0")] -impl<'a, T, P> SplitIter for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bool { - #[inline] - fn finish(&mut self) -> Option<&'a mut [T]> { - if self.finished { - None - } else { - self.finished = true; - Some(mem::replace(&mut self.v, &mut [])) - } - } -} - -#[unstable(feature = "split_inclusive", issue = "0")] -impl<'a, T, P> Iterator for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bool { +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> Iterator for SplitInclusiveMut<'a, T, P> +where + P: FnMut(&T) -> bool, +{ type Item = &'a mut [T]; #[inline] fn next(&mut self) -> Option<&'a mut [T]> { - if self.finished { return None; } + if self.finished { + return None; + } - let idx_opt = { // work around borrowck limitations + let idx_opt = { + // work around borrowck limitations let pred = &mut self.pred; self.v.iter().position(|x| (*pred)(x)) }; - match idx_opt { - None => self.finish(), - Some(idx) => { - let tmp = mem::replace(&mut self.v, &mut []); - let (head, tail) = tmp.split_at_mut(idx+1); - self.v = tail; - Some(head) - } + let idx = idx_opt.map(|idx| idx + 1).unwrap_or(self.v.len()); + if idx == self.v.len() { + self.finished = true; } + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx); + self.v = tail; + Some(head) } #[inline] @@ -4021,31 +4022,41 @@ impl<'a, T, P> Iterator for SplitInclusiveMut<'a, T, P> where P: FnMut(&T) -> bo } } -#[unstable(feature = "split_inclusive", issue = "0")] -impl<'a, T, P> DoubleEndedIterator for SplitInclusiveMut<'a, T, P> where +#[unstable(feature = "split_inclusive", issue = "none")] +impl<'a, T, P> DoubleEndedIterator for SplitInclusiveMut<'a, T, P> +where P: FnMut(&T) -> bool, { #[inline] fn next_back(&mut self) -> Option<&'a mut [T]> { - if self.finished { return None; } + if self.finished { + return None; + } - let idx_opt = { // work around borrowck limitations + let idx_opt = if self.v.len() == 0 { + None + } else { + // work around borrowck limitations let pred = &mut self.pred; - self.v.iter().rposition(|x| (*pred)(x)) + + // The last index of self.v is already checked and found to match + // by the last iteration, so we start searching a new match + // one index to the left. + let remainder = &self.v[..(self.v.len() - 1)]; + remainder.iter().rposition(|x| (*pred)(x)) }; - match idx_opt { - None => self.finish(), - Some(idx) => { - let tmp = mem::replace(&mut self.v, &mut []); - let (head, tail) = tmp.split_at_mut(idx+1); - self.v = head; - Some(tail) - } + let idx = idx_opt.map(|idx| idx + 1).unwrap_or(0); + if idx == 0 { + self.finished = true; } + let tmp = mem::replace(&mut self.v, &mut []); + let (head, tail) = tmp.split_at_mut(idx); + self.v = head; + Some(tail) } } -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl FusedIterator for SplitInclusiveMut<'_, T, P> where P: FnMut(&T) -> bool {} /// An iterator over subslices separated by elements that match a predicate diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs index 3f7defa912bb1..efe6d885662e4 100644 --- a/src/libcore/str/mod.rs +++ b/src/libcore/str/mod.rs @@ -1134,10 +1134,15 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { #[inline] fn next_inclusive(&mut self) -> Option<&'a str> { - if self.finished { return None } + if self.finished { + return None; + } let haystack = self.matcher.haystack(); match self.matcher.next_match() { + // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary, + // and self.start is either the start of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. Some((_, b)) => unsafe { let elt = haystack.get_unchecked(self.start..b); self.start = b; @@ -1186,25 +1191,40 @@ impl<'a, P: Pattern<'a>> SplitInternal<'a, P> { #[inline] fn next_back_inclusive(&mut self) -> Option<&'a str> - where P::Searcher: ReverseSearcher<'a> + where + P::Searcher: ReverseSearcher<'a>, { - if self.finished { return None } + if self.finished { + return None; + } if !self.allow_trailing_empty { self.allow_trailing_empty = true; - match self.next_back() { + match self.next_back_inclusive() { Some(elt) if !elt.is_empty() => return Some(elt), - _ => if self.finished { return None } + _ => { + if self.finished { + return None; + } + } } } let haystack = self.matcher.haystack(); match self.matcher.next_match_back() { + // SAFETY: `Searcher` guarantees that `b` lies on unicode boundary, + // and self.end is either the end of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. Some((_, b)) => unsafe { let elt = haystack.get_unchecked(b..self.end); self.end = b; Some(elt) }, + // SAFETY: self.start is either the start of the original string, + // or start of a substring that represents the part of the string that hasn't + // iterated yet. Either way, it is guaranteed to lie on unicode boundary. + // self.end is either the end of the original string, + // or `b` was assigned to it, so it also lies on unicode boundary. None => unsafe { self.finished = true; Some(haystack.get_unchecked(self.start..self.end)) @@ -3268,14 +3288,25 @@ impl str { /// .split_inclusive('\n').collect(); /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]); /// ``` - #[unstable(feature = "split_inclusive", issue = "0")] + /// + /// If the last element of the string is matched, + /// that element will be considered the terminator of the preceding substring. + /// That substring will be the last item returned by the iterator. + /// + /// ``` + /// #![feature(split_inclusive)] + /// let v: Vec<&str> = "Mary had a little lamb\nlittle lamb\nlittle lamb.\n" + /// .split_inclusive('\n').collect(); + /// assert_eq!(v, ["Mary had a little lamb\n", "little lamb\n", "little lamb.\n"]); + /// ``` + #[unstable(feature = "split_inclusive", issue = "none")] #[inline] pub fn split_inclusive<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitInclusive<'a, P> { SplitInclusive(SplitInternal { start: 0, end: self.len(), matcher: pat.into_searcher(self), - allow_trailing_empty: true, + allow_trailing_empty: false, finished: false, }) } @@ -4483,7 +4514,7 @@ pub struct SplitAsciiWhitespace<'a> { /// /// [`split_inclusive`]: ../../std/primitive.str.html#method.split_inclusive /// [`str`]: ../../std/primitive.str.html -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] pub struct SplitInclusive<'a, P: Pattern<'a>>(SplitInternal<'a, P>); impl_fn_for_zst! { @@ -4514,8 +4545,6 @@ impl_fn_for_zst! { }; } - - #[stable(feature = "split_whitespace", since = "1.1.0")] impl<'a> Iterator for SplitWhitespace<'a> { type Item = &'a str; @@ -4578,7 +4607,7 @@ impl<'a> DoubleEndedIterator for SplitAsciiWhitespace<'a> { #[stable(feature = "split_ascii_whitespace", since = "1.34.0")] impl FusedIterator for SplitAsciiWhitespace<'_> {} -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { type Item = &'a str; @@ -4588,24 +4617,22 @@ impl<'a, P: Pattern<'a>> Iterator for SplitInclusive<'a, P> { } } -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: fmt::Debug>> fmt::Debug for SplitInclusive<'a, P> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("SplitInclusive") - .field("0", &self.0) - .finish() + f.debug_struct("SplitInclusive").field("0", &self.0).finish() } } // FIXME(#26925) Remove in favor of `#[derive(Clone)]` -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: Clone>> Clone for SplitInclusive<'a, P> { fn clone(&self) -> Self { SplitInclusive(self.0.clone()) } } -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator for SplitInclusive<'a, P> { @@ -4615,7 +4642,7 @@ impl<'a, P: Pattern<'a, Searcher: ReverseSearcher<'a>>> DoubleEndedIterator } } -#[unstable(feature = "split_inclusive", issue = "0")] +#[unstable(feature = "split_inclusive", issue = "none")] impl<'a, P: Pattern<'a>> FusedIterator for SplitInclusive<'a, P> {} /// An iterator of [`u16`] over the string encoded as UTF-16.