Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update edition from 2018 to 2021 #84

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
[package]
authors = ["Andre Bogus <bogusandre@gmail.de>", "Joshua Landau <joshua@landau.ws>"]
authors = [
"Andre Bogus <bogusandre@gmail.de>",
"Joshua Landau <joshua@landau.ws>",
]
description = "count occurrences of a given byte, or the number of UTF-8 code points, in a byte slice, fast"
edition = "2018"
edition = "2021"
name = "bytecount"
version = "0.6.3"
license = "Apache-2.0/MIT"
Expand All @@ -28,7 +31,7 @@ packed_simd = { version = "0.3.8", package = "packed_simd_2", optional = true }
[dev-dependencies]
quickcheck = "1.0"
rand = "0.8"
criterion = { version = "0.4", default-features = false }
criterion = { version = "0.5.1", default-features = false }

[[bench]]
name = "bench"
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ To use bytecount in your crate, if you have [cargo-edit](https://github.com/kill
`Cargo.toml` to add `bytecount = 0.6.3` to your `[dependencies]` section.

In your crate root (`lib.rs` or `main.rs`, depending on if you are writing a
library or application), add `extern crate bytecount;`. Now you can simply use
library or application), add `use bytecount;`. Now you can simply use
`bytecount::count` as follows:

```Rust
extern crate bytecount;
use bytecount;

fn main() {
let mytext = "some potentially large text, perhaps read from disk?";
Expand All @@ -31,7 +31,7 @@ fn main() {
bytecount supports two features to make use of modern CPU's features to speed up counting considerably. To allow your
users to use them, add the following to your `Cargo.toml`:

```
```toml
[features]
runtime-dispatch-simd = ["bytecount/runtime-dispatch-simd"]
generic-simd = ["bytecount/generic-simd"]
Expand Down
9 changes: 2 additions & 7 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
#[macro_use]
extern crate criterion;
extern crate bytecount;
extern crate rand;

use criterion::{Bencher, BenchmarkId, Criterion};
use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion};
use rand::RngCore;
use std::env;
use std::time::Duration;

use bytecount::{count, naive_count, naive_count_32, naive_num_chars, num_chars};
use bytecount::{self, count, naive_count, naive_count_32, naive_num_chars, num_chars};

fn random_bytes(len: usize) -> Vec<u8> {
let mut result = vec![0; len];
Expand Down
20 changes: 15 additions & 5 deletions src/integer_simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ unsafe fn usize_load_unchecked(bytes: &[u8], offset: usize) -> usize {
ptr::copy_nonoverlapping(
bytes.as_ptr().add(offset),
&mut output as *mut usize as *mut u8,
mem::size_of::<usize>()
mem::size_of::<usize>(),
);
output
}
Expand Down Expand Up @@ -65,11 +65,17 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
// 8
let mut counts = 0;
for i in 0..(haystack.len() - offset) / chunksize {
counts += bytewise_equal(usize_load_unchecked(haystack, offset + i * chunksize), needles);
counts += bytewise_equal(
usize_load_unchecked(haystack, offset + i * chunksize),
needles,
);
}
if haystack.len() % 8 != 0 {
let mask = usize::from_le(!(!0 >> ((haystack.len() % chunksize) * 8)));
counts += bytewise_equal(usize_load_unchecked(haystack, haystack.len() - chunksize), needles) & mask;
counts += bytewise_equal(
usize_load_unchecked(haystack, haystack.len() - chunksize),
needles,
) & mask;
}
count += sum_usize(counts);

Expand Down Expand Up @@ -98,11 +104,15 @@ pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
// 8
let mut counts = 0;
for i in 0..(utf8_chars.len() - offset) / chunksize {
counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize));
counts +=
is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize));
}
if utf8_chars.len() % 8 != 0 {
let mask = usize::from_le(!(!0 >> ((utf8_chars.len() % chunksize) * 8)));
counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, utf8_chars.len() - chunksize)) & mask;
counts += is_leading_utf8_byte(usize_load_unchecked(
utf8_chars,
utf8_chars.len() - chunksize,
)) & mask;
}
count += sum_usize(counts);

Expand Down
22 changes: 16 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
//! still on small strings.

#![deny(missing_docs)]

#![cfg_attr(not(feature = "runtime-dispatch-simd"), no_std)]

#[cfg(not(feature = "runtime-dispatch-simd"))]
Expand All @@ -45,7 +44,10 @@ pub use naive::*;
mod integer_simd;

#[cfg(any(
all(feature = "runtime-dispatch-simd", any(target_arch = "x86", target_arch = "x86_64")),
all(
feature = "runtime-dispatch-simd",
any(target_arch = "x86", target_arch = "x86_64")
),
feature = "generic-simd"
))]
mod simd;
Expand All @@ -64,7 +66,9 @@ pub fn count(haystack: &[u8], needle: u8) -> usize {
#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
unsafe { return simd::x86_avx2::chunk_count(haystack, needle); }
unsafe {
return simd::x86_avx2::chunk_count(haystack, needle);
}
}
}

Expand All @@ -80,7 +84,9 @@ pub fn count(haystack: &[u8], needle: u8) -> usize {
))]
{
if is_x86_feature_detected!("sse2") {
unsafe { return simd::x86_sse2::chunk_count(haystack, needle); }
unsafe {
return simd::x86_sse2::chunk_count(haystack, needle);
}
}
}
}
Expand Down Expand Up @@ -109,7 +115,9 @@ pub fn num_chars(utf8_chars: &[u8]) -> usize {
#[cfg(all(feature = "runtime-dispatch-simd", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") {
unsafe { return simd::x86_avx2::chunk_num_chars(utf8_chars); }
unsafe {
return simd::x86_avx2::chunk_num_chars(utf8_chars);
}
}
}

Expand All @@ -125,7 +133,9 @@ pub fn num_chars(utf8_chars: &[u8]) -> usize {
))]
{
if is_x86_feature_detected!("sse2") {
unsafe { return simd::x86_sse2::chunk_num_chars(utf8_chars); }
unsafe {
return simd::x86_sse2::chunk_num_chars(utf8_chars);
}
}
}
}
Expand Down
9 changes: 7 additions & 2 deletions src/naive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ pub fn naive_count_32(haystack: &[u8], needle: u8) -> usize {
/// assert_eq!(number_of_spaces, 6);
/// ```
pub fn naive_count(utf8_chars: &[u8], needle: u8) -> usize {
utf8_chars.iter().fold(0, |n, c| n + (*c == needle) as usize)
utf8_chars
.iter()
.fold(0, |n, c| n + (*c == needle) as usize)
}

/// Count the number of UTF-8 encoded Unicode codepoints in a slice of bytes, simple
Expand All @@ -38,5 +40,8 @@ pub fn naive_count(utf8_chars: &[u8], needle: u8) -> usize {
/// assert_eq!(char_count, 4);
/// ```
pub fn naive_num_chars(utf8_chars: &[u8]) -> usize {
utf8_chars.iter().filter(|&&byte| (byte >> 6) != 0b10).count()
utf8_chars
.iter()
.filter(|&&byte| (byte >> 6) != 0b10)
.count()
}
22 changes: 12 additions & 10 deletions src/simd/generic.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
extern crate packed_simd;
use packed_simd;

#[cfg(not(feature = "runtime-dispatch-simd"))]
use core::mem;
Expand All @@ -8,10 +8,9 @@ use std::mem;
use self::packed_simd::{u8x32, u8x64, FromCast};

const MASK: [u8; 64] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
];

unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 {
Expand Down Expand Up @@ -66,15 +65,17 @@ pub fn chunk_count(haystack: &[u8], needle: u8) -> usize {
// 32
let mut counts = u8x32::splat(0);
for i in 0..(haystack.len() - offset) / 32 {
counts -= u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32));
counts -=
u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32));
}
count += sum_x32(&counts);

// Straggler; need to reset counts because prior loop can run 255 times
counts = u8x32::splat(0);
if haystack.len() % 32 != 0 {
counts -= u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) &
u8x32_from_offset(&MASK, haystack.len() % 32);
counts -=
u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32))
& u8x32_from_offset(&MASK, haystack.len() % 32);
}
count += sum_x32(&counts);

Expand Down Expand Up @@ -127,8 +128,9 @@ pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
// Straggler; need to reset counts because prior loop can run 255 times
counts = u8x32::splat(0);
if utf8_chars.len() % 32 != 0 {
counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32)) &
u8x32_from_offset(&MASK, utf8_chars.len() % 32);
counts -=
is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32))
& u8x32_from_offset(&MASK, utf8_chars.len() % 32);
}
count += sum_x32(&counts);

Expand Down
52 changes: 23 additions & 29 deletions src/simd/x86_avx2.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
use std::arch::x86_64::{
__m256i,
_mm256_and_si256,
_mm256_cmpeq_epi8,
_mm256_extract_epi64,
_mm256_loadu_si256,
_mm256_sad_epu8,
_mm256_set1_epi8,
_mm256_setzero_si256,
_mm256_sub_epi8,
_mm256_xor_si256,
__m256i, _mm256_and_si256, _mm256_cmpeq_epi8, _mm256_extract_epi64, _mm256_loadu_si256,
_mm256_sad_epu8, _mm256_set1_epi8, _mm256_setzero_si256, _mm256_sub_epi8, _mm256_xor_si256,
};

#[target_feature(enable = "avx2")]
Expand All @@ -22,10 +14,9 @@ pub unsafe fn mm256_cmpneq_epi8(a: __m256i, b: __m256i) -> __m256i {
}

const MASK: [u8; 64] = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
];

#[target_feature(enable = "avx2")]
Expand All @@ -36,10 +27,10 @@ unsafe fn mm256_from_offset(slice: &[u8], offset: usize) -> __m256i {
#[target_feature(enable = "avx2")]
unsafe fn sum(u8s: &__m256i) -> usize {
let sums = _mm256_sad_epu8(*u8s, _mm256_setzero_si256());
(
_mm256_extract_epi64(sums, 0) + _mm256_extract_epi64(sums, 1) +
_mm256_extract_epi64(sums, 2) + _mm256_extract_epi64(sums, 3)
) as usize
(_mm256_extract_epi64(sums, 0)
+ _mm256_extract_epi64(sums, 1)
+ _mm256_extract_epi64(sums, 2)
+ _mm256_extract_epi64(sums, 3)) as usize
}

#[target_feature(enable = "avx2")]
Expand All @@ -57,7 +48,7 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
for _ in 0..255 {
counts = _mm256_sub_epi8(
counts,
_mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles)
_mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles),
);
offset += 32;
}
Expand All @@ -70,7 +61,7 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
for _ in 0..128 {
counts = _mm256_sub_epi8(
counts,
_mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles)
_mm256_cmpeq_epi8(mm256_from_offset(haystack, offset), needles),
);
offset += 32;
}
Expand All @@ -82,16 +73,16 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {
for i in 0..(haystack.len() - offset) / 32 {
counts = _mm256_sub_epi8(
counts,
_mm256_cmpeq_epi8(mm256_from_offset(haystack, offset + i * 32), needles)
_mm256_cmpeq_epi8(mm256_from_offset(haystack, offset + i * 32), needles),
);
}
if haystack.len() % 32 != 0 {
counts = _mm256_sub_epi8(
counts,
_mm256_and_si256(
_mm256_cmpeq_epi8(mm256_from_offset(haystack, haystack.len() - 32), needles),
mm256_from_offset(&MASK, haystack.len() % 32)
)
mm256_from_offset(&MASK, haystack.len() % 32),
),
);
}
count += sum(&counts);
Expand All @@ -101,7 +92,10 @@ pub unsafe fn chunk_count(haystack: &[u8], needle: u8) -> usize {

#[target_feature(enable = "avx2")]
unsafe fn is_leading_utf8_byte(u8s: __m256i) -> __m256i {
mm256_cmpneq_epi8(_mm256_and_si256(u8s, _mm256_set1_epu8(0b1100_0000)), _mm256_set1_epu8(0b1000_0000))
mm256_cmpneq_epi8(
_mm256_and_si256(u8s, _mm256_set1_epu8(0b1100_0000)),
_mm256_set1_epu8(0b1000_0000),
)
}

#[target_feature(enable = "avx2")]
Expand All @@ -118,7 +112,7 @@ pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
for _ in 0..255 {
counts = _mm256_sub_epi8(
counts,
is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset))
is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset)),
);
offset += 32;
}
Expand All @@ -131,7 +125,7 @@ pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
for _ in 0..128 {
counts = _mm256_sub_epi8(
counts,
is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset))
is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset)),
);
offset += 32;
}
Expand All @@ -143,16 +137,16 @@ pub unsafe fn chunk_num_chars(utf8_chars: &[u8]) -> usize {
for i in 0..(utf8_chars.len() - offset) / 32 {
counts = _mm256_sub_epi8(
counts,
is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset + i * 32))
is_leading_utf8_byte(mm256_from_offset(utf8_chars, offset + i * 32)),
);
}
if utf8_chars.len() % 32 != 0 {
counts = _mm256_sub_epi8(
counts,
_mm256_and_si256(
is_leading_utf8_byte(mm256_from_offset(utf8_chars, utf8_chars.len() - 32)),
mm256_from_offset(&MASK, utf8_chars.len() % 32)
)
mm256_from_offset(&MASK, utf8_chars.len() % 32),
),
);
}
count += sum(&counts);
Expand Down
Loading
Loading