Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Remove unneeded escaping in strings if possible #253

Merged
merged 1 commit into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
fix: remove unneeded escaping in strings if possible
  • Loading branch information
pamburus committed May 9, 2024
commit e56da8eb9b90d31fa6eaebcc92398f36c37f7067
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ members = [".", "crate/encstr"]
[workspace.package]
repository = "https://github.com/pamburus/hl"
authors = ["Pavel Ivanov <mr.pavel.ivanov@gmail.com>"]
version = "0.29.3-alpha.1"
version = "0.29.3"
edition = "2021"
license = "MIT"

Expand Down Expand Up @@ -135,6 +135,10 @@ harness = false
name = "json"
harness = false

[[bench]]
name = "mem"
harness = false

[[bench]]
name = "encstr"
path = "benches/encstr/benches.rs"
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ install-versioned: contrib-build

## Run tests
test: contrib-build
@cargo test
@cargo test --workspace
.PHONY: test

## Run benchmarks
bench: contrib-build
@cargo bench
@cargo bench --workspace
.PHONY: bench

## Show usage of the binary
Expand Down
60 changes: 60 additions & 0 deletions benches/mem.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// std imports
use std::alloc::System;

// third-party imports
use criterion::{criterion_group, criterion_main, Criterion};
use stats_alloc::{StatsAlloc, INSTRUMENTED_SYSTEM};
use std::hint::black_box;

#[global_allocator]
static GLOBAL: &StatsAlloc<System> = &INSTRUMENTED_SYSTEM;

fn benchmark(c: &mut Criterion) {
let mut c = c.benchmark_group("mem");

let bufs = |size| {
let vi: Vec<u8> = (0..size).into_iter().map(|x| x as u8).collect();
let ve: Vec<u8> = Vec::with_capacity(size);
(vi, ve)
};

for n in [512, 4096] {
c.bench_function(format!("mem-rotate-{}", n), |b| {
let (mut vi, _) = bufs(n);
b.iter(|| {
black_box(&mut vi).rotate_right(1);
});
});
c.bench_function(format!("mem-copy-{}", n), |b| {
let (vi, mut ve) = bufs(n);
b.iter(|| {
ve.clear();
black_box(&mut ve).extend_from_slice(black_box(&vi).as_slice());
});
});
}

c.bench_function("mem-find-single-value-4096", |b| {
let vi: Vec<u8> = (0..4096).into_iter().map(|x| (x / 16) as u8).collect();
b.iter(|| {
black_box(vi.iter().position(|&x| x == 128));
});
});

c.bench_function("mem-find-one-of-two-values-4096", |b| {
let vi: Vec<u8> = (0..4096).into_iter().map(|x| (x / 16) as u8).collect();
b.iter(|| {
black_box(vi.iter().position(|&x| matches!(x, 128 | 192)));
});
});

c.bench_function("mem-find-one-of-four-values-4096", |b| {
let vi: Vec<u8> = (0..4096).into_iter().map(|x| (x / 16) as u8).collect();
b.iter(|| {
black_box(vi.iter().position(|&x| matches!(x, 128 | 192 | 224 | 240)));
});
});
}

criterion_group!(benches, benchmark);
criterion_main!(benches);
14 changes: 9 additions & 5 deletions build/ci/coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ IGNORE=(

function executables() {
echo ${MAIN_EXECUTABLE:?}
cargo test --tests --no-run --message-format=json \
cargo test --workspace --tests --no-run --message-format=json \
| jq -r 'select(.profile.test == true) | .filenames[]' \
| grep -v dSYM -
}
Expand All @@ -33,11 +33,13 @@ LLVM_COV_FLAGS=(
)

function clean() {
rm -f ${LLVM_PROFILE_PATTERN:?}
rm -f \
${LLVM_PROFILE_PATTERN:?} \
crate/encstr/${LLVM_PROFILE_PATTERN:?}
}

function test() {
cargo test --tests
cargo test --tests --workspace
cargo build
${MAIN_EXECUTABLE:?} > /dev/null
${MAIN_EXECUTABLE:?} --config= --help > /dev/null
Expand All @@ -50,8 +52,10 @@ function test() {

function merge() {
${LLVM_BIN:?}/llvm-profdata merge \
-sparse ${LLVM_PROFILE_PATTERN:?} \
-o ${PROFDATA_FILE:?}
-o ${PROFDATA_FILE:?} \
-sparse \
${LLVM_PROFILE_PATTERN:?} \
crate/encstr/${LLVM_PROFILE_PATTERN:?}
}

function report() {
Expand Down
1 change: 1 addition & 0 deletions crate/encstr/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target
35 changes: 2 additions & 33 deletions crate/encstr/src/encstr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ where
impl Handler for &mut Vec<u8> {
#[inline(always)]
fn handle(&mut self, token: Token<'_>) -> Option<()> {
Appender::new(self).handle(token)
RawAppender::new(self).handle(token)
}
}

Expand Down Expand Up @@ -314,38 +314,7 @@ impl Builder {
impl Handler for Builder {
#[inline(always)]
fn handle(&mut self, token: Token<'_>) -> Option<()> {
Appender::new(&mut self.buffer).handle(token)
}
}

// ---

pub struct Appender<'a> {
buffer: &'a mut Vec<u8>,
}

impl<'a> Appender<'a> {
#[inline(always)]
pub fn new(buffer: &'a mut Vec<u8>) -> Self {
Self { buffer }
}
}

impl<'a> Handler for Appender<'a> {
#[inline(always)]
fn handle(&mut self, token: Token<'_>) -> Option<()> {
match token {
Token::Char(ch) => match ch {
..='\x7F' => self.buffer.push(ch as u8),
_ => {
let mut buf = [0; 4];
let s = ch.encode_utf8(&mut buf);
self.buffer.extend(s.as_bytes());
}
},
Token::Sequence(s) => self.buffer.extend(s.as_bytes()),
}
Some(())
RawAppender::new(&mut self.buffer).handle(token)
}
}

Expand Down
120 changes: 118 additions & 2 deletions crate/encstr/src/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,68 @@ impl<'a> From<&'a str> for JsonEncodedString<'a> {

// ---

// ---

pub struct Appender<'a> {
buffer: &'a mut Vec<u8>,
}

impl<'a> Appender<'a> {
#[inline(always)]
pub fn new(buffer: &'a mut Vec<u8>) -> Self {
Self { buffer }
}
}

impl<'a> Handler for Appender<'a> {
#[inline(always)]
fn handle(&mut self, token: Token<'_>) -> Option<()> {
match token {
Token::Char(ch) => match ch {
..='\x7f' => {
let ch = ch as u8;
if !ESCAPE[ch as usize] {
self.buffer.push(ch);
} else {
self.buffer.push(b'\\');
match ch {
b'\x08' => self.buffer.push(b'b'),
b'\x0c' => self.buffer.push(b'f'),
b'\n' => self.buffer.push(b'n'),
b'\r' => self.buffer.push(b'r'),
b'\t' => self.buffer.push(b't'),
b'\\' | b'"' => self.buffer.push(ch),
_ => {
self.buffer.extend(b"u00");
self.buffer.push(HEX[((ch & 0xf0) >> 4) as usize]);
self.buffer.push(HEX[(ch & 0x0f) as usize]);
}
}
}
}
_ => {
let mut buf = [0; 4];
let s = ch.encode_utf8(&mut buf);
self.buffer.extend(s.as_bytes());
}
},
Token::Sequence(s) => {
let mut ss = s.as_bytes();
while let Some(pos) = ss.iter().position(|x| matches!(x, b'"' | b'\\')) {
self.buffer.extend(&ss[..pos]);
self.buffer.push(b'\\');
self.buffer.push(ss[pos]);
ss = &ss[pos + 1..];
}
self.buffer.extend(ss);
}
}
Some(())
}
}

// ---

struct Parser<'a> {
input: &'a str,
index: usize,
Expand Down Expand Up @@ -287,7 +349,7 @@ impl<'a> Iterator for Tokens<'a> {

#[inline(always)]
fn decode_hex_val(val: u8) -> Option<u16> {
let n = HEX[val as usize] as u16;
let n = UNHEX[val as usize] as u16;
if n == 255 {
None
} else {
Expand Down Expand Up @@ -325,7 +387,11 @@ static ESCAPE: [bool; 256] = {
]
};

static HEX: [u8; 256] = {
static HEX: [u8; 16] = [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd', b'e', b'f',
];

static UNHEX: [u8; 256] = {
const __: u8 = 255; // not a hex digit
[
// 1 2 3 4 5 6 7 8 9 A B C D E F
Expand Down Expand Up @@ -411,6 +477,15 @@ mod tests {
assert_eq!(tokens.next(), None);
}

#[test]
fn test_tokens_escape_b() {
let mut tokens = Tokens::new(&r#""00 \b""#);
assert_eq!(tokens.next(), Some(Ok(Token::Sequence("00 "))));
assert_eq!(tokens.next(), Some(Ok(Token::Char('\x08'))));
assert_eq!(tokens.next(), None);
assert_eq!(tokens.next(), None);
}

#[test]
fn test_tokens_control() {
let mut tokens = Tokens::new(&r#""hello, \x00world""#);
Expand Down Expand Up @@ -440,4 +515,45 @@ mod tests {
assert_eq!(tokens.next(), Some(Ok(Token::Sequence("hello, "))));
assert_eq!(tokens.next(), Some(Err(Error::UnexpectedEndOfHexEscape)));
}

#[test]
fn test_append_esc_q() {
let mut tokens = Tokens::new(&r#""hello\u002c \"world\"""#);
let mut buffer = Vec::new();
let mut appender = Appender::new(&mut buffer);
while let Some(Ok(token)) = tokens.next() {
appender.handle(token);
}
assert_eq!(buffer, "hello, \\\"world\\\"".as_bytes());
}

#[test]
fn test_append_esc_bfnrt() {
let mut tokens = Tokens::new(r#""00 \b\f\n\r\t""#);
let mut buffer = Vec::new();
let mut appender = Appender::new(&mut buffer);
while let Some(Ok(token)) = tokens.next() {
appender.handle(token);
}
assert_eq!(buffer, r#"00 \b\f\n\r\t"#.as_bytes());
}

#[test]
fn test_append_esc_unicode() {
let mut tokens = Tokens::new(r#""00 ∞ \u2023""#);
let mut buffer = Vec::new();
let mut appender = Appender::new(&mut buffer);
while let Some(Ok(token)) = tokens.next() {
appender.handle(token);
}
assert_eq!(buffer, r#"00 ∞ ‣"#.as_bytes(), "{:?}", String::from_utf8_lossy(&buffer));
}

#[test]
fn test_append_sequence_with_quotes() {
let mut buffer = Vec::new();
let mut appender = Appender::new(&mut buffer);
appender.handle(Token::Sequence(r#"hello, "world""#));
assert_eq!(buffer, r#"hello, \"world\""#.as_bytes());
}
}
3 changes: 3 additions & 0 deletions crate/encstr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ mod encstr;

pub use encstr::*;
pub use error::*;

pub type JsonAppender<'a> = json::Appender<'a>;
pub type RawAppender<'a> = raw::Appender<'a>;
Loading