From 6b9565139d0873430aeb6d7b41cd71d2c924b3e0 Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Fri, 27 Sep 2019 08:19:42 -0700 Subject: [PATCH] add redirect performance benchmark --- Cargo.lock | 1 + Cargo.toml | 5 + benches/bench_redirect_performance.rs | 179 ++++++++++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 benches/bench_redirect_performance.rs diff --git a/Cargo.lock b/Cargo.lock index 864e4a68..637499f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -16,6 +16,7 @@ dependencies = [ "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "lifeguard 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", "matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "psl 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.9.19 (registry+https://github.com/rust-lang/crates.io-index)", "rmp-serde 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 53039e99..4829c3d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ criterion = "0.2" csv = "1" serde_json = "1.0" reqwest = "0.9" +psl = "0.4.1" [lib] bench = false @@ -61,6 +62,10 @@ harness = false name = "bench_rules" harness = false +[[bench]] +name = "bench_redirect_performance" +harness = false + [features] default = ["full-regex-handling", "object-pooling"] full-domain-matching = [] # feature has no explicit dependencies diff --git a/benches/bench_redirect_performance.rs b/benches/bench_redirect_performance.rs new file mode 100644 index 00000000..8175174a --- /dev/null +++ b/benches/bench_redirect_performance.rs @@ -0,0 +1,179 @@ +use criterion::*; +use std::path::Path; +use psl::Psl; +use lazy_static::lazy_static; + +use adblock; +use adblock::filters::network::{NetworkFilter, NetworkFilterMask}; +use adblock::request::Request; +use adblock::blocker::{Blocker, BlockerOptions}; +use adblock::resource_assembler::{assemble_web_accessible_resources, assemble_scriptlet_resources}; + +lazy_static! { + static ref PSL_LIST: psl::List = psl::List::new(); +} + +/// Gets all rules with redirects, and modifies them to apply to resources at `a{0-n}.com/bad.js` +fn get_redirect_rules() -> Vec { + adblock::filter_lists::default::default_lists() + .into_iter() + .map(|list| { + let filters: Vec = reqwest::get(&list.url).expect("Could not request rules") + .text().expect("Could not get rules as text") + .lines() + .map(|s| s.to_owned()) + .collect(); + + let (network_filters, _) = adblock::lists::parse_filters(&filters, true, false, true); + network_filters + }) + .flatten() + .filter(|rule| { + if let Some(ref redirect) = rule.redirect { + if redirect != "none" { + return true; + } + } + false + }) + .enumerate() + .map(|(index, mut rule)| { + rule.mask.insert(NetworkFilterMask::IS_LEFT_ANCHOR); + rule.mask.insert(NetworkFilterMask::IS_RIGHT_ANCHOR); + rule.hostname = Some(format!("a{}.com/bad.js", index)); + + rule.filter = adblock::filters::network::FilterPart::Empty; + rule.mask.remove(NetworkFilterMask::IS_HOSTNAME_ANCHOR); + rule.mask.remove(NetworkFilterMask::IS_HOSTNAME_REGEX); + rule.mask.remove(NetworkFilterMask::IS_REGEX); + rule.mask.remove(NetworkFilterMask::IS_COMPLETE_REGEX); + rule.mask.remove(NetworkFilterMask::FUZZY_MATCH); + + rule + }) + .collect() +} + +/// Loads the supplied rules, and the test set of resources, into a Blocker +fn get_preloaded_blocker(rules: Vec) -> Blocker { + let blocker_options = BlockerOptions { + debug: false, + enable_optimizations: true, + load_cosmetic_filters: false, + load_network_filters: true + }; + + let mut blocker = Blocker::new(rules, &blocker_options); + + let mut resources = assemble_web_accessible_resources( + Path::new("data/test/fake-uBO-files/web_accessible_resources"), + Path::new("data/test/fake-uBO-files/redirect-engine.js") + ); + resources.append(&mut assemble_scriptlet_resources( + Path::new("data/test/fake-uBO-files/scriptlets.js"), + )); + + blocker.with_resources(&resources); + + blocker +} + +/// Maps network filter rules into `Request`s that would trigger those rules +pub fn build_custom_requests(rules: Vec) -> Vec { + rules.iter().map(|rule| { + let raw_type = if rule.mask.contains(NetworkFilterMask::FROM_IMAGE) { + "image" + } else if rule.mask.contains(NetworkFilterMask::FROM_MEDIA) { + "media" + } else if rule.mask.contains(NetworkFilterMask::FROM_OBJECT) { + "object" + } else if rule.mask.contains(NetworkFilterMask::FROM_OTHER) { + "other" + } else if rule.mask.contains(NetworkFilterMask::FROM_PING) { + "ping" + } else if rule.mask.contains(NetworkFilterMask::FROM_SCRIPT) { + "script" + } else if rule.mask.contains(NetworkFilterMask::FROM_STYLESHEET) { + "stylesheet" + } else if rule.mask.contains(NetworkFilterMask::FROM_SUBDOCUMENT) { + "subdocument" + } else if rule.mask.contains(NetworkFilterMask::FROM_DOCUMENT) { + "main_frame" + } else if rule.mask.contains(NetworkFilterMask::FROM_XMLHTTPREQUEST) { + "xhr" + } else if rule.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) { + "websocket" + } else if rule.mask.contains(NetworkFilterMask::FROM_FONT) { + "font" + } else { + unreachable!() + }; + + let rule_hostname = rule.hostname.clone().unwrap(); + let url = format!("https://{}", rule_hostname.clone()); + let domain = &rule_hostname[..rule_hostname.find('/').unwrap()]; + let hostname = domain; + + let raw_line = rule.raw_line.clone().unwrap(); + let (source_hostname, source_domain) = if rule.opt_domains.is_some() { + let domain_start = raw_line.rfind("domain=").unwrap() + "domain=".len(); + let from_start = &raw_line[domain_start..]; + let domain_end = from_start.find('|').or_else(|| from_start.find(",")).or_else(|| Some(from_start.len())).unwrap() + domain_start; + let source_hostname = &raw_line[domain_start..domain_end]; + + let suffix = PSL_LIST.suffix(source_hostname).unwrap(); + let suffix = suffix.to_str(); + let domain_start = source_hostname[..source_hostname.len()-suffix.len()-1].rfind('.'); + let source_domain = if let Some(domain_start) = domain_start { + &source_hostname[domain_start+1..] + } else { + source_hostname + }; + (source_hostname, source_domain) + } else { + (hostname, domain) + }; + + Request::new( + raw_type, + &url, + "https", + hostname, + domain, + source_hostname, + source_domain, + ) + }).collect::>() +} + +fn bench_fn(blocker: &Blocker, requests: &[Request]) { + requests.iter().for_each(|request| { + let block_result = blocker.check(&request); + assert!(block_result.redirect.is_some()); + }); +} + +fn redirect_performance(c: &mut Criterion) { + let rules = get_redirect_rules(); + + let blocker = get_preloaded_blocker(rules.clone()); + let requests = build_custom_requests(rules.clone()); + let requests_len = requests.len() as u32; + + c.bench( + "redirect_performance", + Benchmark::new( + "without_alias_lookup", + move |b| { + b.iter(|| bench_fn(&blocker, &requests)) + }, + ).throughput(Throughput::Elements(requests_len)) + .sample_size(10) + ); +} + +criterion_group!( + benches, + redirect_performance, +); +criterion_main!(benches);