Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to web_accessible_resources and scriptlets.js #50

Merged
merged 5 commits into from
Oct 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ criterion = "0.2"
csv = "1"
serde_json = "1.0"
reqwest = "0.9"
psl = "0.4.1"

[lib]
bench = false
Expand All @@ -61,6 +62,10 @@ harness = false
name = "bench_rules"
harness = false

[[bench]]
name = "bench_redirect_performance"
harness = false

[features]
default = ["full-regex-handling", "object-pooling"]
full-domain-matching = [] # feature has no explicit dependencies
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const AdBlockClient = require('adblock-rs');
let el_rules = fs.readFileSync('./data/easylist.to/easylist/easylist.txt', { encoding: 'utf-8' }).split('\n');
let ubo_unbreak_rules = fs.readFileSync('./data/uBlockOrigin/unbreak.txt', { encoding: 'utf-8' }).split('\n');
let rules = el_rules.concat(ubo_unbreak_rules);
let resources = fs.readFileSync('./data/uBlockOrigin/resources.txt', { encoding: 'utf-8' });
let resources = AdBlockClient.uBlockResources('uBlockOrigin/src/web_accessible_resources', 'uBlockOrigin/src/js/redirect-engine.js', 'uBlockOrigin/assets/resources/scriptlets.js');

// create client with debug = true
const client = new AdBlockClient.Engine(rules, true);
Expand Down
179 changes: 179 additions & 0 deletions benches/bench_redirect_performance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
use criterion::*;
use std::path::Path;
use psl::Psl;
use lazy_static::lazy_static;

use adblock;
use adblock::filters::network::{NetworkFilter, NetworkFilterMask};
use adblock::request::Request;
use adblock::blocker::{Blocker, BlockerOptions};
use adblock::resource_assembler::{assemble_web_accessible_resources, assemble_scriptlet_resources};

lazy_static! {
static ref PSL_LIST: psl::List = psl::List::new();
}

/// Gets all rules with redirects, and modifies them to apply to resources at `a{0-n}.com/bad.js`
fn get_redirect_rules() -> Vec<NetworkFilter> {
adblock::filter_lists::default::default_lists()
.into_iter()
.map(|list| {
let filters: Vec<String> = reqwest::get(&list.url).expect("Could not request rules")
.text().expect("Could not get rules as text")
.lines()
.map(|s| s.to_owned())
.collect();

let (network_filters, _) = adblock::lists::parse_filters(&filters, true, false, true);
network_filters
})
.flatten()
.filter(|rule| {
if let Some(ref redirect) = rule.redirect {
if redirect != "none" {
return true;
}
}
false
})
.enumerate()
.map(|(index, mut rule)| {
rule.mask.insert(NetworkFilterMask::IS_LEFT_ANCHOR);
rule.mask.insert(NetworkFilterMask::IS_RIGHT_ANCHOR);
rule.hostname = Some(format!("a{}.com/bad.js", index));

rule.filter = adblock::filters::network::FilterPart::Empty;
rule.mask.remove(NetworkFilterMask::IS_HOSTNAME_ANCHOR);
rule.mask.remove(NetworkFilterMask::IS_HOSTNAME_REGEX);
rule.mask.remove(NetworkFilterMask::IS_REGEX);
rule.mask.remove(NetworkFilterMask::IS_COMPLETE_REGEX);
rule.mask.remove(NetworkFilterMask::FUZZY_MATCH);

rule
})
.collect()
}

/// Loads the supplied rules, and the test set of resources, into a Blocker
fn get_preloaded_blocker(rules: Vec<NetworkFilter>) -> Blocker {
let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
load_cosmetic_filters: false,
load_network_filters: true
};

let mut blocker = Blocker::new(rules, &blocker_options);

let mut resources = assemble_web_accessible_resources(
Path::new("data/test/fake-uBO-files/web_accessible_resources"),
Path::new("data/test/fake-uBO-files/redirect-engine.js")
);
resources.append(&mut assemble_scriptlet_resources(
Path::new("data/test/fake-uBO-files/scriptlets.js"),
));

blocker.with_resources(&resources);

blocker
}

/// Maps network filter rules into `Request`s that would trigger those rules
pub fn build_custom_requests(rules: Vec<NetworkFilter>) -> Vec<Request> {
rules.iter().map(|rule| {
let raw_type = if rule.mask.contains(NetworkFilterMask::FROM_IMAGE) {
"image"
} else if rule.mask.contains(NetworkFilterMask::FROM_MEDIA) {
"media"
} else if rule.mask.contains(NetworkFilterMask::FROM_OBJECT) {
"object"
} else if rule.mask.contains(NetworkFilterMask::FROM_OTHER) {
"other"
} else if rule.mask.contains(NetworkFilterMask::FROM_PING) {
"ping"
} else if rule.mask.contains(NetworkFilterMask::FROM_SCRIPT) {
"script"
} else if rule.mask.contains(NetworkFilterMask::FROM_STYLESHEET) {
"stylesheet"
} else if rule.mask.contains(NetworkFilterMask::FROM_SUBDOCUMENT) {
"subdocument"
} else if rule.mask.contains(NetworkFilterMask::FROM_DOCUMENT) {
"main_frame"
} else if rule.mask.contains(NetworkFilterMask::FROM_XMLHTTPREQUEST) {
"xhr"
} else if rule.mask.contains(NetworkFilterMask::FROM_WEBSOCKET) {
"websocket"
} else if rule.mask.contains(NetworkFilterMask::FROM_FONT) {
"font"
} else {
unreachable!()
};

let rule_hostname = rule.hostname.clone().unwrap();
let url = format!("https://{}", rule_hostname.clone());
let domain = &rule_hostname[..rule_hostname.find('/').unwrap()];
let hostname = domain;

let raw_line = rule.raw_line.clone().unwrap();
let (source_hostname, source_domain) = if rule.opt_domains.is_some() {
let domain_start = raw_line.rfind("domain=").unwrap() + "domain=".len();
let from_start = &raw_line[domain_start..];
let domain_end = from_start.find('|').or_else(|| from_start.find(",")).or_else(|| Some(from_start.len())).unwrap() + domain_start;
let source_hostname = &raw_line[domain_start..domain_end];

let suffix = PSL_LIST.suffix(source_hostname).unwrap();
let suffix = suffix.to_str();
let domain_start = source_hostname[..source_hostname.len()-suffix.len()-1].rfind('.');
let source_domain = if let Some(domain_start) = domain_start {
&source_hostname[domain_start+1..]
} else {
source_hostname
};
(source_hostname, source_domain)
} else {
(hostname, domain)
};

Request::new(
raw_type,
&url,
"https",
hostname,
domain,
source_hostname,
source_domain,
)
}).collect::<Vec<_>>()
}

fn bench_fn(blocker: &Blocker, requests: &[Request]) {
requests.iter().for_each(|request| {
let block_result = blocker.check(&request);
assert!(block_result.redirect.is_some());
});
}

fn redirect_performance(c: &mut Criterion) {
let rules = get_redirect_rules();

let blocker = get_preloaded_blocker(rules.clone());
let requests = build_custom_requests(rules.clone());
let requests_len = requests.len() as u32;

c.bench(
"redirect_performance",
Benchmark::new(
"without_alias_lookup",
move |b| {
b.iter(|| bench_fn(&blocker, &requests))
},
).throughput(Throughput::Elements(requests_len))
.sample_size(10)
);
}

criterion_group!(
benches,
redirect_performance,
);
criterion_main!(benches);
Loading