-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
02fb7f7
commit d059f29
Showing
7 changed files
with
640 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
/* eslint-disable no-console */ | ||
/* eslint-disable import/no-extraneous-dependencies */ | ||
/** | ||
* @file Configuration for the benchmarking script | ||
* | ||
* @see {@link https://github.com/stylelint/css-parser/issues/1} | ||
*/ | ||
|
||
import * as CssTree from 'css-tree'; | ||
import * as CssToolsCssTokenizer from '@csstools/css-tokenizer'; | ||
import * as CssToolsTokenizer from '@csstools/tokenizer'; | ||
// Note: this module has no types | ||
import * as parseCss from 'parse-css/parse-css'; | ||
// Note: this is an ES module, but we use esbuild to make a bundle on the fly before running the benchmark | ||
import * as cssLex from 'csslex'; | ||
|
||
// eslint-disable-next-line import/no-relative-packages | ||
import * as AdGuardCssTokenizer from '../dist/csstokenizer'; | ||
import { type Resource, type Tokenizer } from './interfaces'; | ||
|
||
// Add `tokenize` function to the `CssTree` module | ||
declare module 'css-tree' { | ||
export function tokenize(css: string, callback: (token: number, start: number, end: number) => void): void; | ||
} | ||
|
||
/** | ||
* Resources to benchmark | ||
*/ | ||
export const resources: Resource[] = [ | ||
{ | ||
name: 'Bootstrap CSS', | ||
url: 'https://cdn.jsdelivr.net/npm/bootstrap@latest/dist/css/bootstrap.css', | ||
}, | ||
{ | ||
name: 'Bulma CSS', | ||
url: 'https://cdn.jsdelivr.net/npm/bulma@latest/css/bulma.css', | ||
}, | ||
{ | ||
name: 'AdGuard Base List', | ||
url: 'https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_2_Base/filter.txt', | ||
adblock: true, | ||
}, | ||
{ | ||
name: 'uBlock Base List', | ||
url: 'https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/filters/filters.txt', | ||
adblock: true, | ||
}, | ||
]; | ||
|
||
/** | ||
* Tokenizers to benchmark | ||
*/ | ||
export const tokenizers: Tokenizer[] = [ | ||
{ | ||
// https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/css-tokenizer | ||
name: '@adguard/css-tokenizer', | ||
tokenize: (css: string) => { | ||
let count = 0; | ||
AdGuardCssTokenizer.tokenize(css, () => { count += 1; }); | ||
return count; | ||
}, | ||
}, | ||
{ | ||
// https://github.com/csstree/csstree | ||
name: 'css-tree', | ||
tokenize: (css: string) => { | ||
let count = 0; | ||
CssTree.tokenize(css, () => { count += 1; }); | ||
return count; | ||
}, | ||
}, | ||
{ | ||
// https://github.com/csstools/tokenizer | ||
name: '@csstools/tokenizer', | ||
tokenize: (css: string) => { | ||
let count = 1; // first token | ||
const tokenizer = CssToolsTokenizer.tokenize(css); | ||
while (!tokenizer().done) { | ||
count += 1; | ||
} | ||
return count; | ||
}, | ||
}, | ||
{ | ||
// https://github.com/csstools/postcss-plugins/tree/main/packages/css-tokenizer | ||
name: '@csstools/css-tokenizer', | ||
tokenize: (css: string) => { | ||
return CssToolsCssTokenizer.tokenize({ css }).length; | ||
}, | ||
}, | ||
{ | ||
// https://github.com/tabatkins/parse-css | ||
name: 'parse-css', | ||
tokenize: (css: string) => { | ||
// This tokenizer uses console.log while consuming numbers | ||
const { log } = console; | ||
console.log = () => {}; | ||
|
||
let tokens = 0; | ||
|
||
try { | ||
// Run the tokenizer | ||
tokens = parseCss.tokenize(css).length; | ||
} finally { | ||
// Restore console.log | ||
console.log = log; | ||
} | ||
|
||
return tokens; | ||
}, | ||
}, | ||
{ | ||
name: 'csslex', | ||
tokenize: (css: string) => { | ||
return Array.from(cssLex.lex(css)).length; | ||
}, | ||
}, | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* eslint-disable @typescript-eslint/no-loop-func */ | ||
/* eslint-disable no-console */ | ||
/* eslint-disable import/no-extraneous-dependencies */ | ||
/** | ||
* @file Benchmarking script | ||
*/ | ||
|
||
import Benchmark from 'benchmark'; | ||
|
||
import { resources, tokenizers } from './config'; | ||
import { downloadResources } from './utils'; | ||
|
||
const AVERAGE_RUNTIME = 'Average runtime'; | ||
const MS = 'ms'; | ||
const N_A = 'N/A'; | ||
const OPT_PER_SECOND = 'ops/sec'; | ||
const PERCENT = '%'; | ||
const PLUS_MINUS = '\xb1'; | ||
const PLUS_MINUS_PERCENT = PLUS_MINUS + PERCENT; | ||
const RUNS_SAMPLED = 'Runs sampled'; | ||
const TOKENS = 'Tokens'; | ||
|
||
const EVENT_COMPLETE = 'complete'; | ||
const SUCCESSFUL_FILTER = 'successful'; | ||
|
||
// Extend the Benchmark.Stats interface with a new property | ||
declare module 'benchmark' { | ||
interface Stats { | ||
tokens?: number; | ||
} | ||
} | ||
|
||
const main = async () => { | ||
// Download the resources | ||
const downloadedResources = await downloadResources(resources); | ||
|
||
// Benchmark the resources | ||
for (const resource of downloadedResources) { | ||
console.group(`Benchmarking ${resource.name}...`); | ||
|
||
// Create a new benchmark suite | ||
// https://benchmarkjs.com/docs/#Suite | ||
const suite = new Benchmark.Suite(); | ||
|
||
for (const tokenizer of tokenizers) { | ||
// https://benchmarkjs.com/docs/#Suite_prototype_add | ||
suite.add( | ||
tokenizer.name, | ||
// eslint-disable-next-line func-names | ||
function () { | ||
// Add tokens count to the benchmark stats, this is binded to the benchmark | ||
// TODO: Its a bit hacky, if we find a better way to do this, we should change it | ||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment | ||
// @ts-ignore 2683 | ||
this.stats.tokens = tokenizer.tokenize(resource.css); | ||
}, | ||
); | ||
} | ||
|
||
suite.on(EVENT_COMPLETE, () => { | ||
// Get successful benchmarks | ||
const successful = suite.filter(SUCCESSFUL_FILTER); | ||
|
||
// Sort the benchmarks by performance (fastest first) | ||
successful.sort((a, b) => b.hz - a.hz); | ||
|
||
const results: Record<string, unknown> = {}; | ||
|
||
// Iterate over the benchmarks and save the results | ||
successful.forEach((bench: Benchmark) => { | ||
// Some calculations here based on the Benchmark.js source code: | ||
// https://github.com/bestiejs/benchmark.js/blob/42f3b732bac3640eddb3ae5f50e445f3141016fd/benchmark.js#L1525 | ||
const name = bench.name || (Number.isNaN(bench.id) ? `${bench.id}` : `#${bench.id}`); | ||
results[name] = { | ||
[OPT_PER_SECOND]: bench.hz.toFixed(bench.hz < 100 ? 2 : 0), | ||
[PLUS_MINUS_PERCENT]: `${PLUS_MINUS}${bench.stats.rme.toFixed(2)}${PERCENT}`, | ||
[RUNS_SAMPLED]: bench.stats.sample.length, | ||
[AVERAGE_RUNTIME]: `${bench.stats.mean.toFixed(10)} ${MS}`, | ||
[TOKENS]: bench.stats.tokens || N_A, | ||
}; | ||
}); | ||
|
||
// Show the results as a table | ||
console.table(results); | ||
console.groupEnd(); | ||
}); | ||
|
||
// Run the benchmark suite for the current resource | ||
// https://benchmarkjs.com/docs/#Suite_prototype_run | ||
suite.run(); | ||
} | ||
}; | ||
|
||
main(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/** | ||
* @file Interfaces for the benchmarking script | ||
*/ | ||
|
||
/** | ||
* Resource data | ||
*/ | ||
export interface Resource { | ||
/** | ||
* URL to the resource | ||
*/ | ||
url: string; | ||
|
||
/** | ||
* Name of the resource | ||
*/ | ||
name: string; | ||
|
||
/** | ||
* Whether the resource is an adblock list (and not a plain CSS file) | ||
*/ | ||
adblock?: boolean; | ||
|
||
/** | ||
* CSS content of the resource | ||
*/ | ||
css?: string; | ||
} | ||
|
||
/** | ||
* Tokenizer data | ||
*/ | ||
export interface Tokenizer { | ||
/** | ||
* Name of the tokenizer | ||
*/ | ||
name: string; | ||
|
||
/** | ||
* Function to tokenize CSS | ||
* | ||
* @param css CSS to tokenize | ||
* @returns Token count | ||
*/ | ||
tokenize: (css: string) => number; | ||
} | ||
|
||
/** | ||
* Downloaded resource data, in this case we also have the CSS content | ||
*/ | ||
export type DownloadedResource = { css: string; } & Resource; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* eslint-disable import/no-extraneous-dependencies */ | ||
/** | ||
* @file Utils for the benchmarking script | ||
*/ | ||
|
||
import fetch from 'node-fetch'; | ||
|
||
import { type DownloadedResource, type Resource } from './interfaces'; | ||
|
||
const CSS_RELATED_SEPARATORS = ['##', '#?#', '#@#', '#@?#', '$$', '$@$', '#$#', '#@$#']; | ||
const EMPTY = ''; | ||
const LINE_FEED = '\n'; | ||
const NEW_LINE_RE = /\r?\n/; | ||
const UBO_JS_MARKER = '+js('; | ||
|
||
/** | ||
* Helper function to fetch a file from a URL | ||
* | ||
* @param url File URL to fetch | ||
* @returns File contents | ||
* @throws If the file could not be fetched | ||
*/ | ||
export const fetchFile = async (url: string) => { | ||
const response = await fetch(url); | ||
const text = await response.text(); | ||
return text; | ||
}; | ||
|
||
/** | ||
* A very simple function that takes the "right side" of the CSS-related filtering rules from an | ||
* Adblock list and concatenates them into a single string to make it easier to benchmark the | ||
* performance of the CSS tokenizer. | ||
* | ||
* @param content Filter list content | ||
* @returns CSS parts from the filter list | ||
* @note Not too accurate, not too fast, but good enough for generate data for the benchmark | ||
*/ | ||
export const getCssFromAdblockList = (content: string) => { | ||
// Split the content into lines | ||
const lines = content.split(NEW_LINE_RE); | ||
let css = EMPTY; | ||
|
||
// Iterate over the lines | ||
for (const line of lines) { | ||
for (const separator of CSS_RELATED_SEPARATORS) { | ||
const index = line.indexOf(separator); | ||
|
||
if (index !== -1) { | ||
// ignore if separator followed by +js( | ||
if (line.indexOf(UBO_JS_MARKER, index + separator.length) !== -1) { | ||
continue; | ||
} | ||
|
||
css += line.slice(index + separator.length); | ||
css += LINE_FEED; | ||
break; | ||
} | ||
} | ||
} | ||
|
||
return css; | ||
}; | ||
|
||
/** | ||
* Helper function to download resources from the web | ||
* | ||
* @param resources Resources to download | ||
* @returns Downloaded resources | ||
*/ | ||
export const downloadResources = async (resources: Resource[]): Promise<DownloadedResource[]> => { | ||
const downloadedResources: DownloadedResource[] = []; | ||
|
||
for (const resource of resources) { | ||
const content = await fetchFile(resource.url); | ||
let css: string; | ||
|
||
if (resource.adblock) { | ||
css = getCssFromAdblockList(content); | ||
} else { | ||
css = content; | ||
} | ||
|
||
downloadedResources.push({ | ||
...resource, | ||
css, | ||
}); | ||
|
||
// eslint-disable-next-line no-console | ||
console.log(`Downloaded ${resource.name}`); | ||
} | ||
|
||
return downloadedResources; | ||
}; |
Oops, something went wrong.