Skip to content

Commit

Permalink
CSS Tokenizer benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
scripthunter7 committed Oct 19, 2023
1 parent 02fb7f7 commit d059f29
Show file tree
Hide file tree
Showing 7 changed files with 640 additions and 0 deletions.
118 changes: 118 additions & 0 deletions packages/css-tokenizer/benchmark/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/* eslint-disable no-console */
/* eslint-disable import/no-extraneous-dependencies */
/**
* @file Configuration for the benchmarking script
*
* @see {@link https://github.com/stylelint/css-parser/issues/1}
*/

import * as CssTree from 'css-tree';
import * as CssToolsCssTokenizer from '@csstools/css-tokenizer';
import * as CssToolsTokenizer from '@csstools/tokenizer';
// Note: this module has no types
import * as parseCss from 'parse-css/parse-css';
// Note: this is an ES module, but we use esbuild to make a bundle on the fly before running the benchmark
import * as cssLex from 'csslex';

// eslint-disable-next-line import/no-relative-packages
import * as AdGuardCssTokenizer from '../dist/csstokenizer';
import { type Resource, type Tokenizer } from './interfaces';

// Add `tokenize` function to the `CssTree` module
declare module 'css-tree' {
export function tokenize(css: string, callback: (token: number, start: number, end: number) => void): void;
}

/**
* Resources to benchmark
*/
export const resources: Resource[] = [
{
name: 'Bootstrap CSS',
url: 'https://cdn.jsdelivr.net/npm/bootstrap@latest/dist/css/bootstrap.css',
},
{
name: 'Bulma CSS',
url: 'https://cdn.jsdelivr.net/npm/bulma@latest/css/bulma.css',
},
{
name: 'AdGuard Base List',
url: 'https://raw.githubusercontent.com/AdguardTeam/FiltersRegistry/master/filters/filter_2_Base/filter.txt',
adblock: true,
},
{
name: 'uBlock Base List',
url: 'https://raw.githubusercontent.com/uBlockOrigin/uAssets/master/filters/filters.txt',
adblock: true,
},
];

/**
* Tokenizers to benchmark
*/
export const tokenizers: Tokenizer[] = [
{
// https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/css-tokenizer
name: '@adguard/css-tokenizer',
tokenize: (css: string) => {
let count = 0;
AdGuardCssTokenizer.tokenize(css, () => { count += 1; });
return count;
},
},
{
// https://github.com/csstree/csstree
name: 'css-tree',
tokenize: (css: string) => {
let count = 0;
CssTree.tokenize(css, () => { count += 1; });
return count;
},
},
{
// https://github.com/csstools/tokenizer
name: '@csstools/tokenizer',
tokenize: (css: string) => {
let count = 1; // first token
const tokenizer = CssToolsTokenizer.tokenize(css);
while (!tokenizer().done) {
count += 1;
}
return count;
},
},
{
// https://github.com/csstools/postcss-plugins/tree/main/packages/css-tokenizer
name: '@csstools/css-tokenizer',
tokenize: (css: string) => {
return CssToolsCssTokenizer.tokenize({ css }).length;
},
},
{
// https://github.com/tabatkins/parse-css
name: 'parse-css',
tokenize: (css: string) => {
// This tokenizer uses console.log while consuming numbers
const { log } = console;
console.log = () => {};

let tokens = 0;

try {
// Run the tokenizer
tokens = parseCss.tokenize(css).length;
} finally {
// Restore console.log
console.log = log;
}

return tokens;
},
},
{
name: 'csslex',
tokenize: (css: string) => {
return Array.from(cssLex.lex(css)).length;
},
},
];
94 changes: 94 additions & 0 deletions packages/css-tokenizer/benchmark/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/* eslint-disable @typescript-eslint/no-loop-func */
/* eslint-disable no-console */
/* eslint-disable import/no-extraneous-dependencies */
/**
* @file Benchmarking script
*/

import Benchmark from 'benchmark';

import { resources, tokenizers } from './config';
import { downloadResources } from './utils';

const AVERAGE_RUNTIME = 'Average runtime';
const MS = 'ms';
const N_A = 'N/A';
const OPT_PER_SECOND = 'ops/sec';
const PERCENT = '%';
const PLUS_MINUS = '\xb1';
const PLUS_MINUS_PERCENT = PLUS_MINUS + PERCENT;
const RUNS_SAMPLED = 'Runs sampled';
const TOKENS = 'Tokens';

const EVENT_COMPLETE = 'complete';
const SUCCESSFUL_FILTER = 'successful';

// Extend the Benchmark.Stats interface with a new property
declare module 'benchmark' {
interface Stats {
tokens?: number;
}
}

const main = async () => {
// Download the resources
const downloadedResources = await downloadResources(resources);

// Benchmark the resources
for (const resource of downloadedResources) {
console.group(`Benchmarking ${resource.name}...`);

// Create a new benchmark suite
// https://benchmarkjs.com/docs/#Suite
const suite = new Benchmark.Suite();

for (const tokenizer of tokenizers) {
// https://benchmarkjs.com/docs/#Suite_prototype_add
suite.add(
tokenizer.name,
// eslint-disable-next-line func-names
function () {
// Add tokens count to the benchmark stats, this is binded to the benchmark
// TODO: Its a bit hacky, if we find a better way to do this, we should change it
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore 2683
this.stats.tokens = tokenizer.tokenize(resource.css);
},
);
}

suite.on(EVENT_COMPLETE, () => {
// Get successful benchmarks
const successful = suite.filter(SUCCESSFUL_FILTER);

// Sort the benchmarks by performance (fastest first)
successful.sort((a, b) => b.hz - a.hz);

const results: Record<string, unknown> = {};

// Iterate over the benchmarks and save the results
successful.forEach((bench: Benchmark) => {
// Some calculations here based on the Benchmark.js source code:
// https://github.com/bestiejs/benchmark.js/blob/42f3b732bac3640eddb3ae5f50e445f3141016fd/benchmark.js#L1525
const name = bench.name || (Number.isNaN(bench.id) ? `${bench.id}` : `#${bench.id}`);
results[name] = {
[OPT_PER_SECOND]: bench.hz.toFixed(bench.hz < 100 ? 2 : 0),
[PLUS_MINUS_PERCENT]: `${PLUS_MINUS}${bench.stats.rme.toFixed(2)}${PERCENT}`,
[RUNS_SAMPLED]: bench.stats.sample.length,
[AVERAGE_RUNTIME]: `${bench.stats.mean.toFixed(10)} ${MS}`,
[TOKENS]: bench.stats.tokens || N_A,
};
});

// Show the results as a table
console.table(results);
console.groupEnd();
});

// Run the benchmark suite for the current resource
// https://benchmarkjs.com/docs/#Suite_prototype_run
suite.run();
}
};

main();
51 changes: 51 additions & 0 deletions packages/css-tokenizer/benchmark/interfaces.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* @file Interfaces for the benchmarking script
*/

/**
* Resource data
*/
export interface Resource {
/**
* URL to the resource
*/
url: string;

/**
* Name of the resource
*/
name: string;

/**
* Whether the resource is an adblock list (and not a plain CSS file)
*/
adblock?: boolean;

/**
* CSS content of the resource
*/
css?: string;
}

/**
* Tokenizer data
*/
export interface Tokenizer {
/**
* Name of the tokenizer
*/
name: string;

/**
* Function to tokenize CSS
*
* @param css CSS to tokenize
* @returns Token count
*/
tokenize: (css: string) => number;
}

/**
* Downloaded resource data, in this case we also have the CSS content
*/
export type DownloadedResource = { css: string; } & Resource;
93 changes: 93 additions & 0 deletions packages/css-tokenizer/benchmark/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/* eslint-disable import/no-extraneous-dependencies */
/**
* @file Utils for the benchmarking script
*/

import fetch from 'node-fetch';

import { type DownloadedResource, type Resource } from './interfaces';

const CSS_RELATED_SEPARATORS = ['##', '#?#', '#@#', '#@?#', '$$', '$@$', '#$#', '#@$#'];
const EMPTY = '';
const LINE_FEED = '\n';
const NEW_LINE_RE = /\r?\n/;
const UBO_JS_MARKER = '+js(';

/**
* Helper function to fetch a file from a URL
*
* @param url File URL to fetch
* @returns File contents
* @throws If the file could not be fetched
*/
export const fetchFile = async (url: string) => {
const response = await fetch(url);
const text = await response.text();
return text;
};

/**
* A very simple function that takes the "right side" of the CSS-related filtering rules from an
* Adblock list and concatenates them into a single string to make it easier to benchmark the
* performance of the CSS tokenizer.
*
* @param content Filter list content
* @returns CSS parts from the filter list
* @note Not too accurate, not too fast, but good enough for generate data for the benchmark
*/
export const getCssFromAdblockList = (content: string) => {
// Split the content into lines
const lines = content.split(NEW_LINE_RE);
let css = EMPTY;

// Iterate over the lines
for (const line of lines) {
for (const separator of CSS_RELATED_SEPARATORS) {
const index = line.indexOf(separator);

if (index !== -1) {
// ignore if separator followed by +js(
if (line.indexOf(UBO_JS_MARKER, index + separator.length) !== -1) {
continue;
}

css += line.slice(index + separator.length);
css += LINE_FEED;
break;
}
}
}

return css;
};

/**
* Helper function to download resources from the web
*
* @param resources Resources to download
* @returns Downloaded resources
*/
export const downloadResources = async (resources: Resource[]): Promise<DownloadedResource[]> => {
const downloadedResources: DownloadedResource[] = [];

for (const resource of resources) {
const content = await fetchFile(resource.url);
let css: string;

if (resource.adblock) {
css = getCssFromAdblockList(content);
} else {
css = content;
}

downloadedResources.push({
...resource,
css,
});

// eslint-disable-next-line no-console
console.log(`Downloaded ${resource.name}`);
}

return downloadedResources;
};
Loading

0 comments on commit d059f29

Please sign in to comment.