Skip to content

Commit

Permalink
AG-36228 Optimized performance of parsing uBlock filter parameters
Browse files Browse the repository at this point in the history
Merge in ADGUARD-FILTERS/tsurlfilter from fix/AG-36228 to master

Squashed commit of the following:

commit 23c9ac9
Author: Slava Leleka <v.leleka@adguard.com>
Date:   Fri Oct 18 13:49:12 2024 +0300

    Revert "fix names"

    This reverts commit ec3e324.

commit ec3e324
Author: Slava Leleka <v.leleka@adguard.com>
Date:   Fri Oct 18 13:48:00 2024 +0300

    fix names

commit 11898ac
Author: Kurbanali Ruslan <r.kurbanali@adguard.com>
Date:   Thu Oct 17 14:08:46 2024 +0500

    added changelog

commit dfb2481
Author: Kurbanali Ruslan <r.kurbanali@adguard.com>
Date:   Thu Oct 17 13:42:54 2024 +0500

    added test cases

commit ef598e0
Author: Kurbanali Ruslan <r.kurbanali@adguard.com>
Date:   Thu Oct 17 13:42:48 2024 +0500

    fixed slow code

commit d041197
Author: Kurbanali Ruslan <r.kurbanali@adguard.com>
Date:   Thu Oct 17 13:42:29 2024 +0500

    added params to string utils
  • Loading branch information
kurrx committed Oct 18, 2024
1 parent d00c0cc commit e780b38
Show file tree
Hide file tree
Showing 4 changed files with 391 additions and 15 deletions.
8 changes: 8 additions & 0 deletions packages/agtree/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ The format is based on [Keep a Changelog][keepachangelog], and this project adhe
[keepachangelog]: https://keepachangelog.com/en/1.0.0/
[semver]: https://semver.org/spec/v2.0.0.html

## Unreleased

### Fixed

- Optimized performance of parsing uBlock filter parameters [AdguardBrowserExtension#2962].

[AdguardBrowserExtension#2962]: https://github.com/AdguardTeam/AdguardBrowserExtension/issues/2962

## [2.1.2] - 2024-09-19

### Fixed
Expand Down
55 changes: 43 additions & 12 deletions packages/agtree/src/parser/misc/ubo-parameter-list.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* eslint-disable no-param-reassign */
import { StringUtils } from '../../utils/string';
import { type ParameterList } from '../common';
import { COMMA } from '../../utils/constants';
import { COMMA, ESCAPE_CHARACTER } from '../../utils/constants';
import { defaultParserOptions } from '../options';
import { ValueParser } from './value';
import { AdblockSyntaxError } from '../../errors/adblock-syntax-error';
Expand Down Expand Up @@ -61,13 +61,6 @@ export class UboParameterListParser extends ParameterListParser {
const nextSeparatorIndex = StringUtils.skipWS(raw, possibleClosingQuoteIndex + 1);

if (nextSeparatorIndex === length) {
if (requireQuotes) {
throw new AdblockSyntaxError(
'Expected separator, got end of string',
baseOffset + nextSeparatorIndex,
baseOffset + length,
);
}
// If the separator is not found, the param end is the end of the string
paramEnd = StringUtils.skipWSBack(raw, length - 1) + 1;
offset = length;
Expand All @@ -83,13 +76,51 @@ export class UboParameterListParser extends ParameterListParser {
baseOffset + length,
);
}
// Param end should be the last separator before the quote
offset = StringUtils.findNextUnescapedCharacterBackwards(

/**
* At that point found `possibleClosingQuoteIndex` is wrong
* | is `offset`
* ~ is `possibleClosingQuoteIndex`
* ^ is `nextSeparatorIndex`
*
* Example 1: "abc, ').cba='1'"
* | ~^
* Example 2: "abc, ').cba, '1'"
* | ~^
* Example 3: "abc, ').cba='1', cba"
* | ~^
*
* Search for separator before `possibleClosingQuoteIndex`
*/

const separatorIndexBeforeQuote = StringUtils.findNextUnescapedCharacterBackwards(
raw,
separator,
possibleClosingQuoteIndex,
) + 1;
paramEnd = StringUtils.skipWSBack(raw, offset - 2) + 1;
ESCAPE_CHARACTER,
offset + 1,
);
if (separatorIndexBeforeQuote !== -1) {
// Found separator before (Example 2)
paramEnd = StringUtils.skipWSBack(raw, separatorIndexBeforeQuote - 1) + 1;
offset = separatorIndexBeforeQuote + 1;
} else {
// Didn't found separator before, search after
const separatorIndexAfterQuote = StringUtils.findNextUnescapedCharacter(
raw,
separator,
possibleClosingQuoteIndex,
);
if (separatorIndexAfterQuote !== -1) {
// We found separator after (Example 3)
paramEnd = StringUtils.skipWSBack(raw, separatorIndexAfterQuote - 1) + 1;
offset = separatorIndexAfterQuote + 1;
} else {
// If the separator is not found, the param end is the end of the string (Example 1)
paramEnd = StringUtils.skipWSBack(raw, length - 1) + 1;
offset = length;
}
}
}
} else {
if (requireQuotes) {
Expand Down
8 changes: 6 additions & 2 deletions packages/agtree/src/utils/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,17 @@ export class StringUtils {
* @param searchedCharacter - Searched character
* @param start - Start index
* @param escapeCharacter - Escape character, \ by default
* @param end - End index (excluded)
* @returns Index or -1 if the character not found
*/
public static findNextUnescapedCharacter(
pattern: string,
searchedCharacter: string,
start = 0,
escapeCharacter: string = ESCAPE_CHARACTER,
end = pattern.length,
): number {
for (let i = start; i < pattern.length; i += 1) {
for (let i = start; i < end; i += 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
return i;
Expand All @@ -59,15 +61,17 @@ export class StringUtils {
* @param searchedCharacter - Searched character
* @param start - Start index
* @param escapeCharacter - Escape character, \ by default
* @param end - End index (Included)
* @returns Index or -1 if the character not found
*/
public static findNextUnescapedCharacterBackwards(
pattern: string,
searchedCharacter: string,
start = pattern.length - 1,
escapeCharacter: string = ESCAPE_CHARACTER,
end = 0,
): number {
for (let i = start; i >= 0; i -= 1) {
for (let i = start; i >= end; i -= 1) {
// The searched character cannot be preceded by an escape
if (pattern[i] === searchedCharacter && pattern[i - 1] !== escapeCharacter) {
return i;
Expand Down
Loading

0 comments on commit e780b38

Please sign in to comment.