Skip to content

Commit

Permalink
fix: speed up word look up. (#5984)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S authored Jul 26, 2024
1 parent c35c2ef commit 3052dbc
Show file tree
Hide file tree
Showing 18 changed files with 437 additions and 273 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { SuggestionCollector, SuggestionResult } from 'cspell-trie-lib';
import { CASE_INSENSITIVE_PREFIX, CompoundWordsMethod } from 'cspell-trie-lib';
import { genSequence } from 'gensequence';

import { isDefined } from '../util/util.js';
import * as Defaults from './defaults.js';
Expand Down Expand Up @@ -138,7 +137,7 @@ function isWordInAnyDictionary(
word: string,
options: SearchOptions,
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.has(word, options));
return dicts.find((dict) => dict.has(word, options));
}

function findInAnyDictionary(
Expand All @@ -160,15 +159,15 @@ function isNoSuggestWordInAnyDictionary(
word: string,
options: HasOptions,
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.isNoSuggestWord(word, options));
return dicts.find((dict) => dict.isNoSuggestWord(word, options));
}

function isWordForbiddenInAnyDictionary(
dicts: SpellingDictionary[],
word: string,
ignoreCase: boolean | undefined,
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.isForbidden(word, ignoreCase));
return dicts.find((dict) => dict.isForbidden(word, ignoreCase));
}

export function isSpellingDictionaryCollection(dict: SpellingDictionary): dict is SpellingDictionaryCollection {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import type {
} from 'cspell-trie-lib';
import { CompoundWordsMethod, decodeTrie, suggestionCollector } from 'cspell-trie-lib';

import { autoCache, createCache01 } from '../util/AutoCache.js';
import { clean } from '../util/clean.js';
import { createMapper, createRepMapper } from '../util/repMap.js';
import * as Defaults from './defaults.js';
Expand All @@ -27,7 +26,6 @@ const findWordOptionsCaseSensitive: FindWordOptions = Object.freeze({ caseSensit
const findWordOptionsNotCaseSensitive: FindWordOptions = Object.freeze({ caseSensitive: false });

export class SpellingDictionaryFromTrie implements SpellingDictionary {
static readonly cachedWordsLimit = 50_000;
private _size = 0;
readonly knownWords = new Set<string>();
readonly unknownWords = new Set<string>();
Expand Down Expand Up @@ -95,9 +93,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
return { useCompounds, ignoreCase };
}

private _find = findCache((word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) =>
this.findAnyForm(word, useCompounds, ignoreCase),
);
private _find = (word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) =>
this.findAnyForm(word, useCompounds, ignoreCase);

private findAnyForm(
word: string,
Expand Down Expand Up @@ -147,12 +144,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
}

public isForbidden(word: string, _ignoreCaseAndAccents?: boolean): boolean {
return this._isForbidden(word);
}

private _isForbidden = autoCache((word: string): boolean => {
return this.trie.isForbiddenWord(word);
});
}

public suggest(word: string, suggestOptions: SuggestOptions = {}): SuggestionResult[] {
return this._suggest(word, suggestOptions);
Expand Down Expand Up @@ -214,38 +207,6 @@ export function createSpellingDictionaryFromTrieFile(
return new SpellingDictionaryFromTrie(trie, name, options, source);
}

type FindFunction = (
word: string,
useCompounds: number | boolean | undefined,
ignoreCase: boolean,
) => FindAnyFormResult | undefined;

interface CachedFind {
useCompounds: number | boolean | undefined;
ignoreCase: boolean;
findResult: FindAnyFormResult | undefined;
}

function findCache(fn: FindFunction, size = 2000): FindFunction {
const cache = createCache01<CachedFind>(size);

function find(
word: string,
useCompounds: number | boolean | undefined,
ignoreCase: boolean,
): FindAnyFormResult | undefined {
const r = cache.get(word);
if (r !== undefined && r.useCompounds === useCompounds && r.ignoreCase === ignoreCase) {
return r.findResult;
}
const findResult = fn(word, useCompounds, ignoreCase);
cache.set(word, { useCompounds, ignoreCase, findResult });
return findResult;
}

return find;
}

function* outerWordForms(word: string, mapWord: (word: string) => string[]): Iterable<string> {
// Only generate the needed forms.
const sent = new Set<string>();
Expand Down
12 changes: 6 additions & 6 deletions packages/cspell-dictionary/src/perf/has.perf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,28 +57,28 @@ suite('dictionary has Not', async (test) => {
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);
const dictCol = createCollection([dict, dict2, dict3], 'test-collection');

test('dictionary has 100k words', () => {
test('dictionary has not 100k words', () => {
checkWords(dict, missingWords, false);
});

test('dictionary has 100k words (2nd time)', () => {
test('dictionary has not 100k words (2nd time)', () => {
checkWords(dict, missingWords, false);
});

test('collection has 100k words', () => {
test('collection has not 100k words', () => {
checkWords(dictCol, missingWords, false);
});

test('iTrie has 100k words', () => {
test('iTrie has not 100k words', () => {
checkWords(iTrie, missingWords, false);
});

test('iTrie.hasWord has 100k words', () => {
test('iTrie.hasWord has not 100k words', () => {
const dict = { has: (word: string) => iTrie.hasWord(word, true) };
checkWords(dict, missingWords, false);
});

test('iTrie.data has 100k words', () => {
test('iTrie.data has not 100k words', () => {
checkWords(iTrie.data, missingWords, false);
});
});
Expand Down
54 changes: 54 additions & 0 deletions packages/cspell-dictionary/src/perf/misc.perf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { genSequence } from 'gensequence';
import { loremIpsum } from 'lorem-ipsum';
import { suite } from 'perf-insight';

suite('Array Primitives', async (test) => {
const words = genWords(20);
const toFind = [...words, 'not-a-word'];
const iterations = 1000;

test('Array.find', () => {
for (let i = 0; i < iterations; ++i) {
for (const word of toFind) {
words.find((w) => w === word);
}
}
});

test('genSequence.first', () => {
for (let i = 0; i < iterations; ++i) {
for (const word of toFind) {
genSequence(words).first((w) => w === word);
}
}
});
});

function genWords(count: number, includeForbidden = true): string[] {
const setOfWords = new Set(loremIpsum({ count }).split(' '));

if (includeForbidden) {
setOfWords.add('!forbidden');
setOfWords.add('!bad-word');
setOfWords.add('!rejection');
}

while (setOfWords.size < count) {
const words = [...setOfWords];
for (const a of words) {
for (const b of words) {
if (a !== b) {
setOfWords.add(a + b);
}
if (setOfWords.size >= count) {
break;
}
}
if (setOfWords.size >= count) {
break;
}
}
}

return [...setOfWords];
}
40 changes: 26 additions & 14 deletions packages/cspell-dictionary/src/util/AutoCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,62 @@ export interface CacheStats {
swaps: number;
}

class Cache01<R> implements CacheStats {
private count = 0;
private cache0: Record<string, R> = Object.create(null);
private cache1: Record<string, R> = Object.create(null);

abstract class Cache01<R> implements CacheStats {
hits = 0;
misses = 0;
swaps = 0;

constructor(readonly maxSize: number) {}

abstract get(key: string): R | undefined;
abstract set(key: string, value: R): this;
}

class Cache01Map<R> extends Cache01<R> implements CacheStats {
private count = 0;
private cache0: Map<string, R> = new Map();
private cache1: Map<string, R> = new Map();

constructor(maxSize: number) {
super(maxSize);
}

get(key: string): R | undefined {
const cache0 = this.cache0;
const cache1 = this.cache1;
if (key in cache0) {
let found = cache0.get(key);
if (found !== undefined) {
++this.hits;
return cache0[key];
return found;
}
if (key in cache1) {
found = cache1.get(key);
if (found !== undefined) {
++this.hits;
++this.count;
const r = cache1[key];
cache0[key] = r;
return r;
cache0.set(key, found);
return found;
}
++this.misses;
return undefined;
}

set(key: string, value: R): this {
if (this.count >= this.maxSize) {
const c = this.cache1;
this.cache1 = this.cache0;
this.cache0 = Object.create(null);
this.cache0 = c;
c.clear();
this.swaps++;
this.count = 0;
}
++this.count;
this.cache0[key] = value;
this.cache0.set(key, value);
return this;
}
}

export function createCache01<R>(size: number): Cache01<R> {
return new Cache01(size);
return new Cache01Map(size);
}

export function autoCache<R>(fn: (p: string) => R, size = CACHE_SIZE): AutoCache<R> {
Expand Down
5 changes: 5 additions & 0 deletions packages/cspell-trie-lib/api/api.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 3 additions & 12 deletions packages/cspell-trie-lib/src/lib/ITrie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ export interface ITrie {

export class ITrieImpl implements ITrie {
private _info: TrieInfo;
private _findOptionsDefaults: PartialFindOptions;
private hasForbidden: boolean;
private root: ITrieNodeRoot;
private count?: number;
Expand All @@ -127,11 +126,6 @@ export class ITrieImpl implements ITrie {
this.root = data.getRoot();
this._info = mergeOptionalWithDefaults(data.info);
this.hasForbidden = data.hasForbiddenWords();
this._findOptionsDefaults = {
caseInsensitivePrefix: this._info.stripCaseAndAccentsPrefix,
compoundFix: this._info.compoundCharacter,
forbidPrefix: this._info.forbiddenWordPrefix,
};
}

/**
Expand Down Expand Up @@ -195,7 +189,7 @@ export class ITrieImpl implements ITrie {
: defaultLegacyMinCompoundLength;
const findOptions = this.createFindOptions({
legacyMinCompoundLength: len,
matchCase: options.caseSensitive,
matchCase: options.caseSensitive || false,
});
return findLegacyCompound(this.root, word, findOptions);
}
Expand Down Expand Up @@ -290,11 +284,8 @@ export class ITrieImpl implements ITrie {
return new ITrieImpl(root, undefined);
}

private createFindOptions(options: PartialFindOptions = {}): FindOptions {
const findOptions = createFindOptions({
...this._findOptionsDefaults,
...options,
});
private createFindOptions(options: PartialFindOptions | undefined): FindOptions {
const findOptions = createFindOptions(options);
return findOptions;
}

Expand Down
5 changes: 1 addition & 4 deletions packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import type { CompoundModes } from './CompoundModes.js';
export interface FindOptions {
matchCase: boolean;
compoundMode: CompoundModes;
forbidPrefix: string;
compoundFix: string;
caseInsensitivePrefix: string;
legacyMinCompoundLength: number;
legacyMinCompoundLength?: number;
}

export type PartialFindOptions = PartialWithUndefined<FindOptions> | undefined;
6 changes: 6 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export interface ITrieNode {
has(char: string): boolean;
/** `true` iff this node has children */
hasChildren(): boolean;
/** check if a word exists within this node. */
findExact?: ((word: string) => boolean) | undefined;
}

export interface ITrieNodeRoot extends ITrieNode {
Expand All @@ -75,4 +77,8 @@ export interface ITrieNodeRoot extends ITrieNode {
find?: ((word: string, strict: boolean) => FindResult | undefined) | undefined;

isForbidden?: ((word: string) => boolean) | undefined;

forbidPrefix: string;
compoundFix: string;
caseInsensitivePrefix: string;
}
Loading

0 comments on commit 3052dbc

Please sign in to comment.