Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improve the speed of checking text. #6004

Merged
merged 6 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
".pnp.{js,cjs}",
".prettierignore",
".yarn",
"__snapshots__",
"*.{png,jpg,pdf,svg}",
"*.cpuprofile",
"*.heapprofile",
Expand Down
2 changes: 1 addition & 1 deletion integration-tests/src/sh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export function execAsync(command: string, options: ExecOptions = {}): Promise<S
return new Promise<Shell.ExecOutputReturnValue>((resolve) => {
Shell.exec(
command /* lgtm[js/shell-command-injection-from-environment] */,
{ silent: !echo, fatal: bail },
{ silent: !echo, fatal: bail, env: { ...process.env } },
(code, stdout, stderr) => resolve({ code, stdout, stderr }),
);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ let dictionaryCounter = 0;

const DefaultAutoCacheSize = 1000;

let logRequests = false;
const log: LogEntry[] = [];

/**
* Caching Dictionary remembers method calls to increase performance.
*/
Expand All @@ -30,6 +33,22 @@ export interface CachingDictionary {
getPreferredSuggestions(word: string): PreferredSuggestion[] | undefined;
}

interface LogEntryBase extends SearchOptions {
time: number;
method: 'has';
word: string;
value?: unknown;
}

interface LogEntryHas extends LogEntryBase {
method: 'has';
value: boolean;
}

const startTime = performance.now();

export type LogEntry = LogEntryHas;

class CachedDict implements CachingDictionary {
readonly name: string;
readonly id = ++dictionaryCounter;
Expand All @@ -41,7 +60,16 @@ class CachedDict implements CachingDictionary {
// console.log(`CachedDict for ${this.name}`);
}

readonly has = autoCache((word: string) => this.dict.has(word, this.options), DefaultAutoCacheSize);
#has = autoCache((word: string) => this.dict.has(word, this.options), DefaultAutoCacheSize);
has = logRequests
? (word: string): boolean => {
const time = performance.now() - startTime;
const value = this.#has(word);
log.push({ time, method: 'has', word, value });
return value;
}
: this.#has;

readonly isNoSuggestWord = autoCache(
(word: string) => this.dict.isNoSuggestWord(word, this.options),
DefaultAutoCacheSize,
Expand All @@ -56,7 +84,7 @@ class CachedDict implements CachingDictionary {
return {
name: this.name,
id: this.id,
has: extractStats(this.has),
has: extractStats(this.#has),
isNoSuggestWord: extractStats(this.isNoSuggestWord),
isForbidden: extractStats(this.isForbidden),
getPreferredSuggestions: extractStats(this.getPreferredSuggestions),
Expand Down Expand Up @@ -90,3 +118,11 @@ export function createCachingDictionary(
knownOptions.set(dict, cached);
return cached;
}

export function enableLogging(enabled = !logRequests): void {
logRequests = enabled;
}

export function getLog(): LogEntryBase[] {
return log;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

exports[`index > verify api 1`] = `
[
"_debug",
"createCachingDictionary",
"createCollection",
"createFailedToLoadDictionary",
Expand Down
12 changes: 12 additions & 0 deletions packages/cspell-dictionary/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import {
enableLogging as cacheDictionaryEnableLogging,
getLog as cacheDictionaryGetLog,
} from './SpellingDictionary/CachingDictionary.js';
export type {
CachingDictionary,
FindOptions,
Expand All @@ -24,3 +28,11 @@ export {
createSuggestDictionary,
createSuggestOptions,
} from './SpellingDictionary/index.js';

/**
* Debugging utilities.
*/
export const _debug = {
cacheDictionaryEnableLogging,
cacheDictionaryGetLog,
};
157 changes: 154 additions & 3 deletions packages/cspell-dictionary/src/perf/has.perf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,34 @@ import { buildITrieFromWords } from 'cspell-trie-lib';
import { loremIpsum } from 'lorem-ipsum';
import { suite } from 'perf-insight';

import { createCachingDictionary } from '../SpellingDictionary/CachingDictionary.js';
import { createSpellingDictionary } from '../SpellingDictionary/createSpellingDictionary.js';
import { createCollection } from '../SpellingDictionary/SpellingDictionaryCollection.js';

suite('dictionary has', async (test) => {
const words = genWords(10_000);
const words1 = genWords(10_000);
const words2 = genWords(1000);
const words3 = genWords(1000);

const iTrie = buildITrieFromWords(words);
const dict = createSpellingDictionary(words, 'test', import.meta.url);
const words = words1;

const iTrie = buildITrieFromWords(words1);
const dict = createSpellingDictionary(words1, 'test', import.meta.url);
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);

const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse');

const cacheDictSingle = createCachingDictionary(dict, {});
const cacheDictCol = createCachingDictionary(dictCol, {});

const dictSet = new Set(words);

test('Set has 100k words', () => {
checkWords(dictSet, words);
});

test('dictionary has 100k words', () => {
checkWords(dict, words);
});
Expand All @@ -32,6 +44,14 @@ suite('dictionary has', async (test) => {
checkWords(dictColRev, words);
});

test('cache dictionary has 100k words', () => {
checkWords(cacheDictSingle, words);
});

test('cache collection has 100k words', () => {
checkWords(cacheDictCol, words);
});

test('iTrie has 100k words', () => {
checkWords(iTrie, words);
});
Expand All @@ -58,6 +78,12 @@ suite('dictionary has Not', async (test) => {
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);
const dictCol = createCollection([dict, dict2, dict3], 'test-collection');

const dictSet = new Set(words);

test('Set has not 100k words', () => {
checkWords(dictSet, missingWords, false);
});

test('dictionary has not 100k words', () => {
checkWords(dict, missingWords, false);
});
Expand All @@ -80,6 +106,104 @@ suite('dictionary has Not', async (test) => {
});
});

suite('dictionary has sampling', async (test) => {
const words1 = genWords(10_000);
const words2 = genWords(1000);
const words3 = genWords(1000);

const sampleIdx = genSamples(100_000, words1.length);
const wordsSample = sampleIdx.map((i) => words1[i]);

const iTrie = buildITrieFromWords(words1);
const dict = createSpellingDictionary(words1, 'test', import.meta.url);
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);

const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse');

const cacheDictSingle = createCachingDictionary(dict, {});
const cacheDictCol = createCachingDictionary(dictCol, {});

const dictSet = new Set(words1);

test('Set has 100k words', () => {
checkWords(dictSet, wordsSample);
});

test('dictionary has 100k words', () => {
checkWords(dict, wordsSample);
});

test('collection has 100k words', () => {
checkWords(dictCol, wordsSample);
});

test('collection reverse has 100k words', () => {
checkWords(dictColRev, wordsSample);
});

test('cache dictionary has 100k words', () => {
checkWords(cacheDictSingle, wordsSample);
});

test('cache collection has 100k words', () => {
checkWords(cacheDictCol, wordsSample);
});

test('iTrie has 100k words', () => {
checkWords(iTrie, wordsSample);
});

test('iTrie.hasWord has 100k words', () => {
const dict = { has: (word: string) => iTrie.hasWord(word, true) };
checkWords(dict, wordsSample);
});

test('iTrie.data has 100k words', () => {
checkWords(iTrie.data, wordsSample);
});
});

suite('dictionary isForbidden sampling', async (test) => {
const words1 = genWords(10_000);
const words2 = genWords(1000);
const words3 = genWords(1000);

const sampleIdx = genSamples(100_000, words1.length);
const wordsSample = sampleIdx.map((i) => words1[i]);

const dict = createSpellingDictionary(words1, 'test', import.meta.url);
const dict2 = createSpellingDictionary(words2, 'test2', import.meta.url);
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);

const dictCol = createCollection([dict, dict2, dict3], 'test-collection');
const dictColRev = createCollection([dict3, dict2, dict], 'test-collection-reverse');

const cacheDictSingle = createCachingDictionary(dict, {});
const cacheDictCol = createCachingDictionary(dictCol, {});

test('dictionary isForbidden 100k words', () => {
checkForForbiddenWords(dict, wordsSample);
});

test('collection isForbidden 100k words', () => {
checkForForbiddenWords(dictCol, wordsSample);
});

test('collection reverse isForbidden 100k words', () => {
checkForForbiddenWords(dictColRev, wordsSample);
});

test('cache dictionary isForbidden 100k words', () => {
checkForForbiddenWords(cacheDictSingle, wordsSample);
});

test('cache collection isForbidden 100k words', () => {
checkForForbiddenWords(cacheDictCol, wordsSample);
});
});

function checkWords(dict: { has: (word: string) => boolean }, words: string[], expected = true, totalChecks = 100_000) {
let has = true;
const len = words.length;
Expand All @@ -94,6 +218,21 @@ function checkWords(dict: { has: (word: string) => boolean }, words: string[], e
assert(has, 'All words should be found in the dictionary');
}

function checkForForbiddenWords(
dict: { isForbidden: (word: string) => boolean },
words: string[],
totalChecks = 100_000,
) {
let result = true;
const len = words.length;
for (let i = 0; i < totalChecks; ++i) {
const word = words[i % len];
const r = !dict.isForbidden(word);
result = r && result;
}
assert(result, 'All words should not be forbidden');
}

function genWords(count: number, includeForbidden = true): string[] {
const setOfWords = new Set(loremIpsum({ count }).split(' '));

Expand Down Expand Up @@ -122,3 +261,15 @@ function genWords(count: number, includeForbidden = true): string[] {

return [...setOfWords];
}

function genSamples(count: number, max: number, depth = 3) {
const r = Array<number>(count);
for (let j = 0; j < count; ++j) {
let n = Math.random() * max;
for (let i = 1; i < depth; ++i) {
n = Math.random() * n;
}
r[j] = Math.floor(n);
}
return r;
}
12 changes: 8 additions & 4 deletions packages/cspell-pipe/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,21 @@
"!**/*.tsbuildInfo",
"!**/__mocks__",
"!**/*.spec.*",
"!**/*.perf.*",
"!**/*.test.*",
"!**/perf/**",
"!**/test/**",
"!**/*.map"
],
"scripts": {
"build": "tsc -b . -f",
"watch": "tsc -b . -w -f",
"build": "tsc -p .",
"watch": "tsc -p . -w",
"clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"",
"clean-build": "pnpm run clean && pnpm run build",
"coverage": "vitest run --coverage",
"test-watch": "vitest",
"test": "vitest run"
"test": "vitest run",
"test:perf": "NODE_ENV=production insight --register ts-node/esm --file \"**/*.perf.{mts,ts}\""
},
"repository": {
"type": "git",
Expand All @@ -124,6 +127,7 @@
"node": ">=18"
},
"devDependencies": {
"globby": "^14.0.2"
"globby": "^14.0.2",
"perf-insight": "^1.2.0"
}
}
8 changes: 4 additions & 4 deletions packages/cspell-pipe/src/operators/append.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ import type { PipeFn } from '../internalTypes.js';
export function opAppendAsync<T>(
...iterablesToAppend: (AsyncIterable<T> | Iterable<T>)[]
): (iter: AsyncIterable<T> | Iterable<T>) => AsyncIterable<T> {
async function* fn(iter: AsyncIterable<T> | Iterable<T>) {
async function* fnAppend(iter: AsyncIterable<T> | Iterable<T>) {
yield* iter;
for (const i of iterablesToAppend) {
yield* i;
}
}

return fn;
return fnAppend;
}

/**
Expand All @@ -25,14 +25,14 @@ export function opAppendAsync<T>(
* @returns
*/
export function opAppendSync<T>(...iterablesToAppend: Iterable<T>[]): (iter: Iterable<T>) => Iterable<T> {
function* fn(iter: Iterable<T>) {
function* fnAppend(iter: Iterable<T>) {
yield* iter;
for (const i of iterablesToAppend) {
yield* i;
}
}

return fn;
return fnAppend;
}

export function opAppend<T>(...iterablesToAppend: Iterable<T>[]): PipeFn<T, T> {
Expand Down
Loading