diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.ts index ac790c0d705..51b0e73c6b4 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.ts @@ -1,6 +1,5 @@ import type { SuggestionCollector, SuggestionResult } from 'cspell-trie-lib'; import { CASE_INSENSITIVE_PREFIX, CompoundWordsMethod } from 'cspell-trie-lib'; -import { genSequence } from 'gensequence'; import { isDefined } from '../util/util.js'; import * as Defaults from './defaults.js'; @@ -138,7 +137,7 @@ function isWordInAnyDictionary( word: string, options: SearchOptions, ): SpellingDictionary | undefined { - return genSequence(dicts).first((dict) => dict.has(word, options)); + return dicts.find((dict) => dict.has(word, options)); } function findInAnyDictionary( @@ -160,7 +159,7 @@ function isNoSuggestWordInAnyDictionary( word: string, options: HasOptions, ): SpellingDictionary | undefined { - return genSequence(dicts).first((dict) => dict.isNoSuggestWord(word, options)); + return dicts.find((dict) => dict.isNoSuggestWord(word, options)); } function isWordForbiddenInAnyDictionary( @@ -168,7 +167,7 @@ function isWordForbiddenInAnyDictionary( word: string, ignoreCase: boolean | undefined, ): SpellingDictionary | undefined { - return genSequence(dicts).first((dict) => dict.isForbidden(word, ignoreCase)); + return dicts.find((dict) => dict.isForbidden(word, ignoreCase)); } export function isSpellingDictionaryCollection(dict: SpellingDictionary): dict is SpellingDictionaryCollection { diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts index 37e86273133..ab9822d3ae3 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts @@ -8,7 +8,6 @@ import type { } from 'cspell-trie-lib'; import { CompoundWordsMethod, decodeTrie, suggestionCollector } from 'cspell-trie-lib'; -import { autoCache, createCache01 } from '../util/AutoCache.js'; import { clean } from '../util/clean.js'; import { createMapper, createRepMapper } from '../util/repMap.js'; import * as Defaults from './defaults.js'; @@ -27,7 +26,6 @@ const findWordOptionsCaseSensitive: FindWordOptions = Object.freeze({ caseSensit const findWordOptionsNotCaseSensitive: FindWordOptions = Object.freeze({ caseSensitive: false }); export class SpellingDictionaryFromTrie implements SpellingDictionary { - static readonly cachedWordsLimit = 50_000; private _size = 0; readonly knownWords = new Set(); readonly unknownWords = new Set(); @@ -95,9 +93,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { return { useCompounds, ignoreCase }; } - private _find = findCache((word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) => - this.findAnyForm(word, useCompounds, ignoreCase), - ); + private _find = (word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) => + this.findAnyForm(word, useCompounds, ignoreCase); private findAnyForm( word: string, @@ -147,12 +144,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { } public isForbidden(word: string, _ignoreCaseAndAccents?: boolean): boolean { - return this._isForbidden(word); - } - - private _isForbidden = autoCache((word: string): boolean => { return this.trie.isForbiddenWord(word); - }); + } public suggest(word: string, suggestOptions: SuggestOptions = {}): SuggestionResult[] { return this._suggest(word, suggestOptions); @@ -214,38 +207,6 @@ export function createSpellingDictionaryFromTrieFile( return new SpellingDictionaryFromTrie(trie, name, options, source); } -type FindFunction = ( - word: string, - useCompounds: number | boolean | undefined, - ignoreCase: boolean, -) => FindAnyFormResult | undefined; - -interface CachedFind { - useCompounds: number | boolean | undefined; - ignoreCase: boolean; - findResult: FindAnyFormResult | undefined; -} - -function findCache(fn: FindFunction, size = 2000): FindFunction { - const cache = createCache01(size); - - function find( - word: string, - useCompounds: number | boolean | undefined, - ignoreCase: boolean, - ): FindAnyFormResult | undefined { - const r = cache.get(word); - if (r !== undefined && r.useCompounds === useCompounds && r.ignoreCase === ignoreCase) { - return r.findResult; - } - const findResult = fn(word, useCompounds, ignoreCase); - cache.set(word, { useCompounds, ignoreCase, findResult }); - return findResult; - } - - return find; -} - function* outerWordForms(word: string, mapWord: (word: string) => string[]): Iterable { // Only generate the needed forms. const sent = new Set(); diff --git a/packages/cspell-dictionary/src/perf/has.perf.ts b/packages/cspell-dictionary/src/perf/has.perf.ts index 4629ffb08b0..66687336d32 100644 --- a/packages/cspell-dictionary/src/perf/has.perf.ts +++ b/packages/cspell-dictionary/src/perf/has.perf.ts @@ -57,28 +57,28 @@ suite('dictionary has Not', async (test) => { const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url); const dictCol = createCollection([dict, dict2, dict3], 'test-collection'); - test('dictionary has 100k words', () => { + test('dictionary has not 100k words', () => { checkWords(dict, missingWords, false); }); - test('dictionary has 100k words (2nd time)', () => { + test('dictionary has not 100k words (2nd time)', () => { checkWords(dict, missingWords, false); }); - test('collection has 100k words', () => { + test('collection has not 100k words', () => { checkWords(dictCol, missingWords, false); }); - test('iTrie has 100k words', () => { + test('iTrie has not 100k words', () => { checkWords(iTrie, missingWords, false); }); - test('iTrie.hasWord has 100k words', () => { + test('iTrie.hasWord has not 100k words', () => { const dict = { has: (word: string) => iTrie.hasWord(word, true) }; checkWords(dict, missingWords, false); }); - test('iTrie.data has 100k words', () => { + test('iTrie.data has not 100k words', () => { checkWords(iTrie.data, missingWords, false); }); }); diff --git a/packages/cspell-dictionary/src/perf/misc.perf.ts b/packages/cspell-dictionary/src/perf/misc.perf.ts new file mode 100644 index 00000000000..77ac78c0137 --- /dev/null +++ b/packages/cspell-dictionary/src/perf/misc.perf.ts @@ -0,0 +1,54 @@ +import { genSequence } from 'gensequence'; +import { loremIpsum } from 'lorem-ipsum'; +import { suite } from 'perf-insight'; + +suite('Array Primitives', async (test) => { + const words = genWords(20); + const toFind = [...words, 'not-a-word']; + const iterations = 1000; + + test('Array.find', () => { + for (let i = 0; i < iterations; ++i) { + for (const word of toFind) { + words.find((w) => w === word); + } + } + }); + + test('genSequence.first', () => { + for (let i = 0; i < iterations; ++i) { + for (const word of toFind) { + genSequence(words).first((w) => w === word); + } + } + }); +}); + +function genWords(count: number, includeForbidden = true): string[] { + const setOfWords = new Set(loremIpsum({ count }).split(' ')); + + if (includeForbidden) { + setOfWords.add('!forbidden'); + setOfWords.add('!bad-word'); + setOfWords.add('!rejection'); + } + + while (setOfWords.size < count) { + const words = [...setOfWords]; + for (const a of words) { + for (const b of words) { + if (a !== b) { + setOfWords.add(a + b); + } + if (setOfWords.size >= count) { + break; + } + } + if (setOfWords.size >= count) { + break; + } + } + } + + return [...setOfWords]; +} diff --git a/packages/cspell-dictionary/src/util/AutoCache.ts b/packages/cspell-dictionary/src/util/AutoCache.ts index e7c251fbfc8..090c28954b7 100644 --- a/packages/cspell-dictionary/src/util/AutoCache.ts +++ b/packages/cspell-dictionary/src/util/AutoCache.ts @@ -10,30 +10,40 @@ export interface CacheStats { swaps: number; } -class Cache01 implements CacheStats { - private count = 0; - private cache0: Record = Object.create(null); - private cache1: Record = Object.create(null); - +abstract class Cache01 implements CacheStats { hits = 0; misses = 0; swaps = 0; constructor(readonly maxSize: number) {} + abstract get(key: string): R | undefined; + abstract set(key: string, value: R): this; +} + +class Cache01Map extends Cache01 implements CacheStats { + private count = 0; + private cache0: Map = new Map(); + private cache1: Map = new Map(); + + constructor(maxSize: number) { + super(maxSize); + } + get(key: string): R | undefined { const cache0 = this.cache0; const cache1 = this.cache1; - if (key in cache0) { + let found = cache0.get(key); + if (found !== undefined) { ++this.hits; - return cache0[key]; + return found; } - if (key in cache1) { + found = cache1.get(key); + if (found !== undefined) { ++this.hits; ++this.count; - const r = cache1[key]; - cache0[key] = r; - return r; + cache0.set(key, found); + return found; } ++this.misses; return undefined; @@ -41,19 +51,21 @@ class Cache01 implements CacheStats { set(key: string, value: R): this { if (this.count >= this.maxSize) { + const c = this.cache1; this.cache1 = this.cache0; - this.cache0 = Object.create(null); + this.cache0 = c; + c.clear(); this.swaps++; this.count = 0; } ++this.count; - this.cache0[key] = value; + this.cache0.set(key, value); return this; } } export function createCache01(size: number): Cache01 { - return new Cache01(size); + return new Cache01Map(size); } export function autoCache(fn: (p: string) => R, size = CACHE_SIZE): AutoCache { diff --git a/packages/cspell-trie-lib/api/api.d.ts b/packages/cspell-trie-lib/api/api.d.ts index 2c0cc310f95..2fad4e47291 100644 --- a/packages/cspell-trie-lib/api/api.d.ts +++ b/packages/cspell-trie-lib/api/api.d.ts @@ -133,6 +133,8 @@ interface ITrieNode { has(char: string): boolean; /** `true` iff this node has children */ hasChildren(): boolean; + /** check if a word exists within this node. */ + findExact?: ((word: string) => boolean) | undefined; } interface ITrieNodeRoot extends ITrieNode { info: Readonly; @@ -150,6 +152,9 @@ interface ITrieNodeRoot extends ITrieNode { */ find?: ((word: string, strict: boolean) => FindResult$1 | undefined) | undefined; isForbidden?: ((word: string) => boolean) | undefined; + forbidPrefix: string; + compoundFix: string; + caseInsensitivePrefix: string; } declare const FLAG_WORD = 1; diff --git a/packages/cspell-trie-lib/src/lib/ITrie.ts b/packages/cspell-trie-lib/src/lib/ITrie.ts index be8f5b42c2b..a96c0649a63 100644 --- a/packages/cspell-trie-lib/src/lib/ITrie.ts +++ b/packages/cspell-trie-lib/src/lib/ITrie.ts @@ -115,7 +115,6 @@ export interface ITrie { export class ITrieImpl implements ITrie { private _info: TrieInfo; - private _findOptionsDefaults: PartialFindOptions; private hasForbidden: boolean; private root: ITrieNodeRoot; private count?: number; @@ -127,11 +126,6 @@ export class ITrieImpl implements ITrie { this.root = data.getRoot(); this._info = mergeOptionalWithDefaults(data.info); this.hasForbidden = data.hasForbiddenWords(); - this._findOptionsDefaults = { - caseInsensitivePrefix: this._info.stripCaseAndAccentsPrefix, - compoundFix: this._info.compoundCharacter, - forbidPrefix: this._info.forbiddenWordPrefix, - }; } /** @@ -195,7 +189,7 @@ export class ITrieImpl implements ITrie { : defaultLegacyMinCompoundLength; const findOptions = this.createFindOptions({ legacyMinCompoundLength: len, - matchCase: options.caseSensitive, + matchCase: options.caseSensitive || false, }); return findLegacyCompound(this.root, word, findOptions); } @@ -290,11 +284,8 @@ export class ITrieImpl implements ITrie { return new ITrieImpl(root, undefined); } - private createFindOptions(options: PartialFindOptions = {}): FindOptions { - const findOptions = createFindOptions({ - ...this._findOptionsDefaults, - ...options, - }); + private createFindOptions(options: PartialFindOptions | undefined): FindOptions { + const findOptions = createFindOptions(options); return findOptions; } diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts index 4b2b981d99e..b5063322f11 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts @@ -4,10 +4,7 @@ import type { CompoundModes } from './CompoundModes.js'; export interface FindOptions { matchCase: boolean; compoundMode: CompoundModes; - forbidPrefix: string; - compoundFix: string; - caseInsensitivePrefix: string; - legacyMinCompoundLength: number; + legacyMinCompoundLength?: number; } export type PartialFindOptions = PartialWithUndefined | undefined; diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts index 20ae91da936..3d7f002b9e8 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts @@ -55,6 +55,8 @@ export interface ITrieNode { has(char: string): boolean; /** `true` iff this node has children */ hasChildren(): boolean; + /** check if a word exists within this node. */ + findExact?: ((word: string) => boolean) | undefined; } export interface ITrieNodeRoot extends ITrieNode { @@ -75,4 +77,8 @@ export interface ITrieNodeRoot extends ITrieNode { find?: ((word: string, strict: boolean) => FindResult | undefined) | undefined; isForbidden?: ((word: string) => boolean) | undefined; + + forbidPrefix: string; + compoundFix: string; + caseInsensitivePrefix: string; } diff --git a/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts b/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts index 33239dee65e..e31f1d828e9 100644 --- a/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts +++ b/packages/cspell-trie-lib/src/lib/ITrieNode/find.ts @@ -1,4 +1,3 @@ -import { CASE_INSENSITIVE_PREFIX, COMPOUND_FIX, FORBID_PREFIX } from '../constants.js'; import { memorizeLastCall } from '../utils/memorizeLastCall.js'; import { mergeDefaults } from '../utils/mergeDefaults.js'; import type { CompoundModes } from './CompoundModes.js'; @@ -13,14 +12,13 @@ const defaultLegacyMinCompoundLength = 3; const _defaultFindOptions: FindOptions = { matchCase: false, compoundMode: 'compound', - forbidPrefix: FORBID_PREFIX, - compoundFix: COMPOUND_FIX, - caseInsensitivePrefix: CASE_INSENSITIVE_PREFIX, legacyMinCompoundLength: defaultLegacyMinCompoundLength, }; +Object.freeze(_defaultFindOptions); + const arrayCompoundModes: CompoundModes[] = ['none', 'compound', 'legacy']; -const knownCompoundModes = new Map(arrayCompoundModes.map((a) => [a, a])); +const knownCompoundModes = new Map(arrayCompoundModes.map((a) => [a, a])); /** * @@ -29,7 +27,7 @@ const knownCompoundModes = new Map(arrayCompoundMo * @param options */ export function findWord(root: Root, word: string, options?: PartialFindOptions): FindFullResult { - return _findWord(root, word, createFindOptions(options)); + return _findWord(root, word, options); } /** @@ -39,7 +37,7 @@ export function findWord(root: Root, word: string, options?: PartialFindOptions) * @param options */ export function findWordNode(root: Root, word: string, options?: PartialFindOptions): FindFullNodeResult { - return _findWordNode(root, word, createFindOptions(options)); + return _findWordNode(root, word, options); } /** @@ -48,9 +46,9 @@ export function findWordNode(root: Root, word: string, options?: PartialFindOpti * @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase` * @param options */ -function _findWord(root: Root, word: string, options: FindOptions): FindFullResult { +function _findWord(root: Root, word: string, options: PartialFindOptions): FindFullResult { if (root.find) { - const found = root.find(word, options.matchCase); + const found = root.find(word, options?.matchCase || false); if (found) return found as FindFullResult; } const { node: _, ...result } = _findWordNode(root, word, options); @@ -63,14 +61,12 @@ function _findWord(root: Root, word: string, options: FindOptions): FindFullResu * @param word A pre normalized word use `normalizeWord` or `normalizeWordToLowercase` * @param options */ -function _findWordNode(root: Root, word: string, options: FindOptions): FindFullNodeResult { +function _findWordNode(root: Root, word: string, options: PartialFindOptions): FindFullNodeResult { const trieInfo = root.info; - const compoundMode = knownCompoundModes.get(options.compoundMode) || _defaultFindOptions.compoundMode; - const compoundPrefix = - options.compoundMode === 'compound' ? (trieInfo.compoundCharacter ?? options.compoundFix) : ''; - const ignoreCasePrefix = options.matchCase - ? '' - : (trieInfo.stripCaseAndAccentsPrefix ?? options.caseInsensitivePrefix); + const matchCase = options?.matchCase || false; + const compoundMode = knownCompoundModes.get(options?.compoundMode) || _defaultFindOptions.compoundMode; + const compoundPrefix = compoundMode === 'compound' ? (trieInfo.compoundCharacter ?? root.compoundFix) : ''; + const ignoreCasePrefix = matchCase ? '' : (trieInfo.stripCaseAndAccentsPrefix ?? root.caseInsensitivePrefix); function __findCompound(): FindFullNodeResult { const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix); @@ -78,8 +74,8 @@ function _findWordNode(root: Root, word: string, options: FindOptions): FindFull if (f.found !== false && f.compoundUsed) { // If case was ignored when searching for the word, then check the forbidden // in the ignore case forbidden list. - const r = !f.caseMatched ? walk(root, options.caseInsensitivePrefix) : root; - result.forbidden = isForbiddenWord(r, word, options.forbidPrefix); + const r = !f.caseMatched ? walk(root, root.caseInsensitivePrefix) : root; + result.forbidden = isForbiddenWord(r, word, root.forbidPrefix); } return result; } @@ -90,7 +86,7 @@ function _findWordNode(root: Root, word: string, options: FindOptions): FindFull const result: FindFullNodeResult = { found: isFound && word, compoundUsed: false, - forbidden: isForbiddenWord(root, word, options.forbidPrefix), + forbidden: isForbiddenWord(root, word, root.forbidPrefix), node: n, caseMatched: true, }; @@ -99,7 +95,7 @@ function _findWordNode(root: Root, word: string, options: FindOptions): FindFull switch (compoundMode) { case 'none': { - return options.matchCase ? __findExact() : __findCompound(); + return matchCase ? __findExact() : __findCompound(); } case 'compound': { return __findCompound(); @@ -110,12 +106,12 @@ function _findWordNode(root: Root, word: string, options: FindOptions): FindFull } } -export function findLegacyCompound(root: Root, word: string, options: FindOptions): FindFullNodeResult { +export function findLegacyCompound(root: Root, word: string, options: PartialFindOptions): FindFullNodeResult { const roots: (ITrieNode | undefined)[] = [root]; - if (!options.matchCase) { - roots.push(walk(root, options.caseInsensitivePrefix)); + if (!options?.matchCase) { + roots.push(walk(root, root.caseInsensitivePrefix)); } - return findLegacyCompoundNode(roots, word, options.legacyMinCompoundLength); + return findLegacyCompoundNode(roots, word, options?.legacyMinCompoundLength || defaultLegacyMinCompoundLength); } interface FindCompoundChain { @@ -358,6 +354,7 @@ export function isForbiddenWord(root: Root | ITrieNode | undefined, word: string export const createFindOptions = memorizeLastCall(_createFindOptions); function _createFindOptions(options: PartialFindOptions | undefined): FindOptions { + if (!options) return _defaultFindOptions; return mergeDefaults(options, _defaultFindOptions); } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts index 18cfe91013a..f04bf7b6921 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts @@ -2,11 +2,11 @@ import { encodeTextToUtf8, encodeUtf8N_BE, type Utf8BE32 } from './Utf8.js'; export type Utf8Seq = Readonly; -export type CharIndexMap = Record; +export type CharIndexMap = Map; export type RO_CharIndexMap = Readonly; -export type CharIndexSeqMap = Record; +export type CharIndexSeqMap = Map; export type RO_CharIndexSeqMap = Readonly; @@ -23,14 +23,14 @@ export class CharIndex { constructor(readonly charIndex: readonly string[]) { this.#charToUtf8SeqMap = buildCharIndexSequenceMap(charIndex); - this.#multiByteChars = Object.values(this.#charToUtf8SeqMap).some((c) => c.length > 1); + this.#multiByteChars = [...this.#charToUtf8SeqMap.values()].some((c) => c.length > 1); } getCharUtf8Seq(c: string): Utf8Seq { - const found = this.#charToUtf8SeqMap[c]; + const found = this.#charToUtf8SeqMap.get(c); if (found) return found; const s = encodeTextToUtf8(c); - this.#charToUtf8SeqMap[c] = s; + this.#charToUtf8SeqMap.set(c, s); return s; } @@ -59,17 +59,17 @@ export class CharIndex { } function buildCharIndexSequenceMap(charIndex: readonly string[]): CharIndexSeqMap { - const map: CharIndexSeqMap = Object.create(null); + const map: CharIndexSeqMap = new Map(); for (const key of charIndex) { - map[key] = encodeTextToUtf8(key); + map.set(key, encodeTextToUtf8(key)); } return map; } export class CharIndexBuilder { private readonly charIndex: string[] = []; - readonly charIndexMap: CharIndexMap = Object.create(null); - readonly charIndexSeqMap: CharIndexSeqMap = Object.create(null); + readonly charIndexMap: CharIndexMap = new Map(); + readonly charIndexSeqMap: CharIndexSeqMap = new Map(); readonly #mapIdxToSeq = new Map(); @@ -78,16 +78,16 @@ export class CharIndexBuilder { } getUtf8Value(c: string): number { - const found = this.charIndexMap[c]; + const found = this.charIndexMap.get(c); if (found !== undefined) { return found; } const nc = c.normalize('NFC'); this.charIndex.push(nc); const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0); - this.charIndexMap[c] = utf8; - this.charIndexMap[nc] = utf8; - this.charIndexMap[c.normalize('NFD')] = utf8; + this.charIndexMap.set(c, utf8); + this.charIndexMap.set(nc, utf8); + this.charIndexMap.set(c.normalize('NFD'), utf8); return utf8; } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts index df38f578d9c..c832cc5c890 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlob.ts @@ -17,6 +17,7 @@ const checkSorted = false; export class FastTrieBlob implements TrieData { private _readonly = false; private _forbidIdx: number; + private _caseInsensitiveIdx: number; private _iTrieRoot: ITrieNodeRoot | undefined; wordToCharacters: (word: string) => readonly string[]; // private nodes8: Uint8Array[]; @@ -32,6 +33,8 @@ export class FastTrieBlob implements TrieData { this.info = mergeOptionalWithDefaults(options); this.wordToCharacters = (word: string) => [...word]; this._forbidIdx = this._searchNodeForChar(0, this.info.forbiddenWordPrefix); + this._caseInsensitiveIdx = this._searchNodeForChar(0, this.info.stripCaseAndAccentsPrefix); + if (checkSorted) { assertSorted(this.nodes, bitMasksInfo.NodeMaskChildCharIndex); } @@ -49,6 +52,11 @@ export class FastTrieBlob implements TrieData { return this._has(0, word); } + hasCaseInsensitive(word: string): boolean { + if (!this._caseInsensitiveIdx) return false; + return this._has(this._caseInsensitiveIdx, word); + } + private _has(nodeIdx: number, word: string): boolean { return this.#hasSorted(nodeIdx, word); } @@ -192,6 +200,9 @@ export class FastTrieBlob implements TrieData { 0, trie.info, (word: string) => trie.has(word), + (word: string) => trie.isForbiddenWord(word), + (word: string) => trie.hasCaseInsensitive(word), + (idx: number, word: string) => trie._has(idx, word), ); } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts index 11112e97cd8..5eacce89124 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/FastTrieBlobIRoot.ts @@ -26,6 +26,7 @@ class FastTrieBlobINode implements ITrieNode { constructor( readonly trie: FastTrieBlobInternals, readonly nodeIdx: NodeIndex, + protected nodeHas: (idx: number, word: string) => boolean, ) { const node = trie.nodes[nodeIdx]; this.node = node; @@ -53,7 +54,7 @@ class FastTrieBlobINode implements ITrieNode { if (this._entries) return this._entries; if (!this._count) return EmptyEntries; const entries = this.getNodesEntries(); - this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value)]); + this._entries = entries.map(([key, value]) => [key, new FastTrieBlobINode(this.trie, value, this.nodeHas)]); return this._entries; } @@ -77,7 +78,7 @@ class FastTrieBlobINode implements ITrieNode { if (!this._values && !this.containsChainedIndexes()) { const n = this.node[keyIdx + 1]; const nodeIdx = n >>> this.trie.NodeChildRefShift; - return new FastTrieBlobINode(this.trie, nodeIdx); + return new FastTrieBlobINode(this.trie, nodeIdx, this.nodeHas); } return this.values()[keyIdx]; } @@ -94,6 +95,10 @@ class FastTrieBlobINode implements ITrieNode { return map; } + findExact(word: string): boolean { + return this.nodeHas(this.id, word); + } + private containsChainedIndexes(): boolean { if (this._chained !== undefined) return this._chained; if (!this._count || !this.trie.isIndexDecoderNeeded) { @@ -204,11 +209,14 @@ export class FastTrieBlobIRoot extends FastTrieBlobINode implements ITrieNodeRoo nodeIdx: number, readonly info: Readonly, readonly findExact: (word: string) => boolean, + readonly isForbidden: (word: string) => boolean, + readonly findCaseInsensitive: (word: string) => boolean, + nodeHas: (idx: number, word: string) => boolean, ) { - super(trie, nodeIdx); + super(trie, nodeIdx, nodeHas); } resolveId(id: ITrieNodeId): ITrieNode { - return new FastTrieBlobINode(this.trie, id as number); + return new FastTrieBlobINode(this.trie, id as number, this.nodeHas); } find(word: string, strict: boolean): FindResult | undefined { @@ -217,7 +225,19 @@ export class FastTrieBlobIRoot extends FastTrieBlobINode implements ITrieNodeRoo return { found: word, compoundUsed: false, caseMatched: true }; } if (strict) return undefined; - found = this.findExact(this.info.stripCaseAndAccentsPrefix + word); + found = this.findCaseInsensitive(word); return found ? { found: word, compoundUsed: false, caseMatched: false } : undefined; } + + get forbidPrefix(): string { + return this.info.forbiddenWordPrefix; + } + + get compoundFix(): string { + return this.info.compoundCharacter; + } + + get caseInsensitivePrefix(): string { + return this.info.stripCaseAndAccentsPrefix; + } } diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts index faf911788f5..9bf3c9a478c 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlob.ts @@ -40,11 +40,6 @@ const headerSig = 'TrieBlob'; const version = '00.01.00'; const endianSig = 0x0403_0201; -const lookupCount = 50; - -type MapSeqToNodeIdx = Map; -type CacheNodeIdxLookup = Map; - export class TrieBlob implements TrieData { readonly info: Readonly; #forbidIdx: number | undefined; @@ -57,15 +52,6 @@ export class TrieBlob implements TrieData { #nodes8: Uint8Array; #beAdj = endianness() === 'BE' ? 3 : 0; - /** - * Lookup table for node indexes. - * The first level is the node index. - * The second level is the character index. - * The value is the node index of the child node. - * It speeds the lookup process up by about 20%. - */ - #nodeIdxLookup: CacheNodeIdxLookup = new Map(); - readonly wordToCharacters = (word: string) => [...word]; constructor( @@ -75,7 +61,7 @@ export class TrieBlob implements TrieData { ) { trieBlobSort(nodes); this.info = mergeOptionalWithDefaults(info); - this.#prepLookup(); + // this.#prepLookup(); this.#nodes8 = new Uint8Array(nodes.buffer, nodes.byteOffset + this.#beAdj); this.#forbidIdx = this._lookupNode(0, this.info.forbiddenWordPrefix); this.#compoundIdx = this._lookupNode(0, this.info.compoundCharacter); @@ -91,11 +77,11 @@ export class TrieBlob implements TrieData { } has(word: string): boolean { - return this._has8(0, word); + return this.#hasWord(0, word); } isForbiddenWord(word: string): boolean { - return !!this.#forbidIdx && this._has8(this.#forbidIdx, word); + return !!this.#forbidIdx && this.#hasWord(this.#forbidIdx, word); } hasForbiddenWords(): boolean { @@ -120,10 +106,10 @@ export class TrieBlob implements TrieData { */ find(word: string, strict: boolean): FindResult | undefined { if (!this.hasCompoundWords()) { - const found = this._has8(0, word); + const found = this.#hasWord(0, word); if (found) return { found: word, compoundUsed: false, caseMatched: true }; if (strict || !this.#nonStrictIdx) return { found: false, compoundUsed: false, caseMatched: false }; - return { found: this._has8(this.#nonStrictIdx, word) && word, compoundUsed: false, caseMatched: false }; + return { found: this.#hasWord(this.#nonStrictIdx, word) && word, compoundUsed: false, caseMatched: false }; } // @todo: handle compound words. return undefined; @@ -134,15 +120,23 @@ export class TrieBlob implements TrieData { } private _getRoot(): ITrieNodeRoot { - const trieData = new TrieBlobInternals(this.nodes, this.charIndex, { - NodeMaskEOW: TrieBlob.NodeMaskEOW, - NodeMaskNumChildren: TrieBlob.NodeMaskNumChildren, - NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex, - NodeChildRefShift: TrieBlob.NodeChildRefShift, - }); + const trieData = new TrieBlobInternals( + this.nodes, + this.charIndex, + { + NodeMaskEOW: TrieBlob.NodeMaskEOW, + NodeMaskNumChildren: TrieBlob.NodeMaskNumChildren, + NodeMaskChildCharIndex: TrieBlob.NodeMaskChildCharIndex, + NodeChildRefShift: TrieBlob.NodeChildRefShift, + }, + { + nodeFindExact: (idx, word) => this.#hasWord(idx, word), + nodeGetChild: (idx, letter) => this._lookupNode(idx, letter), + isForbidden: (word) => this.isForbiddenWord(word), + findExact: (word) => this.has(word), + }, + ); return new TrieBlobIRoot(trieData, 0, this.info, { - findExact: (word) => this.has(word), - isForbidden: (word) => this.isForbiddenWord(word), find: (word, strict) => this.find(word, strict), }); } @@ -154,22 +148,29 @@ export class TrieBlob implements TrieData { /** * Check if the word is in the trie starting at the given node index. */ - private _has8(nodeIdx: number, word: string): boolean { + #hasWord(nodeIdx: number, word: string): boolean { + const wordIndexes = this.wordToUtf8Seq(word); + const nodeIdxFound = this.#lookupNode(nodeIdx, wordIndexes); + if (nodeIdxFound === undefined) return false; + const node = this.nodes[nodeIdxFound]; + return (node & TrieBlob.NodeMaskEOW) === TrieBlob.NodeMaskEOW; + } + + /** + * Find the node index for the given Utf8 character sequence. + * @param nodeIdx - node index to start the search + * @param seq - the byte sequence of the character to look for + * @returns + */ + #lookupNode(nodeIdx: number, seq: readonly number[] | Readonly): number | undefined { const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; const NodeChildRefShift = TrieBlob.NodeChildRefShift; const nodes = this.nodes; const nodes8 = this.#nodes8; - const wordIndexes = this.wordToUtf8Seq(word); - const lookup = this.#nodeIdxLookup; + const wordIndexes = seq; const len = wordIndexes.length; - let p = 0; - for (let m = lookup.get(nodeIdx); m && p < len; ++p, m = lookup.get(nodeIdx)) { - const i = m.get(wordIndexes[p]); - if (!i) break; - nodeIdx = i; - } let node = nodes[nodeIdx]; - for (; p < len; ++p, node = nodes[nodeIdx]) { + for (let p = 0; p < len; ++p, node = nodes[nodeIdx]) { const letterIdx = wordIndexes[p]; const count = node & NodeMaskNumChildren; const idx4 = nodeIdx << 2; @@ -185,7 +186,7 @@ export class TrieBlob implements TrieData { j = m; } } - if (i > pEnd || nodes8[i] !== letterIdx) return false; + if (i > pEnd || nodes8[i] !== letterIdx) return undefined; nodeIdx = nodes[i >> 2] >>> NodeChildRefShift; continue; } @@ -195,11 +196,11 @@ export class TrieBlob implements TrieData { break; } } - if (i <= idx4) return false; + if (i <= idx4) return undefined; nodeIdx = nodes[i >> 2] >>> NodeChildRefShift; } - return (node & TrieBlob.NodeMaskEOW) === TrieBlob.NodeMaskEOW; + return nodeIdx; } /** @@ -210,41 +211,10 @@ export class TrieBlob implements TrieData { */ private _lookupNode(nodeIdx: number, char: string): number | undefined { const indexSeq = this.letterToNodeCharIndexSequence(char); - const len = indexSeq.length; - if (!len) return undefined; - let currNodeIdx: number | undefined = nodeIdx; - for (let i = 0; i < len; ++i) { - currNodeIdx = this._lookupNodeByCharIndexSeq(currNodeIdx, indexSeq[i]); - if (currNodeIdx === undefined) { - return undefined; - } - } + const currNodeIdx = this.#lookupNode(nodeIdx, indexSeq); return currNodeIdx; } - /** - * Find the node index for the given character. - * @param nodeIdx - node index to start the search - * @param char - character to look for - * @returns - */ - private _lookupNodeByCharIndexSeq(nodeIdx: number, index: number): number | undefined { - const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; - const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex; - const NodeChildRefShift = TrieBlob.NodeChildRefShift; - const nodes = this.nodes; - const node = nodes[nodeIdx]; - const letterIdx = index; - const count = node & NodeMaskNumChildren; - let i = count; - for (; i > 0; --i) { - if ((nodes[i + nodeIdx] & NodeMaskChildCharIndex) === letterIdx) { - return nodes[i + nodeIdx] >>> NodeChildRefShift; - } - } - return undefined; - } - *words(): Iterable { interface StackItem { nodeIdx: number; @@ -354,66 +324,67 @@ export class TrieBlob implements TrieData { return trieBlob; } - #prepLookup() { - const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; - const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex; - const NodeChildRefShift = TrieBlob.NodeChildRefShift; - const stack: WalkStackItem[] = []; - const iter = this.#walk(stack)[Symbol.iterator](); - const nodes = this.nodes; - - let n: IteratorResult; - let deeper = true; - while (!(n = iter.next(deeper)).done) { - const depth = n.value; - const nodeIdx = stack[depth].nodeIdx; - const node = nodes[nodeIdx]; - const len = node & NodeMaskNumChildren; - deeper = len > lookupCount; - if (deeper) { - const map = new Map(); - this.#nodeIdxLookup.set(nodeIdx, map); - for (let i = len; i > 0; --i) { - const n = nodes[i + nodeIdx]; - map.set(n & NodeMaskChildCharIndex, n >> NodeChildRefShift); - } - // const parent = depth > 0 ? stack[depth - 1].nodeIdx : -1; - // console.error('Node %d has %d children, parent %d', nodeIdx, len, parent); - } - } - } - - *#walk(wStack: WalkStackItem[]): Generator { - const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; - const NodeChildRefShift = TrieBlob.NodeChildRefShift; - const nodes = this.nodes; - const stack = wStack; - stack[0] = { nodeIdx: 0, pos: 0 }; - let depth = 0; - - while (depth >= 0) { - const { nodeIdx, pos } = stack[depth]; - const node = nodes[nodeIdx]; - // pos is 0 when first entering a node - if (!pos) { - const deeper = yield depth; - if (deeper === false) { - --depth; - continue; - } - } - const len = node & NodeMaskNumChildren; - if (pos >= len) { - --depth; - continue; - } - const nextPos = ++stack[depth].pos; - const entry = nodes[nodeIdx + nextPos]; - ++depth; - stack[depth] = stack[depth] || { nodeIdx: 0, pos: 0 }; - (stack[depth].nodeIdx = entry >>> NodeChildRefShift), (stack[depth].pos = 0); - } - } + // #prepLookup() { + // const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; + // const NodeMaskChildCharIndex = TrieBlob.NodeMaskChildCharIndex; + // const NodeChildRefShift = TrieBlob.NodeChildRefShift; + // const stack: WalkStackItem[] = []; + // const iter = this.#walk(stack)[Symbol.iterator](); + // const nodes = this.nodes; + + // let n: IteratorResult; + // let deeper = true; + // while (!(n = iter.next(deeper)).done) { + // const depth = n.value; + // const nodeIdx = stack[depth].nodeIdx; + // const node = nodes[nodeIdx]; + // const len = node & NodeMaskNumChildren; + // deeper = len > lookupCount; + // if (deeper) { + // const map = new Map(); + // this.#nodeIdxLookup.set(nodeIdx, map); + // for (let i = len; i > 0; --i) { + // const n = nodes[i + nodeIdx]; + // map.set(n & NodeMaskChildCharIndex, n >> NodeChildRefShift); + // } + // // const parent = depth > 0 ? stack[depth - 1].nodeIdx : -1; + // // console.error('Node %d has %d children, parent %d', nodeIdx, len, parent); + // } + // } + // } + + // Keeping this for a bit, until we are sure we don't need it. + // *#walk(wStack: WalkStackItem[]): Generator { + // const NodeMaskNumChildren = TrieBlob.NodeMaskNumChildren; + // const NodeChildRefShift = TrieBlob.NodeChildRefShift; + // const nodes = this.nodes; + // const stack = wStack; + // stack[0] = { nodeIdx: 0, pos: 0 }; + // let depth = 0; + + // while (depth >= 0) { + // const { nodeIdx, pos } = stack[depth]; + // const node = nodes[nodeIdx]; + // // pos is 0 when first entering a node + // if (!pos) { + // const deeper = yield depth; + // if (deeper === false) { + // --depth; + // continue; + // } + // } + // const len = node & NodeMaskNumChildren; + // if (pos >= len) { + // --depth; + // continue; + // } + // const nextPos = ++stack[depth].pos; + // const entry = nodes[nodeIdx + nextPos]; + // ++depth; + // stack[depth] = stack[depth] || { nodeIdx: 0, pos: 0 }; + // (stack[depth].nodeIdx = entry >>> NodeChildRefShift), (stack[depth].pos = 0); + // } + // } static NodeMaskEOW = 0x0000_0100; static NodeMaskNumChildren = (1 << NodeHeaderNumChildrenBits) - 1; @@ -433,10 +404,10 @@ export class TrieBlob implements TrieData { } } -interface WalkStackItem { - nodeIdx: number; - pos: number; -} +// interface WalkStackItem { +// nodeIdx: number; +// pos: number; +// } function isLittleEndian(): boolean { const buf = new Uint8Array([1, 2, 3, 4]); diff --git a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts index 16e9effeafd..09fd0dfb16c 100644 --- a/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts +++ b/packages/cspell-trie-lib/src/lib/TrieBlob/TrieBlobIRoot.ts @@ -13,17 +13,29 @@ interface BitMaskInfo { type Node = number; type NodeIndex = number; +interface TrieMethods { + readonly nodeFindExact: (idx: number, word: string) => boolean; + readonly nodeGetChild: (idx: number, letter: string) => number | undefined; + readonly isForbidden: (word: string) => boolean; + readonly findExact: (word: string) => boolean; +} + export class TrieBlobInternals implements BitMaskInfo { readonly NodeMaskEOW: number; readonly NodeMaskNumChildren: number; readonly NodeMaskChildCharIndex: number; readonly NodeChildRefShift: number; readonly isIndexDecoderNeeded: boolean; + readonly nodeFindExact: (idx: number, word: string) => boolean; + readonly isForbidden: (word: string) => boolean; + readonly findExact: (word: string) => boolean; + readonly nodeGetChild: (idx: number, letter: string) => number | undefined; constructor( readonly nodes: Uint32Array, readonly charIndex: Readonly, maskInfo: BitMaskInfo, + methods: TrieMethods, ) { const { NodeMaskEOW, NodeMaskChildCharIndex, NodeMaskNumChildren, NodeChildRefShift } = maskInfo; this.NodeMaskEOW = NodeMaskEOW; @@ -31,6 +43,10 @@ export class TrieBlobInternals implements BitMaskInfo { this.NodeMaskChildCharIndex = NodeMaskChildCharIndex; this.NodeChildRefShift = NodeChildRefShift; this.isIndexDecoderNeeded = charIndex.indexContainsMultiByteChars(); + this.nodeFindExact = methods.nodeFindExact; + this.isForbidden = methods.isForbidden; + this.findExact = methods.findExact; + this.nodeGetChild = methods.nodeGetChild; } } @@ -38,6 +54,8 @@ const EmptyKeys: readonly string[] = Object.freeze([]); const EmptyNodes: readonly ITrieNode[] = Object.freeze([]); const EmptyEntries: readonly (readonly [string, ITrieNode])[] = Object.freeze([]); +export interface ITrieSupportMethods extends Readonly> {} + class TrieBlobINode implements ITrieNode { readonly id: number; readonly node: Node; @@ -87,14 +105,11 @@ class TrieBlobINode implements ITrieNode { /** get child ITrieNode */ get(char: string): ITrieNode | undefined { - const idx = this.getCharToIdxMap()[char]; - if (idx === undefined) return undefined; - return this.child(idx); + return this.#getChildNode(char); } has(char: string): boolean { - const idx = this.getCharToIdxMap()[char]; - return idx !== undefined; + return this.trie.nodeGetChild(this.nodeIdx, char) !== undefined; } hasChildren(): boolean { @@ -110,6 +125,21 @@ class TrieBlobINode implements ITrieNode { return this.values()[keyIdx]; } + #getChildNodeIdx(char: string) { + return this.trie.nodeGetChild(this.nodeIdx, char); + } + + #getChildNode(char: string): ITrieNode | undefined { + if (this.charToIdx) { + const keyIdx = this.charToIdx[char]; + if (keyIdx === undefined) return undefined; + return this.child(keyIdx); + } + const idx = this.#getChildNodeIdx(char); + if (idx === undefined) return undefined; + return new TrieBlobINode(this.trie, idx); + } + getCharToIdxMap(): Record { const m = this.charToIdx; if (m) return m; @@ -122,6 +152,10 @@ class TrieBlobINode implements ITrieNode { return map; } + findExact(word: string): boolean { + return this.trie.nodeFindExact(this.nodeIdx, word); + } + private containsChainedIndexes(): boolean { if (this._chained !== undefined) return this._chained; if (!this._count || !this.trie.isIndexDecoderNeeded) { @@ -226,25 +260,33 @@ class TrieBlobINode implements ITrieNode { } } -export interface ITrieSupportMethods extends Readonly> {} - export class TrieBlobIRoot extends TrieBlobINode implements ITrieNodeRoot { find: ITrieNodeRoot['find']; - findExact: ITrieNodeRoot['findExact']; isForbidden: ITrieNodeRoot['isForbidden']; constructor( trie: TrieBlobInternals, nodeIdx: number, readonly info: Readonly, - methods: ITrieSupportMethods | undefined, + methods: ITrieSupportMethods, ) { super(trie, nodeIdx); - this.find = methods?.find; - this.findExact = methods?.findExact; - this.isForbidden = methods?.isForbidden; + this.find = methods.find; + this.isForbidden = trie.isForbidden; } resolveId(id: ITrieNodeId): ITrieNode { return new TrieBlobINode(this.trie, id as number); } + + get forbidPrefix(): string { + return this.info.forbiddenWordPrefix; + } + + get compoundFix(): string { + return this.info.compoundCharacter; + } + + get caseInsensitivePrefix(): string { + return this.info.stripCaseAndAccentsPrefix; + } } diff --git a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts index 8b27c47d150..d1dd4ab2713 100644 --- a/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts +++ b/packages/cspell-trie-lib/src/lib/TrieNode/trie.ts @@ -97,6 +97,18 @@ class ImplITrieRoot extends ImplITrieNode implements ITrieNodeRoot { return new ImplITrieNode(n); } + get forbidPrefix(): string { + return this.root.forbiddenWordPrefix; + } + + get compoundFix(): string { + return this.root.compoundCharacter; + } + + get caseInsensitivePrefix(): string { + return this.root.stripCaseAndAccentsPrefix; + } + static toITrieNode(node: TrieRoot): ITrieNodeRoot { return new this(node); } diff --git a/packages/cspell-trie-lib/src/lib/buildITrie.ts b/packages/cspell-trie-lib/src/lib/buildITrie.ts index 7386046d81b..561a883aa62 100644 --- a/packages/cspell-trie-lib/src/lib/buildITrie.ts +++ b/packages/cspell-trie-lib/src/lib/buildITrie.ts @@ -7,5 +7,5 @@ export function buildITrieFromWords(words: Iterable, info: PartialTrieIn const builder = new FastTrieBlobBuilder(info); builder.insert(words); const ft = builder.build(); - return new ITrieImpl(ft.size > 5000 ? ft.toTrieBlob() : ft); + return new ITrieImpl(ft.size > 100 ? ft.toTrieBlob() : ft); } diff --git a/packages/cspell-trie-lib/src/perf/map.perf.ts b/packages/cspell-trie-lib/src/perf/map.perf.ts new file mode 100644 index 00000000000..84e1f8a15c4 --- /dev/null +++ b/packages/cspell-trie-lib/src/perf/map.perf.ts @@ -0,0 +1,86 @@ +import { suite } from 'perf-insight'; + +suite('trie has', async (test) => { + const chars = [...charSet()]; + const charRecord = Object.fromEntries(chars.map((c) => [c, c.codePointAt(0)])); + const charMap = new Map(Object.entries(charRecord)); + + const lookUp = randomCharString(chars, 1000); + const iterations = 100; + + test('Map.get', () => { + let sum = 0; + for (let i = 0; i < iterations; ++i) { + for (const c of lookUp) { + sum += charMap.get(c) || 0; + } + } + return sum; + }); + + test('Record.get', () => { + let sum = 0; + for (let i = 0; i < iterations; ++i) { + for (const c of lookUp) { + sum += charRecord[c] || 0; + } + } + return sum; + }); + + test('Map.has', () => { + let sum = 0; + for (let i = 0; i < iterations; ++i) { + for (const c of lookUp) { + sum += (charMap.has(c) && 1) || 0; + } + } + return sum; + }); + + test('Record.has', () => { + let sum = 0; + for (let i = 0; i < iterations; ++i) { + for (const c of lookUp) { + sum += (c in charRecord && 1) || 0; + } + } + return sum; + }); + + test('Map.has.get', () => { + let sum = 0; + for (let i = 0; i < iterations; ++i) { + for (const c of lookUp) { + if (charMap.has(c)) { + sum += charMap.get(c)!; + } + } + } + return sum; + }); + + test('Record.has.get', () => { + let sum = 0; + for (let i = 0; i < iterations; ++i) { + for (const c of lookUp) { + if (c in charRecord) { + sum += charRecord[c]!; + } + } + } + return sum; + }); +}); + +function charSet() { + const letters = 'abcdefghijklmnopqrstuvwxyz'; + return new Set(letters.toUpperCase() + letters); +} + +function randomCharString(chars: string[], count: number): string[] { + const len = chars.length; + return Array.from({ length: count }, () => chars[Math.floor(Math.random() * len)]); +} + +// cspell:ignore tion aeiou