Skip to content

Commit

Permalink
Use Map instead of Record for CharIndex
Browse files Browse the repository at this point in the history
After testing, Map seems to be about 2x faster.
  • Loading branch information
Jason3S committed Jul 26, 2024
1 parent e634d0f commit 399e75b
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 13 deletions.
26 changes: 13 additions & 13 deletions packages/cspell-trie-lib/src/lib/TrieBlob/CharIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ import { encodeTextToUtf8, encodeUtf8N_BE, type Utf8BE32 } from './Utf8.js';

export type Utf8Seq = Readonly<number[]>;

export type CharIndexMap = Record<string, Utf8BE32>;
export type CharIndexMap = Map<string, Utf8BE32>;

export type RO_CharIndexMap = Readonly<CharIndexMap>;

export type CharIndexSeqMap = Record<string, Utf8Seq>;
export type CharIndexSeqMap = Map<string, Utf8Seq>;

export type RO_CharIndexSeqMap = Readonly<CharIndexSeqMap>;

Expand All @@ -23,14 +23,14 @@ export class CharIndex {

constructor(readonly charIndex: readonly string[]) {
this.#charToUtf8SeqMap = buildCharIndexSequenceMap(charIndex);
this.#multiByteChars = Object.values(this.#charToUtf8SeqMap).some((c) => c.length > 1);
this.#multiByteChars = [...this.#charToUtf8SeqMap.values()].some((c) => c.length > 1);
}

getCharUtf8Seq(c: string): Utf8Seq {
const found = this.#charToUtf8SeqMap[c];
const found = this.#charToUtf8SeqMap.get(c);
if (found) return found;
const s = encodeTextToUtf8(c);
this.#charToUtf8SeqMap[c] = s;
this.#charToUtf8SeqMap.set(c, s);
return s;
}

Expand Down Expand Up @@ -59,17 +59,17 @@ export class CharIndex {
}

function buildCharIndexSequenceMap(charIndex: readonly string[]): CharIndexSeqMap {
const map: CharIndexSeqMap = Object.create(null);
const map: CharIndexSeqMap = new Map();
for (const key of charIndex) {
map[key] = encodeTextToUtf8(key);
map.set(key, encodeTextToUtf8(key));
}
return map;
}

export class CharIndexBuilder {
private readonly charIndex: string[] = [];
readonly charIndexMap: CharIndexMap = Object.create(null);
readonly charIndexSeqMap: CharIndexSeqMap = Object.create(null);
readonly charIndexMap: CharIndexMap = new Map();
readonly charIndexSeqMap: CharIndexSeqMap = new Map();

readonly #mapIdxToSeq = new Map<number, number[]>();

Expand All @@ -78,16 +78,16 @@ export class CharIndexBuilder {
}

getUtf8Value(c: string): number {
const found = this.charIndexMap[c];
const found = this.charIndexMap.get(c);
if (found !== undefined) {
return found;
}
const nc = c.normalize('NFC');
this.charIndex.push(nc);
const utf8 = encodeUtf8N_BE(nc.codePointAt(0) || 0);
this.charIndexMap[c] = utf8;
this.charIndexMap[nc] = utf8;
this.charIndexMap[c.normalize('NFD')] = utf8;
this.charIndexMap.set(c, utf8);
this.charIndexMap.set(nc, utf8);
this.charIndexMap.set(c.normalize('NFD'), utf8);
return utf8;
}

Expand Down
86 changes: 86 additions & 0 deletions packages/cspell-trie-lib/src/perf/map.perf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { suite } from 'perf-insight';

suite('trie has', async (test) => {
const chars = [...charSet()];
const charRecord = Object.fromEntries(chars.map((c) => [c, c.codePointAt(0)]));
const charMap = new Map(Object.entries(charRecord));

const lookUp = randomCharString(chars, 1000);
const iterations = 100;

test('Map.get', () => {
let sum = 0;
for (let i = 0; i < iterations; ++i) {
for (const c of lookUp) {
sum += charMap.get(c) || 0;
}
}
return sum;
});

test('Record.get', () => {
let sum = 0;
for (let i = 0; i < iterations; ++i) {
for (const c of lookUp) {
sum += charRecord[c] || 0;
}
}
return sum;
});

test('Map.has', () => {
let sum = 0;
for (let i = 0; i < iterations; ++i) {
for (const c of lookUp) {
sum += (charMap.has(c) && 1) || 0;
}
}
return sum;
});

test('Record.has', () => {
let sum = 0;
for (let i = 0; i < iterations; ++i) {
for (const c of lookUp) {
sum += (c in charRecord && 1) || 0;
}
}
return sum;
});

test('Map.has.get', () => {
let sum = 0;
for (let i = 0; i < iterations; ++i) {
for (const c of lookUp) {
if (charMap.has(c)) {
sum += charMap.get(c)!;
}
}
}
return sum;
});

test('Record.has.get', () => {
let sum = 0;
for (let i = 0; i < iterations; ++i) {
for (const c of lookUp) {
if (c in charRecord) {
sum += charRecord[c]!;
}
}
}
return sum;
});
});

function charSet() {
const letters = 'abcdefghijklmnopqrstuvwxyz';
return new Set(letters.toUpperCase() + letters);
}

function randomCharString(chars: string[], count: number): string[] {
const len = chars.length;
return Array.from({ length: count }, () => chars[Math.floor(Math.random() * len)]);
}

// cspell:ignore tion aeiou

0 comments on commit 399e75b

Please sign in to comment.