Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Reduce the use of Generators in critical sections. #6015

Merged
merged 6 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Args: '["--config=../../../cspell-power-shell-docs.config.yaml","**"]'
Summary:
files: 2683
filesWithIssues: 1239
issues: 5376
issues: 5379
errors: 0
Errors: []

Expand Down Expand Up @@ -1467,6 +1467,7 @@ issues:
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:806:87 redirections U when present, across redirections."
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:131:21 jdoe U User = 'jdoe'"
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:165:44 docspage U StreamWriter]::new('.\\docspage.html', $false, $Response"
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:236:26 jdoe U the image data for `jdoe.png` is submitted."
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:250:59 unkownhost U Uri \"www.microsoft.com/unkownhost\""
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:353:2 httpbin U [httpbin.org](https://httpbin"
- "reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:796:32 Passthru U pipeline, use the **Passthru** parameter."
Expand Down Expand Up @@ -2198,6 +2199,7 @@ issues:
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:1529:38 Brotli U added support for the Brotli compression algorithm"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 jdoe U User = 'jdoe'"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 docspage U StreamWriter]::new('.\\docspage.html', $false, $Response"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 jdoe U the image data for `jdoe.png` is submitted."
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 unkownhost U Uri \"www.microsoft.com/unkownhost\""
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 httpbin U [httpbin.org](https://httpbin"
- "reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 Passthru U pipeline, use the **Passthru** parameter."
Expand Down Expand Up @@ -2932,6 +2934,7 @@ issues:
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:1529:38 Brotli U added support for the Brotli compression algorithm"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 jdoe U User = 'jdoe'"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 docspage U StreamWriter]::new('.\\docspage.html', $false, $Response"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 jdoe U the image data for `jdoe.png` is submitted."
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 unkownhost U Uri \"www.microsoft.com/unkownhost\""
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 httpbin U [httpbin.org](https://httpbin"
- "reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 Passthru U pipeline, use the **Passthru** parameter."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Repository: MicrosoftDocs/PowerShell-Docs
Url: "https://github.com/MicrosoftDocs/PowerShell-Docs.git"
Args: ["--config=../../../cspell-power-shell-docs.config.yaml","**"]
Lines:
CSpell: Files checked: 2683, Issues found: 5376 in 1239 files.
CSpell: Files checked: 2683, Issues found: 5379 in 1239 files.
exit code: 1
CODE_OF_CONDUCT.md:10:38 - Unknown word (opensource) -- reach out at [aka.ms/opensource/moderation-support]
LICENSE:139:15 - Unknown word (sublicensable) -- non-sublicensable, non-exclusive, irrevocable
Expand Down Expand Up @@ -1462,6 +1462,7 @@ reference/7.2/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:270:2 - Unkn
reference/7.2/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:806:87 - Unknown word (redirections) -- when present, across redirections.
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:131:21 - Unknown word (jdoe) -- User = 'jdoe'
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:165:44 - Unknown word (docspage) -- StreamWriter]::new('.\docspage.html', $false, $Response
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:236:26 - Unknown word (jdoe) -- the image data for `jdoe.png` is submitted.
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:250:59 - Unknown word (unkownhost) -- Uri "www.microsoft.com/unkownhost"
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:353:2 - Unknown word (httpbin) -- [httpbin.org](https://httpbin
reference/7.2/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:796:32 - Unknown word (Passthru) -- pipeline, use the **Passthru** parameter.
Expand Down Expand Up @@ -2194,6 +2195,7 @@ reference/7.4/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:961:71 - Unkn
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 - Unknown word (jdoe) -- User = 'jdoe'
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:1496:38 - Unknown word (Brotli) -- added support for the Brotli compression algorithm
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 - Unknown word (docspage) -- StreamWriter]::new('.\docspage.html', $false, $Response
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 - Unknown word (jdoe) -- the image data for `jdoe.png` is submitted.
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 - Unknown word (unkownhost) -- Uri "www.microsoft.com/unkownhost"
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 - Unknown word (httpbin) -- [httpbin.org](https://httpbin
reference/7.4/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 - Unknown word (Passthru) -- pipeline, use the **Passthru** parameter.
Expand Down Expand Up @@ -2928,6 +2930,7 @@ reference/7.5/Microsoft.PowerShell.Utility/Invoke-RestMethod.md:961:71 - Unkn
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:141:21 - Unknown word (jdoe) -- User = 'jdoe'
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:1496:38 - Unknown word (Brotli) -- added support for the Brotli compression algorithm
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:175:44 - Unknown word (docspage) -- StreamWriter]::new('.\docspage.html', $false, $Response
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:246:26 - Unknown word (jdoe) -- the image data for `jdoe.png` is submitted.
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:260:59 - Unknown word (unkownhost) -- Uri "www.microsoft.com/unkownhost"
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:363:2 - Unknown word (httpbin) -- [httpbin.org](https://httpbin
reference/7.5/Microsoft.PowerShell.Utility/Invoke-WebRequest.md:923:32 - Unknown word (Passthru) -- pipeline, use the **Passthru** parameter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Args: '["**","--exclude=**/Backup/**"]'
Summary:
files: 489
filesWithIssues: 452
issues: 14311
issues: 14314
errors: 0
Errors: []

Expand Down Expand Up @@ -4806,9 +4806,12 @@ issues:
- "Scripts/Search_Script.sql:147:26 SSRS U WHEN 2 THEN 'SSRS Report'"
- "Scripts/Search_Script.sql:148:26 SSRS U WHEN 3 THEN 'SSRS Resource'"
- "Scripts/Search_Script.sql:156:10 reportserver U FROM reportserver.dbo.Catalog"
- "Scripts/Search_Script.sql:180:12 SSIS U IF @search_SSIS_MSDB = 1"
- "Scripts/Search_Script.sql:180:17 MSDB U IF @search_SSIS_MSDB = 1"
- "Scripts/Search_Script.sql:182:14 SSIS U WITH CTE_SSIS AS ("
- "Scripts/Search_Script.sql:185:44 packagedata U CONVERT(VARBINARY(MAX),packagedata)) AS package_details"
- "Scripts/Search_Script.sql:186:67 packagedata U CONVERT(VARBINARY(MAX),packagedata))) AS package_details"
- "Scripts/Search_Script.sql:187:10 SSIS U 'SSIS Package (MSDB)' AS object"
- "Scripts/Search_Script.sql:187:24 MSDB U 'SSIS Package (MSDB)' AS object_type"
- "Scripts/Search_Script.sql:188:10 msdb U FROM msdb.dbo.sysssispackages"
- "Scripts/Search_Script.sql:188:19 sysssispackages U FROM msdb.dbo.sysssispackages p"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Repository: ktaranov/sqlserver-kit
Url: "https://github.com/ktaranov/sqlserver-kit.git"
Args: ["**","--exclude=**/Backup/**"]
Lines:
CSpell: Files checked: 489, Issues found: 14311 in 452 files.
CSpell: Files checked: 489, Issues found: 14314 in 452 files.
exit code: 1
ADS/README.md:30:15 - Unknown word (Dacpac) -- | [SQL Server Dacpac]
ADS/README.md:30:181 - Unknown word (wizarding) -- Provides an easy-to-use wizarding experience to deploy
Expand Down Expand Up @@ -8112,9 +8112,12 @@ Scripts/Search_Script.sql:147:26 - Unknown word (SSRS) -- WHEN 2 THEN
Scripts/Search_Script.sql:148:26 - Unknown word (SSRS) -- WHEN 3 THEN 'SSRS Resource'
Scripts/Search_Script.sql:14:17 - Unknown word (SSIS) -- DECLARE @search_SSIS_disk BIT = 0;
Scripts/Search_Script.sql:156:10 - Unknown word (reportserver) -- FROM reportserver.dbo.Catalog
Scripts/Search_Script.sql:180:12 - Unknown word (SSIS) -- IF @search_SSIS_MSDB = 1
Scripts/Search_Script.sql:180:17 - Unknown word (MSDB) -- IF @search_SSIS_MSDB = 1
Scripts/Search_Script.sql:182:14 - Unknown word (SSIS) -- WITH CTE_SSIS AS (
Scripts/Search_Script.sql:185:44 - Unknown word (packagedata) -- CONVERT(VARBINARY(MAX),packagedata)) AS package_details
Scripts/Search_Script.sql:186:67 - Unknown word (packagedata) -- CONVERT(VARBINARY(MAX),packagedata))) AS package_details
Scripts/Search_Script.sql:187:10 - Unknown word (SSIS) -- 'SSIS Package (MSDB)' AS object
Scripts/Search_Script.sql:187:24 - Unknown word (MSDB) -- 'SSIS Package (MSDB)' AS object_type
Scripts/Search_Script.sql:188:10 - Unknown word (msdb) -- FROM msdb.dbo.sysssispackages
Scripts/Search_Script.sql:188:19 - Unknown word (sysssispackages) -- FROM msdb.dbo.sysssispackages p
Expand Down
133 changes: 85 additions & 48 deletions packages/cspell-lib/src/lib/textValidation/lineValidatorFactory.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import { opConcatMap, opFilter, opMap, pipe, toArray } from '@cspell/cspell-pipe/sync';
import { opConcatMap, opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
import type { ParsedText } from '@cspell/cspell-types';
import type { CachingDictionary, SearchOptions, SpellingDictionary } from 'cspell-dictionary';
import { createCachingDictionary } from 'cspell-dictionary';

import type { ValidationIssue } from '../Models/ValidationIssue.js';
import * as RxPat from '../Settings/RegExpPatterns.js';
import * as Text from '../util/text.js';
import { clean } from '../util/util.js';
import { split } from '../util/wordSplitter.js';
import { defaultMinWordLength } from './defaultConstants.js';
import { isWordValidWithEscapeRetry } from './isWordValid.js';
Expand All @@ -16,7 +15,6 @@ import type {
LineValidatorFn,
MappedTextValidationResult,
TextOffsetRO,
TextOffsetRW,
TextValidatorFn,
ValidationIssueRO,
ValidationOptions,
Expand All @@ -27,8 +25,12 @@ interface LineValidator {
dict: CachingDictionary;
}

interface TextOffsetWithLine extends TextOffsetRW {
line?: TextOffsetRO;
interface WordStatusInfo {
word: string;
isFound: boolean | undefined;
isFlagged: boolean | undefined;
isIgnored: boolean | undefined;
fin: boolean;
}

export function lineValidatorFactory(sDict: SpellingDictionary, options: ValidationOptions): LineValidator {
Expand All @@ -45,6 +47,8 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat

const dictCol = createCachingDictionary(sDict, hasWordOptions);

const knownWords = new Map<string, WordStatusInfo>();

const setOfFlagWords = new Set(flagWords);
const setOfKnownSuccessfulWords = new Set<string>();
const rememberFilter =
Expand All @@ -60,26 +64,33 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return !setOfKnownSuccessfulWords.has(wo.text);
};

function testForFlaggedWord(wo: TextOffsetRO): boolean {
const text = wo.text;
return setOfFlagWords.has(text) || setOfFlagWords.has(text.toLowerCase()) || dictCol.isForbidden(text);
function calcIgnored(info: WordStatusInfo): boolean {
info.isIgnored ??= dictCol.isNoSuggestWord(info.word);
return info.isIgnored;
}

function calcFlagged(info: WordStatusInfo): boolean {
if (info.isFlagged !== undefined) return info.isFlagged;
const word = info.word;
info.isFlagged =
(setOfFlagWords.has(word) || setOfFlagWords.has(word.toLowerCase()) || dictCol.isForbidden(word)) &&
!calcIgnored(info);
return info.isFlagged;
}

function isWordIgnored(word: string): boolean {
return dictCol.isNoSuggestWord(word);
return calcIgnored(getWordInfo(word));
}

function getSuggestions(word: string) {
return dictCol.getPreferredSuggestions(word);
}

function isWordFlagged(word: TextOffsetRO): boolean {
const isIgnored = isWordIgnored(word.text);
const isFlagged = !isIgnored && testForFlaggedWord(word);
return isFlagged;
function isWordFlagged(wo: TextOffsetRO): boolean {
return calcFlagged(getWordInfo(wo.text));
}

function annotateIsFlagged(word: ValidationIssue): ValidationIssueRO {
function annotateIsFlagged(word: ValidationIssue): ValidationIssue {
word.isFlagged = isWordFlagged(word);
return word;
}
Expand All @@ -92,18 +103,38 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return issue;
}

function checkWord(word: ValidationIssueRO): ValidationIssueRO {
const isIgnored = isWordIgnored(word.text);
const { isFlagged = !isIgnored && testForFlaggedWord(word) } = word;
const isFound = isFlagged ? undefined : isIgnored || isWordValidWithEscapeRetry(dictCol, word, word.line);
return clean({ ...word, isFlagged, isFound });
const isFlaggedOrMinLength = rememberFilter(
(wo: ValidationIssue) => wo.text.length >= minWordLength || !!wo.isFlagged,
);

const isFlaggedOrNotFound = rememberFilter((wo: ValidationIssue) => wo.isFlagged || !wo.isFound);
const isNotRepeatingChar = rememberFilter((wo: ValidationIssue) => !RxPat.regExRepeatedChar.test(wo.text));

function checkWord(issue: ValidationIssue): ValidationIssueRO {
const info = getWordInfo(issue.text);
if (info.fin) {
const { isFlagged: isForbidden, isFound, isIgnored } = info;
const isFlagged = issue.isFlagged ?? (!isIgnored && isForbidden);
issue.isFlagged = isFlagged;
issue.isFound = isFound;
return issue;
}
const isIgnored = calcIgnored(info);
const isFlagged = issue.isFlagged ?? calcFlagged(info);
const isFound = isFlagged ? undefined : isIgnored || isWordValidWithEscapeRetry(dictCol, issue, issue.line);
info.isFlagged = !!isFlagged;
info.isFound = isFound;
info.fin = true;
issue.isFlagged = isFlagged;
issue.isFound = isFound;
return issue;
}

const fn: LineValidatorFn = (lineSegment: LineSegment) => {
function splitterIsValid(word: TextOffsetRO): boolean {
return (
setOfKnownSuccessfulWords.has(word.text) ||
(!testForFlaggedWord(word) && isWordValidWithEscapeRetry(dictCol, word, lineSegment.line))
(!isWordFlagged(word) && isWordValidWithEscapeRetry(dictCol, word, lineSegment.line))
);
}

Expand All @@ -112,24 +143,21 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return [vr];
}

const codeWordResults = toArray(
pipe(
Text.extractWordsFromCodeTextOffset(vr),
opFilter(filterAlreadyChecked),
opMap((t) => ({ ...t, line: vr.line })),
opMap(annotateIsFlagged),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opMap((wo) => (wo.isFlagged ? wo : checkWord(wo))),
opFilter(rememberFilter((wo) => wo.isFlagged || !wo.isFound)),
opFilter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))),

// get back the original text.
opMap((wo) => ({
...wo,
text: Text.extractText(lineSegment.segment, wo.offset, wo.offset + wo.text.length),
})),
),
);
const codeWordResults: ValidationIssueRO[] = [];

for (const wo of Text.extractWordsFromCodeTextOffset(vr)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = vr.line;
issue.isFlagged = undefined;
issue.isFound = undefined;
annotateIsFlagged(issue);
if (!isFlaggedOrMinLength(issue)) continue;
checkWord(issue);
if (!isFlaggedOrNotFound(issue) || !isNotRepeatingChar(issue)) continue;
issue.text = Text.extractText(lineSegment.segment, issue.offset, issue.offset + issue.text.length);
codeWordResults.push(issue);
}

if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr).isFound) {
rememberFilter((_) => false)(vr);
Expand All @@ -149,16 +177,17 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return [vr];
}

const mismatches: ValidationIssue[] = toArray(
pipe(
Text.extractWordsFromTextOffset(possibleWord),
opFilter((wo: TextOffsetWithLine) => filterAlreadyChecked(wo)),
opMap((wo: TextOffsetWithLine) => ((wo.line = lineSegment.line), wo as ValidationIssue)),
opMap(annotateIsFlagged),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opConcatMap(checkFullWord),
),
);
const mismatches: ValidationIssue[] = [];
for (const wo of Text.extractWordsFromTextOffset(possibleWord)) {
if (setOfKnownSuccessfulWords.has(wo.text)) continue;
const issue = wo as ValidationIssue;
issue.line = lineSegment.line;
annotateIsFlagged(issue);
if (!isFlaggedOrMinLength(issue)) continue;
for (const w of checkFullWord(issue)) {
mismatches.push(w);
}
}
if (mismatches.length) {
// Try the more expensive word splitter
const splitResult = split(lineSegment.segment, possibleWord.offset, splitterIsValid);
Expand All @@ -179,6 +208,14 @@ export function lineValidatorFactory(sDict: SpellingDictionary, options: Validat
return checkedPossibleWords;
};

function getWordInfo(word: string): WordStatusInfo {
const info = knownWords.get(word);
if (info) return info;
const result = { word, isFound: undefined, isFlagged: undefined, isIgnored: undefined, fin: false };
knownWords.set(word, result);
return result;
}

return { fn, dict: dictCol };
}

Expand Down
Loading