Skip to content

Commit

Permalink
Skip indexing large files above 1mb, add option --max-file-byte-size (
Browse files Browse the repository at this point in the history
#271)

* Add configurable option to skip large files

Previously, scip-typescript indexed all files regardless of file size.
This could result in scip-typescript stalling progress to index very
large files that were (frequently) auto-generated. This commit changes
the default behavior to skip indexing files that are larger than 1mb,
and makes this threshold configurable via the new `--max-file-byte-size`
flag.

* Print out when large files are skipped
  • Loading branch information
olafurpg authored Jul 11, 2023
1 parent 67cfeeb commit d56d117
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 4 deletions.
27 changes: 23 additions & 4 deletions src/CommandLineOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import ts from 'typescript'

import packageJson from '../package.json'

import { parseHumanByteSizeIntoNumber } from './parseHumanByteSizeIntoNumber'
import * as scip from './scip'

/** Configuration options to index a multi-project workspace. */
Expand All @@ -14,6 +15,8 @@ export interface MultiProjectOptions {
yarnBerryWorkspaces: boolean
pnpmWorkspaces: boolean
globalCaches: boolean
maxFileByteSize?: string
maxFileByteSizeNumber?: number
cwd: string
output: string
indexedProjects: Set<string>
Expand All @@ -36,7 +39,7 @@ export interface GlobalCache {
}

export function mainCommand(
indexAction: (projects: string[], otpions: MultiProjectOptions) => void
indexAction: (projects: string[], options: MultiProjectOptions) => void
): Command {
const command = new Command()
command
Expand Down Expand Up @@ -67,12 +70,28 @@ export function mainCommand(
'--no-global-caches',
'whether to disable global caches between TypeScript projects'
)
.option(
'--max-file-byte-size <value>',
'skip files that have a larger byte size than the provided value. Supported formats: 1kb, 1mb, 1gb.',
'1mb'
)
.argument('[projects...]')
.action((parsedProjects, parsedOptions) => {
indexAction(
parsedProjects as string[],
parsedOptions as MultiProjectOptions
const options = parsedOptions as MultiProjectOptions

// Parse and validate human-provided --max-file-byte-size value
options.maxFileByteSizeNumber = parseHumanByteSizeIntoNumber(
options.maxFileByteSize ?? '1mb'
)
if (isNaN(options.maxFileByteSizeNumber)) {
console.error(
`invalid byte size '${options.maxFileByteSize}'. To fix this problem, change the value of the flag --max-file-byte-size to use a valid byte size format: 1kb, 1mb, 1gb.`
)
process.exitCode = 1
return
}

indexAction(parsedProjects as string[], options)
})
return command
}
18 changes: 18 additions & 0 deletions src/FileIndexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
} from './Descriptor'
import { Input } from './Input'
import { Packages } from './Packages'
import { formatByteSizeAsHumanReadable } from './parseHumanByteSizeIntoNumber'
import { Range } from './Range'
import * as scip from './scip'
import { ScipSymbol } from './ScipSymbol'
Expand Down Expand Up @@ -42,6 +43,23 @@ export class FileIndexer {
// if (!this.sourceFile.fileName.includes('constructor')) {
// return
// }

const byteSize = Buffer.from(this.sourceFile.getText()).length
if (
this.options.maxFileByteSizeNumber &&
byteSize > this.options.maxFileByteSizeNumber
) {
const humanSize = formatByteSizeAsHumanReadable(byteSize)
const humanMaxSize = formatByteSizeAsHumanReadable(
this.options.maxFileByteSizeNumber
)
console.log(
`info: skipping file '${this.sourceFile.fileName}' because it has byte size ${humanSize} that exceeds the maximum threshold ${humanMaxSize}. ` +
'If you intended to index this file, use the flag --max-file-byte-size to configure the maximum file size threshold.'
)
return
}

this.emitSourceFileOccurrence()
this.visit(this.sourceFile)
}
Expand Down
37 changes: 37 additions & 0 deletions src/parseHumanByteSizeIntoNumber.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import { test } from 'uvu'
import * as assert from 'uvu/assert'

import { parseHumanByteSizeIntoNumber } from './parseHumanByteSizeIntoNumber'

function checkHumanByteSize(
humanInput: string,
expectedByteNumber: number
): void {
test(humanInput, () => {
const obtained = parseHumanByteSizeIntoNumber(humanInput)
assert.equal(obtained, expectedByteNumber)
})
}

// Invalid formats
checkHumanByteSize('invalid', NaN)
checkHumanByteSize('15tb', NaN)
checkHumanByteSize('15b', NaN)

// All numeral
checkHumanByteSize('1001', 1001)

// All lowercase
checkHumanByteSize('1.2kb', 1_200)
checkHumanByteSize('1.2mb', 1_200_000)
checkHumanByteSize('1.2gb', 1_200_000_000)

// All uppercase
checkHumanByteSize('1.2KB', 1_200)
checkHumanByteSize('1.2MB', 1_200_000)
checkHumanByteSize('1.2GB', 1_200_000_000)

// Mixed case
checkHumanByteSize('1.2Kb', 1_200)
checkHumanByteSize('1.2Mb', 1_200_000)
checkHumanByteSize('1.2Gb', 1_200_000_000)
32 changes: 32 additions & 0 deletions src/parseHumanByteSizeIntoNumber.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
const kilo = 1_000
const mega = 1_000_000
const giga = 1_000_000_000

export function parseHumanByteSizeIntoNumber(humanByteSize: string): number {
let value = humanByteSize.toLowerCase()
let multiplier = 1
if (value.endsWith('kb')) {
multiplier = kilo
value = value.slice(0, -2)
} else if (value.endsWith('mb')) {
multiplier = mega
value = value.slice(0, -2)
} else if (value.endsWith('gb')) {
multiplier = giga
value = value.slice(0, -2)
}
return Number.parseFloat(value) * multiplier
}

export function formatByteSizeAsHumanReadable(byteSize: number): string {
if (byteSize > giga) {
return `${byteSize / giga}gb`
}
if (byteSize > mega) {
return `${byteSize / mega}mb`
}
if (byteSize > kilo) {
return `${byteSize / kilo}kb`
}
return byteSize.toString()
}

0 comments on commit d56d117

Please sign in to comment.