Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various performance improvements #251

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions lib/parser.dart
Original file line number Diff line number Diff line change
Expand Up @@ -3959,6 +3959,9 @@ class ParseError implements SourceSpanException {

@override
String toString({dynamic color}) {
if (span == null) {
return message;
}
final res = span!.message(message, color: color);
return span!.sourceUrl == null ? 'ParserError on $res' : 'On $res';
}
Expand Down
127 changes: 116 additions & 11 deletions lib/src/constants.dart
Original file line number Diff line number Diff line change
Expand Up @@ -393,10 +393,54 @@ const mathmlTextIntegrationPointElements = [
Pair(Namespaces.mathml, 'mtext')
];

const spaceCharacters = ' \n\r\t\u000C';
class Charcode {
moffatman marked this conversation as resolved.
Show resolved Hide resolved
static const int kNull = 0x00;

const int newLine = 10;
const int returnCode = 13;
/// '\t'
static const int kTab = 0x09;

/// '\n'
static const int kLineFeed = 0x0A;
static const int kFormFeed = 0x0C;

/// '\r'
static const int kCarriageReturn = 0x0D;

/// ' '
static const int kSpace = 0x20;

/// '"'
static const int kDoubleQuote = 0x22;

/// '&'
static const int kAmpersand = 0x26;

/// "'"
static const int kSingleQuote = 0x27;

/// '-'
static const int kHyphen = 0x2D;

/// '<'
static const int kLessThan = 0x3C;

/// '='
static const int kEquals = 0x3D;

/// '>'
static const int kGreaterThan = 0x3E;

/// '`'
static const int kGraveAccent = 0x60;
}

const spaceCharacters = {
Charcode.kSpace,
Charcode.kLineFeed,
Charcode.kCarriageReturn,
Charcode.kTab,
Charcode.kFormFeed
};

bool isWhitespace(String? char) {
if (char == null) return false;
Expand All @@ -405,11 +449,11 @@ bool isWhitespace(String? char) {

bool isWhitespaceCC(int charCode) {
switch (charCode) {
case 9: // '\t'
case newLine: // '\n'
case 12: // '\f'
case returnCode: // '\r'
case 32: // ' '
case Charcode.kTab:
case Charcode.kLineFeed:
case Charcode.kFormFeed:
case Charcode.kCarriageReturn:
case Charcode.kSpace:
return true;
}
return false;
Expand All @@ -424,7 +468,60 @@ const List<String> tableInsertModeElements = [
];

// TODO(jmesserly): remove these in favor of the test functions
const asciiLetters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
const asciiLetters = {
0x41,
0x42,
0x43,
0x44,
0x45,
0x46,
0x47,
0x48,
0x49,
0x4A,
0x4B,
0x4C,
0x4D,
0x4E,
0x4F,
0x50,
0x51,
0x52,
0x53,
0x54,
0x55,
0x56,
0x57,
0x58,
0x59,
0x5A,
0x61,
0x62,
0x63,
0x64,
0x65,
0x66,
0x67,
0x68,
0x69,
0x6A,
0x6B,
0x6C,
0x6D,
0x6E,
0x6F,
0x70,
0x71,
0x72,
0x73,
0x74,
0x75,
0x76,
0x77,
0x78,
0x79,
0x7A,
};

const _zeroCode = 48;
const _lowerACode = 97;
Expand Down Expand Up @@ -482,8 +579,16 @@ extension AsciiUpperToLower on String {
/// Converts ASCII characters to lowercase.
///
/// Unlike [String.toLowerCase] does not touch non-ASCII characters.
String toAsciiLowerCase() =>
String.fromCharCodes(codeUnits.map(_asciiToLower));
String toAsciiLowerCase() {
for (final c in codeUnits) {
if (c >= _upperACode && c <= _upperZCode) {
// Some character is ASCII uppercase
return String.fromCharCodes(codeUnits.map(_asciiToLower));
}
}
// No modification needed
return this;
}

static int _asciiToLower(int c) => (c >= _upperACode && c <= _upperZCode)
? c + _lowerACode - _upperACode
Expand Down
76 changes: 63 additions & 13 deletions lib/src/html_input_stream.dart
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,22 @@ class HtmlInputStream {
errors = Queue<String>();

_offset = 0;
_chars = <int>[];

final rawChars = _rawChars ??= _decodeBytes(charEncodingName!, _rawBytes!);

// Optimistically allocate array, trim it later if there are changes
_chars = List.filled(rawChars.length, 0, growable: true);
var skipNewline = false;
var wasSurrogatePair = false;
var deletedChars = 0;
for (var i = 0; i < rawChars.length; i++) {
var c = rawChars[i];
if (skipNewline) {
skipNewline = false;
if (c == newLine) continue;
if (c == Charcode.kLineFeed) {
deletedChars++;
continue;
}
}

final isSurrogatePair = _isSurrogatePair(rawChars, i);
Expand All @@ -111,20 +116,24 @@ class HtmlInputStream {
}
wasSurrogatePair = isSurrogatePair;

if (c == returnCode) {
if (c == Charcode.kCarriageReturn) {
skipNewline = true;
c = newLine;
c = Charcode.kLineFeed;
}

_chars.add(c);
_chars[i - deletedChars] = c;
}
if (deletedChars > 0) {
// Remove the null bytes from the end
_chars.removeRange(_chars.length - deletedChars, _chars.length);
}

// Free decoded characters if they aren't needed anymore.
if (_rawBytes != null) _rawChars = null;

// TODO(sigmund): Don't parse the file at all if spans aren't being
// generated.
fileInfo = SourceFile.decoded(_chars, url: sourceUrl);
if (generateSpans) {
fileInfo = SourceFile.decoded(_chars, url: sourceUrl);
}
}

void detectEncoding([bool parseMeta = true]) {
Expand Down Expand Up @@ -212,6 +221,11 @@ class HtmlInputStream {
: String.fromCharCode(_chars[_offset++]);
}

int? peekCodeUnit() {
if (_offset >= _chars.length) return null;
return _chars[_offset];
}

String? peekChar() {
if (_offset >= _chars.length) return eof;
return _isSurrogatePair(_chars, _offset)
Expand All @@ -233,12 +247,46 @@ class HtmlInputStream {
bool _isTrailSurrogate(int code) => (code & 0xFC00) == 0xDC00;

/// Returns a string of characters from the stream up to but not
/// including any character in 'characters' or EOF.
String charsUntil(String characters, [bool opposite = false]) {
/// including any character in 'characters' or EOF. These functions rely
/// on the charCode(s) being single-codepoint.
String charsUntil(Set<int> charCodes, [bool opposite = false]) {
final start = _offset;
int? c;
while ((c = peekCodeUnit()) != null && charCodes.contains(c!) == opposite) {
_offset += 1;
}

return String.fromCharCodes(_chars.sublist(start, _offset));
}

String charsUntil1(int charCode, [bool opposite = false]) {
final start = _offset;
int? c;
while ((c = peekCodeUnit()) != null && (charCode == c!) == opposite) {
_offset += 1;
}

return String.fromCharCodes(_chars.sublist(start, _offset));
}

String charsUntil2(int charCode1, int charCode2, [bool opposite = false]) {
final start = _offset;
int? c;
while ((c = peekCodeUnit()) != null &&
(charCode1 == c! || charCode2 == c) == opposite) {
_offset += 1;
}

return String.fromCharCodes(_chars.sublist(start, _offset));
}

String charsUntil3(int charCode1, int charCode2, int charCode3,
[bool opposite = false]) {
final start = _offset;
String? c;
while ((c = peekChar()) != null && characters.contains(c!) == opposite) {
_offset += c.codeUnits.length;
int? c;
while ((c = peekCodeUnit()) != null &&
(charCode1 == c! || charCode2 == c || charCode3 == c) == opposite) {
_offset += 1;
}

return String.fromCharCodes(_chars.sublist(start, _offset));
Expand All @@ -257,6 +305,8 @@ class HtmlInputStream {
// TODO(jmesserly): the Python code used a regex to check for this. But
// Dart doesn't let you create a regexp with invalid characters.
bool _invalidUnicode(int c) {
// Fast return for common ASCII characters
if (0x0020 <= c && c <= 0x007E) return false;
if (0x0001 <= c && c <= 0x0008) return true;
if (0x000E <= c && c <= 0x001F) return true;
if (0x007F <= c && c <= 0x009F) return true;
Expand Down
Loading
Loading