Skip to content

Commit

Permalink
Improve Extended CSS tokenization
Browse files Browse the repository at this point in the history
  • Loading branch information
scripthunter7 committed Oct 17, 2023
1 parent bc5d53f commit 146b808
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import { type TokenizerContext } from '../../common/context';
import { CodePoint } from '../../common/enums/code-points';
import { TokenType } from '../../common/enums/token-types';
import { type TokenizerContextFunction } from '../../common/types/function-prototypes';
import { consumeDelimToken } from '../consumers/delim-token';
import { isWhitespace } from '../definitions';

/**
* Generic handler for the Extended CSS's pseudo-classes
Expand All @@ -16,14 +18,44 @@ export const handleRegularExtendedCssPseudo: TokenizerContextFunction = (context
// Save the current offset, because we will need it later
const start = context.offset;

// Consume as much whitespace as possible
while (isWhitespace(context.code())) {
context.consumeCodePoint();
}

// If the first non-whitespace code point is an apostrophe or a quotation mark, it means that we are dealing
// with a string parameter.
// In this case, we simply abort the custom handler here, and let the standard tokenizer handle the string and
// everything that comes after it as specified in the spec.
// This behavior is similar to the standard CSS's url() function, it is also handled differently if its parameter
// is a string.
if (context.code() === CodePoint.Apostrophe || context.code() === CodePoint.QuotationMark) {
// Report whitespace tokens (if any)
// It is important to report them, because we already consumed them - and the report is faster here than
// a re-consume
if (context.offset > start) {
context.onToken(TokenType.Whitespace, start, context.offset);
}

// We simply abort the custom handler
return;
}

// Otherwise, we need to find the closing parenthesis based on the parenthesis balance
// Parenthesis balance: 1, because we start after the opening parenthesis:
// :contains(param)
// ^ we starts from here
// ^ we starts from here, so we already have 1 open parenthesis
let balance = 1;
let end = context.offset;

for (; context.offset < context.source.length; context.consumeCodePoint()) {
// TODO: handle newlines - they are not allowed within the pseudo-class
// Don't forget to report already consumed whitespace chars as delim-tokens (if any)
// Note: we handle the parameter characters as delim-tokens, this is why we don't need to report them here
// as whitespace-tokens
for (let i = start; i < context.offset; i += 1) {
context.onToken(TokenType.Delim, i, i + 1);
}

// Consume until we find the closing parenthesis or we reach the end of the source
while (!context.isEof()) {
if (
context.code() === CodePoint.LeftParenthesis
&& context.source.charCodeAt(context.offset - 1) !== CodePoint.ReverseSolidus
Expand All @@ -39,19 +71,11 @@ export const handleRegularExtendedCssPseudo: TokenizerContextFunction = (context

// If the balance is 0, it means that we found the closing parenthesis
if (balance === 0) {
end = context.offset;
break;
}
}
}

// If the balance is not 0, it means that we reached the end of the source code
// without finding the closing parenthesis
// If the balance is 0, it means that we found the closing parenthesis, so we need to report tokens between
// the start and the end offsets
if (balance === 0) {
for (let i = start; i < end; i += 1) {
context.onToken(TokenType.Delim, i, i + 1);
}
// Consume the current character as a delim-token
consumeDelimToken(context);
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import { type TokenizerContext } from '../../common/context';
import { CodePoint } from '../../common/enums/code-points';
import { TokenType } from '../../common/enums/token-types';
import { type TokenizerContextFunction } from '../../common/types/function-prototypes';
import { consumeDelimToken } from '../consumers/delim-token';
import { isWhitespace } from '../definitions';

/**
* Handler for the Extended CSS's `:xpath()` pseudo-class
Expand All @@ -18,15 +20,50 @@ export const handleXpathExtendedCssPseudo: TokenizerContextFunction = (context:
// Save the current offset, because we will need it later
const start = context.offset;

// Consume as much whitespace as possible
while (isWhitespace(context.code())) {
context.consumeCodePoint();
}

// If the first non-whitespace code point is an apostrophe or a quotation mark, it means that we are dealing
// with a string parameter.
// In this case, we simply abort the custom handler here, and let the standard tokenizer handle the string and
// everything that comes after it as specified in the spec.
// This behavior is similar to the standard CSS's url() function, it is also handled differently if its parameter
// is a string.
if (context.code() === CodePoint.Apostrophe || context.code() === CodePoint.QuotationMark) {
// Report whitespace tokens (if any)
// It is important to report them, because we already consumed them - and the report is faster here than
// a re-consume
if (context.offset > start) {
context.onToken(TokenType.Whitespace, start, context.offset);
}

// We simply abort the custom handler
return;
}

// Otherwise, we need to find the closing parenthesis based on the parenthesis balance
// Parenthesis balance: 1, because we start after the opening parenthesis:
// :xpath(param)
// ^ we starts from here
// :contains(param)
// ^ we starts from here, so we already have 1 open parenthesis
let balance = 1;
let end = context.offset;

// Don't forget to report already consumed whitespace chars as delim-tokens (if any)
// Note: we handle the parameter characters as delim-tokens, this is why we don't need to report them here
// as whitespace-tokens
for (let i = start; i < context.offset; i += 1) {
context.onToken(TokenType.Delim, i, i + 1);
}

// :xpath() is a bit tricky, because it can contain unescaped parentheses inside strings in the XPath expression,
// like this:
// :xpath(//div[@class="foo(bar)"])
// but in this case, not required the whole XPath expression to be a string
let inString = false;

for (; context.offset < context.source.length; context.consumeCodePoint()) {
// TODO: handle newlines - they are not allowed within the pseudo-class
// Consume until we find the closing parenthesis or we reach the end of the source
while (!context.isEof()) {
// If we find an unescaped quote mark, we toggle the "inString" flag
// It is important, because we should omit parentheses inside strings.
if (
Expand Down Expand Up @@ -54,17 +91,12 @@ export const handleXpathExtendedCssPseudo: TokenizerContextFunction = (context:
// If the balance is 0, it means that we found the closing parenthesis of the
// pseudo-class
if (balance === 0) {
end = context.offset;
break;
}
}
}
}

// If the balance is not 0, it means that we reached the end of the source code
if (balance === 0) {
for (let i = start; i < end; i += 1) {
context.onToken(TokenType.Delim, i, i + 1);
}
// Consume the current character as a delim-token
consumeDelimToken(context);
}
};

0 comments on commit 146b808

Please sign in to comment.