Skip to content

Commit

Permalink
Unicode computed location fix
Browse files Browse the repository at this point in the history
Reviewed By: captbaritone

Differential Revision: D63879176

fbshipit-source-id: 9093a8916524dad92cab7a4d91bf6917218ee95c
  • Loading branch information
gordyf authored and facebook-github-bot committed Oct 4, 2024
1 parent b02a3f9 commit 524f5c4
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
23 changes: 18 additions & 5 deletions compiler/crates/common/src/text_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ impl TextSource {
/**
* Converts span, which is the relative indices of characters within this text source,
* into the equivalent line and character number range.
* Span is bytes, not characters.
*/
pub fn to_span_range(&self, span: Span) -> lsp_types::Range {
let start = span.start as usize;
Expand All @@ -84,13 +85,15 @@ impl TextSource {
let mut character = self.column_index;
let mut start_position = lsp_types::Position::default();
let mut end_position = lsp_types::Position::default();
let mut chars = self.text.chars().enumerate().peekable();
let mut chars = self.text.chars().peekable();

let mut bytes_seen = 0;

while let Some((index, chr)) = chars.next() {
if index == start {
while let Some(chr) = chars.next() {
if bytes_seen == start {
start_position = lsp_types::Position::new(line as u32, character as u32);
}
if index == end {
if bytes_seen == end {
end_position = lsp_types::Position::new(line as u32, character as u32);
break;
}
Expand All @@ -99,7 +102,7 @@ impl TextSource {
// Line terminators: https://www.ecma-international.org/ecma-262/#sec-line-terminators
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}' => {
// <CLRF>
!matches!((chr, chars.peek()), ('\u{000D}', Some((_, '\u{000D}'))))
!matches!((chr, chars.peek()), ('\u{000D}', Some('\u{000D}')))
}
_ => false,
};
Expand All @@ -112,6 +115,7 @@ impl TextSource {
} else {
character += 1;
}
bytes_seen += chr.len_utf8();
}

if start_position != lsp_types::Position::default()
Expand All @@ -138,6 +142,15 @@ mod test {
assert_eq!(range.end, lsp_types::Position::new(0, 5));
}

#[test]
fn to_range_unicode_test() {
let span = Span::new(0, 5);
let text_source = TextSource::new("☃ource", 0, 0);
let range = text_source.to_span_range(span);
assert_eq!(range.start, lsp_types::Position::new(0, 0));
assert_eq!(range.end, lsp_types::Position::new(0, 3));
}

#[test]
fn to_range_multi_line_test() {
// this range contains all characters of `fn foo ...`
Expand Down
6 changes: 3 additions & 3 deletions compiler/crates/docblock-syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ pub fn parse_docblock(
* strings with quotation marks.
*
* To account for this, we parse in a single pass, essentially treating each
* character as a token. This allows us to easily intemperate characters
* character as a token. This allows us to easily interpret characters
* differently in different contexts.
*/
struct DocblockParser<'a> {
Expand Down Expand Up @@ -283,7 +283,7 @@ impl<'a> DocblockParser<'a> {

fn next(&mut self) {
self.chars.next();
self.offset += 1;
self.offset += 1; // Is this correct for unicode characters?
}

/// Advance over a string of characters matching predicate.
Expand All @@ -307,7 +307,7 @@ impl<'a> DocblockParser<'a> {
break;
}
}
self.offset += result.len() as u32;
self.offset += result.len() as u32; // result.len() returns byte length
result
}

Expand Down

0 comments on commit 524f5c4

Please sign in to comment.