From 2f57364cd081d04c2837b06a761f36a8202703fe Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Sat, 16 Sep 2023 18:22:46 +0200 Subject: [PATCH 1/7] Implement relative indent queries, i.e. also take into account the indentation of a previous line when computing the indentation for a new line. --- helix-core/src/indent.rs | 294 ++++++++++++++++++++++++++----------- helix-core/tests/indent.rs | 4 +- 2 files changed, 213 insertions(+), 85 deletions(-) diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index abf45d6faf36..8dfc1f1efb84 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -4,6 +4,7 @@ use tree_sitter::{Query, QueryCursor, QueryPredicateArg}; use crate::{ chars::{char_is_line_ending, char_is_whitespace}, + find_first_non_whitespace_char, graphemes::{grapheme_width, tab_width_at}, syntax::{LanguageConfiguration, RopeProvider, Syntax}, tree_sitter::Node, @@ -196,6 +197,19 @@ pub fn indent_level_for_line(line: RopeSlice, tab_width: usize, indent_width: us len / indent_width } +/// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width). +fn whitespace_with_same_width(text: RopeSlice) -> String { + let mut s = String::new(); + for grapheme in RopeGraphemes::new(text) { + if grapheme == "\t" { + s.push('\t'); + } else { + s.extend(std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme)))); + } + } + s +} + /// Computes for node and all ancestors whether they are the first node on their line. /// The first entry in the return value represents the root node, the last one the node itself fn get_first_in_line(mut node: Node, new_line_byte_pos: Option) -> Vec { @@ -241,21 +255,21 @@ fn get_first_in_line(mut node: Node, new_line_byte_pos: Option) -> Vec { indent: usize, indent_always: usize, outdent: usize, outdent_always: usize, /// The alignment, as a string containing only tabs & spaces. Storing this as a string instead of e.g. /// the (visual) width ensures that the alignment is preserved even if the tab width changes. - align: Option, + align: Option>, } -impl Indentation { +impl<'a> Indentation<'a> { /// Add some other [Indentation] to this. /// The added indent should be the total added indent from one line. /// Indent should always be added starting from the bottom (or equivalently, the innermost tree-sitter node). - fn add_line(&mut self, added: Indentation) { + fn add_line(&mut self, added: Indentation<'a>) { // Align overrides the indent from outer scopes. if self.align.is_some() { return; @@ -274,7 +288,7 @@ impl Indentation { /// Only captures that apply to the same line should be added together in this way (otherwise use `add_line`) /// and the captures should be added starting from the innermost tree-sitter node (currently this only matters /// if multiple `@align` patterns occur on the same line). - fn add_capture(&mut self, added: IndentCaptureType) { + fn add_capture(&mut self, added: IndentCaptureType<'a>) { match added { IndentCaptureType::Indent => { if self.indent_always == 0 { @@ -303,7 +317,60 @@ impl Indentation { } } } - fn into_string(self, indent_style: &IndentStyle) -> String { + fn net_indent(&self) -> isize { + (self.indent + self.indent_always) as isize + - ((self.outdent + self.outdent_always) as isize) + } + /// Convert `self` into a string, taking into account the computed and actual indentation of some other line. + fn relative_indent( + &self, + other_computed_indent: &Self, + other_leading_whitespace: RopeSlice, + indent_style: &IndentStyle, + tab_width: usize, + ) -> Option { + if self.align == other_computed_indent.align { + // If self and baseline are either not aligned to anything or both aligned the same way, + // we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second + // case, the alignment should already be accounted for in `other_leading_whitespace`). + let indent_diff = self.net_indent() - other_computed_indent.net_indent(); + if indent_diff >= 0 { + let mut indent = other_leading_whitespace.to_string(); + indent.push_str(&indent_style.as_str().repeat(indent_diff as usize)); + Some(indent) + } else { + // It's not entirely clear how to subtract a given indent level from the other line if its indentation is + // complex (e.g. a weird alignment or a mixture of tabs and spaces). Therefore, we only consider the indent level + // of `baseline_leading_whitespace`, add `indent_diff` to it and convert this indent level back to a string. If we ever encounter + // cases where some other behavior is expected, the behavior for strings that don't exactly correspond to some indent + // level could be re-evaluated. + let actual_baseline_indent_level = indent_level_for_line( + other_leading_whitespace, + tab_width, + indent_style.indent_width(tab_width), + ); + let total_indent = actual_baseline_indent_level as isize + indent_diff; + if total_indent < 0 { + log::warn!( + "Computed negative indent during a relative indent computation (actual baseline indent: {}, computed baseline indent: {}, computed line indent: {})", + actual_baseline_indent_level, + other_computed_indent.net_indent(), + self.net_indent(), + ); + Some(String::new()) + } else { + Some(indent_style.as_str().repeat(total_indent as usize)) + } + } + } else if self.align.is_some() { + Some(self.to_string(indent_style)) + } else { + // If the other line has some alignment and `self` does not, there is no reasonable way to take + // into account `other_leading_whitespace`. + None + } + } + pub fn to_string(&self, indent_style: &IndentStyle) -> String { let indent = self.indent_always + self.indent; let outdent = self.outdent_always + self.outdent; @@ -314,7 +381,7 @@ impl Indentation { 0 }; let mut indent_string = if let Some(align) = self.align { - align + whitespace_with_same_width(align) } else { String::new() }; @@ -325,21 +392,21 @@ impl Indentation { /// An indent definition which corresponds to a capture from the indent query #[derive(Debug)] -struct IndentCapture { - capture_type: IndentCaptureType, +struct IndentCapture<'a> { + capture_type: IndentCaptureType<'a>, scope: IndentScope, } #[derive(Debug, Clone, PartialEq)] -enum IndentCaptureType { +enum IndentCaptureType<'a> { Indent, IndentAlways, Outdent, OutdentAlways, /// Alignment given as a string of whitespace - Align(String), + Align(RopeSlice<'a>), } -impl IndentCaptureType { +impl<'a> IndentCaptureType<'a> { fn default_scope(&self) -> IndentScope { match self { IndentCaptureType::Indent | IndentCaptureType::IndentAlways => IndentScope::Tail, @@ -371,8 +438,8 @@ enum ExtendCapture { /// each node (identified by its ID) the relevant captures (already filtered /// by predicates). #[derive(Debug)] -struct IndentQueryResult { - indent_captures: HashMap>, +struct IndentQueryResult<'a> { + indent_captures: HashMap>>, extend_captures: HashMap>, } @@ -393,14 +460,14 @@ fn get_node_end_line(node: Node, new_line_byte_pos: Option) -> usize { node_line } -fn query_indents( +fn query_indents<'a>( query: &Query, syntax: &Syntax, cursor: &mut QueryCursor, - text: RopeSlice, + text: RopeSlice<'a>, range: std::ops::Range, new_line_byte_pos: Option, -) -> IndentQueryResult { +) -> IndentQueryResult<'a> { let mut indent_captures: HashMap> = HashMap::new(); let mut extend_captures: HashMap> = HashMap::new(); cursor.set_byte_range(range); @@ -488,7 +555,7 @@ fn query_indents( "outdent" => IndentCaptureType::Outdent, "outdent.always" => IndentCaptureType::OutdentAlways, // The alignment will be updated to the correct value at the end, when the anchor is known. - "align" => IndentCaptureType::Align(String::from("")), + "align" => IndentCaptureType::Align(RopeSlice::from("")), "anchor" => { if anchor.is_some() { log::error!("Invalid indent query: Encountered more than one @anchor in the same match.") @@ -560,22 +627,10 @@ fn query_indents( } Some(anchor) => anchor, }; - // Create a string of tabs & spaces that should have the same width - // as the string that precedes the anchor (independent of the tab width). - let mut align = String::new(); - for grapheme in RopeGraphemes::new( + capture.capture_type = IndentCaptureType::Align( text.line(anchor.start_position().row) .byte_slice(0..anchor.start_position().column), - ) { - if grapheme == "\t" { - align.push('\t'); - } else { - align.extend( - std::iter::repeat(' ').take(grapheme_width(&Cow::from(grapheme))), - ); - } - } - capture.capture_type = IndentCaptureType::Align(align); + ); } indent_captures .entry(node_id) @@ -661,56 +716,20 @@ fn extend_nodes<'a>( } } -/// Use the syntax tree to determine the indentation for a given position. -/// This can be used in 2 ways: -/// -/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation. -/// - In this case, pos should be inside the first tree-sitter node on that line. -/// In most cases, this can just be the first non-whitespace on that line. -/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line -/// after pos were moved to a new line. -/// -/// The indentation is determined by traversing all the tree-sitter nodes containing the position. -/// Each of these nodes produces some [Indentation] for: -/// -/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line. -/// - The line after the node. This is defined by: -/// - The scope `tail`. -/// - The scope `all` if this node is not the first node on its line. -/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node. -/// The indents from different nodes for the same line are then combined. -/// The result [Indentation] is simply the sum of the [Indentation] for all lines. -/// -/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines: -/// ```ignore -/// some_function(|| { -/// // Both the function parameters as well as the contained block should be indented. -/// // Because they are on the same line, this only yields one indent level -/// }); -/// ``` -/// -/// ```ignore -/// some_function( -/// param1, -/// || { -/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines -/// }, -/// ); -/// ``` +/// Prepare an indent query by computing: +/// - The node from which to start the query (this is non-trivial due to `@extend` captures) +/// - The indent captures for all relevant nodes. #[allow(clippy::too_many_arguments)] -pub fn treesitter_indent_for_pos( +fn init_indent_query<'a, 'b>( query: &Query, - syntax: &Syntax, - indent_style: &IndentStyle, + syntax: &'a Syntax, + text: RopeSlice<'b>, tab_width: usize, indent_width: usize, - text: RopeSlice, line: usize, - pos: usize, - new_line: bool, -) -> Option { - let byte_pos = text.char_to_byte(pos); - let new_line_byte_pos = new_line.then_some(byte_pos); + byte_pos: usize, + new_line_byte_pos: Option, +) -> Option<(Node<'a>, HashMap>>)> { // The innermost tree-sitter node which is considered for the indent // computation. It may change if some predeceding node is extended let mut node = syntax @@ -754,7 +773,6 @@ pub fn treesitter_indent_for_pos( (query_result, deepest_preceding) }) }; - let mut indent_captures = query_result.indent_captures; let extend_captures = query_result.extend_captures; // Check for extend captures, potentially changing the node that the indent calculation starts with @@ -769,6 +787,68 @@ pub fn treesitter_indent_for_pos( indent_width, ); } + Some((node, query_result.indent_captures)) +} + +/// Use the syntax tree to determine the indentation for a given position. +/// This can be used in 2 ways: +/// +/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation. +/// - In this case, pos should be inside the first tree-sitter node on that line. +/// In most cases, this can just be the first non-whitespace on that line. +/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line +/// after pos were moved to a new line. +/// +/// The indentation is determined by traversing all the tree-sitter nodes containing the position. +/// Each of these nodes produces some [Indentation] for: +/// +/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line. +/// - The line after the node. This is defined by: +/// - The scope `tail`. +/// - The scope `all` if this node is not the first node on its line. +/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node. +/// The indents from different nodes for the same line are then combined. +/// The result [Indentation] is simply the sum of the [Indentation] for all lines. +/// +/// Specifying which line exactly an [Indentation] applies to is important because indents on the same line combine differently than indents on different lines: +/// ```ignore +/// some_function(|| { +/// // Both the function parameters as well as the contained block should be indented. +/// // Because they are on the same line, this only yields one indent level +/// }); +/// ``` +/// +/// ```ignore +/// some_function( +/// param1, +/// || { +/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines +/// }, +/// ); +/// ``` +#[allow(clippy::too_many_arguments)] +pub fn treesitter_indent_for_pos<'a>( + query: &Query, + syntax: &Syntax, + tab_width: usize, + indent_width: usize, + text: RopeSlice<'a>, + line: usize, + pos: usize, + new_line: bool, +) -> Option> { + let byte_pos = text.char_to_byte(pos); + let new_line_byte_pos = new_line.then_some(byte_pos); + let (mut node, mut indent_captures) = init_indent_query( + query, + syntax, + text, + tab_width, + indent_width, + line, + byte_pos, + new_line_byte_pos, + )?; let mut first_in_line = get_first_in_line(node, new_line.then_some(byte_pos)); let mut result = Indentation::default(); @@ -836,7 +916,7 @@ pub fn treesitter_indent_for_pos( break; } } - Some(result.into_string(indent_style)) + Some(result) } /// Returns the indentation for a new line. @@ -860,7 +940,6 @@ pub fn indent_for_newline( if let Some(indent) = treesitter_indent_for_pos( query, syntax, - indent_style, tab_width, indent_width, text, @@ -868,9 +947,55 @@ pub fn indent_for_newline( line_before_end_pos, true, ) { - return indent; + // We want to compute the indentation not only based on the + // syntax tree but also on the actual indentation of a previous + // line. This makes indentation computation more resilient to + // incomplete queries, incomplete source code & differing indentation + // styles for the same language. + // However, using the indent of a previous line as a baseline may not + // make sense, e.g. if it has a different alignment than the new line. + // In order to prevent edge cases with long running times, we only try + // a constant number of (non-empty) lines. + const MAX_ATTEMPTS: usize = 2; + let mut num_attempts = 0; + for line_idx in (0..=line_before).rev() { + let line = text.line(line_idx); + let first_non_whitespace_char = match find_first_non_whitespace_char(line) { + Some(i) => i, + None => { + continue; + } + }; + if let Some(indent) = (|| { + let computed_indent = treesitter_indent_for_pos( + query, + syntax, + tab_width, + indent_width, + text, + line_idx, + text.line_to_char(line_idx) + first_non_whitespace_char, + false, + )?; + let leading_whitespace = line.slice(0..first_non_whitespace_char); + indent.relative_indent( + &computed_indent, + leading_whitespace, + indent_style, + tab_width, + ) + })() { + return indent; + } + num_attempts += 1; + if num_attempts == MAX_ATTEMPTS { + break; + } + } + return indent.to_string(indent_style); }; } + // Fallback in case we either don't have indent queries or they failed for some reason let indent_level = indent_level_for_line(text.line(current_line), tab_width, indent_width); indent_style.as_str().repeat(indent_level) } @@ -962,10 +1087,13 @@ mod test { ..Default::default() }; - let add_capture = |mut indent: Indentation, capture| { + fn add_capture<'a>( + mut indent: Indentation<'a>, + capture: IndentCaptureType<'a>, + ) -> Indentation<'a> { indent.add_capture(capture); indent - }; + } // adding an indent to no indent makes an indent assert_eq!( diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index 26010c906ae1..cd96fcff80ae 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -210,7 +210,6 @@ fn test_treesitter_indent( let suggested_indent = treesitter_indent_for_pos( indent_query, &syntax, - &indent_style, tab_width, indent_style.indent_width(tab_width), text, @@ -218,7 +217,8 @@ fn test_treesitter_indent( text.line_to_char(i) + pos, false, ) - .unwrap(); + .unwrap() + .to_string(&indent_style); assert!( line.get_slice(..pos).map_or(false, |s| s == suggested_indent), "Wrong indentation for file {:?} on line {}:\n\"{}\" (original line)\n\"{}\" (suggested indentation)\n", From ac68b98582c9785dedc5c4de31ad80402dd7dfa7 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Sun, 17 Sep 2023 17:34:23 +0200 Subject: [PATCH 2/7] Improve relative indent computation. Add tests to ensure that relative & absolute indent computation are consistent. --- helix-core/src/indent.rs | 176 ++++++++++++++++++++++++++----------- helix-core/tests/indent.rs | 2 +- 2 files changed, 127 insertions(+), 51 deletions(-) diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 8dfc1f1efb84..dfb33939d592 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -210,6 +210,46 @@ fn whitespace_with_same_width(text: RopeSlice) -> String { s } +fn add_indent_level( + mut base_indent: String, + added_indent_level: isize, + indent_style: &IndentStyle, + tab_width: usize, +) -> String { + if added_indent_level >= 0 { + // Adding a non-negative indent is easy, we can simply append the indent string + base_indent.push_str(&indent_style.as_str().repeat(added_indent_level as usize)); + base_indent + } else { + // In this case, we want to return a prefix of `base_indent`. + // Since the width of a tab depends on its offset, we cannot simply iterate over + // the chars of `base_indent` in reverse until we have the desired indent reduction, + // instead we iterate over them twice in forward direction. + let mut base_indent_width: usize = 0; + for c in base_indent.chars() { + base_indent_width += match c { + '\t' => tab_width_at(base_indent_width, tab_width as u16), + // This is only true since `base_indent` consists only of tabs and spaces + _ => 1, + }; + } + let target_indent_width = base_indent_width + .saturating_sub((-added_indent_level) as usize * indent_style.indent_width(tab_width)); + let mut reduced_indent_width = 0; + for (i, c) in base_indent.char_indices() { + if reduced_indent_width >= target_indent_width { + base_indent.truncate(i); + return base_indent; + } + reduced_indent_width += match c { + '\t' => tab_width_at(base_indent_width, tab_width as u16), + _ => 1, + }; + } + base_indent + } +} + /// Computes for node and all ancestors whether they are the first node on their line. /// The first entry in the return value represents the root node, the last one the node itself fn get_first_in_line(mut node: Node, new_line_byte_pos: Option) -> Vec { @@ -334,59 +374,25 @@ impl<'a> Indentation<'a> { // we can simply take `other_leading_whitespace` and add some indent / outdent to it (in the second // case, the alignment should already be accounted for in `other_leading_whitespace`). let indent_diff = self.net_indent() - other_computed_indent.net_indent(); - if indent_diff >= 0 { - let mut indent = other_leading_whitespace.to_string(); - indent.push_str(&indent_style.as_str().repeat(indent_diff as usize)); - Some(indent) - } else { - // It's not entirely clear how to subtract a given indent level from the other line if its indentation is - // complex (e.g. a weird alignment or a mixture of tabs and spaces). Therefore, we only consider the indent level - // of `baseline_leading_whitespace`, add `indent_diff` to it and convert this indent level back to a string. If we ever encounter - // cases where some other behavior is expected, the behavior for strings that don't exactly correspond to some indent - // level could be re-evaluated. - let actual_baseline_indent_level = indent_level_for_line( - other_leading_whitespace, - tab_width, - indent_style.indent_width(tab_width), - ); - let total_indent = actual_baseline_indent_level as isize + indent_diff; - if total_indent < 0 { - log::warn!( - "Computed negative indent during a relative indent computation (actual baseline indent: {}, computed baseline indent: {}, computed line indent: {})", - actual_baseline_indent_level, - other_computed_indent.net_indent(), - self.net_indent(), - ); - Some(String::new()) - } else { - Some(indent_style.as_str().repeat(total_indent as usize)) - } - } - } else if self.align.is_some() { - Some(self.to_string(indent_style)) + Some(add_indent_level( + String::from(other_leading_whitespace), + indent_diff, + indent_style, + tab_width, + )) } else { - // If the other line has some alignment and `self` does not, there is no reasonable way to take - // into account `other_leading_whitespace`. + // If the alignment of both lines is different, we cannot compare their indentation in any meaningful way None } } - pub fn to_string(&self, indent_style: &IndentStyle) -> String { - let indent = self.indent_always + self.indent; - let outdent = self.outdent_always + self.outdent; - - let indent_level = if indent >= outdent { - indent - outdent - } else { - log::warn!("Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent", self.outdent, self.indent); - 0 - }; - let mut indent_string = if let Some(align) = self.align { - whitespace_with_same_width(align) - } else { - String::new() - }; - indent_string.push_str(&indent_style.as_str().repeat(indent_level)); - indent_string + pub fn to_string(&self, indent_style: &IndentStyle, tab_width: usize) -> String { + add_indent_level( + self.align + .map_or_else(String::new, whitespace_with_same_width), + self.net_indent(), + indent_style, + tab_width, + ) } } @@ -992,7 +998,7 @@ pub fn indent_for_newline( break; } } - return indent.to_string(indent_style); + return indent.to_string(indent_style, tab_width); }; } // Fallback in case we either don't have indent queries or they failed for some reason @@ -1188,4 +1194,74 @@ mod test { ) ); } + + #[test] + fn test_relative_indent() { + let indent_style = IndentStyle::Spaces(4); + let tab_width: usize = 4; + let no_align = [ + Indentation::default(), + Indentation { + indent: 1, + ..Default::default() + }, + Indentation { + indent: 5, + outdent: 1, + ..Default::default() + }, + ]; + let align = no_align.clone().map(|indent| Indentation { + align: Some(RopeSlice::from("12345")), + ..indent + }); + let different_align = Indentation { + align: Some(RopeSlice::from("123456")), + ..Default::default() + }; + + // Check that relative and absolute indentation computation are the same when the line we compare to is + // indented as we expect. + let check_consistency = |indent: &Indentation, other: &Indentation| { + assert_eq!( + indent.relative_indent( + other, + RopeSlice::from(other.to_string(&indent_style, tab_width).as_str()), + &indent_style, + tab_width + ), + Some(indent.to_string(&indent_style, tab_width)) + ); + }; + for a in &no_align { + for b in &no_align { + check_consistency(a, b); + } + } + for a in &align { + for b in &align { + check_consistency(a, b); + } + } + + // Relative indent computation makes no sense if the alignment differs + assert_eq!( + align[0].relative_indent( + &no_align[0], + RopeSlice::from(" "), + &indent_style, + tab_width + ), + None + ); + assert_eq!( + align[0].relative_indent( + &different_align, + RopeSlice::from(" "), + &indent_style, + tab_width + ), + None + ); + } } diff --git a/helix-core/tests/indent.rs b/helix-core/tests/indent.rs index cd96fcff80ae..faf845c074f8 100644 --- a/helix-core/tests/indent.rs +++ b/helix-core/tests/indent.rs @@ -218,7 +218,7 @@ fn test_treesitter_indent( false, ) .unwrap() - .to_string(&indent_style); + .to_string(&indent_style, tab_width); assert!( line.get_slice(..pos).map_or(false, |s| s == suggested_indent), "Wrong indentation for file {:?} on line {}:\n\"{}\" (original line)\n\"{}\" (suggested indentation)\n", From 7de6353980e28bfda8bedec30ca9eb75a0599b99 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Tue, 19 Sep 2023 15:31:38 +0200 Subject: [PATCH 3/7] Make the indent heuristic configurable --- helix-core/src/indent.rs | 96 +++++++++++++++++++++----------------- helix-core/src/syntax.rs | 16 +++++++ helix-term/src/commands.rs | 3 ++ helix-view/src/editor.rs | 6 ++- 4 files changed, 76 insertions(+), 45 deletions(-) diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index dfb33939d592..26186ee83b9f 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -6,7 +6,7 @@ use crate::{ chars::{char_is_line_ending, char_is_whitespace}, find_first_non_whitespace_char, graphemes::{grapheme_width, tab_width_at}, - syntax::{LanguageConfiguration, RopeProvider, Syntax}, + syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax}, tree_sitter::Node, Rope, RopeGraphemes, RopeSlice, }; @@ -931,6 +931,7 @@ pub fn treesitter_indent_for_pos<'a>( pub fn indent_for_newline( language_config: Option<&LanguageConfiguration>, syntax: Option<&Syntax>, + indent_heuristic: &IndentationHeuristic, indent_style: &IndentStyle, tab_width: usize, text: RopeSlice, @@ -939,7 +940,12 @@ pub fn indent_for_newline( current_line: usize, ) -> String { let indent_width = indent_style.indent_width(tab_width); - if let (Some(query), Some(syntax)) = ( + if let ( + IndentationHeuristic::TreeSitter | IndentationHeuristic::Hybrid, + Some(query), + Some(syntax), + ) = ( + indent_heuristic, language_config.and_then(|config| config.indent_query()), syntax, ) { @@ -953,49 +959,51 @@ pub fn indent_for_newline( line_before_end_pos, true, ) { - // We want to compute the indentation not only based on the - // syntax tree but also on the actual indentation of a previous - // line. This makes indentation computation more resilient to - // incomplete queries, incomplete source code & differing indentation - // styles for the same language. - // However, using the indent of a previous line as a baseline may not - // make sense, e.g. if it has a different alignment than the new line. - // In order to prevent edge cases with long running times, we only try - // a constant number of (non-empty) lines. - const MAX_ATTEMPTS: usize = 2; - let mut num_attempts = 0; - for line_idx in (0..=line_before).rev() { - let line = text.line(line_idx); - let first_non_whitespace_char = match find_first_non_whitespace_char(line) { - Some(i) => i, - None => { - continue; + if let IndentationHeuristic::Hybrid = indent_heuristic { + // We want to compute the indentation not only based on the + // syntax tree but also on the actual indentation of a previous + // line. This makes indentation computation more resilient to + // incomplete queries, incomplete source code & differing indentation + // styles for the same language. + // However, using the indent of a previous line as a baseline may not + // make sense, e.g. if it has a different alignment than the new line. + // In order to prevent edge cases with long running times, we only try + // a constant number of (non-empty) lines. + const MAX_ATTEMPTS: usize = 2; + let mut num_attempts = 0; + for line_idx in (0..=line_before).rev() { + let line = text.line(line_idx); + let first_non_whitespace_char = match find_first_non_whitespace_char(line) { + Some(i) => i, + None => { + continue; + } + }; + if let Some(indent) = (|| { + let computed_indent = treesitter_indent_for_pos( + query, + syntax, + tab_width, + indent_width, + text, + line_idx, + text.line_to_char(line_idx) + first_non_whitespace_char, + false, + )?; + let leading_whitespace = line.slice(0..first_non_whitespace_char); + indent.relative_indent( + &computed_indent, + leading_whitespace, + indent_style, + tab_width, + ) + })() { + return indent; + } + num_attempts += 1; + if num_attempts == MAX_ATTEMPTS { + break; } - }; - if let Some(indent) = (|| { - let computed_indent = treesitter_indent_for_pos( - query, - syntax, - tab_width, - indent_width, - text, - line_idx, - text.line_to_char(line_idx) + first_non_whitespace_char, - false, - )?; - let leading_whitespace = line.slice(0..first_non_whitespace_char); - indent.relative_indent( - &computed_indent, - leading_whitespace, - indent_style, - tab_width, - ) - })() { - return indent; - } - num_attempts += 1; - if num_attempts == MAX_ATTEMPTS { - break; } } return indent.to_string(indent_style, tab_width); diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 8c7fc4e15c61..dd9223160604 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -442,6 +442,22 @@ pub struct IndentationConfiguration { pub unit: String, } +/// How the indentation for a newly inserted line should be determined. +/// If the selected heuristic is not available (e.g. because the current +/// language has no tree-sitter indent queries), a simpler one will be used. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum IndentationHeuristic { + /// Just copy the indentation of the line that the cursor is currently on. + Simple, + /// Use tree-sitter indent queries to compute the expected absolute indentation level of the new line. + TreeSitter, + /// Use tree-sitter indent queries to compute the expected difference in indentation between the new line + /// and the line before. Add this to the actual indentation level of the line before. + #[default] + Hybrid, +} + /// Configuration for auto pairs #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "kebab-case", deny_unknown_fields, untagged)] diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index cd053266c2e0..62643ac41c77 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -3053,6 +3053,7 @@ fn insert_with_indent(cx: &mut Context, cursor_fallback: IndentFallbackPos) { let indent = indent::indent_for_newline( language_config, syntax, + &doc.config.load().indent_heuristic, &doc.indent_style, tab_width, text, @@ -3181,6 +3182,7 @@ fn open(cx: &mut Context, open: Open) { let indent = indent::indent_for_newline( doc.language_config(), doc.syntax(), + &doc.config.load().indent_heuristic, &doc.indent_style, doc.tab_width(), text, @@ -3720,6 +3722,7 @@ pub mod insert { let indent = indent::indent_for_newline( doc.language_config(), doc.syntax(), + &doc.config.load().indent_heuristic, &doc.indent_style, doc.tab_width(), text, diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index 5a81cb5d6da8..f2e853071f51 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -42,7 +42,7 @@ use anyhow::{anyhow, bail, Error}; pub use helix_core::diagnostic::Severity; use helix_core::{ auto_pairs::AutoPairs, - syntax::{self, AutoPairConfig, SoftWrap}, + syntax::{self, AutoPairConfig, IndentationHeuristic, SoftWrap}, Change, LineEnding, NATIVE_LINE_ENDING, }; use helix_core::{Position, Selection}; @@ -291,6 +291,9 @@ pub struct Config { pub insert_final_newline: bool, /// Enables smart tab pub smart_tab: Option, + /// Which indent heuristic to use when a new line is inserted + #[serde(default)] + pub indent_heuristic: IndentationHeuristic, } #[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Eq, PartialOrd, Ord)] @@ -841,6 +844,7 @@ impl Default for Config { default_line_ending: LineEndingConfig::default(), insert_final_newline: true, smart_tab: Some(SmartTabConfig::default()), + indent_heuristic: IndentationHeuristic::default(), } } } From 0a0fca4220a76029ee081a44a7773d59dc92cac1 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Tue, 19 Sep 2023 16:14:23 +0200 Subject: [PATCH 4/7] Add documentation for new indent computation --- book/src/configuration.md | 1 + book/src/guides/indent.md | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/book/src/configuration.md b/book/src/configuration.md index f70ce5470dc8..bbec40f2a7a4 100644 --- a/book/src/configuration.md +++ b/book/src/configuration.md @@ -65,6 +65,7 @@ Its settings will be merged with the configuration directory `config.toml` and t | `workspace-lsp-roots` | Directories relative to the workspace root that are treated as LSP roots. Should only be set in `.helix/config.toml` | `[]` | | `default-line-ending` | The line ending to use for new documents. Can be `native`, `lf`, `crlf`, `ff`, `cr` or `nel`. `native` uses the platform's native line ending (`crlf` on Windows, otherwise `lf`). | `native` | | `insert-final-newline` | Whether to automatically insert a trailing line-ending on write if missing | `true` | +| `indent-heuristic` | How the indentation for a newly inserted line is computed: `simple` just copies the indentation level from the previous line, `tree-sitter` computes the indentation based on the syntax tree and `hybrid` combines both approaches | `hybrid` ### `[editor.statusline]` Section diff --git a/book/src/guides/indent.md b/book/src/guides/indent.md index bd037bb05330..a65ac5ac1f48 100644 --- a/book/src/guides/indent.md +++ b/book/src/guides/indent.md @@ -12,6 +12,15 @@ Note that it matters where these added indents begin. For example, multiple indent level increases that start on the same line only increase the total indent level by 1. See [Capture types](#capture-types). +By default, Helix uses the `hybrid` indentation heuristic. This means that +indent queries are not used to compute the expected absolute indentation of a +line but rather the expected difference in indentation between the new and an +already existing line. This difference is then added to the actual indentation +of the already existing line. Since this makes errors in the indent queries +harder to find, it is recommended to disable it when testing via +`:set indent-heuristic tree-sitter`. The rest of this guide assumes that +the `tree-sitter` heuristic is used. + ## Indent queries When Helix is inserting a new line through `o`, `O`, or ``, to determine From bedc4720f5d8e378ee55da704f8dc19d8324c324 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Tue, 19 Sep 2023 16:29:52 +0200 Subject: [PATCH 5/7] Align arguments in a function call in C. Since the tree-sitter grammar is not very good at parsing function calls while they're being written, this is not yet super useful. However, it prevents the new `hybrid` indent heuristic from choosing these lines as a baseline, making it more robust. --- runtime/queries/c/indents.scm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runtime/queries/c/indents.scm b/runtime/queries/c/indents.scm index 058d8031650e..0e97ed2b1872 100644 --- a/runtime/queries/c/indents.scm +++ b/runtime/queries/c/indents.scm @@ -36,3 +36,6 @@ (parameter_list . (parameter_declaration) @anchor (#set! "scope" "tail")) @align +(argument_list + . (_) @anchor + (#set! "scope" "tail")) @align From 82bb4311ddf74a3ad5fb4086204d5c2755180746 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Sun, 15 Oct 2023 11:43:28 +0200 Subject: [PATCH 6/7] Simplify implementation of `add_indent_level`. Increase hybrid indent heuristic attempt limit to 4. Clarify the fallback logic in indent heuristic docs. --- book/src/configuration.md | 2 +- helix-core/src/indent.rs | 41 ++++++++++++++++++--------------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/book/src/configuration.md b/book/src/configuration.md index bbec40f2a7a4..34be7e8d724e 100644 --- a/book/src/configuration.md +++ b/book/src/configuration.md @@ -65,7 +65,7 @@ Its settings will be merged with the configuration directory `config.toml` and t | `workspace-lsp-roots` | Directories relative to the workspace root that are treated as LSP roots. Should only be set in `.helix/config.toml` | `[]` | | `default-line-ending` | The line ending to use for new documents. Can be `native`, `lf`, `crlf`, `ff`, `cr` or `nel`. `native` uses the platform's native line ending (`crlf` on Windows, otherwise `lf`). | `native` | | `insert-final-newline` | Whether to automatically insert a trailing line-ending on write if missing | `true` | -| `indent-heuristic` | How the indentation for a newly inserted line is computed: `simple` just copies the indentation level from the previous line, `tree-sitter` computes the indentation based on the syntax tree and `hybrid` combines both approaches | `hybrid` +| `indent-heuristic` | How the indentation for a newly inserted line is computed: `simple` just copies the indentation level from the previous line, `tree-sitter` computes the indentation based on the syntax tree and `hybrid` combines both approaches. If the chosen heuristic is not available, a different one will be used as a fallback (the fallback order being `hybrid` -> `tree-sitter` -> `simple`). | `hybrid` ### `[editor.statusline]` Section diff --git a/helix-core/src/indent.rs b/helix-core/src/indent.rs index 26186ee83b9f..9333460b4dd5 100644 --- a/helix-core/src/indent.rs +++ b/helix-core/src/indent.rs @@ -8,7 +8,7 @@ use crate::{ graphemes::{grapheme_width, tab_width_at}, syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax}, tree_sitter::Node, - Rope, RopeGraphemes, RopeSlice, + Position, Rope, RopeGraphemes, RopeSlice, }; /// Enum representing indentation style. @@ -225,27 +225,24 @@ fn add_indent_level( // Since the width of a tab depends on its offset, we cannot simply iterate over // the chars of `base_indent` in reverse until we have the desired indent reduction, // instead we iterate over them twice in forward direction. - let mut base_indent_width: usize = 0; - for c in base_indent.chars() { - base_indent_width += match c { - '\t' => tab_width_at(base_indent_width, tab_width as u16), - // This is only true since `base_indent` consists only of tabs and spaces - _ => 1, - }; - } + let base_indent_rope = RopeSlice::from(base_indent.as_str()); + #[allow(deprecated)] + let base_indent_width = + crate::visual_coords_at_pos(base_indent_rope, base_indent_rope.len_chars(), tab_width) + .col; let target_indent_width = base_indent_width .saturating_sub((-added_indent_level) as usize * indent_style.indent_width(tab_width)); - let mut reduced_indent_width = 0; - for (i, c) in base_indent.char_indices() { - if reduced_indent_width >= target_indent_width { - base_indent.truncate(i); - return base_indent; - } - reduced_indent_width += match c { - '\t' => tab_width_at(base_indent_width, tab_width as u16), - _ => 1, - }; - } + #[allow(deprecated)] + let char_end_idx = crate::pos_at_visual_coords( + base_indent_rope, + Position { + row: 0, + col: target_indent_width, + }, + tab_width, + ); + let byte_end_idx = base_indent_rope.char_to_byte(char_end_idx); + base_indent.truncate(byte_end_idx); base_indent } } @@ -959,7 +956,7 @@ pub fn indent_for_newline( line_before_end_pos, true, ) { - if let IndentationHeuristic::Hybrid = indent_heuristic { + if *indent_heuristic == IndentationHeuristic::Hybrid { // We want to compute the indentation not only based on the // syntax tree but also on the actual indentation of a previous // line. This makes indentation computation more resilient to @@ -969,7 +966,7 @@ pub fn indent_for_newline( // make sense, e.g. if it has a different alignment than the new line. // In order to prevent edge cases with long running times, we only try // a constant number of (non-empty) lines. - const MAX_ATTEMPTS: usize = 2; + const MAX_ATTEMPTS: usize = 4; let mut num_attempts = 0; for line_idx in (0..=line_before).rev() { let line = text.line(line_idx); From bf311eca5b5f939edb78c65ff9819206a30e6462 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Sun, 3 Dec 2023 19:47:01 +0100 Subject: [PATCH 7/7] Add alignment indent queries for binary & ternary expressions in C. --- runtime/queries/c/indents.scm | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/runtime/queries/c/indents.scm b/runtime/queries/c/indents.scm index 0e97ed2b1872..877b6670c5fa 100644 --- a/runtime/queries/c/indents.scm +++ b/runtime/queries/c/indents.scm @@ -39,3 +39,13 @@ (argument_list . (_) @anchor (#set! "scope" "tail")) @align +; These are a bit opinionated since some people just indent binary/ternary expressions spanning multiple lines. +; Since they are only triggered when a newline is inserted into an already complete binary/ternary expression, +; this should happen rarely, so it is not a big deal either way. +; Additionally, adding these queries has the advantage of preventing such continuation lines from being used +; as the baseline when the `hybrid` indent heuristic is used (which is desirable since their indentation is so inconsistent). +(binary_expression + (#set! "scope" "tail")) @anchor @align +(conditional_expression + "?" @anchor + (#set! "scope" "tail")) @align