Skip to content

Commit

Permalink
feat: now condenses words with hyphens
Browse files Browse the repository at this point in the history
  • Loading branch information
elijah-potter committed Aug 6, 2024
1 parent 38f5bcd commit 3778c14
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions harper-core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,8 @@ impl Document {
self.tokens.remove_indices(to_remove);
}

/// Searches for contractions and condenses them down into single
/// tokens.
/// Searches for contractions and hyphenated words and condenses them down
/// into single tokens.
fn condense_contractions(&mut self) {
if self.tokens.len() < 3 {
return;
Expand All @@ -488,6 +488,10 @@ impl Document {
TokenKind::Word(..),
TokenKind::Punctuation(Punctuation::Apostrophe),
TokenKind::Word(..)
) | (
TokenKind::Word(..),
TokenKind::Punctuation(Punctuation::Hyphen),
TokenKind::Word(..)
)
) {
// Ensure there is no overlapping between replacements
Expand Down Expand Up @@ -648,6 +652,15 @@ mod tests {
assert_condensed_contractions("There's no way", 5);
}

#[test]
fn condenses_pre_existing() {
assert_condensed_contractions("pre-existing", 1);
assert_condensed_contractions(
"There was a pre-existing problem with words with hyphens.",
18
);
}

#[test]
fn selects_token_at_char_index() {
let text = "There were three little pigs. They built three little homes.";
Expand Down

0 comments on commit 3778c14

Please sign in to comment.