Skip to content

Commit

Permalink
noise reduction: don't match single-character l33ted words
Browse files Browse the repository at this point in the history
  • Loading branch information
lowe committed Sep 28, 2015
1 parent 0ef64d6 commit 76e94bc
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/matching.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,11 @@ matching =
match.sub = match_sub
match.sub_display = ("#{k} -> #{v}" for k,v of match_sub).join(', ')
matches.push match
@sorted matches
@sorted matches.filter (match) ->
# filter single-character l33t matches to reduce noise.
# otherwise '1' matches 'i', '4' matches 'a', both very common English words
# with low dictionary rank.
match.token.length > 1

# ------------------------------------------------------------------------------
# spatial match (qwerty/dvorak/keypad) -----------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions test/test-matching.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ test 'l33t matching', (t) ->
msg = "doesn't match when multiple l33t substitutions are needed for the same letter"
t.deepEqual lm('p4@ssword'), [], msg

msg = "doesn't match single-character l33ted words"
matches = matching.l33t_match '4 1 @'
t.deepEqual matches, [], msg

# known issue: subsets of substitutions aren't tried.
# for long inputs, trying every subset of every possible substitution could quickly get large,
# but there might be a performant way to fix.
Expand Down

0 comments on commit 76e94bc

Please sign in to comment.