noise reduction: don't match single-character l33ted words

ebrahimbd · Sep 28, 2015 · 76e94bc · 76e94bc
1 parent 0ef64d6
commit 76e94bc
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 1 deletion.
diff --git a/src/matching.coffee b/src/matching.coffee
@@ -242,7 +242,11 @@ matching =
         match.sub = match_sub
         match.sub_display = ("#{k} -> #{v}" for k,v of match_sub).join(', ')
         matches.push match
-    @sorted matches
+    @sorted matches.filter (match) ->
+      # filter single-character l33t matches to reduce noise.
+      # otherwise '1' matches 'i', '4' matches 'a', both very common English words
+      # with low dictionary rank.
+      match.token.length > 1
 
   # ------------------------------------------------------------------------------
   # spatial match (qwerty/dvorak/keypad) -----------------------------------------

diff --git a/test/test-matching.coffee b/test/test-matching.coffee
@@ -278,6 +278,10 @@ test 'l33t matching', (t) ->
   msg = "doesn't match when multiple l33t substitutions are needed for the same letter"
   t.deepEqual lm('p4@ssword'), [], msg
 
+  msg = "doesn't match single-character l33ted words"
+  matches = matching.l33t_match '4 1 @'
+  t.deepEqual matches, [], msg
+
   # known issue: subsets of substitutions aren't tried.
   # for long inputs, trying every subset of every possible substitution could quickly get large,
   # but there might be a performant way to fix.