Skip to content

Commit

Permalink
add regex precedence to cut down on redundant regex matches + regex m…
Browse files Browse the repository at this point in the history
…atching tests
  • Loading branch information
lowe committed Sep 14, 2015
1 parent 45b17a6 commit 61f9e35
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 4 deletions.
36 changes: 32 additions & 4 deletions src/matching.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,23 @@ L33T_TABLE =
z: ['2']

REGEXEN =
alpha_lower: /[A-Z]{2,}/g
alpha_upper: /[a-z]{2,}/g
alpha: /[a-zA-Z]{2,}/g
alphanumeric: /[a-zA-Z0-9]{2,}/g
alpha: /[a-zA-Z]{2,}/g
alpha_lower: /[a-z]{2,}/g
alpha_upper: /[A-Z]{2,}/g
digits: /\d{2,}/g
symbols: /[\W_]{2,}/g # includes non-latin unicode chars
recent_year: /19\d\d|200\d|201\d/g

REGEX_PRECEDENCE =
alphanumeric: 0
alpha: 1
alpha_lower: 2
alpha_upper: 2
digits: 2
symbols: 2
recent_year: 3

DATE_MAX_YEAR = 2050
DATE_MIN_YEAR = 1000
DATE_SPLITS =
Expand Down Expand Up @@ -402,7 +411,26 @@ matching =
j: rx_match.index + rx_match[0].length - 1
regex_name: name
regex_match: rx_match
@sorted matches
# currently, match list includes a bunch of redundancies:
# ex for every alpha_lower match, also an alpha and alphanumeric match of the same [i,j].
# ex for every recent_year match, also an alphanumeric match and digits match.
# use precedence to filter these redundancies out.
precedence_map = {} # maps from 'i-j' to current highest precedence
get_key = (match) -> "#{match.i}-#{match.j}"
for match in matches
key = get_key match
precedence = REGEX_PRECEDENCE[match.regex_name]
if key of precedence_map
highest_precedence = precedence_map[key]
continue if highest_precedence >= precedence
precedence_map[key] = precedence
filtered_matches = []
for match in matches
key = get_key match
precedence = REGEX_PRECEDENCE[match.regex_name]
if precedence_map[key] == precedence
filtered_matches.push match
@sorted filtered_matches

#-------------------------------------------------------------------------------
# date matching ----------------------------------------------------------------
Expand Down
23 changes: 23 additions & 0 deletions test/test-matching.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,29 @@ test 'repeat matching', (t) ->
t.end()


test 'regex matching', (t) ->
for [pattern, name] in [
['aaa', 'alpha_lower']
['a7c8D9', 'alphanumeric']
['aAaA', 'alpha']
['1922', 'recent_year']
['&@*#', 'symbols']
['94113', 'digits']
]
matches = matching.regex_match pattern
msg = "matches #{pattern} as a #{name} pattern"
check_matches msg, t, matches, 'regex', [pattern], [[0, pattern.length - 1]],
regex_name: [name]

password = 'a7c8D9vvv2015'
matches = matching.regex_match password
ijs = [[0, 12], [6, 8], [9, 12]]
msg = "matches multiple overlapping regex patterns"
check_matches msg, t, matches, 'regex', ['a7c8D9vvv2015', 'vvv', '2015'], ijs,
regex_name: ['alphanumeric', 'alpha_lower', 'recent_year']
t.end()


test 'date matching', (t) ->
for sep in ['', ' ', '-', '/', '\\', '_', '.']
password = "13#{sep}2#{sep}1921"
Expand Down

0 comments on commit 61f9e35

Please sign in to comment.