From 61f9e3534fdaaf9c7d76e04d161ddaed96c672c3 Mon Sep 17 00:00:00 2001 From: Dan Wheeler Date: Sun, 13 Sep 2015 23:55:39 -0700 Subject: [PATCH] add regex precedence to cut down on redundant regex matches + regex matching tests --- src/matching.coffee | 36 ++++++++++++++++++++++++++++++++---- test/test-matching.coffee | 23 +++++++++++++++++++++++ 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/matching.coffee b/src/matching.coffee index 171784db..2e0eab4b 100644 --- a/src/matching.coffee +++ b/src/matching.coffee @@ -43,14 +43,23 @@ L33T_TABLE = z: ['2'] REGEXEN = - alpha_lower: /[A-Z]{2,}/g - alpha_upper: /[a-z]{2,}/g - alpha: /[a-zA-Z]{2,}/g alphanumeric: /[a-zA-Z0-9]{2,}/g + alpha: /[a-zA-Z]{2,}/g + alpha_lower: /[a-z]{2,}/g + alpha_upper: /[A-Z]{2,}/g digits: /\d{2,}/g symbols: /[\W_]{2,}/g # includes non-latin unicode chars recent_year: /19\d\d|200\d|201\d/g +REGEX_PRECEDENCE = + alphanumeric: 0 + alpha: 1 + alpha_lower: 2 + alpha_upper: 2 + digits: 2 + symbols: 2 + recent_year: 3 + DATE_MAX_YEAR = 2050 DATE_MIN_YEAR = 1000 DATE_SPLITS = @@ -402,7 +411,26 @@ matching = j: rx_match.index + rx_match[0].length - 1 regex_name: name regex_match: rx_match - @sorted matches + # currently, match list includes a bunch of redundancies: + # ex for every alpha_lower match, also an alpha and alphanumeric match of the same [i,j]. + # ex for every recent_year match, also an alphanumeric match and digits match. + # use precedence to filter these redundancies out. + precedence_map = {} # maps from 'i-j' to current highest precedence + get_key = (match) -> "#{match.i}-#{match.j}" + for match in matches + key = get_key match + precedence = REGEX_PRECEDENCE[match.regex_name] + if key of precedence_map + highest_precedence = precedence_map[key] + continue if highest_precedence >= precedence + precedence_map[key] = precedence + filtered_matches = [] + for match in matches + key = get_key match + precedence = REGEX_PRECEDENCE[match.regex_name] + if precedence_map[key] == precedence + filtered_matches.push match + @sorted filtered_matches #------------------------------------------------------------------------------- # date matching ---------------------------------------------------------------- diff --git a/test/test-matching.coffee b/test/test-matching.coffee index f05c6b93..3a75e4c6 100644 --- a/test/test-matching.coffee +++ b/test/test-matching.coffee @@ -427,6 +427,29 @@ test 'repeat matching', (t) -> t.end() +test 'regex matching', (t) -> + for [pattern, name] in [ + ['aaa', 'alpha_lower'] + ['a7c8D9', 'alphanumeric'] + ['aAaA', 'alpha'] + ['1922', 'recent_year'] + ['&@*#', 'symbols'] + ['94113', 'digits'] + ] + matches = matching.regex_match pattern + msg = "matches #{pattern} as a #{name} pattern" + check_matches msg, t, matches, 'regex', [pattern], [[0, pattern.length - 1]], + regex_name: [name] + + password = 'a7c8D9vvv2015' + matches = matching.regex_match password + ijs = [[0, 12], [6, 8], [9, 12]] + msg = "matches multiple overlapping regex patterns" + check_matches msg, t, matches, 'regex', ['a7c8D9vvv2015', 'vvv', '2015'], ijs, + regex_name: ['alphanumeric', 'alpha_lower', 'recent_year'] + t.end() + + test 'date matching', (t) -> for sep in ['', ' ', '-', '/', '\\', '_', '.'] password = "13#{sep}2#{sep}1921"