From 545ce49b00a5f7bbac7a0a338a7cf557f6bc0270 Mon Sep 17 00:00:00 2001 From: Georgios Kalpakas Date: Tue, 15 Dec 2015 12:24:30 +0200 Subject: [PATCH] fix(input): fix URL validation being too strict Background: Prior to ffb6b2f, there was a bug in `URL_REGEXP`, trying to match the hostname as `\S+` (meaning any non-space character). This resulted in never actually validating the structure of the URL (e.g. segments such as port, path, query, fragment). Then ffb6b2f and subsequently e4bb838 fixed that bug, but revealed `URL_REGEXP`'s "strictness" wrt certain parts of the URL. Since browsers are too lenient when it comes to URL validation, this commit relaxes the "strictness" of `URL_REGEXP`, focusing more on the general structure, than on the specific characters allowed in each segment. Note 1: `URL_REGEXP` still seems to be stricter than browsers in some cases. Note 2: Browsers don't always agree on what is a valid URL and what isn't. Fixes #13528 --- src/ng/directive/input.js | 13 +++- test/ng/directive/inputSpec.js | 127 ++++++++++++++++++++++++++++----- 2 files changed, 122 insertions(+), 18 deletions(-) diff --git a/src/ng/directive/input.js b/src/ng/directive/input.js index fdb52f0752db..27098a168560 100644 --- a/src/ng/directive/input.js +++ b/src/ng/directive/input.js @@ -12,7 +12,18 @@ // Regex code is obtained from SO: https://stackoverflow.com/questions/3143070/javascript-regex-iso-datetime#answer-3143231 var ISO_DATE_REGEXP = /\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z)/; // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) -var URL_REGEXP = /^[A-Za-z][A-Za-z\d.+-]*:\/*(?:\w+(?::\w+)?@)?[^\s/]+(?::\d+)?(?:\/[\w#!:.?+=&%@\-/[\]$'()*,;~]*)?$/; +// Note: We are being more lenient, because browsers are too. +// 1. Scheme +// 2. Slashes +// 3. Username +// 4. Password +// 5. Hostname +// 6. Port +// 7. Path +// 8. Query +// 9. Fragment +// 1111111111111111 222 333333 44444 555555555555555555555555 666 77777777 8888888 999 +var URL_REGEXP = /^[a-z][a-z\d.+-]*:\/*(?:[^:@]+(?::[^@]+)?@)?(?:[^\s:/?#]+|\[[a-f\d:]+\])(?::\d+)?(?:\/[^?#]*)?(?:\?[^#]*)?(?:#.*)?$/i; var EMAIL_REGEXP = /^[a-z0-9!#$%&'*+\/=?^_`{|}~.-]+@[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$/i; var NUMBER_REGEXP = /^\s*(\-|\+)?(\d+|(\d*(\.\d*)))([eE][+-]?\d+)?\s*$/; var DATE_REGEXP = /^(\d{4})-(\d{2})-(\d{2})$/; diff --git a/test/ng/directive/inputSpec.js b/test/ng/directive/inputSpec.js index 77ee82ec1a00..358633d5969c 100644 --- a/test/ng/directive/inputSpec.js +++ b/test/ng/directive/inputSpec.js @@ -2534,23 +2534,116 @@ describe('input', function() { }); - describe('URL_REGEXP', function() { - /* global URL_REGEXP: false */ - it('should validate url', function() { - // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) - expect(URL_REGEXP.test('http://server:123/path')).toBe(true); - expect(URL_REGEXP.test('https://server:123/path')).toBe(true); - expect(URL_REGEXP.test('file:///home/user')).toBe(true); - expect(URL_REGEXP.test('mailto:user@example.com?subject=Foo')).toBe(true); - expect(URL_REGEXP.test('r2-d2.c3-p0://localhost/foo')).toBe(true); - expect(URL_REGEXP.test('abc:/foo')).toBe(true); - expect(URL_REGEXP.test('http://example.com/path;path')).toBe(true); - expect(URL_REGEXP.test('http://example.com/[]$\'()*,~)')).toBe(true); - expect(URL_REGEXP.test('http:')).toBe(false); - expect(URL_REGEXP.test('a@B.c')).toBe(false); - expect(URL_REGEXP.test('a_B.c')).toBe(false); - expect(URL_REGEXP.test('0scheme://example.com')).toBe(false); - expect(URL_REGEXP.test('http://example.com:9999/``')).toBe(false); + ddescribe('URL_REGEXP', function() { + // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) + // Note: We are being more lenient, because browsers are too. + var urls = [ + ['scheme://hostname', true], + ['scheme://username:password@host.name:7678/pa/t.h?q=u&e=r&y#fragment', true], + + // Validating `scheme` + ['://example.com', false], + ['0scheme://example.com', false], + ['.scheme://example.com', false], + ['+scheme://example.com', false], + ['-scheme://example.com', false], + ['_scheme://example.com', false], + ['scheme0://example.com', true], + ['scheme.://example.com', true], + ['scheme+://example.com', true], + ['scheme-://example.com', true], + ['scheme_://example.com', false], + + // Vaidating `:` and `/` after `scheme` + ['scheme//example.com', false], + ['scheme:example.com', true], + ['scheme:/example.com', true], + ['scheme:///example.com', true], + + // Validating `username` and `password` + ['scheme://@example.com', true], + ['scheme://username@example.com', true], + ['scheme://u0s.e+r-n_a~m!e@example.com', true], + ['scheme://u#s$e%r^n&a*m;e@example.com', true], + ['scheme://:password@example.com', true], + ['scheme://username:password@example.com', true], + ['scheme://username:pass:word@example.com', true], + ['scheme://username:p0a.s+s-w_o~r!d@example.com', true], + ['scheme://username:p#a$s%s^w&o*r;d@example.com', true], + + // Validating `hostname` + ['scheme:', false], // Chrome, FF: true + ['scheme://', false], // Chrome, FF: true + ['scheme:// example.com:', false], // Chrome, FF: true + ['scheme://example com:', false], // Chrome, FF: true + ['scheme://:', false], // Chrome, FF: true + ['scheme://?', false], // Chrome, FF: true + ['scheme://#', false], // Chrome, FF: true + ['scheme://username:password@:', false], // Chrome, FF: true + ['scheme://username:password@/', false], // Chrome, FF: true + ['scheme://username:password@?', false], // Chrome, FF: true + ['scheme://username:password@#', false], // Chrome, FF: true + ['scheme://host.name', true], + ['scheme://123.456.789.10', true], + ['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]', true], + ['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]:7678', true], + ['scheme://[1234:0:0:5678:9abc:0:0:def]', true], + ['scheme://[1234::5678:9abc::def]', true], + ['scheme://~`!@$%^&*-_=+|\\;\'",.()[]{}<>', true], + + // Validating `port` + ['scheme://example.com/no-port', true], + ['scheme://example.com:7678', true], + ['scheme://example.com:76T8', false], // Chrome, FF: true + ['scheme://example.com:port', false], // Chrome, FF: true + + // Validating `path` + ['scheme://example.com/', true], + ['scheme://example.com/path', true], + ['scheme://example.com/path/~`!@$%^&*-_=+|\\;:\'",./()[]{}<>', true], + + // Validating `query` + ['scheme://example.com?query', true], + ['scheme://example.com/?query', true], + ['scheme://example.com/path?query', true], + ['scheme://example.com/path?~`!@$%^&*-_=+|\\;:\'",.?/()[]{}<>', true], + + // Validating `fragment` + ['scheme://example.com#fragment', true], + ['scheme://example.com/#fragment', true], + ['scheme://example.com/path#fragment', true], + ['scheme://example.com/path/#fragment', true], + ['scheme://example.com/path?query#fragment', true], + ['scheme://example.com/path?query#~`!@#$%^&*-_=+|\\;:\'",.?/()[]{}<>', true], + + // Validating miscellaneous + ['scheme://☺.✪.⌘.➡/䨹', true], + ['scheme://مثال.إختبار', true], + ['scheme://例子.测试', true], + ['scheme://उदाहरण.परीक्षा', true], + + // Legacy tests + ['http://server:123/path', true], + ['https://server:123/path', true], + ['file:///home/user', true], + ['mailto:user@example.com?subject=Foo', true], + ['r2-d2.c3-p0://localhost/foo', true], + ['abc:/foo', true], + ['http://example.com/path;path', true], + ['http://example.com/[]$\'()*,~)', true], + ['http:', false], // FF: true + ['a@B.c', false], + ['a_B.c', false], + ['0scheme://example.com', false], + ['http://example.com:9999/``', true] + ]; + + they('should validate url: $prop', urls, function(item) { + var url = item[0]; + var valid = item[1]; + + /* global URL_REGEXP: false */ + expect(URL_REGEXP.test(url)).toBe(valid); }); }); });