From a01127416c8f633fa4594532fa807de398d9d755 Mon Sep 17 00:00:00 2001 From: Georgios Kalpakas Date: Tue, 15 Dec 2015 12:24:30 +0200 Subject: [PATCH] fix(input): fix URL validation being too strict Background: Prior to ffb6b2f, there was a bug in `URL_REGEXP`, trying to match the hostname as `\S+` (meaning any non-space character). This resulted in never actually validating the structure of the URL (e.g. segments such as port, path, query, fragment). Then ffb6b2f and subsequently e4bb838 fixed that bug, but revealed `URL_REGEXP`'s "strictness" wrt certain parts of the URL. Since browsers are too lenient when it comes to URL validation anyway, it doesn't make sense for Angular to be much stricter, so this commit relaxes the "strictness" of `URL_REGEXP`, focusing more on the general structure, than on the specific characters allowed in each segment. Note 1: `URL_REGEXP` still seems to be stricter than browsers in some cases. Note 2: Browsers don't always agree on what is a valid URL and what isn't. Fixes #13528 --- src/ng/directive/input.js | 13 +++- test/ng/directive/inputSpec.js | 125 ++++++++++++++++++++++++++++----- 2 files changed, 121 insertions(+), 17 deletions(-) diff --git a/src/ng/directive/input.js b/src/ng/directive/input.js index fdb52f0752db..27098a168560 100644 --- a/src/ng/directive/input.js +++ b/src/ng/directive/input.js @@ -12,7 +12,18 @@ // Regex code is obtained from SO: https://stackoverflow.com/questions/3143070/javascript-regex-iso-datetime#answer-3143231 var ISO_DATE_REGEXP = /\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z)/; // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) -var URL_REGEXP = /^[A-Za-z][A-Za-z\d.+-]*:\/*(?:\w+(?::\w+)?@)?[^\s/]+(?::\d+)?(?:\/[\w#!:.?+=&%@\-/[\]$'()*,;~]*)?$/; +// Note: We are being more lenient, because browsers are too. +// 1. Scheme +// 2. Slashes +// 3. Username +// 4. Password +// 5. Hostname +// 6. Port +// 7. Path +// 8. Query +// 9. Fragment +// 1111111111111111 222 333333 44444 555555555555555555555555 666 77777777 8888888 999 +var URL_REGEXP = /^[a-z][a-z\d.+-]*:\/*(?:[^:@]+(?::[^@]+)?@)?(?:[^\s:/?#]+|\[[a-f\d:]+\])(?::\d+)?(?:\/[^?#]*)?(?:\?[^#]*)?(?:#.*)?$/i; var EMAIL_REGEXP = /^[a-z0-9!#$%&'*+\/=?^_`{|}~.-]+@[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$/i; var NUMBER_REGEXP = /^\s*(\-|\+)?(\d+|(\d*(\.\d*)))([eE][+-]?\d+)?\s*$/; var DATE_REGEXP = /^(\d{4})-(\d{2})-(\d{2})$/; diff --git a/test/ng/directive/inputSpec.js b/test/ng/directive/inputSpec.js index 77ee82ec1a00..20c23073324b 100644 --- a/test/ng/directive/inputSpec.js +++ b/test/ng/directive/inputSpec.js @@ -2535,22 +2535,115 @@ describe('input', function() { describe('URL_REGEXP', function() { - /* global URL_REGEXP: false */ - it('should validate url', function() { - // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) - expect(URL_REGEXP.test('http://server:123/path')).toBe(true); - expect(URL_REGEXP.test('https://server:123/path')).toBe(true); - expect(URL_REGEXP.test('file:///home/user')).toBe(true); - expect(URL_REGEXP.test('mailto:user@example.com?subject=Foo')).toBe(true); - expect(URL_REGEXP.test('r2-d2.c3-p0://localhost/foo')).toBe(true); - expect(URL_REGEXP.test('abc:/foo')).toBe(true); - expect(URL_REGEXP.test('http://example.com/path;path')).toBe(true); - expect(URL_REGEXP.test('http://example.com/[]$\'()*,~)')).toBe(true); - expect(URL_REGEXP.test('http:')).toBe(false); - expect(URL_REGEXP.test('a@B.c')).toBe(false); - expect(URL_REGEXP.test('a_B.c')).toBe(false); - expect(URL_REGEXP.test('0scheme://example.com')).toBe(false); - expect(URL_REGEXP.test('http://example.com:9999/``')).toBe(false); + // See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987) + // Note: We are being more lenient, because browsers are too. + var urls = [ + ['scheme://hostname', true], + ['scheme://username:password@host.name:7678/pa/t.h?q=u&e=r&y#fragment', true], + + // Validating `scheme` + ['://example.com', false], + ['0scheme://example.com', false], + ['.scheme://example.com', false], + ['+scheme://example.com', false], + ['-scheme://example.com', false], + ['_scheme://example.com', false], + ['scheme0://example.com', true], + ['scheme.://example.com', true], + ['scheme+://example.com', true], + ['scheme-://example.com', true], + ['scheme_://example.com', false], + + // Vaidating `:` and `/` after `scheme` + ['scheme//example.com', false], + ['scheme:example.com', true], + ['scheme:/example.com', true], + ['scheme:///example.com', true], + + // Validating `username` and `password` + ['scheme://@example.com', true], + ['scheme://username@example.com', true], + ['scheme://u0s.e+r-n_a~m!e@example.com', true], + ['scheme://u#s$e%r^n&a*m;e@example.com', true], + ['scheme://:password@example.com', true], + ['scheme://username:password@example.com', true], + ['scheme://username:pass:word@example.com', true], + ['scheme://username:p0a.s+s-w_o~r!d@example.com', true], + ['scheme://username:p#a$s%s^w&o*r;d@example.com', true], + + // Validating `hostname` + ['scheme:', false], // Chrome, FF: true + ['scheme://', false], // Chrome, FF: true + ['scheme:// example.com:', false], // Chrome, FF: true + ['scheme://example com:', false], // Chrome, FF: true + ['scheme://:', false], // Chrome, FF: true + ['scheme://?', false], // Chrome, FF: true + ['scheme://#', false], // Chrome, FF: true + ['scheme://username:password@:', false], // Chrome, FF: true + ['scheme://username:password@/', false], // Chrome, FF: true + ['scheme://username:password@?', false], // Chrome, FF: true + ['scheme://username:password@#', false], // Chrome, FF: true + ['scheme://host.name', true], + ['scheme://123.456.789.10', true], + ['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]', true], + ['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]:7678', true], + ['scheme://[1234:0:0:5678:9abc:0:0:def]', true], + ['scheme://[1234::5678:9abc::def]', true], + ['scheme://~`!@$%^&*-_=+|\\;\'",.()[]{}<>', true], + + // Validating `port` + ['scheme://example.com/no-port', true], + ['scheme://example.com:7678', true], + ['scheme://example.com:76T8', false], // Chrome, FF: true + ['scheme://example.com:port', false], // Chrome, FF: true + + // Validating `path` + ['scheme://example.com/', true], + ['scheme://example.com/path', true], + ['scheme://example.com/path/~`!@$%^&*-_=+|\\;:\'",./()[]{}<>', true], + + // Validating `query` + ['scheme://example.com?query', true], + ['scheme://example.com/?query', true], + ['scheme://example.com/path?query', true], + ['scheme://example.com/path?~`!@$%^&*-_=+|\\;:\'",.?/()[]{}<>', true], + + // Validating `fragment` + ['scheme://example.com#fragment', true], + ['scheme://example.com/#fragment', true], + ['scheme://example.com/path#fragment', true], + ['scheme://example.com/path/#fragment', true], + ['scheme://example.com/path?query#fragment', true], + ['scheme://example.com/path?query#~`!@#$%^&*-_=+|\\;:\'",.?/()[]{}<>', true], + + // Validating miscellaneous + ['scheme://☺.✪.⌘.➡/䨹', true], + ['scheme://مثال.إختبار', true], + ['scheme://例子.测试', true], + ['scheme://उदाहरण.परीक्षा', true], + + // Legacy tests + ['http://server:123/path', true], + ['https://server:123/path', true], + ['file:///home/user', true], + ['mailto:user@example.com?subject=Foo', true], + ['r2-d2.c3-p0://localhost/foo', true], + ['abc:/foo', true], + ['http://example.com/path;path', true], + ['http://example.com/[]$\'()*,~)', true], + ['http:', false], // FF: true + ['a@B.c', false], + ['a_B.c', false], + ['0scheme://example.com', false], + ['http://example.com:9999/``', true] + ]; + + they('should validate url: $prop', urls, function(item) { + var url = item[0]; + var valid = item[1]; + + /* global URL_REGEXP: false */ + expect(URL_REGEXP.test(url)).toBe(valid); }); }); });