diff --git a/CHANGELOG.md b/CHANGELOG.md index 475ed70ced..7a5282da01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Updates should follow the [Keep a CHANGELOG](http://keepachangelog.com/) princip ## [Unreleased][unreleased] +### Changed + + - Modified how URL normalization decodes certain characters in order to align with the JS library's output + ## [0.18.3] - 2019-03-21 This is a **security update** release. diff --git a/src/Util/UrlEncoder.php b/src/Util/UrlEncoder.php index d2015b59f1..774d1fa3c6 100644 --- a/src/Util/UrlEncoder.php +++ b/src/Util/UrlEncoder.php @@ -39,6 +39,20 @@ final class UrlEncoder '%7E' => '~', ]; + protected static $dontDecode = [ + ';', + '/', + '?', + ':', + '@', + '&', + '=', + '+', + '$', + ',', + '#', + ]; + /** * @param string $uri * @@ -61,15 +75,13 @@ public static function unescapeAndEncode($uri) private static function decode($uri) { return preg_replace_callback('/%([0-9a-f]{2})/iu', function ($matches) { - // Convert percent-encoded codes to uppercase - $upper = strtoupper($matches[0]); - // Keep excluded characters as-is - if (array_key_exists($upper, self::$dontEncode)) { - return $upper; + $char = chr(hexdec($matches[1])); + + if (in_array($char, self::$dontDecode, true)) { + return strtoupper($matches[0]); } - // Otherwise, return the character for this codepoint - return chr(hexdec($matches[1])); + return $char; }, $uri); } diff --git a/tests/functional/data/safe_links/input.md b/tests/functional/data/safe_links/input.md index c538e9efee..9b98076b3e 100644 --- a/tests/functional/data/safe_links/input.md +++ b/tests/functional/data/safe_links/input.md @@ -164,4 +164,4 @@ javas\x09cript:javascript:alert(1)> [XSS](javascript:alert%28'XSS'%29) -[XSS](javascript:alert%28'XSS'%29) \ No newline at end of file +[XSS](javascript:alert%28'XSS'%29) diff --git a/tests/functional/data/safe_links/safe_output.html b/tests/functional/data/safe_links/safe_output.html index 0853ba1bb8..845eb96bbe 100644 --- a/tests/functional/data/safe_links/safe_output.html +++ b/tests/functional/data/safe_links/safe_output.html @@ -81,4 +81,4 @@

jAvAsCrIpT:alert(1)

XSS

XSS

-

XSS

\ No newline at end of file +

XSS

diff --git a/tests/functional/data/safe_links/unsafe_output.html b/tests/functional/data/safe_links/unsafe_output.html index 859c25793d..8ddda54756 100644 --- a/tests/functional/data/safe_links/unsafe_output.html +++ b/tests/functional/data/safe_links/unsafe_output.html @@ -79,6 +79,6 @@

<jav&#65ascript:javascript:alert(1)>

<jav&#97ascript:javascript:alert(1)>

jAvAsCrIpT:alert(1)

-

XSS

-

XSS

-

XSS

\ No newline at end of file +

XSS

+

XSS

+

XSS

diff --git a/tests/unit/Util/UrlEncoderTest.php b/tests/unit/Util/UrlEncoderTest.php index 80379d2349..4cc5b1f521 100644 --- a/tests/unit/Util/UrlEncoderTest.php +++ b/tests/unit/Util/UrlEncoderTest.php @@ -52,9 +52,29 @@ public function unescapeAndEncodeTestProvider() ['<', '%3C'], ['>', '%3E'], ['?', '?'], + ['%21', '!'], + ['%23', '%23'], + ['%24', '%24'], + ['%26', '%26'], + ['%27', "'"], + ['%2A', '*'], + ['%2B', '%2B'], + ['%2C', '%2C'], + ['%2D', '-'], + ['%2E', '.'], + ['%2F', '%2F'], + ['%3A', '%3A'], + ['%3B', '%3B'], + ['%3D', '%3D'], + ['%3F', '%3F'], + ['%40', '%40'], + ['%5F', '_'], + ['%7E', '~'], ['java%0ascript:alert("XSS")', 'java%0Ascript:alert(%22XSS%22)'], ['java%0Ascript:alert("XSS")', 'java%0Ascript:alert(%22XSS%22)'], ["java\nscript:alert('XSS')", "java%0Ascript:alert('XSS')"], + // Note that the following test does decode '&colon;' to '&colon', but this does not impact rendering, which re-encodes that '&' elsewhere + ['javascript&colon;alert%28'XSS'%29', 'javascript:alert('XSS')'], ['https://en.wikipedia.org/wiki/Markdown#CommonMark', 'https://en.wikipedia.org/wiki/Markdown#CommonMark'], ['https://img.shields.io/badge/help-%23hoaproject-ff0066.svg', 'https://img.shields.io/badge/help-%23hoaproject-ff0066.svg'], ['http://example.com/a%62%63%2fd%3Fe', 'http://example.com/abc%2Fd%3Fe'],