diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a246b01b5..7c1779099d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi ## [Unreleased][unreleased] +### Fixed + +- Fixed autolink extension not detecting some URIs with underscores (#956) + ## [2.3.8] - 2022-12-10 ### Fixed diff --git a/src/Extension/Autolink/UrlAutolinkParser.php b/src/Extension/Autolink/UrlAutolinkParser.php index 7fad7f5542..6b4290a994 100644 --- a/src/Extension/Autolink/UrlAutolinkParser.php +++ b/src/Extension/Autolink/UrlAutolinkParser.php @@ -22,16 +22,22 @@ final class UrlAutolinkParser implements InlineParserInterface { private const ALLOWED_AFTER = [null, ' ', "\t", "\n", "\x0b", "\x0c", "\x0d", '*', '_', '~', '(']; - // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php + // RegEx adapted from https://github.com/symfony/symfony/blob/6.3/src/Symfony/Component/Validator/Constraints/UrlValidator.php private const REGEX = '~ ( # Must start with a supported scheme + auth, or "www" (?: - (?:%s):// # protocol - (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)? # basic auth + (?:%s):// # protocol + (?:(?:(?:[\_\.\pL\pN-]|%%[0-9A-Fa-f]{2})+:)?((?:[\_\.\pL\pN-]|%%[0-9A-Fa-f]{2})+)@)? # basic auth |www\.) (?: - (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name + (?: + (?:xn--[a-z0-9-]++\.)*+xn--[a-z0-9-]++ # a domain name using punycode + | + (?:[\pL\pN\pS\pM\-\_]++\.)+[\pL\pN\pM]++ # a multi-level domain name + | + [a-z0-9\-\_]++ # a single-level domain name + )\.? | # or \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} # an IP address | # or @@ -40,9 +46,9 @@ final class UrlAutolinkParser implements InlineParserInterface \] # an IPv6 address ) (?::[0-9]+)? # a port (optional) - (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )* # a path - (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )? # a query (optional) - (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )? # a fragment (optional) + (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )* # a path + (?:\? (?:[\pL\pN\-._\~!$&\'\[\]()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )? # a query (optional) + (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )? # a fragment (optional) )~ixu'; /** diff --git a/tests/functional/Extension/Autolink/UrlAutolinkParserTest.php b/tests/functional/Extension/Autolink/UrlAutolinkParserTest.php index 0f07f18c63..0c280f6154 100644 --- a/tests/functional/Extension/Autolink/UrlAutolinkParserTest.php +++ b/tests/functional/Extension/Autolink/UrlAutolinkParserTest.php @@ -44,6 +44,7 @@ public function dataProviderForAutolinkTests(): iterable // Basic examples yield ['You can search on http://google.com for stuff.', '

You can search on http://google.com for stuff.

']; yield ['https://google.com', '

https://google.com

']; + yield ['https://sub_domain.example.com', '

https://sub_domain.example.com

']; yield ['ftp://example.com', '

ftp://example.com

']; yield ['www.google.com', '

www.google.com

']; yield [' http://leadingwhitespace.example.com', '

http://leadingwhitespace.example.com

'];