diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cbaa815e0..c9c3548e4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi ## [Unreleased][unreleased] +### Added + + - Added check to ensure Markdown input is valid UTF-8 (#401, #405) + ## [1.2.2] - 2019-01-15 This release contains the same changes as 1.1.3: diff --git a/src/Converter.php b/src/Converter.php index 500f0b3e81..38d2abc4d0 100644 --- a/src/Converter.php +++ b/src/Converter.php @@ -50,6 +50,8 @@ public function __construct(DocParserInterface $docParser, ElementRendererInterf * * @param string $commonMark * + * @throws \RuntimeException + * * @return string * * @api @@ -68,6 +70,8 @@ public function convertToHtml(string $commonMark): string * * @param string $commonMark * + * @throws \RuntimeException + * * @return string */ public function __invoke(string $commonMark): string diff --git a/src/DocParser.php b/src/DocParser.php index 496eed80bb..dfc87d5327 100644 --- a/src/DocParser.php +++ b/src/DocParser.php @@ -20,6 +20,7 @@ use League\CommonMark\Block\Element\Paragraph; use League\CommonMark\Block\Element\StringContainerInterface; use League\CommonMark\Event\DocumentParsedEvent; +use League\CommonMark\Exception\UnexpectedEncodingException; final class DocParser implements DocParserInterface { @@ -71,6 +72,8 @@ private function preProcessInput(string $input): array /** * @param string $input * + * @throws \RuntimeException + * * @return Document */ public function parse(string $input): Document @@ -78,6 +81,7 @@ public function parse(string $input): Document $document = new Document(); $context = new Context($document, $this->environment); + $this->assertValidUTF8($input); $lines = $this->preProcessInput($input); foreach ($lines as $line) { $context->setNextLine($line); @@ -248,4 +252,11 @@ private function setAndPropagateLastLineBlank(ContextInterface $context, Cursor $container = $container->parent(); } } + + private function assertValidUTF8(string $input) + { + if (!\mb_check_encoding($input, 'UTF-8')) { + throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected'); + } + } } diff --git a/src/DocParserInterface.php b/src/DocParserInterface.php index f43c93093e..23725ad476 100644 --- a/src/DocParserInterface.php +++ b/src/DocParserInterface.php @@ -18,6 +18,8 @@ interface DocParserInterface /** * @param string $input * + * @throws \RuntimeException + * * @return Document */ public function parse(string $input): Document; diff --git a/src/Exception/UnexpectedEncodingException.php b/src/Exception/UnexpectedEncodingException.php new file mode 100644 index 0000000000..ada054bc90 --- /dev/null +++ b/src/Exception/UnexpectedEncodingException.php @@ -0,0 +1,16 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Exception; + +final class UnexpectedEncodingException extends \RuntimeException +{ +} diff --git a/tests/unit/CommonMarkConverterTest.php b/tests/unit/CommonMarkConverterTest.php index 16af95374a..7f24f44df1 100644 --- a/tests/unit/CommonMarkConverterTest.php +++ b/tests/unit/CommonMarkConverterTest.php @@ -60,4 +60,13 @@ public function testEnvironmentAndConfigConstructor() $this->assertSame($mockEnvironment, $environment); } + + /** + * @expectedException \League\CommonMark\Exception\UnexpectedEncodingException + */ + public function testConvertingInvalidUTF8() + { + $converter = new CommonMarkConverter(); + $converter->convertToHtml("\x09\xca\xca"); + } } diff --git a/tests/unit/DocParserTest.php b/tests/unit/DocParserTest.php new file mode 100644 index 0000000000..5e5319fe3a --- /dev/null +++ b/tests/unit/DocParserTest.php @@ -0,0 +1,30 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Tests\Unit; + +use League\CommonMark\DocParser; +use League\CommonMark\Environment; +use PHPUnit\Framework\TestCase; + +class DocParserTest extends TestCase +{ + /** + * @expectedException \League\CommonMark\Exception\UnexpectedEncodingException + */ + public function testParsingWithInvalidUTF8() + { + $environment = Environment::createCommonMarkEnvironment(); + $docParser = new DocParser($environment); + + $docParser->parse("\x09\xca\xca"); + } +}