Skip to content

Commit

Permalink
Check encoding before parsing; throw exception if not UTF-8
Browse files Browse the repository at this point in the history
  • Loading branch information
colinodell committed Jan 16, 2020
1 parent 4d0a30c commit 5c7b281
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi

## [Unreleased][unreleased]

### Added

- Added check to ensure Markdown input is valid UTF-8 (#401, #405)

## [1.2.2] - 2019-01-15

This release contains the same changes as 1.1.3:
Expand Down
8 changes: 4 additions & 4 deletions src/Converter.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ public function __construct(DocParserInterface $docParser, ElementRendererInterf
*
* @param string $commonMark
*
* @return string
*
* @throws \RuntimeException
*
* @return string
*
* @api
*/
public function convertToHtml(string $commonMark): string
Expand All @@ -70,9 +70,9 @@ public function convertToHtml(string $commonMark): string
*
* @param string $commonMark
*
* @return string
*
* @throws \RuntimeException
*
* @return string
*/
public function __invoke(string $commonMark): string
{
Expand Down
13 changes: 11 additions & 2 deletions src/DocParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use League\CommonMark\Block\Element\Paragraph;
use League\CommonMark\Block\Element\StringContainerInterface;
use League\CommonMark\Event\DocumentParsedEvent;
use League\CommonMark\Exception\UnexpectedEncodingException;

final class DocParser implements DocParserInterface
{
Expand Down Expand Up @@ -71,15 +72,16 @@ private function preProcessInput(string $input): array
/**
* @param string $input
*
* @return Document
*
* @throws \RuntimeException
*
* @return Document
*/
public function parse(string $input): Document
{
$document = new Document();
$context = new Context($document, $this->environment);

$this->assertValidUTF8($input);
$lines = $this->preProcessInput($input);
foreach ($lines as $line) {
$context->setNextLine($line);
Expand Down Expand Up @@ -250,4 +252,11 @@ private function setAndPropagateLastLineBlank(ContextInterface $context, Cursor
$container = $container->parent();
}
}

private function assertValidUTF8(string $input)
{
if (!\mb_check_encoding($input, 'UTF-8')) {
throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
}
}
}
4 changes: 2 additions & 2 deletions src/DocParserInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ interface DocParserInterface
/**
* @param string $input
*
* @return Document
*
* @throws \RuntimeException
*
* @return Document
*/
public function parse(string $input): Document;
}
16 changes: 16 additions & 0 deletions src/Exception/UnexpectedEncodingException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?php

/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace League\CommonMark\Exception;

final class UnexpectedEncodingException extends \RuntimeException
{
}
9 changes: 9 additions & 0 deletions tests/unit/CommonMarkConverterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,13 @@ public function testEnvironmentAndConfigConstructor()

$this->assertSame($mockEnvironment, $environment);
}

/**
* @expectedException \League\CommonMark\Exception\UnexpectedEncodingException
*/
public function testConvertingInvalidUTF8()
{
$converter = new CommonMarkConverter();
$converter->convertToHtml("\x09\xca\xca");
}
}
30 changes: 30 additions & 0 deletions tests/unit/DocParserTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace League\CommonMark\Tests\Unit;

use League\CommonMark\DocParser;
use League\CommonMark\Environment;
use PHPUnit\Framework\TestCase;

class DocParserTest extends TestCase
{
/**
* @expectedException \League\CommonMark\Exception\UnexpectedEncodingException
*/
public function testParsingWithInvalidUTF8()
{
$environment = Environment::createCommonMarkEnvironment();
$docParser = new DocParser($environment);

$docParser->parse("\x09\xca\xca");
}
}

0 comments on commit 5c7b281

Please sign in to comment.