Skip to content

Commit

Permalink
Merge pull request from GHSA-59jf-3q9v-rh6g
Browse files Browse the repository at this point in the history
* [SECURITY] Properly encode noscript child nodes

The `<noscript>` element has a special behavior when
being evaluated in browsers, which depends on whether
script-parsing is enabled or disabled.

As a consequence this change will

* encode comment inner data, e.g. `<!-- <"comment"> -->`
  as `<!-- &lt;&quot;comment&quot;&gt -->`
* always encode tag attributes, e.g. `<p id="<value>">`
  as `<p id="&lt;value&gt;">`
* extend `Comment` and `CdataSection` to have a constructor
  (which triggers encoding per default) and to implement the
  `Behavior\HandlerInterface`
* add a new serializer option `encode_attributes`, which might
  basically be extracted to `Masterminds\HTML5`

* [TASK] Ensure attribute serialization preserves values as is

We don't want so called double-encoding – which is a valid
usecase when HTML describes how HTML is to be written – to
be automagically transformed to single encoded values,
as otherwise a valid input like

<a title="Insert &amp;amp; to write an &amp;"></a>
(Browser would show "Insert &amp; to write an &")

…would be changed to:

<a title="Insert &amp; to write an &amp;"></a>
(Browser would show "Insert & to write an &")

Also add tests for the attribute encoding we want:
 * Encode quotes, tags and stuff that might cause security issues
 * do not encode unnecessarily encode slashes or colons
   (like htmlentitites would do)

---------

Co-authored-by: Benjamin Franzke <ben@bnf.dev>
  • Loading branch information
ohader and bnf authored Jul 25, 2023
1 parent 476383a commit e3026f5
Show file tree
Hide file tree
Showing 10 changed files with 302 additions and 70 deletions.
38 changes: 13 additions & 25 deletions src/Behavior.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
namespace TYPO3\HtmlSanitizer;

use LogicException;
use TYPO3\HtmlSanitizer\Behavior\CdataSection;
use TYPO3\HtmlSanitizer\Behavior\Comment;
use TYPO3\HtmlSanitizer\Behavior\NodeInterface;
use TYPO3\HtmlSanitizer\Behavior\Tag;

Expand Down Expand Up @@ -74,11 +76,16 @@ class Behavior
* Node names as array index, e.g. `['strong' => new Tag('strong', '#comment' => new Comment()]`
* @var array<string, ?NodeInterface>
*/
protected $nodes = [
protected $nodes = [];

public function __construct()
{
// v2.1.0: adding `#comment` and `#cdata-section` hints for backward compatibility, will be removed with v3.0.0
'#comment' => null,
'#cdata-section' => null,
];
$this->nodes = array_merge($this->nodes, [
'#comment' => new Comment(),
'#cdata-section' => new CdataSection(),
]);
}

public function withFlags(int $flags): self
{
Expand Down Expand Up @@ -125,7 +132,6 @@ public function withNodes(NodeInterface ...$nodes): self
if (!is_array($indexedNodes)) {
return $this;
}
$this->assertNodeUniqueness($indexedNodes);
$target = clone $this;
$target->nodes = array_merge($target->nodes, $indexedNodes);
return $target;
Expand All @@ -136,9 +142,8 @@ public function withoutNodes(NodeInterface ...$nodes): self
$names = array_map([$this, 'getNodeName'], $nodes);
$filteredNodes = array_filter(
$this->nodes,
static function (?NodeInterface $node, string $name) use ($nodes, $names) {
return $node === null && !in_array($name, $names, true)
|| $node !== null && !in_array($node, $nodes, true);
static function (NodeInterface $node, string $name) use ($nodes, $names) {
return !in_array($name, $names, true) && !in_array($node, $nodes, true);
},
ARRAY_FILTER_USE_BOTH
);
Expand Down Expand Up @@ -247,23 +252,6 @@ protected function assertScalarUniqueness(array $names): void
}
}

/**
* @param array<string, NodeInterface> $nodes
*/
protected function assertNodeUniqueness(array $nodes): void
{
$existingNodeNames = array_intersect_key(array_filter($this->nodes), $nodes);
if ($existingNodeNames !== []) {
throw new LogicException(
sprintf(
'Cannot redeclare node names %s. Remove duplicates first',
implode(', ', array_keys($existingNodeNames))
),
1625391217
);
}
}

protected function getNodeName(NodeInterface $node): string
{
return strtolower($node->getName());
Expand Down
25 changes: 24 additions & 1 deletion src/Behavior/CdataSection.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,36 @@

namespace TYPO3\HtmlSanitizer\Behavior;

use DOMNode;
use DOMText;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Context;

/**
* Model of CDATA node.
*/
class CdataSection implements NodeInterface
class CdataSection implements NodeInterface, HandlerInterface
{
/**
* @var bool
*/
protected $secure = true;

public function __construct(bool $secure = true)
{
$this->secure = $secure;
}

public function getName(): string
{
return '#cdata-section';
}

public function handle(NodeInterface $node, ?DOMNode $domNode, Context $context, Behavior $behavior = null): ?DOMNode
{
if (!$this->secure || $domNode === null) {
return $domNode;
}
return new DOMText(trim($domNode->nodeValue));
}
}
25 changes: 24 additions & 1 deletion src/Behavior/Comment.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,36 @@

namespace TYPO3\HtmlSanitizer\Behavior;

use DOMComment;
use DOMNode;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Context;

/**
* Model of comment node.
*/
class Comment implements NodeInterface
class Comment implements NodeInterface, HandlerInterface
{
/**
* @var bool
*/
protected $secure = true;

public function __construct(bool $secure = true)
{
$this->secure = $secure;
}

public function getName(): string
{
return '#comment';
}

public function handle(NodeInterface $node, ?DOMNode $domNode, Context $context, Behavior $behavior = null): ?DOMNode
{
if (!$this->secure || $domNode === null) {
return $domNode;
}
return new DOMComment(htmlspecialchars($domNode->textContent, ENT_QUOTES, 'UTF-8', false));
}
}
23 changes: 1 addition & 22 deletions src/Builder/CommonBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@

namespace TYPO3\HtmlSanitizer\Builder;

use DOMNode;
use DOMText;
use TYPO3\HtmlSanitizer\Behavior;
use TYPO3\HtmlSanitizer\Behavior\Attr\UriAttrValueBuilder;
use TYPO3\HtmlSanitizer\Behavior\NodeInterface;
use TYPO3\HtmlSanitizer\Sanitizer;
use TYPO3\HtmlSanitizer\Visitor\CommonVisitor;

Expand Down Expand Up @@ -83,8 +80,7 @@ protected function createBehavior(): Behavior
->withName('common')
->withTags(...array_values($this->createBasicTags()))
->withTags(...array_values($this->createMediaTags()))
->withTags(...array_values($this->createTableTags()))
->withNodes(...array_values($this->createSpecialNodes()));
->withTags(...array_values($this->createTableTags()));
}

protected function createBasicTags(): array
Expand Down Expand Up @@ -211,23 +207,6 @@ protected function createTableTags(): array
return $tags;
}

/**
* @return array<string, Behavior\NodeInterface>
*/
protected function createSpecialNodes(): array
{
$nodes = [];
$nodes['#cdata-section'] = (new Behavior\NodeHandler(
new Behavior\CdataSection(),
new Behavior\Handler\ClosureHandler(
static function (NodeInterface $node, ?DOMNode $domNode) {
return $domNode !== null ? new DOMText(trim($domNode->nodeValue)) : null;
}
)
));
return $nodes;
}

/**
* @return Behavior\Attr[]
*/
Expand Down
1 change: 1 addition & 0 deletions src/Sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class Sanitizer
protected const mastermindsDefaultOptions = [
// Whether the serializer should aggressively encode all characters as entities.
'encode_entities' => false,
'encode_attributes' => true,
// Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
// (adjusted due to https://github.com/Masterminds/html5-php/issues/181#issuecomment-643767471)
'disable_html_ns' => true,
Expand Down
32 changes: 32 additions & 0 deletions src/Serializer/Rules.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ class Rules extends OutputRules implements RulesInterface
*/
protected $initiator;

/**
* @var bool
*/
protected $encodeAttributes;

/**
* @param Behavior $behavior
* @param resource$output
Expand All @@ -66,6 +71,7 @@ public static function create(Behavior $behavior, $output, array $options = []):
public function __construct($output, $options = [])
{
$this->options = (array)$options;
$this->encodeAttributes = !empty($options['encode_attributes']);
parent::__construct($output, $this->options);
}

Expand Down Expand Up @@ -158,6 +164,32 @@ public function text($domNode): void
$this->wr($domNode->data);
}

protected function enc($text, $attribute = false): string
{
if ($attribute && $this->encodeAttributes && !$this->encode) {
// In contrast to parent::enc() (when $this->encode is true),
// we are using htmlspecialchars() instead of htmlentities() as
// colons and slashes do not need to be aggressively escaped.
$value = htmlspecialchars(
$text,
ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES,
'UTF-8',
// $double_encode: true
// (name is misleading, it actually means: disable-automagic/always-encode)
// Our input is always entity decoded by the parser and we do not
// want to consider our input to possibly contain valid entities
// we rather want to treat it as pure text, that is *always* to be encoded.
true
);
// htmlspecialchars does escaping, but it doesn't match the requirements of
// HTML5 section 8.3 to ecape non breaking spaces
// https://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
$value = implode('&nbsp;', mb_split("\xc2\xa0", $value));
return $value;
}
return parent::enc($text, $attribute);
}

/**
* If the element has a declared namespace in the HTML, MathML or
* SVG namespaces, we use the localName instead of the tagName.
Expand Down
3 changes: 3 additions & 0 deletions src/Visitor/CommonVisitor.php
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ public function enterNode(DOMNode $domNode): ?DOMNode
if (!$node->shallHandleFirst()) {
$domNode = $node->getHandler()->handle($node->getNode(), $domNode, $this->context, $this->behavior);
}
} elseif ($node instanceof Behavior\HandlerInterface) {
$domNode = $node->handle($node, $domNode, $this->context, $this->behavior);
$domNode = $domNode instanceof DOMElement ? $this->enterDomElement($domNode, $node) : $domNode;
} elseif ($domNode instanceof DOMElement) {
$domNode = $this->enterDomElement($domNode, $node);
}
Expand Down
1 change: 0 additions & 1 deletion tests/BehaviorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class BehaviorTest extends TestCase
public function ambiguityIsDetectedDataProvider(): array
{
return [
[ ['same'], ['same'], 1625391217 ],
[ ['same', 'same'], [], 1625591503 ],
[ ['same', 'same'], ['same'], 1625591503 ],
[ [], ['same', 'same'], 1625591503 ],
Expand Down
20 changes: 18 additions & 2 deletions tests/CommonBuilderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,21 @@ public function isSanitizedDataProvider(): array
'<!-- #comment -->',
],
'#910' => [
'<![CDATA[ #cdata ]]>',
'#cdata',
'<!-- <"comment"> -->',
'<!-- &lt;&quot;comment&quot;&gt; -->',
],
'#911' => [
'<!-- &lt;&quot;comment&quot;&gt; -->',
'<!-- &lt;&quot;comment&quot;&gt; -->',
],
'#915' => [
'#text',
'#text',
],
'#920' => [
'<![CDATA[ #cdata ]]>',
'#cdata',
],
'#921' => [
'<![CDATA[<any><span data-value="value"></any>*/]]>',
'&lt;any&gt;&lt;span data-value="value"&gt;&lt;/any&gt;*/',
Expand All @@ -287,6 +295,14 @@ public function isSanitizedDataProvider(): array
'<img src="/typo3.org/logo.svg"><any>value</any></img>',
'<img src="/typo3.org/logo.svg">&lt;any&gt;value&lt;/any&gt;',
],
'#935' => [
'<p class="</p><script>alert(1)">value</p>',
'<p class="&lt;/p&gt;&lt;script&gt;alert(1)">value</p>',
],
'#936' => [
'<p class="{&quot;json&quot;:true}">value</p>',
'<p class="{&quot;json&quot;:true}">value</p>',
],
];
}

Expand Down
Loading

0 comments on commit e3026f5

Please sign in to comment.