From 62a16f3e239069cf38de5da646618723dd1bbf8a Mon Sep 17 00:00:00 2001 From: Sukka Date: Mon, 19 Aug 2024 00:21:26 +0800 Subject: [PATCH] feat: allow composer to adjust sanitization (#564) (#579) * feat: allow disable sanitization (#564) * test: #564 * chore: add changeset * chore: restore prior whitespace * refactor: adjust sanitizer to provide more data to the composer * refactor: DX tweaks * chore: adjust size limit will golf this down later * chore: tweak changeset --------- Co-authored-by: Evan Jacobs --- .changeset/tricky-poems-collect.md | 24 ++++++++++ README.md | 28 ++++++++++- index.compiler.spec.tsx | 42 ++++++++++++++++- index.tsx | 74 +++++++++++++++++++++--------- package.json | 4 +- 5 files changed, 145 insertions(+), 27 deletions(-) create mode 100644 .changeset/tricky-poems-collect.md diff --git a/.changeset/tricky-poems-collect.md b/.changeset/tricky-poems-collect.md new file mode 100644 index 00000000..0ee84dcf --- /dev/null +++ b/.changeset/tricky-poems-collect.md @@ -0,0 +1,24 @@ +--- +'markdown-to-jsx': minor +--- + +Allow modifying HTML attribute sanitization when `options.sanitizer` is passed by the composer. + +By default a lightweight URL sanitizer function is provided to avoid common attack vectors that might be placed into the `href` of an anchor tag, for example. The sanitizer receives the input, the HTML tag being targeted, and the attribute name. The original function is available as a library export called `sanitizer`. + +This can be overridden and replaced with a custom sanitizer if desired via `options.sanitizer`: + +```jsx +// sanitizer in this situation would receive: +// ('javascript:alert("foo")', 'a', 'href') + +; value }}> + {`[foo](javascript:alert("foo"))`} + + +// or + +compiler('[foo](javascript:alert("foo"))', { + sanitizer: (value, tag, attribute) => value, +}) +``` diff --git a/README.md b/README.md index 2146e272..8f32a817 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ The most lightweight, customizable React markdown component. - [options.createElement - Custom React.createElement behavior](#optionscreateelement---custom-reactcreateelement-behavior) - [options.enforceAtxHeadings](#optionsenforceatxheadings) - [options.renderRule](#optionsrenderrule) + - [options.sanitizer](#optionssanitizer) - [options.slugify](#optionsslugify) - [options.namedCodesToUnicode](#optionsnamedcodestounicode) - [options.disableParsingRawHTML](#optionsdisableparsingrawhtml) @@ -435,21 +436,44 @@ function App() { } ```` +#### options.sanitizer + +By default a lightweight URL sanitizer function is provided to avoid common attack vectors that might be placed into the `href` of an anchor tag, for example. The sanitizer receives the input, the HTML tag being targeted, and the attribute name. The original function is available as a library export called `sanitizer`. + +This can be overridden and replaced with a custom sanitizer if desired via `options.sanitizer`: + +```jsx +// sanitizer in this situation would receive: +// ('javascript:alert("foo")', 'a', 'href') + +; value }}> + {`[foo](javascript:alert("foo"))`} + + +// or + +compiler('[foo](javascript:alert("foo"))', { + sanitizer: (value, tag, attribute) => value, +}) +``` + #### options.slugify By default, a [lightweight deburring function](https://github.com/probablyup/markdown-to-jsx/blob/bc2f57412332dc670f066320c0f38d0252e0f057/index.js#L261-L275) is used to generate an HTML id from headings. You can override this by passing a function to `options.slugify`. This is helpful when you are using non-alphanumeric characters (e.g. Chinese or Japanese characters) in headings. For example: ```jsx -; str }}># 中文 + str }}># 中文 // or compiler('# 中文', { slugify: str => str }) // renders: -;

中文

+

中文

``` +The original function is available as a library export called `slugify`. + #### options.namedCodesToUnicode By default only a couple of named html codes are converted to unicode characters: diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx index 01379bee..08d3d39f 100644 --- a/index.compiler.spec.tsx +++ b/index.compiler.spec.tsx @@ -1,4 +1,4 @@ -import { compiler, RuleType } from './index' +import { compiler, sanitizer, RuleType } from './index' import * as React from 'react' import * as ReactDOM from 'react-dom' import * as fs from 'fs' @@ -1180,6 +1180,46 @@ describe('links', () => { `) }) + it('should not sanitize markdown when explicitly disabled', () => { + jest.spyOn(console, 'warn').mockImplementation(() => {}) + jest.spyOn(console, 'error').mockImplementation(() => {}) + + render(compiler('[foo](javascript:doSomethingBad)', { sanitizer: x => x })) + + expect(root.innerHTML).toMatchInlineSnapshot(` + + foo + + `) + + expect(console.warn).not.toHaveBeenCalled() + }) + + it('tag and attribute are provided to allow for conditional override', () => { + jest.spyOn(console, 'warn').mockImplementation(() => {}) + jest.spyOn(console, 'error').mockImplementation(() => {}) + + render( + compiler( + '[foo](javascript:doSomethingBad)\n![foo](javascript:doSomethingBad)', + { + sanitizer: (value, tag) => (tag === 'a' ? value : sanitizer(value)), + } + ) + ) + + expect(root.innerHTML).toMatchInlineSnapshot(` +

+ + foo + + foo +

+ `) + + expect(console.warn).toHaveBeenCalledTimes(1) + }) + it('should sanitize markdown links containing JS expressions', () => { jest.spyOn(console, 'warn').mockImplementation(() => {}) jest.spyOn(console, 'error').mockImplementation(() => {}) diff --git a/index.tsx b/index.tsx index 651e8a2d..ad34347e 100644 --- a/index.tsx +++ b/index.tsx @@ -731,8 +731,10 @@ function normalizeAttributeKey(key) { } function attributeValueToJSXPropValue( + tag: MarkdownToJSX.HTMLTags, key: keyof React.AllHTMLAttributes, - value: string + value: string, + sanitizeUrlFn: MarkdownToJSX.Options['sanitizer'] ): any { if (key === 'style') { return value.split(/;\s?/).reduce(function (styles, kvPair) { @@ -750,7 +752,7 @@ function attributeValueToJSXPropValue( return styles }, {}) } else if (key === 'href' || key === 'src') { - return sanitizeUrl(value) + return sanitizeUrlFn(value, tag, key) } else if (value.match(INTERPOLATION_R)) { // return as a string and let the consumer decide what to do with it value = value.slice(1, value.length - 1) @@ -951,7 +953,7 @@ function matchParagraph( return [match, captured] } -function sanitizeUrl(url: string): string | undefined { +export function sanitizer(url: string): string { try { const decoded = decodeURIComponent(url).replace(/[^A-Za-z0-9/:]/g, '') @@ -963,7 +965,7 @@ function sanitizeUrl(url: string): string | undefined { ) } - return undefined + return null } } catch (e) { if (process.env.NODE_ENV !== 'production') { @@ -1138,12 +1140,13 @@ export function compiler( options: MarkdownToJSX.Options = {} ) { options.overrides = options.overrides || {} + options.sanitizer = options.sanitizer || sanitizer options.slugify = options.slugify || slugify options.namedCodesToUnicode = options.namedCodesToUnicode ? { ...namedCodesToUnicode, ...options.namedCodesToUnicode } : namedCodesToUnicode - const createElementFn = options.createElement || React.createElement + options.createElement = options.createElement || React.createElement // JSX custom pragma // eslint-disable-next-line no-unused-vars @@ -1158,7 +1161,7 @@ export function compiler( ) { const overrideProps = get(options.overrides, `${tag}.props`, {}) - return createElementFn( + return options.createElement( getTag(tag, options.overrides), { ...props, @@ -1228,7 +1231,10 @@ export function compiler( return React.createElement(wrapper, { key: 'outer' }, jsx) } - function attrStringToMap(str: string): JSX.IntrinsicAttributes { + function attrStringToMap( + tag: MarkdownToJSX.HTMLTags, + str: string + ): JSX.IntrinsicAttributes { const attributes = str.match(ATTR_EXTRACTOR_R) if (!attributes) { return null @@ -1243,8 +1249,10 @@ export function compiler( const mappedKey = ATTRIBUTE_TO_JSX_PROP_MAP[key] || key const normalizedValue = (map[mappedKey] = attributeValueToJSXPropValue( + tag, key, - value + value, + options.sanitizer )) if ( @@ -1366,7 +1374,7 @@ export function compiler( parse(capture /*, parse, state*/) { return { // if capture[3] it's additional metadata - attrs: attrStringToMap(capture[3] || ''), + attrs: attrStringToMap('code', capture[3] || ''), lang: capture[2] || undefined, text: capture[4], type: RuleType.codeBlock, @@ -1409,13 +1417,13 @@ export function compiler( order: Priority.HIGH, parse(capture /*, parse*/) { return { - target: `#${options.slugify(capture[1])}`, + target: `#${options.slugify(capture[1], slugify)}`, text: capture[1], } }, render(node, output, state) { return ( - + {node.text} ) @@ -1450,7 +1458,7 @@ export function compiler( parse(capture, parse, state) { return { children: parseInline(parse, capture[2], state), - id: options.slugify(capture[2]), + id: options.slugify(capture[2], slugify), level: capture[1].length as MarkdownToJSX.HeadingNode['level'], } }, @@ -1495,10 +1503,14 @@ export function compiler( const noInnerParse = DO_NOT_PROCESS_HTML_ELEMENTS.indexOf(tagName) !== -1 + const tag = ( + noInnerParse ? tagName : capture[1] + ).trim() as MarkdownToJSX.HTMLTags + const ast = { - attrs: attrStringToMap(capture[2]), + attrs: attrStringToMap(tag, capture[2]), noInnerParse: noInnerParse, - tag: (noInnerParse ? tagName : capture[1]).trim(), + tag, } as { attrs: ReturnType children?: ReturnType | undefined @@ -1539,9 +1551,11 @@ export function compiler( match: anyScopeRegex(HTML_SELF_CLOSING_ELEMENT_R), order: Priority.HIGH, parse(capture /*, parse, state*/) { + const tag = capture[1].trim() as MarkdownToJSX.HTMLTags + return { - attrs: attrStringToMap(capture[2] || ''), - tag: capture[1].trim(), + attrs: attrStringToMap(tag, capture[2] || ''), + tag, } }, render(node, output, state) { @@ -1574,7 +1588,7 @@ export function compiler( key={state.key} alt={node.alt || undefined} title={node.title || undefined} - src={sanitizeUrl(node.target)} + src={options.sanitizer(node.target, 'img', 'src')} /> ) }, @@ -1596,7 +1610,11 @@ export function compiler( }, render(node, output, state) { return ( - + {output(node.children, state)} ) @@ -1725,7 +1743,7 @@ export function compiler( {node.alt} ) : null @@ -1749,7 +1767,7 @@ export function compiler( return refs[node.ref] ? ( {output(node.children, state)} @@ -1934,7 +1952,10 @@ export function compiler(