Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow disable sanitization (#564) #579

Merged
merged 9 commits into from
Aug 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .changeset/tricky-poems-collect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
'markdown-to-jsx': minor
---

Allow modifying HTML attribute sanitization when `options.sanitizer` is passed by the composer.

By default a lightweight URL sanitizer function is provided to avoid common attack vectors that might be placed into the `href` of an anchor tag, for example. The sanitizer receives the input, the HTML tag being targeted, and the attribute name. The original function is available as a library export called `sanitizer`.

This can be overridden and replaced with a custom sanitizer if desired via `options.sanitizer`:

```jsx
// sanitizer in this situation would receive:
// ('javascript:alert("foo")', 'a', 'href')

;<Markdown options={{ sanitizer: (value, tag, attribute) => value }}>
{`[foo](javascript:alert("foo"))`}
</Markdown>

// or

compiler('[foo](javascript:alert("foo"))', {
sanitizer: (value, tag, attribute) => value,
})
```
28 changes: 26 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The most lightweight, customizable React markdown component.
- [options.createElement - Custom React.createElement behavior](#optionscreateelement---custom-reactcreateelement-behavior)
- [options.enforceAtxHeadings](#optionsenforceatxheadings)
- [options.renderRule](#optionsrenderrule)
- [options.sanitizer](#optionssanitizer)
- [options.slugify](#optionsslugify)
- [options.namedCodesToUnicode](#optionsnamedcodestounicode)
- [options.disableParsingRawHTML](#optionsdisableparsingrawhtml)
Expand Down Expand Up @@ -435,21 +436,44 @@ function App() {
}
````

#### options.sanitizer

By default a lightweight URL sanitizer function is provided to avoid common attack vectors that might be placed into the `href` of an anchor tag, for example. The sanitizer receives the input, the HTML tag being targeted, and the attribute name. The original function is available as a library export called `sanitizer`.

This can be overridden and replaced with a custom sanitizer if desired via `options.sanitizer`:

```jsx
// sanitizer in this situation would receive:
// ('javascript:alert("foo")', 'a', 'href')

;<Markdown options={{ sanitizer: (value, tag, attribute) => value }}>
{`[foo](javascript:alert("foo"))`}
</Markdown>

// or

compiler('[foo](javascript:alert("foo"))', {
sanitizer: (value, tag, attribute) => value,
})
```

#### options.slugify

By default, a [lightweight deburring function](https://github.com/probablyup/markdown-to-jsx/blob/bc2f57412332dc670f066320c0f38d0252e0f057/index.js#L261-L275) is used to generate an HTML id from headings. You can override this by passing a function to `options.slugify`. This is helpful when you are using non-alphanumeric characters (e.g. Chinese or Japanese characters) in headings. For example:

```jsx
;<Markdown options={{ slugify: str => str }}># 中文</Markdown>
<Markdown options={{ slugify: str => str }}># 中文</Markdown>

// or

compiler('# 中文', { slugify: str => str })

// renders:
;<h1 id="中文">中文</h1>
<h1 id="中文">中文</h1>
```

The original function is available as a library export called `slugify`.

#### options.namedCodesToUnicode

By default only a couple of named html codes are converted to unicode characters:
Expand Down
42 changes: 41 additions & 1 deletion index.compiler.spec.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { compiler, RuleType } from './index'
import { compiler, sanitizer, RuleType } from './index'
import * as React from 'react'
import * as ReactDOM from 'react-dom'
import * as fs from 'fs'
Expand Down Expand Up @@ -1180,6 +1180,46 @@ describe('links', () => {
`)
})

it('should not sanitize markdown when explicitly disabled', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {})
jest.spyOn(console, 'error').mockImplementation(() => {})

render(compiler('[foo](javascript:doSomethingBad)', { sanitizer: x => x }))

expect(root.innerHTML).toMatchInlineSnapshot(`
<a href="javascript:doSomethingBad">
foo
</a>
`)

expect(console.warn).not.toHaveBeenCalled()
})

it('tag and attribute are provided to allow for conditional override', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {})
jest.spyOn(console, 'error').mockImplementation(() => {})

render(
compiler(
'[foo](javascript:doSomethingBad)\n![foo](javascript:doSomethingBad)',
{
sanitizer: (value, tag) => (tag === 'a' ? value : sanitizer(value)),
}
)
)

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<a href="javascript:doSomethingBad">
foo
</a>
<img alt="foo">
</p>
`)

expect(console.warn).toHaveBeenCalledTimes(1)
})

it('should sanitize markdown links containing JS expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {})
jest.spyOn(console, 'error').mockImplementation(() => {})
Expand Down
74 changes: 52 additions & 22 deletions index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -731,8 +731,10 @@ function normalizeAttributeKey(key) {
}

function attributeValueToJSXPropValue(
tag: MarkdownToJSX.HTMLTags,
key: keyof React.AllHTMLAttributes<Element>,
value: string
value: string,
sanitizeUrlFn: MarkdownToJSX.Options['sanitizer']
): any {
if (key === 'style') {
return value.split(/;\s?/).reduce(function (styles, kvPair) {
Expand All @@ -750,7 +752,7 @@ function attributeValueToJSXPropValue(
return styles
}, {})
} else if (key === 'href' || key === 'src') {
return sanitizeUrl(value)
return sanitizeUrlFn(value, tag, key)
} else if (value.match(INTERPOLATION_R)) {
// return as a string and let the consumer decide what to do with it
value = value.slice(1, value.length - 1)
Expand Down Expand Up @@ -951,7 +953,7 @@ function matchParagraph(
return [match, captured]
}

function sanitizeUrl(url: string): string | undefined {
export function sanitizer(url: string): string {
try {
const decoded = decodeURIComponent(url).replace(/[^A-Za-z0-9/:]/g, '')

Expand All @@ -963,7 +965,7 @@ function sanitizeUrl(url: string): string | undefined {
)
}

return undefined
return null
}
} catch (e) {
if (process.env.NODE_ENV !== 'production') {
Expand Down Expand Up @@ -1138,12 +1140,13 @@ export function compiler(
options: MarkdownToJSX.Options = {}
) {
options.overrides = options.overrides || {}
options.sanitizer = options.sanitizer || sanitizer
options.slugify = options.slugify || slugify
options.namedCodesToUnicode = options.namedCodesToUnicode
? { ...namedCodesToUnicode, ...options.namedCodesToUnicode }
: namedCodesToUnicode

const createElementFn = options.createElement || React.createElement
options.createElement = options.createElement || React.createElement

// JSX custom pragma
// eslint-disable-next-line no-unused-vars
Expand All @@ -1158,7 +1161,7 @@ export function compiler(
) {
const overrideProps = get(options.overrides, `${tag}.props`, {})

return createElementFn(
return options.createElement(
getTag(tag, options.overrides),
{
...props,
Expand Down Expand Up @@ -1228,7 +1231,10 @@ export function compiler(
return React.createElement(wrapper, { key: 'outer' }, jsx)
}

function attrStringToMap(str: string): JSX.IntrinsicAttributes {
function attrStringToMap(
tag: MarkdownToJSX.HTMLTags,
str: string
): JSX.IntrinsicAttributes {
const attributes = str.match(ATTR_EXTRACTOR_R)
if (!attributes) {
return null
Expand All @@ -1243,8 +1249,10 @@ export function compiler(

const mappedKey = ATTRIBUTE_TO_JSX_PROP_MAP[key] || key
const normalizedValue = (map[mappedKey] = attributeValueToJSXPropValue(
tag,
key,
value
value,
options.sanitizer
))

if (
Expand Down Expand Up @@ -1366,7 +1374,7 @@ export function compiler(
parse(capture /*, parse, state*/) {
return {
// if capture[3] it's additional metadata
attrs: attrStringToMap(capture[3] || ''),
attrs: attrStringToMap('code', capture[3] || ''),
lang: capture[2] || undefined,
text: capture[4],
type: RuleType.codeBlock,
Expand Down Expand Up @@ -1409,13 +1417,13 @@ export function compiler(
order: Priority.HIGH,
parse(capture /*, parse*/) {
return {
target: `#${options.slugify(capture[1])}`,
target: `#${options.slugify(capture[1], slugify)}`,
text: capture[1],
}
},
render(node, output, state) {
return (
<a key={state.key} href={sanitizeUrl(node.target)}>
<a key={state.key} href={options.sanitizer(node.target, 'a', 'href')}>
<sup key={state.key}>{node.text}</sup>
</a>
)
Expand Down Expand Up @@ -1450,7 +1458,7 @@ export function compiler(
parse(capture, parse, state) {
return {
children: parseInline(parse, capture[2], state),
id: options.slugify(capture[2]),
id: options.slugify(capture[2], slugify),
level: capture[1].length as MarkdownToJSX.HeadingNode['level'],
}
},
Expand Down Expand Up @@ -1495,10 +1503,14 @@ export function compiler(
const noInnerParse =
DO_NOT_PROCESS_HTML_ELEMENTS.indexOf(tagName) !== -1

const tag = (
noInnerParse ? tagName : capture[1]
).trim() as MarkdownToJSX.HTMLTags

const ast = {
attrs: attrStringToMap(capture[2]),
attrs: attrStringToMap(tag, capture[2]),
noInnerParse: noInnerParse,
tag: (noInnerParse ? tagName : capture[1]).trim(),
tag,
} as {
attrs: ReturnType<typeof attrStringToMap>
children?: ReturnType<MarkdownToJSX.NestedParser> | undefined
Expand Down Expand Up @@ -1539,9 +1551,11 @@ export function compiler(
match: anyScopeRegex(HTML_SELF_CLOSING_ELEMENT_R),
order: Priority.HIGH,
parse(capture /*, parse, state*/) {
const tag = capture[1].trim() as MarkdownToJSX.HTMLTags

return {
attrs: attrStringToMap(capture[2] || ''),
tag: capture[1].trim(),
attrs: attrStringToMap(tag, capture[2] || ''),
tag,
}
},
render(node, output, state) {
Expand Down Expand Up @@ -1574,7 +1588,7 @@ export function compiler(
key={state.key}
alt={node.alt || undefined}
title={node.title || undefined}
src={sanitizeUrl(node.target)}
src={options.sanitizer(node.target, 'img', 'src')}
/>
)
},
Expand All @@ -1596,7 +1610,11 @@ export function compiler(
},
render(node, output, state) {
return (
<a key={state.key} href={sanitizeUrl(node.target)} title={node.title}>
<a
key={state.key}
href={options.sanitizer(node.target, 'a', 'href')}
title={node.title}
>
{output(node.children, state)}
</a>
)
Expand Down Expand Up @@ -1725,7 +1743,7 @@ export function compiler(
<img
key={state.key}
alt={node.alt}
src={sanitizeUrl(refs[node.ref].target)}
src={options.sanitizer(refs[node.ref].target, 'img', 'src')}
title={refs[node.ref].title}
/>
) : null
Expand All @@ -1749,7 +1767,7 @@ export function compiler(
return refs[node.ref] ? (
<a
key={state.key}
href={sanitizeUrl(refs[node.ref].target)}
href={options.sanitizer(refs[node.ref].target, 'a', 'href')}
title={refs[node.ref].title}
>
{output(node.children, state)}
Expand Down Expand Up @@ -1934,7 +1952,10 @@ export function compiler(
<footer key="footer">
{footnotes.map(function createFootnote(def) {
return (
<div id={options.slugify(def.identifier)} key={def.identifier}>
<div
id={options.slugify(def.identifier, slugify)}
key={def.identifier}
>
{def.identifier}
{emitter(parser(def.footnote, { inline: true }))}
</div>
Expand Down Expand Up @@ -2375,11 +2396,20 @@ export namespace MarkdownToJSX {
state: State
) => React.ReactChild

/**
* Override the built-in sanitizer function for URLs, etc if desired. The built-in version is available as a library export called `sanitizer`.
*/
sanitizer: (
value: string,
tag: HTMLTags,
attribute: string
) => string | null

/**
* Override normalization of non-URI-safe characters for use in generating
* HTML IDs for anchor linking purposes.
*/
slugify: (source: string) => string
slugify: (input: string, defaultFn: (input: string) => string) => string

/**
* Declare the type of the wrapper to be used when there are multiple
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@
"size-limit": [
{
"path": "./dist/index.module.js",
"limit": "6.1 kB"
"limit": "6.2 kB"
},
{
"path": "./dist/index.modern.js",
"limit": "6.1 kB"
"limit": "6.2 kB"
}
],
"jest": {
Expand Down
Loading