Optionally disable HTML parsing (#278)

* Add disableParsingRawHTML option to disable parsing raw html * Update README with usage of disableParsingRawHTML * Tests for inline JSX and overrides * Bump the allowed size by a few bytes to accommodate this change
quantizor · Feb 5, 2020 · 93cccf1 · 93cccf1
1 parent 3481d93
commit 93cccf1
Show file tree

Hide file tree

Showing 4 changed files with 158 additions and 59 deletions.
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ The most lightweight, customizable React markdown component.
         - [options.createElement - Custom React.createElement behavior](#optionscreateelement---custom-reactcreateelement-behavior)
         - [options.slugify](#optionsslugify)
         - [options.namedCodesToUnicode](#optionsnamedcodestounicode)
+        - [options.disableParsingRawHTML](#optionsdisableparsingrawhtml)
     - [Getting the smallest possible bundle size](#getting-the-smallest-possible-bundle-size)
     - [Usage with Preact](#usage-with-preact)
 - [Gotchas](#gotchas)
@@ -390,6 +391,24 @@ compiler('This text is &le; than this text.', namedCodesToUnicode: {
 <p>This text is ≤ than this text.</p>
 ```
 
+#### options.disableParsingRawHTML
+
+By default, raw HTML is parsed to JSX. This behavior can be disabled with this option.
+
+```jsx
+<Markdown options={{ disableParsingRawHTML: true }}>
+    This text has <span>html</span> in it but it won't be rendered
+</Markdown>;
+
+// or
+
+compiler('This text has <span>html</span> in it but it won't be rendered', { disableParsingRawHTML: true });
+
+// renders:
+
+<span>This text has &lt;span&gt;html&lt;/span&gt; in it but it won't be rendered</span>
+```
+
 ### Getting the smallest possible bundle size
 
 Many development conveniences are placed behind `process.env.NODE_ENV !== "production"` conditionals. When bundling your app, it's a good idea to replace these code snippets such that a minifier (like uglify) can sweep them away and leave a smaller overall bundle.

diff --git a/index.compiler.spec.js b/index.compiler.spec.js
@@ -2820,6 +2820,46 @@ fun main() {
   </p>
 </div>
 
+`);
+  });
+
+  it('should not render html if disableParsingRawHTML is true', () => {
+    render(
+      compiler(
+          'Text with <span>html</span> inside',
+          {
+            disableParsingRawHTML: true
+          }
+      )
+    );
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+
+<span data-reactroot>
+  Text with &lt;span&gt;html&lt;/span&gt; inside
+</span>
+
+`);
+  });
+
+  it('should render html if disableParsingRawHTML is false', () => {
+    render(
+      compiler(
+          'Text with <span>html</span> inside',
+          {
+            disableParsingRawHTML: false
+          }
+      )
+    );
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+
+<span data-reactroot>
+  Text with
+  <span>
+    html
+  </span>
+  inside
+</span>
+
 `);
   });
 });
@@ -3300,6 +3340,44 @@ describe('overrides', () => {
        value="on"
 >
 
+`);
+  });
+
+  it('should substitute the appropriate JSX tag if given a component and disableParsingRawHTML is true', () => {
+    const FakeParagraph = ({ children }) => <p className="foo">{children}</p>;
+
+    render(
+      compiler('Hello.\n\n', {
+        overrides: { p: { component: FakeParagraph } },
+        options: { disableParsingRawHTML: true }
+      })
+    );
+
+    expect(root.children[0].className).toBe('foo');
+    expect(root.children[0].textContent).toBe('Hello.');
+  });
+
+  it('should substitute the appropriate JSX tag inline if given a component and disableParsingRawHTML is true', () => {
+    const FakeSpan = ({ children }) => <span className="foo">{children}</span>;
+
+    render(
+      compiler('Hello.\n\n<FakeSpan>I am a fake span</FakeSpan>', {
+        overrides: { FakeSpan },
+        options: { disableParsingRawHTML: true }
+      })
+    );
+
+    expect(root.innerHTML).toMatchInlineSnapshot(`
+
+<div data-reactroot>
+  <p>
+    Hello.
+  </p>
+  <span class="foo">
+    I am a fake span
+  </span>
+</div>
+
 `);
   });
 });

diff --git a/index.js b/index.js
@@ -1030,47 +1030,6 @@ export function compiler(markdown, options) {
       },
     },
 
-    htmlBlock: {
-      /**
-       * find the first matching end tag and process the interior
-       */
-      match: anyScopeRegex(HTML_BLOCK_ELEMENT_R),
-      order: PARSE_PRIORITY_HIGH,
-      parse(capture, parse, state) {
-        const [, whitespace] = capture[3].match(HTML_LEFT_TRIM_AMOUNT_R);
-        const trimmer = new RegExp(`^${whitespace}`, 'gm');
-        const trimmed = capture[3].replace(trimmer, '');
-
-        const parseFunc = containsBlockSyntax(trimmed)
-          ? parseBlock
-          : parseInline;
-
-        const tagName = capture[1].toLowerCase();
-        const noInnerParse =
-          DO_NOT_PROCESS_HTML_ELEMENTS.indexOf(tagName) !== -1;
-
-        return {
-          attrs: attrStringToMap(capture[2]),
-          /**
-           * if another html block is detected within, parse as block,
-           * otherwise parse as inline to pick up any further markdown
-           */
-          content: noInnerParse ? capture[3] : parseFunc(parse, trimmed, state),
-
-          noInnerParse,
-
-          tag: noInnerParse ? tagName : capture[1]
-        };
-      },
-      react(node, output, state) {
-        return (
-          <node.tag key={state.key} {...node.attrs}>
-            {node.noInnerParse ? node.content : output(node.content, state)}
-          </node.tag>
-        );
-      },
-    },
-
     htmlComment: {
       match: anyScopeRegex(HTML_COMMENT_R),
       order: PARSE_PRIORITY_HIGH,
@@ -1080,23 +1039,6 @@ export function compiler(markdown, options) {
       react: renderNothing,
     },
 
-    htmlSelfClosing: {
-      /**
-       * find the first matching end tag and process the interior
-       */
-      match: anyScopeRegex(HTML_SELF_CLOSING_ELEMENT_R),
-      order: PARSE_PRIORITY_HIGH,
-      parse(capture /*, parse, state*/) {
-        return {
-          attrs: attrStringToMap(capture[2] || ''),
-          tag: capture[1],
-        };
-      },
-      react(node, output, state) {
-        return <node.tag {...node.attrs} key={state.key} />;
-      },
-    },
-
     image: {
       match: simpleInlineRegex(IMAGE_R),
       order: PARSE_PRIORITY_HIGH,
@@ -1553,6 +1495,66 @@ export function compiler(markdown, options) {
   //     };
   // });
 
+  if (options.disableParsingRawHTML !== true) {
+    rules.htmlBlock = {
+      /**
+       * find the first matching end tag and process the interior
+       */
+      match: anyScopeRegex(HTML_BLOCK_ELEMENT_R),
+      order: PARSE_PRIORITY_HIGH,
+      parse(capture, parse, state) {
+        const [, whitespace] = capture[3].match(HTML_LEFT_TRIM_AMOUNT_R);
+        const trimmer = new RegExp(`^${whitespace}`, 'gm');
+        const trimmed = capture[3].replace(trimmer, '');
+
+        const parseFunc = containsBlockSyntax(trimmed)
+          ? parseBlock
+          : parseInline;
+
+        const tagName = capture[1].toLowerCase();
+        const noInnerParse =
+          DO_NOT_PROCESS_HTML_ELEMENTS.indexOf(tagName) !== -1;
+
+        return {
+          attrs: attrStringToMap(capture[2]),
+          /**
+           * if another html block is detected within, parse as block,
+           * otherwise parse as inline to pick up any further markdown
+           */
+          content: noInnerParse ? capture[3] : parseFunc(parse, trimmed, state),
+
+          noInnerParse,
+
+          tag: noInnerParse ? tagName : capture[1]
+        };
+      },
+      react(node, output, state) {
+        return (
+          <node.tag key={state.key} {...node.attrs}>
+            {node.noInnerParse ? node.content : output(node.content, state)}
+          </node.tag>
+        );
+      },
+    }
+
+    rules.htmlSelfClosing = {
+      /**
+       * find the first matching end tag and process the interior
+       */
+      match: anyScopeRegex(HTML_SELF_CLOSING_ELEMENT_R),
+      order: PARSE_PRIORITY_HIGH,
+      parse(capture /*, parse, state*/) {
+        return {
+          attrs: attrStringToMap(capture[2] || ''),
+          tag: capture[1],
+        };
+      },
+      react(node, output, state) {
+        return <node.tag {...node.attrs} key={state.key} />;
+      },
+    };
+  }
+
   const parser = parserFor(rules);
   const emitter = reactFor(ruleOutput(rules));
 

diff --git a/package.json b/package.json
@@ -90,7 +90,7 @@
   "size-limit": [
     {
       "path": "dist/cjs.js",
-      "limit": "5.25 kB"
+      "limit": "5.28 kB"
     }
   ],
   "jest": {