From 4bb64a0b6f575e0f0c98d2ce72941fb17b9a509b Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Fri, 17 Jan 2025 18:34:01 -0500 Subject: [PATCH 01/13] chore: use jest watcher plugin --- package.json | 5 +++++ yarn.lock | 55 +++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index e6c87870..0aa6e6ba 100644 --- a/package.json +++ b/package.json @@ -66,6 +66,7 @@ "jest": "^29.7.0", "jest-environment-jsdom": "^29.7.0", "jest-serializer-html": "^7.1.0", + "jest-watch-typeahead": "^2.2.2", "markdown-it": "^14.0.0", "microbundle": "^0.15.1", "microtime": "^3.1.1", @@ -122,6 +123,10 @@ }, "snapshotSerializers": [ "jest-serializer-html" + ], + "watchPlugins": [ + "jest-watch-typeahead/filename", + "jest-watch-typeahead/testname" ] }, "packageManager": "yarn@4.6.0" diff --git a/yarn.lock b/yarn.lock index e336571b..324f9397 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2849,6 +2849,13 @@ __metadata: languageName: node linkType: hard +"ansi-escapes@npm:^6.0.0": + version: 6.2.1 + resolution: "ansi-escapes@npm:6.2.1" + checksum: 10/3b064937dc8a0645ed8094bc8b09483ee718f3aa3139746280e6c2ea80e28c0a3ce66973d0f33e88e60021abbf67e5f877deabfc810e75edf8a19dfa128850be + languageName: node + linkType: hard + "ansi-regex@npm:^2.0.0": version: 2.1.1 resolution: "ansi-regex@npm:2.1.1" @@ -3512,6 +3519,13 @@ __metadata: languageName: node linkType: hard +"chalk@npm:^5.2.0": + version: 5.4.1 + resolution: "chalk@npm:5.4.1" + checksum: 10/29df3ffcdf25656fed6e95962e2ef86d14dfe03cd50e7074b06bad9ffbbf6089adbb40f75c00744d843685c8d008adaf3aed31476780312553caf07fa86e5bc7 + languageName: node + linkType: hard + "char-regex@npm:^1.0.2": version: 1.0.2 resolution: "char-regex@npm:1.0.2" @@ -3519,6 +3533,13 @@ __metadata: languageName: node linkType: hard +"char-regex@npm:^2.0.0": + version: 2.0.2 + resolution: "char-regex@npm:2.0.2" + checksum: 10/7d6dc918d215761ab389e799b9b119778722f384c8265ccb3c3025c9b219aea942f497fc7922d3470fc270987927719c5fa78d6337a5ebe9a9dc4c5a49099eb2 + languageName: node + linkType: hard + "chardet@npm:^0.7.0": version: 0.7.0 resolution: "chardet@npm:0.7.0" @@ -6427,7 +6448,7 @@ __metadata: languageName: node linkType: hard -"jest-regex-util@npm:^29.6.3": +"jest-regex-util@npm:^29.0.0, jest-regex-util@npm:^29.6.3": version: 29.6.3 resolution: "jest-regex-util@npm:29.6.3" checksum: 10/0518beeb9bf1228261695e54f0feaad3606df26a19764bc19541e0fc6e2a3737191904607fb72f3f2ce85d9c16b28df79b7b1ec9443aa08c3ef0e9efda6f8f2a @@ -6585,7 +6606,24 @@ __metadata: languageName: node linkType: hard -"jest-watcher@npm:^29.7.0": +"jest-watch-typeahead@npm:^2.2.2": + version: 2.2.2 + resolution: "jest-watch-typeahead@npm:2.2.2" + dependencies: + ansi-escapes: "npm:^6.0.0" + chalk: "npm:^5.2.0" + jest-regex-util: "npm:^29.0.0" + jest-watcher: "npm:^29.0.0" + slash: "npm:^5.0.0" + string-length: "npm:^5.0.1" + strip-ansi: "npm:^7.0.1" + peerDependencies: + jest: ^27.0.0 || ^28.0.0 || ^29.0.0 + checksum: 10/8685277ce1b96ec775882111ec55ce90a862cc57acb21ce94f8ac44a25f6fb34c7a7ce119e07b2d8ff5353a8d9e4f981cf96fa35532f71ddba6ca8fedc05bd8e + languageName: node + linkType: hard + +"jest-watcher@npm:^29.0.0, jest-watcher@npm:^29.7.0": version: 29.7.0 resolution: "jest-watcher@npm:29.7.0" dependencies: @@ -7096,6 +7134,7 @@ __metadata: jest: "npm:^29.7.0" jest-environment-jsdom: "npm:^29.7.0" jest-serializer-html: "npm:^7.1.0" + jest-watch-typeahead: "npm:^2.2.2" markdown-it: "npm:^14.0.0" microbundle: "npm:^0.15.1" microtime: "npm:^3.1.1" @@ -9288,7 +9327,7 @@ __metadata: languageName: node linkType: hard -"slash@npm:^5.1.0": +"slash@npm:^5.0.0, slash@npm:^5.1.0": version: 5.1.0 resolution: "slash@npm:5.1.0" checksum: 10/2c41ec6fb1414cd9bba0fa6b1dd00e8be739e3fe85d079c69d4b09ca5f2f86eafd18d9ce611c0c0f686428638a36c272a6ac14799146a8295f259c10cc45cde4 @@ -9489,6 +9528,16 @@ __metadata: languageName: node linkType: hard +"string-length@npm:^5.0.1": + version: 5.0.1 + resolution: "string-length@npm:5.0.1" + dependencies: + char-regex: "npm:^2.0.0" + strip-ansi: "npm:^7.0.1" + checksum: 10/71f73b8c8a743e01dcd001bcf1b197db78d5e5e53b12bd898cddaf0961be09f947dfd8c429783db3694b55b05cb5a51de6406c5085ff1aaa10c4771440c8396d + languageName: node + linkType: hard + "string-width-cjs@npm:string-width@^4.2.0": version: 4.2.2 resolution: "string-width@npm:4.2.2" From 68cee95e4c23f045af7757826391088ea20a546e Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 13:58:19 -0500 Subject: [PATCH 02/13] chore: add latest markdown-to-jsx to comparison for benchmarking --- benchmark.js | 4 +++- package.json | 1 + yarn.lock | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/benchmark.js b/benchmark.js index bd8cdecc..38d389e2 100644 --- a/benchmark.js +++ b/benchmark.js @@ -3,6 +3,7 @@ import cliProgress from 'cli-progress' import * as fs from 'fs' import SimpleMarkdown from 'simple-markdown' import MarkdownIt from 'markdown-it' +import { compiler as latestCompiler } from 'markdown-to-jsx-latest' import { compiler } from './dist/index.module.js' const mdIt = new MarkdownIt() @@ -20,7 +21,8 @@ let totalCycles // add tests suite - .addFunction('markdown-to-jsx', input => compiler(input)) + .addFunction('markdown-to-jsx (next)', input => compiler(input)) + .addFunction('markdown-to-jsx (latest)', input => latestCompiler(input)) .addFunction('simple-markdown', input => SimpleMarkdown.defaultReactOutput(SimpleMarkdown.defaultBlockParse(input)) ) diff --git a/package.json b/package.json index 0aa6e6ba..845b92b7 100644 --- a/package.json +++ b/package.json @@ -68,6 +68,7 @@ "jest-serializer-html": "^7.1.0", "jest-watch-typeahead": "^2.2.2", "markdown-it": "^14.0.0", + "markdown-to-jsx-latest": "npm:markdown-to-jsx@latest", "microbundle": "^0.15.1", "microtime": "^3.1.1", "mkdirp": "^3.0.1", diff --git a/yarn.lock b/yarn.lock index 324f9397..90ffbcd4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7114,6 +7114,15 @@ __metadata: languageName: node linkType: hard +"markdown-to-jsx-latest@npm:markdown-to-jsx@latest": + version: 7.7.3 + resolution: "markdown-to-jsx@npm:7.7.3" + peerDependencies: + react: ">= 0.14.0" + checksum: 10/b71383b98e6254bda2c94ffb0744619c1d89714cdff449defb330e18942c565fc2203d9ba0235aff7bb65a52656b850e4e42d62c65582e500a6b11bd78c6f04b + languageName: node + linkType: hard + "markdown-to-jsx@workspace:.": version: 0.0.0-use.local resolution: "markdown-to-jsx@workspace:." @@ -7136,6 +7145,7 @@ __metadata: jest-serializer-html: "npm:^7.1.0" jest-watch-typeahead: "npm:^2.2.2" markdown-it: "npm:^14.0.0" + markdown-to-jsx-latest: "npm:markdown-to-jsx@latest" microbundle: "npm:^0.15.1" microtime: "npm:^3.1.1" mkdirp: "npm:^3.0.1" From bc952eb20a836f8d9ec575e6d2f0b76edc4f4a68 Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 14:16:04 -0500 Subject: [PATCH 03/13] chore: adjust perf logging --- index.tsx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/index.tsx b/index.tsx index 96f4b515..5f7de399 100644 --- a/index.tsx +++ b/index.tsx @@ -1955,10 +1955,11 @@ export function compiler( // const result = parse(...args) // const delta = performance.now() - start - // if (delta > 5) - // console.warn(`Slow parse for ${key}: ${delta.toFixed(3)}ms`) - - // console.log(`${key}:parse`, `${delta.toFixed(3)}ms`, args[0]) + // console[delta > 5 ? 'warn' : 'log']( + // `${key}:parse`, + // `${delta.toFixed(3)}ms`, + // args[0] + // ) // return result // } From 47ef46ae551e657c87a23de223c7c0cd2774f32b Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 15:46:07 -0500 Subject: [PATCH 04/13] refactor: optimize nested parsing effort --- .changeset/nervous-suns-roll.md | 15 +++++++ index.tsx | 76 +++++++++++++++++++++------------ 2 files changed, 64 insertions(+), 27 deletions(-) create mode 100644 .changeset/nervous-suns-roll.md diff --git a/.changeset/nervous-suns-roll.md b/.changeset/nervous-suns-roll.md new file mode 100644 index 00000000..8cc7f7a7 --- /dev/null +++ b/.changeset/nervous-suns-roll.md @@ -0,0 +1,15 @@ +--- +'markdown-to-jsx': patch +--- + +Optimize regexes and parsing to do less work. + +``` ++--------------------------+------------------------+-----------------------+ +| │ simple markdown string │ large markdown string | ++--------------------------+------------------------+-----------------------+ +| markdown-to-jsx (next) │ 86,340 ops/sec │ 307 ops/sec | ++--------------------------+------------------------+-----------------------+ +| markdown-to-jsx (latest) │ 85,247 ops/sec │ 296 ops/sec | ++--------------------------+------------------------+-----------------------+ +``` diff --git a/index.tsx b/index.tsx index 5f7de399..7aa1c254 100644 --- a/index.tsx +++ b/index.tsx @@ -426,7 +426,7 @@ function generateListRule( : UNORDERED_LIST_ITEM_PREFIX_R return { - match(source, state) { + match: allowInline(function (source, state) { // We only want to break into a list if we are at the start of a // line. This is to avoid parsing "hi * there" with "* there" // becoming a part of a list. @@ -436,16 +436,16 @@ function generateListRule( // in which case we can parse with inline scope, but need to allow // nested lists inside this inline scope. const isStartOfLine = LIST_LOOKBEHIND_R.exec(state.prevCapture) - const isListBlock = state.list || (!state.inline && !state.simple) + const isListAllowed = state.list || (!state.inline && !state.simple) - if (isStartOfLine && isListBlock) { + if (isStartOfLine && isListAllowed) { source = isStartOfLine[1] + source return LIST_R.exec(source) } else { return null } - }, + }), order: Priority.HIGH, parse(capture, parse, state) { const bullet = capture[2] @@ -842,6 +842,10 @@ function parserFor( state: MarkdownToJSX.State ): MarkdownToJSX.ParserResult[] { let result = [] + let rule + let ruleType = '' + let parsed + let currCaptureString = '' state.prevCapture = state.prevCapture || '' @@ -852,20 +856,25 @@ function parserFor( while (source) { let i = 0 while (i < ruleList.length) { - const ruleType = ruleList[i] - const rule = rules[ruleType] + ruleType = ruleList[i] + rule = rules[ruleType] + + if (state.inline && !rule.match.inline) { + i++ + continue + } const capture = rule.match(source, state) if (capture) { - const currCaptureString = capture[0] + currCaptureString = capture[0] // retain what's been processed so far for lookbacks state.prevCapture += currCaptureString source = source.substring(currCaptureString.length) - const parsed = rule.parse(capture, nestedParse, state) + parsed = rule.parse(capture, nestedParse, state) // We also let rules override the default type of // their parsed node if they would like to, so that @@ -894,26 +903,39 @@ function parserFor( } } +/** + * Marks a matcher function as eligible for being run inside an inline context; + * allows us to do a little less work in the nested parser. + */ +function allowInline(fn: T) { + fn.inline = 1 + + return fn +} + // Creates a match function for an inline scoped or simple element from a regex function inlineRegex(regex: RegExp) { - return function match(source, state: MarkdownToJSX.State) { + return allowInline(function match(source, state: MarkdownToJSX.State) { if (state.inline) { return regex.exec(source) } else { return null } - } + }) } // basically any inline element except links function simpleInlineRegex(regex: RegExp) { - return function match(source: string, state: MarkdownToJSX.State) { + return allowInline(function match( + source: string, + state: MarkdownToJSX.State + ) { if (state.inline || state.simple) { return regex.exec(source) } else { return null } - } + }) } // Creates a match function for a block scoped element from a regex @@ -929,9 +951,9 @@ function blockRegex(regex: RegExp) { // Creates a match function from a regex, ignoring block/inline scope function anyScopeRegex(regex: RegExp) { - return function match(source: string /*, state*/) { + return allowInline(function match(source: string /*, state*/) { return regex.exec(source) - } + }) } function matchParagraph(source: string, state: MarkdownToJSX.State) { @@ -1671,13 +1693,13 @@ export function compiler( }, [RuleType.linkBareUrlDetector]: { - match: (source, state) => { + match: allowInline((source, state) => { if (state.inAnchor || options.disableAutoLink) { return null } return inlineRegex(LINK_AUTOLINK_BARE_URL_R)(source, state) - }, + }), order: Priority.MAX, parse(capture /*, parse, state*/) { return { @@ -1739,7 +1761,7 @@ export function compiler( }, [RuleType.paragraph]: { - match: matchParagraph, + match: allowInline(matchParagraph), order: Priority.LOW, parse: parseCaptureInline, render(node, output, state) { @@ -1937,18 +1959,18 @@ export function compiler( // Object.keys(rules).forEach(key => { // let { match: match, parse: parse } = rules[key] - // rules[key].match = (...args) => { - // const start = performance.now() - // const result = match(...args) - // const delta = performance.now() - start + // // rules[key].match = (...args) => { + // // const start = performance.now() + // // const result = match(...args) + // // const delta = performance.now() - start - // if (delta > 5) - // console.warn( - // `Slow match for ${key}: ${delta.toFixed(3)}ms, input: ${args[0]}` - // ) + // // if (delta > 5) + // // console.warn( + // // `Slow match for ${key}: ${delta.toFixed(3)}ms, input: ${args[0]}` + // // ) - // return result - // } + // // return result + // // } // rules[key].parse = (...args) => { // const start = performance.now() From 69dbd925234e3dc62e3f427535983e0d9579d19b Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 15:46:43 -0500 Subject: [PATCH 05/13] test: fix test to better match the title --- index.compiler.spec.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx index 1bc9b952..6e740eff 100644 --- a/index.compiler.spec.tsx +++ b/index.compiler.spec.tsx @@ -535,7 +535,7 @@ describe('inline textual elements', () => { it('replaces custom named character codes with unicode equivalents so React will render correctly', () => { render( - compiler('Apostrophe's and less than ≤ equal', { + compiler('Apostrophe's and ≤ equal', { namedCodesToUnicode: { le: '\u2264', '#39': '\u0027', @@ -545,7 +545,7 @@ describe('inline textual elements', () => { expect(root.innerHTML).toMatchInlineSnapshot(` - Apostrophe's and less than ≤ equal + Apostrophe's and ≤ equal `) }) From 7abb3b6b73ffdf021dd0d692e16c0582b69e64ee Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 15:51:27 -0500 Subject: [PATCH 06/13] refactor: optimize splitter --- .changeset/great-rice-return.md | 5 +++++ index.tsx | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 .changeset/great-rice-return.md diff --git a/.changeset/great-rice-return.md b/.changeset/great-rice-return.md new file mode 100644 index 00000000..1837f081 --- /dev/null +++ b/.changeset/great-rice-return.md @@ -0,0 +1,5 @@ +--- +'markdown-to-jsx': patch +--- + +Further optimize the plain text splitting regex. diff --git a/index.tsx b/index.tsx index 7aa1c254..944c8779 100644 --- a/index.tsx +++ b/index.tsx @@ -321,8 +321,11 @@ const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`) const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/ -const TEXT_PLAIN_R = - /^[\s\S]+?(?=[^0-9A-Z\s\u00c0-\uffff&#;.()'"]|\d+\.|\n\n| {2,}\n|\w+:\S|$)/i +/** + * Always take the first character, then eagerly take text until a double space + * (potential line break) or some markdown-like punctuation is reached. + */ +const TEXT_PLAIN_R = /^([\s\S](?:(?! |[0-9]\.)[^*_~\-\n<`\\\[!])*)/ const TRIM_STARTING_NEWLINES = /^\n+/ From 6de554305527e64a07fd561dce1c7c990897b507 Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 16:23:20 -0500 Subject: [PATCH 07/13] refactor: remove redundant matchers during paragraph matching --- .changeset/grumpy-kids-attack.md | 5 +++++ index.tsx | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 .changeset/grumpy-kids-attack.md diff --git a/.changeset/grumpy-kids-attack.md b/.changeset/grumpy-kids-attack.md new file mode 100644 index 00000000..ec8386e0 --- /dev/null +++ b/.changeset/grumpy-kids-attack.md @@ -0,0 +1,5 @@ +--- +'markdown-to-jsx': patch +--- + +Remove redundant detectors when processing paragraphs. diff --git a/index.tsx b/index.tsx index 944c8779..bc1c579b 100644 --- a/index.tsx +++ b/index.tsx @@ -560,11 +560,8 @@ const NON_PARAGRAPH_BLOCK_SYNTAXES = [ HEADING_R, HEADING_SETEXT_R, HEADING_ATX_COMPLIANT_R, - HTML_COMMENT_R, NP_TABLE_R, - ORDERED_LIST_ITEM_R, ORDERED_LIST_R, - UNORDERED_LIST_ITEM_R, UNORDERED_LIST_R, ] @@ -572,6 +569,7 @@ const BLOCK_SYNTAXES = [ ...NON_PARAGRAPH_BLOCK_SYNTAXES, PARAGRAPH_R, HTML_BLOCK_ELEMENT_R, + HTML_COMMENT_R, HTML_SELF_CLOSING_ELEMENT_R, ] From 9332d501ad4862517e66c5b4a13eb0db09b3697a Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 20 Jan 2025 17:01:17 -0500 Subject: [PATCH 08/13] refactor: improve inline code performance --- .changeset/plenty-dodos-collect.md | 5 +++++ .prettierignore | 1 + __snapshots__/index.compiler.spec.tsx.snap | 6 +++--- fixture.md | 2 +- index.tsx | 15 ++++++++++----- 5 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 .changeset/plenty-dodos-collect.md create mode 100644 .prettierignore diff --git a/.changeset/plenty-dodos-collect.md b/.changeset/plenty-dodos-collect.md new file mode 100644 index 00000000..2ab20106 --- /dev/null +++ b/.changeset/plenty-dodos-collect.md @@ -0,0 +1,5 @@ +--- +'markdown-to-jsx': patch +--- + +Rework inline code syntax handling, handle escaped characters in code blocks correctly so they render without the backslash. diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..0aed9848 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +fixture.md diff --git a/__snapshots__/index.compiler.spec.tsx.snap b/__snapshots__/index.compiler.spec.tsx.snap index ceabbfa8..1ca360b3 100644 --- a/__snapshots__/index.compiler.spec.tsx.snap +++ b/__snapshots__/index.compiler.spec.tsx.snap @@ -924,7 +924,7 @@ line. To avoid this, you can backslash-escape the period:

     
-      1986\\. What a great season.
+      1986. What a great season.
     
   

@@ -1433,7 +1433,7 @@ escape it:

     
-      \\*this text is surrounded by literal asterisks\\*
+      *this text is surrounded by literal asterisks*
     
   

@@ -1688,7 +1688,7 @@ backslashes before the asterisks, like this:

     
-      \\*literal asterisks\\*
+      *literal asterisks*
     
   

diff --git a/fixture.md b/fixture.md index 9434ec56..ee056899 100644 --- a/fixture.md +++ b/fixture.md @@ -745,7 +745,7 @@ escape it:

Code

-To indicate a span of code, wrap it with backtick quotes (`` ` ``). +To indicate a span of code, wrap it with backtick quotes (`\``). Unlike a pre-formatted code block, a code span indicates code within a normal paragraph. For example: diff --git a/index.tsx b/index.tsx index bc1c579b..5f4ea85b 100644 --- a/index.tsx +++ b/index.tsx @@ -190,7 +190,7 @@ const BREAK_THEMATIC_R = /^(?:( *[-*_])){3,} *(?:\n *)+\n/ const CODE_BLOCK_FENCED_R = /^(?: {1,3})?(`{3,}|~{3,}) *(\S+)? *([^\n]*?)?\n([\s\S]*?)(?:\1\n?|$)/ const CODE_BLOCK_R = /^(?: {4}[^\n]+\n*)+(?:\n *)+\n?/ -const CODE_INLINE_R = /^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)/ +const CODE_INLINE_R = /^(`+)((?:\\`|[^`])+)\1/ const CONSECUTIVE_NEWLINE_R = /^(?:\n *)*\n/ const CR_NEWLINE_R = /\r\n?/g @@ -320,6 +320,7 @@ const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`) const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`) const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/ +const TEXT_UNESCAPE_R = /\\([^0-9A-Za-z\s])/g /** * Always take the first character, then eagerly take text until a double space @@ -460,6 +461,7 @@ function generateListRule( .match(LIST_ITEM_R) let lastItemWasAParagraph = false + const itemContent = items.map(function (item, i) { // We need to see how far indented the item is: const space = LIST_ITEM_PREFIX_R.exec(item)[0].length @@ -495,7 +497,7 @@ function generateListRule( containsBlocks || (isLastItem && lastItemWasAParagraph) lastItemWasAParagraph = thisItemIsAParagraph - // backup our state for restoration afterwards. We're going to + // backup our state for delta afterwards. We're going to // want to set state.list to true, and state.inline depending // on our list's looseness. const oldStateInline = state.inline @@ -1400,7 +1402,10 @@ export function compiler( parse(capture /*, parse, state*/) { return { lang: undefined, - text: capture[0].replace(/^ {4}/gm, '').replace(/\n+$/, ''), + text: capture[0] + .replace(/^ {4}/gm, '') + .replace(/\n+$/, '') + .replaceAll(TEXT_UNESCAPE_R, '$1'), } }, @@ -1430,7 +1435,7 @@ export function compiler( // if capture[3] it's additional metadata attrs: attrStringToMap('code', capture[3] || ''), lang: capture[2] || undefined, - text: capture[4], + text: capture[4].replaceAll(TEXT_UNESCAPE_R, '$1'), type: RuleType.codeBlock, } }, @@ -1441,7 +1446,7 @@ export function compiler( order: Priority.LOW, parse(capture /*, parse, state*/) { return { - text: capture[2], + text: capture[2].replaceAll(TEXT_UNESCAPE_R, '$1'), } }, render(node, output, state) { From f3489ffab12fa785161a2c1946d0acfd68509d63 Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 17 Feb 2025 13:14:35 -0500 Subject: [PATCH 09/13] replace trimend with more compatible variant --- index.compiler.spec.tsx | 4 ++-- index.tsx | 22 ++++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx index 6e740eff..83464a5a 100644 --- a/index.compiler.spec.tsx +++ b/index.compiler.spec.tsx @@ -480,7 +480,7 @@ describe('inline textual elements', () => { render( compiler( - '*This should not misinterpret the asterisk ~~*~~ in the strikethrough.*' + String.raw`*This should not misinterpret the asterisk ~~\*~~ in the strikethrough.*` ) ) @@ -512,7 +512,7 @@ describe('inline textual elements', () => { render( compiler( - '_This should not misinterpret the under_score that forms part of a word._' + `_This should not misinterpret the under\\_score that forms part of a word._` ) ) diff --git a/index.tsx b/index.tsx index 5f4ea85b..374ca141 100644 --- a/index.tsx +++ b/index.tsx @@ -296,7 +296,7 @@ const TABLE_RIGHT_ALIGN = /^ *-+: *$/ * and therefore miss content that should have been included. */ const INLINE_SKIP_R = - '((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~~.*?~~|==.*?==|.|\\n)*?)' + '((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|\\\\\\1|[\\s\\S])+?)' /** * Detect a sequence like **foo** or __foo__. Note that bold has a higher priority @@ -307,17 +307,17 @@ const TEXT_BOLD_R = new RegExp(`^([*_])\\1${INLINE_SKIP_R}\\1\\1(?!\\1)`) /** * Detect a sequence like *foo* or _foo_. */ -const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1|\\w)`) +const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1)`) /** * Detect a sequence like ==foo==. */ -const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`) +const TEXT_MARKED_R = new RegExp(`^(==)${INLINE_SKIP_R}\\1`) /** * Detect a sequence like ~~foo~~. */ -const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`) +const TEXT_STRIKETHROUGHED_R = new RegExp(`^(~~)${INLINE_SKIP_R}\\1`) const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/ const TEXT_UNESCAPE_R = /\\([^0-9A-Za-z\s])/g @@ -575,6 +575,10 @@ const BLOCK_SYNTAXES = [ HTML_SELF_CLOSING_ELEMENT_R, ] +function trimEnd(str: string) { + return str.replace(/\s*$/, '') +} + function containsBlockSyntax(input: string) { return BLOCK_SYNTAXES.some(r => r.test(input)) } @@ -979,12 +983,14 @@ function matchParagraph(source: string, state: MarkdownToJSX.State) { return !!line.trim() }) - const captured = match.trimEnd() + const captured = trimEnd(match) if (captured == '') { return null } - return [match, captured] + // parseCaptureInline expects the inner content to be at index 2 + // because index 1 is the delimiter for text formatting syntaxes + return [match, , captured] } export function sanitizer(url: string): string { @@ -1074,7 +1080,7 @@ const parseCaptureInline: MarkdownToJSX.Parser<{ children: MarkdownToJSX.ParserResult[] }> = (capture, parse, state: MarkdownToJSX.State) => { return { - children: parseInline(parse, capture[1], state), + children: parseInline(parse, capture[2], state), } } @@ -1225,7 +1231,7 @@ export function compiler( parser( inline ? input - : `${input.trimEnd().replace(TRIM_STARTING_NEWLINES, '')}\n\n`, + : `${trimEnd(input).replace(TRIM_STARTING_NEWLINES, '')}\n\n`, { inline, } From b0a8f847ea6efdf583f830f1ff99eba534e3a056 Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 17 Feb 2025 13:19:28 -0500 Subject: [PATCH 10/13] simplify benchmarking for quick iteration against self --- benchmark.js | 16 +++++++++++----- package.json | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/benchmark.js b/benchmark.js index 38d389e2..5e84ab30 100644 --- a/benchmark.js +++ b/benchmark.js @@ -20,13 +20,19 @@ const bar = new cliProgress.SingleBar( let totalCycles // add tests -suite +const evals = suite .addFunction('markdown-to-jsx (next)', input => compiler(input)) .addFunction('markdown-to-jsx (latest)', input => latestCompiler(input)) - .addFunction('simple-markdown', input => - SimpleMarkdown.defaultReactOutput(SimpleMarkdown.defaultBlockParse(input)) - ) - .addFunction('markdown-it', input => mdIt.render(input)) + +if (process.argv.includes('--all')) { + evals + .addFunction('simple-markdown', input => + SimpleMarkdown.defaultReactOutput(SimpleMarkdown.defaultBlockParse(input)) + ) + .addFunction('markdown-it', input => mdIt.render(input)) +} + +evals .addInput('simple markdown string', ['_Hello_ **world**!']) .addInput('large markdown string', [fixture]) .on('start', () => { diff --git a/package.json b/package.json index 845b92b7..2142dd51 100644 --- a/package.json +++ b/package.json @@ -96,6 +96,7 @@ "test": "jest --verbose", "size": "size-limit", "benchmark": "node benchmark.js", + "benchmark:all": "node benchmark.js --all", "changeset-publish": "yarn build && changeset publish" }, "size-limit": [ From 8ea298943203b6e886c9a2fb7688cb1d7ff4f085 Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 17 Feb 2025 13:32:30 -0500 Subject: [PATCH 11/13] improve compatibility --- index.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index.tsx b/index.tsx index 374ca141..43789e66 100644 --- a/index.tsx +++ b/index.tsx @@ -1411,7 +1411,7 @@ export function compiler( text: capture[0] .replace(/^ {4}/gm, '') .replace(/\n+$/, '') - .replaceAll(TEXT_UNESCAPE_R, '$1'), + .replace(TEXT_UNESCAPE_R, '$1'), } }, @@ -1441,7 +1441,7 @@ export function compiler( // if capture[3] it's additional metadata attrs: attrStringToMap('code', capture[3] || ''), lang: capture[2] || undefined, - text: capture[4].replaceAll(TEXT_UNESCAPE_R, '$1'), + text: capture[4].replace(TEXT_UNESCAPE_R, '$1'), type: RuleType.codeBlock, } }, @@ -1452,7 +1452,7 @@ export function compiler( order: Priority.LOW, parse(capture /*, parse, state*/) { return { - text: capture[2].replaceAll(TEXT_UNESCAPE_R, '$1'), + text: capture[2].replace(TEXT_UNESCAPE_R, '$1'), } }, render(node, output, state) { From 02b26e0b25c6ad809b4b66c038276706bd9700ca Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 17 Feb 2025 13:32:36 -0500 Subject: [PATCH 12/13] adjust package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 2142dd51..3f09da3a 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "jsx", "html" ], - "author": "Evan Jacobs ", + "author": "Evan Jacobs ", "repository": { "type": "git", "url": "git+https://github.com/quantizor/markdown-to-jsx.git" From 0bee64d38fecaf2186068605e673d95e045894a8 Mon Sep 17 00:00:00 2001 From: Evan Jacobs Date: Mon, 17 Feb 2025 13:44:11 -0500 Subject: [PATCH 13/13] eliminate some polynomial time issues --- .changeset/hungry-bugs-tan.md | 5 +++++ index.tsx | 16 +++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) create mode 100644 .changeset/hungry-bugs-tan.md diff --git a/.changeset/hungry-bugs-tan.md b/.changeset/hungry-bugs-tan.md new file mode 100644 index 00000000..62f3c106 --- /dev/null +++ b/.changeset/hungry-bugs-tan.md @@ -0,0 +1,5 @@ +--- +'markdown-to-jsx': patch +--- + +Replace some regexes with optimized functions to avoid polynomial time scenarios. Also fixes compatibility issues in some older browsers with the `trimEnd` API. diff --git a/index.tsx b/index.tsx index 43789e66..35ee1acc 100644 --- a/index.tsx +++ b/index.tsx @@ -509,10 +509,10 @@ function generateListRule( let adjustedContent if (thisItemIsAParagraph) { state.inline = false - adjustedContent = content.replace(LIST_ITEM_END_R, '\n\n') + adjustedContent = trimEnd(content) + '\n\n' } else { state.inline = true - adjustedContent = content.replace(LIST_ITEM_END_R, '') + adjustedContent = trimEnd(content) } const result = parse(adjustedContent, state) @@ -576,7 +576,9 @@ const BLOCK_SYNTAXES = [ ] function trimEnd(str: string) { - return str.replace(/\s*$/, '') + let end = str.length + while (end > 0 && str[end - 1] <= ' ') end-- + return str.slice(0, end) } function containsBlockSyntax(input: string) { @@ -1408,10 +1410,10 @@ export function compiler( parse(capture /*, parse, state*/) { return { lang: undefined, - text: capture[0] - .replace(/^ {4}/gm, '') - .replace(/\n+$/, '') - .replace(TEXT_UNESCAPE_R, '$1'), + text: trimEnd(capture[0].replace(/^ {4}/gm, '')).replace( + TEXT_UNESCAPE_R, + '$1' + ), } },