From 4bb64a0b6f575e0f0c98d2ce72941fb17b9a509b Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Fri, 17 Jan 2025 18:34:01 -0500
Subject: [PATCH 01/13] chore: use jest watcher plugin
---
package.json | 5 +++++
yarn.lock | 55 +++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/package.json b/package.json
index e6c87870..0aa6e6ba 100644
--- a/package.json
+++ b/package.json
@@ -66,6 +66,7 @@
"jest": "^29.7.0",
"jest-environment-jsdom": "^29.7.0",
"jest-serializer-html": "^7.1.0",
+ "jest-watch-typeahead": "^2.2.2",
"markdown-it": "^14.0.0",
"microbundle": "^0.15.1",
"microtime": "^3.1.1",
@@ -122,6 +123,10 @@
},
"snapshotSerializers": [
"jest-serializer-html"
+ ],
+ "watchPlugins": [
+ "jest-watch-typeahead/filename",
+ "jest-watch-typeahead/testname"
]
},
"packageManager": "yarn@4.6.0"
diff --git a/yarn.lock b/yarn.lock
index e336571b..324f9397 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2849,6 +2849,13 @@ __metadata:
languageName: node
linkType: hard
+"ansi-escapes@npm:^6.0.0":
+ version: 6.2.1
+ resolution: "ansi-escapes@npm:6.2.1"
+ checksum: 10/3b064937dc8a0645ed8094bc8b09483ee718f3aa3139746280e6c2ea80e28c0a3ce66973d0f33e88e60021abbf67e5f877deabfc810e75edf8a19dfa128850be
+ languageName: node
+ linkType: hard
+
"ansi-regex@npm:^2.0.0":
version: 2.1.1
resolution: "ansi-regex@npm:2.1.1"
@@ -3512,6 +3519,13 @@ __metadata:
languageName: node
linkType: hard
+"chalk@npm:^5.2.0":
+ version: 5.4.1
+ resolution: "chalk@npm:5.4.1"
+ checksum: 10/29df3ffcdf25656fed6e95962e2ef86d14dfe03cd50e7074b06bad9ffbbf6089adbb40f75c00744d843685c8d008adaf3aed31476780312553caf07fa86e5bc7
+ languageName: node
+ linkType: hard
+
"char-regex@npm:^1.0.2":
version: 1.0.2
resolution: "char-regex@npm:1.0.2"
@@ -3519,6 +3533,13 @@ __metadata:
languageName: node
linkType: hard
+"char-regex@npm:^2.0.0":
+ version: 2.0.2
+ resolution: "char-regex@npm:2.0.2"
+ checksum: 10/7d6dc918d215761ab389e799b9b119778722f384c8265ccb3c3025c9b219aea942f497fc7922d3470fc270987927719c5fa78d6337a5ebe9a9dc4c5a49099eb2
+ languageName: node
+ linkType: hard
+
"chardet@npm:^0.7.0":
version: 0.7.0
resolution: "chardet@npm:0.7.0"
@@ -6427,7 +6448,7 @@ __metadata:
languageName: node
linkType: hard
-"jest-regex-util@npm:^29.6.3":
+"jest-regex-util@npm:^29.0.0, jest-regex-util@npm:^29.6.3":
version: 29.6.3
resolution: "jest-regex-util@npm:29.6.3"
checksum: 10/0518beeb9bf1228261695e54f0feaad3606df26a19764bc19541e0fc6e2a3737191904607fb72f3f2ce85d9c16b28df79b7b1ec9443aa08c3ef0e9efda6f8f2a
@@ -6585,7 +6606,24 @@ __metadata:
languageName: node
linkType: hard
-"jest-watcher@npm:^29.7.0":
+"jest-watch-typeahead@npm:^2.2.2":
+ version: 2.2.2
+ resolution: "jest-watch-typeahead@npm:2.2.2"
+ dependencies:
+ ansi-escapes: "npm:^6.0.0"
+ chalk: "npm:^5.2.0"
+ jest-regex-util: "npm:^29.0.0"
+ jest-watcher: "npm:^29.0.0"
+ slash: "npm:^5.0.0"
+ string-length: "npm:^5.0.1"
+ strip-ansi: "npm:^7.0.1"
+ peerDependencies:
+ jest: ^27.0.0 || ^28.0.0 || ^29.0.0
+ checksum: 10/8685277ce1b96ec775882111ec55ce90a862cc57acb21ce94f8ac44a25f6fb34c7a7ce119e07b2d8ff5353a8d9e4f981cf96fa35532f71ddba6ca8fedc05bd8e
+ languageName: node
+ linkType: hard
+
+"jest-watcher@npm:^29.0.0, jest-watcher@npm:^29.7.0":
version: 29.7.0
resolution: "jest-watcher@npm:29.7.0"
dependencies:
@@ -7096,6 +7134,7 @@ __metadata:
jest: "npm:^29.7.0"
jest-environment-jsdom: "npm:^29.7.0"
jest-serializer-html: "npm:^7.1.0"
+ jest-watch-typeahead: "npm:^2.2.2"
markdown-it: "npm:^14.0.0"
microbundle: "npm:^0.15.1"
microtime: "npm:^3.1.1"
@@ -9288,7 +9327,7 @@ __metadata:
languageName: node
linkType: hard
-"slash@npm:^5.1.0":
+"slash@npm:^5.0.0, slash@npm:^5.1.0":
version: 5.1.0
resolution: "slash@npm:5.1.0"
checksum: 10/2c41ec6fb1414cd9bba0fa6b1dd00e8be739e3fe85d079c69d4b09ca5f2f86eafd18d9ce611c0c0f686428638a36c272a6ac14799146a8295f259c10cc45cde4
@@ -9489,6 +9528,16 @@ __metadata:
languageName: node
linkType: hard
+"string-length@npm:^5.0.1":
+ version: 5.0.1
+ resolution: "string-length@npm:5.0.1"
+ dependencies:
+ char-regex: "npm:^2.0.0"
+ strip-ansi: "npm:^7.0.1"
+ checksum: 10/71f73b8c8a743e01dcd001bcf1b197db78d5e5e53b12bd898cddaf0961be09f947dfd8c429783db3694b55b05cb5a51de6406c5085ff1aaa10c4771440c8396d
+ languageName: node
+ linkType: hard
+
"string-width-cjs@npm:string-width@^4.2.0":
version: 4.2.2
resolution: "string-width@npm:4.2.2"
From 68cee95e4c23f045af7757826391088ea20a546e Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 13:58:19 -0500
Subject: [PATCH 02/13] chore: add latest markdown-to-jsx to comparison for
benchmarking
---
benchmark.js | 4 +++-
package.json | 1 +
yarn.lock | 10 ++++++++++
3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/benchmark.js b/benchmark.js
index bd8cdecc..38d389e2 100644
--- a/benchmark.js
+++ b/benchmark.js
@@ -3,6 +3,7 @@ import cliProgress from 'cli-progress'
import * as fs from 'fs'
import SimpleMarkdown from 'simple-markdown'
import MarkdownIt from 'markdown-it'
+import { compiler as latestCompiler } from 'markdown-to-jsx-latest'
import { compiler } from './dist/index.module.js'
const mdIt = new MarkdownIt()
@@ -20,7 +21,8 @@ let totalCycles
// add tests
suite
- .addFunction('markdown-to-jsx', input => compiler(input))
+ .addFunction('markdown-to-jsx (next)', input => compiler(input))
+ .addFunction('markdown-to-jsx (latest)', input => latestCompiler(input))
.addFunction('simple-markdown', input =>
SimpleMarkdown.defaultReactOutput(SimpleMarkdown.defaultBlockParse(input))
)
diff --git a/package.json b/package.json
index 0aa6e6ba..845b92b7 100644
--- a/package.json
+++ b/package.json
@@ -68,6 +68,7 @@
"jest-serializer-html": "^7.1.0",
"jest-watch-typeahead": "^2.2.2",
"markdown-it": "^14.0.0",
+ "markdown-to-jsx-latest": "npm:markdown-to-jsx@latest",
"microbundle": "^0.15.1",
"microtime": "^3.1.1",
"mkdirp": "^3.0.1",
diff --git a/yarn.lock b/yarn.lock
index 324f9397..90ffbcd4 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -7114,6 +7114,15 @@ __metadata:
languageName: node
linkType: hard
+"markdown-to-jsx-latest@npm:markdown-to-jsx@latest":
+ version: 7.7.3
+ resolution: "markdown-to-jsx@npm:7.7.3"
+ peerDependencies:
+ react: ">= 0.14.0"
+ checksum: 10/b71383b98e6254bda2c94ffb0744619c1d89714cdff449defb330e18942c565fc2203d9ba0235aff7bb65a52656b850e4e42d62c65582e500a6b11bd78c6f04b
+ languageName: node
+ linkType: hard
+
"markdown-to-jsx@workspace:.":
version: 0.0.0-use.local
resolution: "markdown-to-jsx@workspace:."
@@ -7136,6 +7145,7 @@ __metadata:
jest-serializer-html: "npm:^7.1.0"
jest-watch-typeahead: "npm:^2.2.2"
markdown-it: "npm:^14.0.0"
+ markdown-to-jsx-latest: "npm:markdown-to-jsx@latest"
microbundle: "npm:^0.15.1"
microtime: "npm:^3.1.1"
mkdirp: "npm:^3.0.1"
From bc952eb20a836f8d9ec575e6d2f0b76edc4f4a68 Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 14:16:04 -0500
Subject: [PATCH 03/13] chore: adjust perf logging
---
index.tsx | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/index.tsx b/index.tsx
index 96f4b515..5f7de399 100644
--- a/index.tsx
+++ b/index.tsx
@@ -1955,10 +1955,11 @@ export function compiler(
// const result = parse(...args)
// const delta = performance.now() - start
- // if (delta > 5)
- // console.warn(`Slow parse for ${key}: ${delta.toFixed(3)}ms`)
-
- // console.log(`${key}:parse`, `${delta.toFixed(3)}ms`, args[0])
+ // console[delta > 5 ? 'warn' : 'log'](
+ // `${key}:parse`,
+ // `${delta.toFixed(3)}ms`,
+ // args[0]
+ // )
// return result
// }
From 47ef46ae551e657c87a23de223c7c0cd2774f32b Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 15:46:07 -0500
Subject: [PATCH 04/13] refactor: optimize nested parsing effort
---
.changeset/nervous-suns-roll.md | 15 +++++++
index.tsx | 76 +++++++++++++++++++++------------
2 files changed, 64 insertions(+), 27 deletions(-)
create mode 100644 .changeset/nervous-suns-roll.md
diff --git a/.changeset/nervous-suns-roll.md b/.changeset/nervous-suns-roll.md
new file mode 100644
index 00000000..8cc7f7a7
--- /dev/null
+++ b/.changeset/nervous-suns-roll.md
@@ -0,0 +1,15 @@
+---
+'markdown-to-jsx': patch
+---
+
+Optimize regexes and parsing to do less work.
+
+```
++--------------------------+------------------------+-----------------------+
+| │ simple markdown string │ large markdown string |
++--------------------------+------------------------+-----------------------+
+| markdown-to-jsx (next) │ 86,340 ops/sec │ 307 ops/sec |
++--------------------------+------------------------+-----------------------+
+| markdown-to-jsx (latest) │ 85,247 ops/sec │ 296 ops/sec |
++--------------------------+------------------------+-----------------------+
+```
diff --git a/index.tsx b/index.tsx
index 5f7de399..7aa1c254 100644
--- a/index.tsx
+++ b/index.tsx
@@ -426,7 +426,7 @@ function generateListRule(
: UNORDERED_LIST_ITEM_PREFIX_R
return {
- match(source, state) {
+ match: allowInline(function (source, state) {
// We only want to break into a list if we are at the start of a
// line. This is to avoid parsing "hi * there" with "* there"
// becoming a part of a list.
@@ -436,16 +436,16 @@ function generateListRule(
// in which case we can parse with inline scope, but need to allow
// nested lists inside this inline scope.
const isStartOfLine = LIST_LOOKBEHIND_R.exec(state.prevCapture)
- const isListBlock = state.list || (!state.inline && !state.simple)
+ const isListAllowed = state.list || (!state.inline && !state.simple)
- if (isStartOfLine && isListBlock) {
+ if (isStartOfLine && isListAllowed) {
source = isStartOfLine[1] + source
return LIST_R.exec(source)
} else {
return null
}
- },
+ }),
order: Priority.HIGH,
parse(capture, parse, state) {
const bullet = capture[2]
@@ -842,6 +842,10 @@ function parserFor(
state: MarkdownToJSX.State
): MarkdownToJSX.ParserResult[] {
let result = []
+ let rule
+ let ruleType = ''
+ let parsed
+ let currCaptureString = ''
state.prevCapture = state.prevCapture || ''
@@ -852,20 +856,25 @@ function parserFor(
while (source) {
let i = 0
while (i < ruleList.length) {
- const ruleType = ruleList[i]
- const rule = rules[ruleType]
+ ruleType = ruleList[i]
+ rule = rules[ruleType]
+
+ if (state.inline && !rule.match.inline) {
+ i++
+ continue
+ }
const capture = rule.match(source, state)
if (capture) {
- const currCaptureString = capture[0]
+ currCaptureString = capture[0]
// retain what's been processed so far for lookbacks
state.prevCapture += currCaptureString
source = source.substring(currCaptureString.length)
- const parsed = rule.parse(capture, nestedParse, state)
+ parsed = rule.parse(capture, nestedParse, state)
// We also let rules override the default type of
// their parsed node if they would like to, so that
@@ -894,26 +903,39 @@ function parserFor(
}
}
+/**
+ * Marks a matcher function as eligible for being run inside an inline context;
+ * allows us to do a little less work in the nested parser.
+ */
+function allowInline(fn: T) {
+ fn.inline = 1
+
+ return fn
+}
+
// Creates a match function for an inline scoped or simple element from a regex
function inlineRegex(regex: RegExp) {
- return function match(source, state: MarkdownToJSX.State) {
+ return allowInline(function match(source, state: MarkdownToJSX.State) {
if (state.inline) {
return regex.exec(source)
} else {
return null
}
- }
+ })
}
// basically any inline element except links
function simpleInlineRegex(regex: RegExp) {
- return function match(source: string, state: MarkdownToJSX.State) {
+ return allowInline(function match(
+ source: string,
+ state: MarkdownToJSX.State
+ ) {
if (state.inline || state.simple) {
return regex.exec(source)
} else {
return null
}
- }
+ })
}
// Creates a match function for a block scoped element from a regex
@@ -929,9 +951,9 @@ function blockRegex(regex: RegExp) {
// Creates a match function from a regex, ignoring block/inline scope
function anyScopeRegex(regex: RegExp) {
- return function match(source: string /*, state*/) {
+ return allowInline(function match(source: string /*, state*/) {
return regex.exec(source)
- }
+ })
}
function matchParagraph(source: string, state: MarkdownToJSX.State) {
@@ -1671,13 +1693,13 @@ export function compiler(
},
[RuleType.linkBareUrlDetector]: {
- match: (source, state) => {
+ match: allowInline((source, state) => {
if (state.inAnchor || options.disableAutoLink) {
return null
}
return inlineRegex(LINK_AUTOLINK_BARE_URL_R)(source, state)
- },
+ }),
order: Priority.MAX,
parse(capture /*, parse, state*/) {
return {
@@ -1739,7 +1761,7 @@ export function compiler(
},
[RuleType.paragraph]: {
- match: matchParagraph,
+ match: allowInline(matchParagraph),
order: Priority.LOW,
parse: parseCaptureInline,
render(node, output, state) {
@@ -1937,18 +1959,18 @@ export function compiler(
// Object.keys(rules).forEach(key => {
// let { match: match, parse: parse } = rules[key]
- // rules[key].match = (...args) => {
- // const start = performance.now()
- // const result = match(...args)
- // const delta = performance.now() - start
+ // // rules[key].match = (...args) => {
+ // // const start = performance.now()
+ // // const result = match(...args)
+ // // const delta = performance.now() - start
- // if (delta > 5)
- // console.warn(
- // `Slow match for ${key}: ${delta.toFixed(3)}ms, input: ${args[0]}`
- // )
+ // // if (delta > 5)
+ // // console.warn(
+ // // `Slow match for ${key}: ${delta.toFixed(3)}ms, input: ${args[0]}`
+ // // )
- // return result
- // }
+ // // return result
+ // // }
// rules[key].parse = (...args) => {
// const start = performance.now()
From 69dbd925234e3dc62e3f427535983e0d9579d19b Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 15:46:43 -0500
Subject: [PATCH 05/13] test: fix test to better match the title
---
index.compiler.spec.tsx | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx
index 1bc9b952..6e740eff 100644
--- a/index.compiler.spec.tsx
+++ b/index.compiler.spec.tsx
@@ -535,7 +535,7 @@ describe('inline textual elements', () => {
it('replaces custom named character codes with unicode equivalents so React will render correctly', () => {
render(
- compiler('Apostrophe's and less than ≤ equal', {
+ compiler('Apostrophe's and ≤ equal', {
namedCodesToUnicode: {
le: '\u2264',
'#39': '\u0027',
@@ -545,7 +545,7 @@ describe('inline textual elements', () => {
expect(root.innerHTML).toMatchInlineSnapshot(`
- Apostrophe's and less than ≤ equal
+ Apostrophe's and ≤ equal
`)
})
From 7abb3b6b73ffdf021dd0d692e16c0582b69e64ee Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 15:51:27 -0500
Subject: [PATCH 06/13] refactor: optimize splitter
---
.changeset/great-rice-return.md | 5 +++++
index.tsx | 7 +++++--
2 files changed, 10 insertions(+), 2 deletions(-)
create mode 100644 .changeset/great-rice-return.md
diff --git a/.changeset/great-rice-return.md b/.changeset/great-rice-return.md
new file mode 100644
index 00000000..1837f081
--- /dev/null
+++ b/.changeset/great-rice-return.md
@@ -0,0 +1,5 @@
+---
+'markdown-to-jsx': patch
+---
+
+Further optimize the plain text splitting regex.
diff --git a/index.tsx b/index.tsx
index 7aa1c254..944c8779 100644
--- a/index.tsx
+++ b/index.tsx
@@ -321,8 +321,11 @@ const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`)
const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/
-const TEXT_PLAIN_R =
- /^[\s\S]+?(?=[^0-9A-Z\s\u00c0-\uffff.()'"]|\d+\.|\n\n| {2,}\n|\w+:\S|$)/i
+/**
+ * Always take the first character, then eagerly take text until a double space
+ * (potential line break) or some markdown-like punctuation is reached.
+ */
+const TEXT_PLAIN_R = /^([\s\S](?:(?! |[0-9]\.)[^*_~\-\n<`\\\[!])*)/
const TRIM_STARTING_NEWLINES = /^\n+/
From 6de554305527e64a07fd561dce1c7c990897b507 Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 16:23:20 -0500
Subject: [PATCH 07/13] refactor: remove redundant matchers during paragraph
matching
---
.changeset/grumpy-kids-attack.md | 5 +++++
index.tsx | 4 +---
2 files changed, 6 insertions(+), 3 deletions(-)
create mode 100644 .changeset/grumpy-kids-attack.md
diff --git a/.changeset/grumpy-kids-attack.md b/.changeset/grumpy-kids-attack.md
new file mode 100644
index 00000000..ec8386e0
--- /dev/null
+++ b/.changeset/grumpy-kids-attack.md
@@ -0,0 +1,5 @@
+---
+'markdown-to-jsx': patch
+---
+
+Remove redundant detectors when processing paragraphs.
diff --git a/index.tsx b/index.tsx
index 944c8779..bc1c579b 100644
--- a/index.tsx
+++ b/index.tsx
@@ -560,11 +560,8 @@ const NON_PARAGRAPH_BLOCK_SYNTAXES = [
HEADING_R,
HEADING_SETEXT_R,
HEADING_ATX_COMPLIANT_R,
- HTML_COMMENT_R,
NP_TABLE_R,
- ORDERED_LIST_ITEM_R,
ORDERED_LIST_R,
- UNORDERED_LIST_ITEM_R,
UNORDERED_LIST_R,
]
@@ -572,6 +569,7 @@ const BLOCK_SYNTAXES = [
...NON_PARAGRAPH_BLOCK_SYNTAXES,
PARAGRAPH_R,
HTML_BLOCK_ELEMENT_R,
+ HTML_COMMENT_R,
HTML_SELF_CLOSING_ELEMENT_R,
]
From 9332d501ad4862517e66c5b4a13eb0db09b3697a Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 20 Jan 2025 17:01:17 -0500
Subject: [PATCH 08/13] refactor: improve inline code performance
---
.changeset/plenty-dodos-collect.md | 5 +++++
.prettierignore | 1 +
__snapshots__/index.compiler.spec.tsx.snap | 6 +++---
fixture.md | 2 +-
index.tsx | 15 ++++++++++-----
5 files changed, 20 insertions(+), 9 deletions(-)
create mode 100644 .changeset/plenty-dodos-collect.md
create mode 100644 .prettierignore
diff --git a/.changeset/plenty-dodos-collect.md b/.changeset/plenty-dodos-collect.md
new file mode 100644
index 00000000..2ab20106
--- /dev/null
+++ b/.changeset/plenty-dodos-collect.md
@@ -0,0 +1,5 @@
+---
+'markdown-to-jsx': patch
+---
+
+Rework inline code syntax handling, handle escaped characters in code blocks correctly so they render without the backslash.
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 00000000..0aed9848
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1 @@
+fixture.md
diff --git a/__snapshots__/index.compiler.spec.tsx.snap b/__snapshots__/index.compiler.spec.tsx.snap
index ceabbfa8..1ca360b3 100644
--- a/__snapshots__/index.compiler.spec.tsx.snap
+++ b/__snapshots__/index.compiler.spec.tsx.snap
@@ -924,7 +924,7 @@ line. To avoid this, you can backslash-escape the period:
- 1986\\. What a great season.
+ 1986. What a great season.
@@ -1433,7 +1433,7 @@ escape it:
- \\*this text is surrounded by literal asterisks\\*
+ *this text is surrounded by literal asterisks*
@@ -1688,7 +1688,7 @@ backslashes before the asterisks, like this:
- \\*literal asterisks\\*
+ *literal asterisks*
diff --git a/fixture.md b/fixture.md
index 9434ec56..ee056899 100644
--- a/fixture.md
+++ b/fixture.md
@@ -745,7 +745,7 @@ escape it:
Code
-To indicate a span of code, wrap it with backtick quotes (`` ` ``).
+To indicate a span of code, wrap it with backtick quotes (`\``).
Unlike a pre-formatted code block, a code span indicates code within a
normal paragraph. For example:
diff --git a/index.tsx b/index.tsx
index bc1c579b..5f4ea85b 100644
--- a/index.tsx
+++ b/index.tsx
@@ -190,7 +190,7 @@ const BREAK_THEMATIC_R = /^(?:( *[-*_])){3,} *(?:\n *)+\n/
const CODE_BLOCK_FENCED_R =
/^(?: {1,3})?(`{3,}|~{3,}) *(\S+)? *([^\n]*?)?\n([\s\S]*?)(?:\1\n?|$)/
const CODE_BLOCK_R = /^(?: {4}[^\n]+\n*)+(?:\n *)+\n?/
-const CODE_INLINE_R = /^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)/
+const CODE_INLINE_R = /^(`+)((?:\\`|[^`])+)\1/
const CONSECUTIVE_NEWLINE_R = /^(?:\n *)*\n/
const CR_NEWLINE_R = /\r\n?/g
@@ -320,6 +320,7 @@ const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`)
const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`)
const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/
+const TEXT_UNESCAPE_R = /\\([^0-9A-Za-z\s])/g
/**
* Always take the first character, then eagerly take text until a double space
@@ -460,6 +461,7 @@ function generateListRule(
.match(LIST_ITEM_R)
let lastItemWasAParagraph = false
+
const itemContent = items.map(function (item, i) {
// We need to see how far indented the item is:
const space = LIST_ITEM_PREFIX_R.exec(item)[0].length
@@ -495,7 +497,7 @@ function generateListRule(
containsBlocks || (isLastItem && lastItemWasAParagraph)
lastItemWasAParagraph = thisItemIsAParagraph
- // backup our state for restoration afterwards. We're going to
+ // backup our state for delta afterwards. We're going to
// want to set state.list to true, and state.inline depending
// on our list's looseness.
const oldStateInline = state.inline
@@ -1400,7 +1402,10 @@ export function compiler(
parse(capture /*, parse, state*/) {
return {
lang: undefined,
- text: capture[0].replace(/^ {4}/gm, '').replace(/\n+$/, ''),
+ text: capture[0]
+ .replace(/^ {4}/gm, '')
+ .replace(/\n+$/, '')
+ .replaceAll(TEXT_UNESCAPE_R, '$1'),
}
},
@@ -1430,7 +1435,7 @@ export function compiler(
// if capture[3] it's additional metadata
attrs: attrStringToMap('code', capture[3] || ''),
lang: capture[2] || undefined,
- text: capture[4],
+ text: capture[4].replaceAll(TEXT_UNESCAPE_R, '$1'),
type: RuleType.codeBlock,
}
},
@@ -1441,7 +1446,7 @@ export function compiler(
order: Priority.LOW,
parse(capture /*, parse, state*/) {
return {
- text: capture[2],
+ text: capture[2].replaceAll(TEXT_UNESCAPE_R, '$1'),
}
},
render(node, output, state) {
From f3489ffab12fa785161a2c1946d0acfd68509d63 Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 17 Feb 2025 13:14:35 -0500
Subject: [PATCH 09/13] replace trimend with more compatible variant
---
index.compiler.spec.tsx | 4 ++--
index.tsx | 22 ++++++++++++++--------
2 files changed, 16 insertions(+), 10 deletions(-)
diff --git a/index.compiler.spec.tsx b/index.compiler.spec.tsx
index 6e740eff..83464a5a 100644
--- a/index.compiler.spec.tsx
+++ b/index.compiler.spec.tsx
@@ -480,7 +480,7 @@ describe('inline textual elements', () => {
render(
compiler(
- '*This should not misinterpret the asterisk ~~*~~ in the strikethrough.*'
+ String.raw`*This should not misinterpret the asterisk ~~\*~~ in the strikethrough.*`
)
)
@@ -512,7 +512,7 @@ describe('inline textual elements', () => {
render(
compiler(
- '_This should not misinterpret the under_score that forms part of a word._'
+ `_This should not misinterpret the under\\_score that forms part of a word._`
)
)
diff --git a/index.tsx b/index.tsx
index 5f4ea85b..374ca141 100644
--- a/index.tsx
+++ b/index.tsx
@@ -296,7 +296,7 @@ const TABLE_RIGHT_ALIGN = /^ *-+: *$/
* and therefore miss content that should have been included.
*/
const INLINE_SKIP_R =
- '((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|~~.*?~~|==.*?==|.|\\n)*?)'
+ '((?:\\[.*?\\][([].*?[)\\]]|<.*?>(?:.*?<.*?>)?|`.*?`|\\\\\\1|[\\s\\S])+?)'
/**
* Detect a sequence like **foo** or __foo__. Note that bold has a higher priority
@@ -307,17 +307,17 @@ const TEXT_BOLD_R = new RegExp(`^([*_])\\1${INLINE_SKIP_R}\\1\\1(?!\\1)`)
/**
* Detect a sequence like *foo* or _foo_.
*/
-const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1|\\w)`)
+const TEXT_EMPHASIZED_R = new RegExp(`^([*_])${INLINE_SKIP_R}\\1(?!\\1)`)
/**
* Detect a sequence like ==foo==.
*/
-const TEXT_MARKED_R = new RegExp(`^==${INLINE_SKIP_R}==`)
+const TEXT_MARKED_R = new RegExp(`^(==)${INLINE_SKIP_R}\\1`)
/**
* Detect a sequence like ~~foo~~.
*/
-const TEXT_STRIKETHROUGHED_R = new RegExp(`^~~${INLINE_SKIP_R}~~`)
+const TEXT_STRIKETHROUGHED_R = new RegExp(`^(~~)${INLINE_SKIP_R}\\1`)
const TEXT_ESCAPED_R = /^\\([^0-9A-Za-z\s])/
const TEXT_UNESCAPE_R = /\\([^0-9A-Za-z\s])/g
@@ -575,6 +575,10 @@ const BLOCK_SYNTAXES = [
HTML_SELF_CLOSING_ELEMENT_R,
]
+function trimEnd(str: string) {
+ return str.replace(/\s*$/, '')
+}
+
function containsBlockSyntax(input: string) {
return BLOCK_SYNTAXES.some(r => r.test(input))
}
@@ -979,12 +983,14 @@ function matchParagraph(source: string, state: MarkdownToJSX.State) {
return !!line.trim()
})
- const captured = match.trimEnd()
+ const captured = trimEnd(match)
if (captured == '') {
return null
}
- return [match, captured]
+ // parseCaptureInline expects the inner content to be at index 2
+ // because index 1 is the delimiter for text formatting syntaxes
+ return [match, , captured]
}
export function sanitizer(url: string): string {
@@ -1074,7 +1080,7 @@ const parseCaptureInline: MarkdownToJSX.Parser<{
children: MarkdownToJSX.ParserResult[]
}> = (capture, parse, state: MarkdownToJSX.State) => {
return {
- children: parseInline(parse, capture[1], state),
+ children: parseInline(parse, capture[2], state),
}
}
@@ -1225,7 +1231,7 @@ export function compiler(
parser(
inline
? input
- : `${input.trimEnd().replace(TRIM_STARTING_NEWLINES, '')}\n\n`,
+ : `${trimEnd(input).replace(TRIM_STARTING_NEWLINES, '')}\n\n`,
{
inline,
}
From b0a8f847ea6efdf583f830f1ff99eba534e3a056 Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 17 Feb 2025 13:19:28 -0500
Subject: [PATCH 10/13] simplify benchmarking for quick iteration against self
---
benchmark.js | 16 +++++++++++-----
package.json | 1 +
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/benchmark.js b/benchmark.js
index 38d389e2..5e84ab30 100644
--- a/benchmark.js
+++ b/benchmark.js
@@ -20,13 +20,19 @@ const bar = new cliProgress.SingleBar(
let totalCycles
// add tests
-suite
+const evals = suite
.addFunction('markdown-to-jsx (next)', input => compiler(input))
.addFunction('markdown-to-jsx (latest)', input => latestCompiler(input))
- .addFunction('simple-markdown', input =>
- SimpleMarkdown.defaultReactOutput(SimpleMarkdown.defaultBlockParse(input))
- )
- .addFunction('markdown-it', input => mdIt.render(input))
+
+if (process.argv.includes('--all')) {
+ evals
+ .addFunction('simple-markdown', input =>
+ SimpleMarkdown.defaultReactOutput(SimpleMarkdown.defaultBlockParse(input))
+ )
+ .addFunction('markdown-it', input => mdIt.render(input))
+}
+
+evals
.addInput('simple markdown string', ['_Hello_ **world**!'])
.addInput('large markdown string', [fixture])
.on('start', () => {
diff --git a/package.json b/package.json
index 845b92b7..2142dd51 100644
--- a/package.json
+++ b/package.json
@@ -96,6 +96,7 @@
"test": "jest --verbose",
"size": "size-limit",
"benchmark": "node benchmark.js",
+ "benchmark:all": "node benchmark.js --all",
"changeset-publish": "yarn build && changeset publish"
},
"size-limit": [
From 8ea298943203b6e886c9a2fb7688cb1d7ff4f085 Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 17 Feb 2025 13:32:30 -0500
Subject: [PATCH 11/13] improve compatibility
---
index.tsx | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/index.tsx b/index.tsx
index 374ca141..43789e66 100644
--- a/index.tsx
+++ b/index.tsx
@@ -1411,7 +1411,7 @@ export function compiler(
text: capture[0]
.replace(/^ {4}/gm, '')
.replace(/\n+$/, '')
- .replaceAll(TEXT_UNESCAPE_R, '$1'),
+ .replace(TEXT_UNESCAPE_R, '$1'),
}
},
@@ -1441,7 +1441,7 @@ export function compiler(
// if capture[3] it's additional metadata
attrs: attrStringToMap('code', capture[3] || ''),
lang: capture[2] || undefined,
- text: capture[4].replaceAll(TEXT_UNESCAPE_R, '$1'),
+ text: capture[4].replace(TEXT_UNESCAPE_R, '$1'),
type: RuleType.codeBlock,
}
},
@@ -1452,7 +1452,7 @@ export function compiler(
order: Priority.LOW,
parse(capture /*, parse, state*/) {
return {
- text: capture[2].replaceAll(TEXT_UNESCAPE_R, '$1'),
+ text: capture[2].replace(TEXT_UNESCAPE_R, '$1'),
}
},
render(node, output, state) {
From 02b26e0b25c6ad809b4b66c038276706bd9700ca Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 17 Feb 2025 13:32:36 -0500
Subject: [PATCH 12/13] adjust package.json
---
package.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/package.json b/package.json
index 2142dd51..3f09da3a 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,7 @@
"jsx",
"html"
],
- "author": "Evan Jacobs ",
+ "author": "Evan Jacobs ",
"repository": {
"type": "git",
"url": "git+https://github.com/quantizor/markdown-to-jsx.git"
From 0bee64d38fecaf2186068605e673d95e045894a8 Mon Sep 17 00:00:00 2001
From: Evan Jacobs
Date: Mon, 17 Feb 2025 13:44:11 -0500
Subject: [PATCH 13/13] eliminate some polynomial time issues
---
.changeset/hungry-bugs-tan.md | 5 +++++
index.tsx | 16 +++++++++-------
2 files changed, 14 insertions(+), 7 deletions(-)
create mode 100644 .changeset/hungry-bugs-tan.md
diff --git a/.changeset/hungry-bugs-tan.md b/.changeset/hungry-bugs-tan.md
new file mode 100644
index 00000000..62f3c106
--- /dev/null
+++ b/.changeset/hungry-bugs-tan.md
@@ -0,0 +1,5 @@
+---
+'markdown-to-jsx': patch
+---
+
+Replace some regexes with optimized functions to avoid polynomial time scenarios. Also fixes compatibility issues in some older browsers with the `trimEnd` API.
diff --git a/index.tsx b/index.tsx
index 43789e66..35ee1acc 100644
--- a/index.tsx
+++ b/index.tsx
@@ -509,10 +509,10 @@ function generateListRule(
let adjustedContent
if (thisItemIsAParagraph) {
state.inline = false
- adjustedContent = content.replace(LIST_ITEM_END_R, '\n\n')
+ adjustedContent = trimEnd(content) + '\n\n'
} else {
state.inline = true
- adjustedContent = content.replace(LIST_ITEM_END_R, '')
+ adjustedContent = trimEnd(content)
}
const result = parse(adjustedContent, state)
@@ -576,7 +576,9 @@ const BLOCK_SYNTAXES = [
]
function trimEnd(str: string) {
- return str.replace(/\s*$/, '')
+ let end = str.length
+ while (end > 0 && str[end - 1] <= ' ') end--
+ return str.slice(0, end)
}
function containsBlockSyntax(input: string) {
@@ -1408,10 +1410,10 @@ export function compiler(
parse(capture /*, parse, state*/) {
return {
lang: undefined,
- text: capture[0]
- .replace(/^ {4}/gm, '')
- .replace(/\n+$/, '')
- .replace(TEXT_UNESCAPE_R, '$1'),
+ text: trimEnd(capture[0].replace(/^ {4}/gm, '')).replace(
+ TEXT_UNESCAPE_R,
+ '$1'
+ ),
}
},