Skip to content

Commit

Permalink
Improve codeblock tokenization inside other block tokens" (#53)
Browse files Browse the repository at this point in the history
* Optimize codeblock lexer inside other block tokens

* Update test snapshots

* Lint fixes
  • Loading branch information
kiranparajuli589 authored Oct 23, 2023
1 parent 9617e19 commit 154dacc
Show file tree
Hide file tree
Showing 16 changed files with 205 additions and 2,968 deletions.
3 changes: 3 additions & 0 deletions demo/src/styles/main.scss
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ main {
*:last-child {
margin-bottom: 0;
}
pre {
margin-top: .5rem;
}
}
input {
margin-right: .5rem;
Expand Down
2 changes: 1 addition & 1 deletion lib/regex/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export const REGEX = {
UNDERLINE_1: /^\s*=+$/g,
UNDERLINE_2: /^\s*-+$/g
},
CODE_BLOCK: /^\s*`{3}(?<lang>[a-z]*)$/g,
CODE_BLOCK: /^\s*`{3}\s*(?<lang>[a-z]*)$/g,
LIST: {
CHECKBOX: /^\s*(?:[-~*]|\d+\.)\s\[(?<check>\s|x)]\s(?<value>.+)/g,
UNORDERED: /^\s*(?<mark>[-*+])\s(?<value>.+)/g,
Expand Down
6 changes: 4 additions & 2 deletions lib/tokenizer/codeblock.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export class CodeBlock {
if (
!fromToken &&
indent >= 4 &&
(!lastLexer || lastLexer?.type === TOKENS.NEW_LINE)
(!lastLexer || lastLexer.type === TOKENS.NEW_LINE)
) {
return true
}
Expand Down Expand Up @@ -105,6 +105,9 @@ export class CodeBlock {

do {
nextLine = this.#lines[++this.#cursor]
if (nextLine === "") {
continue
}
nextLineIndent = (nextLine) ? Indent.get(nextLine) : null

if (nextLine !== undefined) {
Expand Down Expand Up @@ -217,7 +220,6 @@ export class CodeBlock {
} else {
skeleton = skeleton.replace("%s", Esc.everything(lexer.value))
}

return skeleton
}
}
6 changes: 2 additions & 4 deletions lib/tokenizer/list.js
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ export class List {
const listTag = (lexer.meta.ordered) ? "ol" : "ul"
let listBodyHtml = []
lexer.items.forEach(listItem => {
let listItemHtml = `
<li>%s</li>`
let listItemHtml = "<li>%s</li>"
const lParser = new Parser(listItem.tokens, { from: TOKENS.LIST })
if (lexer.meta.checklist) {
const isChecked = (listItem.checked) ? " checked" : ""
Expand All @@ -215,8 +214,7 @@ export class List {
}
listBodyHtml.push(listItemHtml)
})
return `<${listTag}>${listBodyHtml.join("")}
</${listTag}>`
return `<${listTag}>${listBodyHtml.join("")}</${listTag}>`
}

/**
Expand Down
1 change: 1 addition & 0 deletions lib/tokenizer/paragraph.js
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ export class Paragraph {
} else if (token.type === TOKENS.ITALIC) {
parsed += `<em>${Paragraph.parse(token)}</em>`
} else if (token.type === TOKENS.CODE) {
token.value = Esc.unEscape(token.value)
parsed += `<code>${Esc.everything(token.value)}</code>`
}else if (token.type === TOKENS.STRIKE_THROUGH) {
parsed += `<s>${Paragraph.parse(token)}</s>`
Expand Down
2,871 changes: 30 additions & 2,841 deletions tests/integration/__snapshots__/large.spec.js.snap

Large diffs are not rendered by default.

26 changes: 1 addition & 25 deletions tests/integration/__snapshots__/small.spec.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,7 @@

exports[`Small MD To HTML should parse the small markdown file content to html 1`] = `
"<!-- https://regex101.com/r/C6SxZH/1--><h1>Heading One <code>CODE INSIDE</code></h1><h2>Heading Two</h2><h3>Heading Three</h3><h4>Heading Four</h4><h5>Heading Five</h5><h6>Heading Six</h6><p>a paragraph of words <code>first code</code> normal text here <code>code body</code> <em>first italics</em> here me crying <em>italic body</em> here me crying <strong>first bolds</strong> some normal again <strong>bold body</strong> <a href=\\"https://kiranparajuli.com.np\\">Kiran Parajuli</a> <s>strikes body</s> here some normal again at the last</p><blockquote>
<p> some quote with <u>Underline</u></p></blockquote><ul>
<li>list item 1</li>
<li>list item 2</li>
</ul><ul>
<li>list item 3</li>
<li>list item 4</li>
</ul><ul>
<li>list item 5</li>
<li>list item 6</li>
</ul><ul>
<li><input type='checkbox'>checkbox empty</li>
<li><input type='checkbox' checked>checkbox checked</li>
</ul><ul>
<li><input type='checkbox'>checkbox empty</li>
<li><input type='checkbox' checked>checkbox checked</li>
</ul><ul>
<li><input type='checkbox'>checkbox empty</li>
<li><input type='checkbox' checked>checkbox checked</li>
</ul><ol>
<li>numbered list item 1</li>
<li>numbered list item 2</li>
</ol><ol>
<li><input type='checkbox'>numbered checkbox empty</li>
<li><input type='checkbox' checked>numbered checkbox checked paragraph just below hmm, but with some fun :)</li>
</ol><blockquote>
<p> some quote with <u>Underline</u></p></blockquote><ul><li>list item 1</li><li>list item 2</li></ul><ul><li>list item 3</li><li>list item 4</li></ul><ul><li>list item 5</li><li>list item 6</li></ul><ul><li><input type='checkbox'>checkbox empty</li><li><input type='checkbox' checked>checkbox checked</li></ul><ul><li><input type='checkbox'>checkbox empty</li><li><input type='checkbox' checked>checkbox checked</li></ul><ul><li><input type='checkbox'>checkbox empty</li><li><input type='checkbox' checked>checkbox checked</li></ul><ol><li>numbered list item 1</li><li>numbered list item 2</li></ol><ol><li><input type='checkbox'>numbered checkbox empty</li><li><input type='checkbox' checked>numbered checkbox checked paragraph just below hmm, but with some fun :)</li></ol><blockquote>
<p> quote 1</p><blockquote>
<p> nested quote 2 <a href=\\"quote-link-url\\">quote-link</a></p><blockquote>
<p> nested quote 3 with <em>italics</em> and <strong>bolds</strong></p></blockquote></blockquote></blockquote><pre><code class='language-js'>const a = 1;
Expand Down
3 changes: 1 addition & 2 deletions tests/integration/large.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ const mdp = new HtmlMark({
describe("Large MD To HTML", () => {
it.each([
// eslint-disable-next-line no-undef
File.pathJoin(__dirname, "..", "fixtures/large_markdown.md"),
File.pathJoin(__dirname, "..", "fixtures/markdownit.md")
File.pathJoin(__dirname, "..", "fixtures/large_markdown.md")
])("should parse the large markdown file content to html", (path) => {
const html = mdp.parse(File.read(path))
expect(html).toMatchSnapshot()
Expand Down
89 changes: 89 additions & 0 deletions tests/unit/lexer/__snapshots__/codeblock.spec.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,95 @@ four",
]
`;

exports[`codeblock should be work just fine within a list item 1`] = `
Array [
Object {
"indent": 0,
"items": Array [
Object {
"checked": null,
"count": "4",
"raw": "4. What is the exact rule for determining when list items get
wrapped in \`<p>\` tags? Can a list be partially \\"loose\\" and partially
\\"tight\\"? What should we do with a list like this?
\`\`\` markdown
1. one
2. two
3. three
\`\`\`",
"tokens": Array [
Object {
"indent": 0,
"raw": "What is the exact rule for determining when list items get
wrapped in \`<p>\` tags? Can a list be partially \\"loose\\" and partially
\\"tight\\"? What should we do with a list like this?",
"tokens": Array [
Object {
"raw": "What is the exact rule for determining when list items get wrapped in ",
"type": "text",
"value": "What is the exact rule for determining when list items get wrapped in ",
},
Object {
"raw": "\`<p>\`",
"type": "code",
"value": "<p>",
},
Object {
"raw": " tags? Can a list be partially \\"loose\\" and partially \\"tight\\"? What should we do with a list like this?",
"type": "text",
"value": " tags? Can a list be partially \\"loose\\" and partially \\"tight\\"? What should we do with a list like this?",
},
],
"type": "paragraph",
"value": "What is the exact rule for determining when list items get wrapped in \`<p>\` tags? Can a list be partially \\"loose\\" and partially \\"tight\\"? What should we do with a list like this?",
},
Object {
"type": "new-line",
},
Object {
"indent": 2,
"language": "markdown",
"raw": " \`\`\` markdown
1. one
2. two
3. three
\`\`\`",
"type": "code-block",
"value": "1. one
2. two
3. three",
},
],
"type": "list-item",
},
],
"meta": Object {
"checklist": false,
"identifier": null,
"ordered": true,
},
"raw": "4. What is the exact rule for determining when list items get
wrapped in \`<p>\` tags? Can a list be partially \\"loose\\" and partially
\\"tight\\"? What should we do with a list like this?
\`\`\` markdown
1. one
2. two
3. three
\`\`\`",
"type": "list",
},
Object {
"type": "new-line",
},
]
`;
exports[`codeblock should cope with empty body 1`] = `
Array [
Object {
Expand Down
28 changes: 28 additions & 0 deletions tests/unit/lexer/__snapshots__/paragraph.spec.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,34 @@ Array [
]
`;

exports[`paragraph should allow escaped escape character 1`] = `
Array [
Object {
"indent": 0,
"raw": "this is \`go\\\\ogle\` link",
"tokens": Array [
Object {
"raw": "this is ",
"type": "text",
"value": "this is ",
},
Object {
"raw": "\`go\\\\ogle\`",
"type": "code",
"value": "go\\\\ogle",
},
Object {
"raw": " link",
"type": "text",
"value": " link",
},
],
"type": "paragraph",
"value": "this is \`go\\\\ogle\` link",
},
]
`;

exports[`paragraph should be deep tokenized: a ****bold**** text 1`] = `
Array [
Object {
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/lexer/codeblock.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,22 @@ describe("codeblock", () => {
const tokens = lexer.run()
expect(tokens).toMatchSnapshot()
})
it("should be work just fine within a list item", () => {
const testString = `
4. What is the exact rule for determining when list items get
\twrapped in \`<p>\` tags? Can a list be partially "loose" and partially
\t"tight"? What should we do with a list like this?
\t\t\`\`\` markdown
\t\t1. one
\t\t2. two
\t\t3. three
\t\t\`\`\`
`
const lines = testString.split("\n")
const lexer = new Lexer(lines)
const tokens = lexer.run()
expect(tokens).toMatchSnapshot()
})
})
8 changes: 8 additions & 0 deletions tests/unit/lexer/paragraph.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,12 @@ describe("paragraph", () => {
const lexerData = lexer.run()
expect(lexerData).toMatchSnapshot()
})
it("should allow escaped escape character", () => {
const lines = [
"this is `go\\ogle` link"
]
const lexer = new Lexer(lines)
const lexerData = lexer.run()
expect(lexerData).toMatchSnapshot()
})
})
6 changes: 4 additions & 2 deletions tests/unit/parser/__snapshots__/codeblock.spec.js.snap
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
exports[`CodeBlock Parsing should cope with multiple items 1`] = `"<pre><code class='language-js'>const a = 1</code></pre><p>some people are funny</p>"`;

exports[`CodeBlock Parsing should parse indent codeblock 1`] = `
"<p>This is a normal paragraph:</p><pre><code>This is a code block.</code></pre><p>Here is an example of AppleScript:</p><pre><code>tell application &quot;Foo&quot;
"<p>This is a normal paragraph:</p><pre><code>This is a code block.
</code></pre><p>Here is an example of AppleScript:</p><pre><code>tell application &quot;Foo&quot;
beep
end tell</code></pre><p>A code block continues until it reaches a line that is not indented (or the end of the article).</p>"
end tell
</code></pre><p>A code block continues until it reaches a line that is not indented (or the end of the article).</p>"
`;

exports[`CodeBlock Parsing should parse multiple consequetive codeblocks 1`] = `"<pre><code class='language-js'>const a = 1</code></pre><pre><code class='language-js'>const b = 2</code></pre>"`;
Expand Down
Loading

0 comments on commit 154dacc

Please sign in to comment.