diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ccbe46 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/node_modules/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..766a017 --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# Mark Twain + +It is not easy to process Markdown directly. However, we can use `mark-twain` to parse a Markdown file into a JavaScript object which is easier to process. + +## Installation + +```bash +npm install mark-twain +``` + +## Usage + +```js +const MT = require('mark-twain'); +const fs = require('fs'); +const elements = MT(fs.readFileSync('something.md').toString()); +``` + +The returned value of `MT` would be something like this: + +```js +[{ + type: 'h1', + children: 'heading' + }, { + type: 'ul', + children: [{ + type: 'li', + children: [{ + type: 'span', + children: 'Bla bla ...' + }] + }] + }, { + type: 'hr' + }, { + type: 'p', + children: 'Bla bla ...' +}] +``` + +## Liscence + +MIT diff --git a/index.js b/index.js new file mode 100644 index 0000000..82b116e --- /dev/null +++ b/index.js @@ -0,0 +1,3 @@ +'use strict'; + +module.exports = require('./src/MT'); diff --git a/package.json b/package.json new file mode 100644 index 0000000..c759da5 --- /dev/null +++ b/package.json @@ -0,0 +1,18 @@ +{ + "name": "mark-twain", + "version": "0.1.0-beta", + "description": "Parse Markdown into JavaScript object.", + "main": "index.js", + "scripts": { + "test": "mocha" + }, + "keywords": [ + "markdown", + "parser" + ], + "author": "Benjy Cui", + "license": "MIT", + "dependencies": { + "marked": "^0.3.5" + } +} diff --git a/src/MT.js b/src/MT.js new file mode 100644 index 0000000..96bf2bc --- /dev/null +++ b/src/MT.js @@ -0,0 +1,9 @@ +'use strict'; + +const marked = require('marked'); +const Parser = require('./parser'); + +module.exports = function MT(markdown) { + const tokens = marked.lexer(markdown); + return Parser.parse(tokens); +}; diff --git a/src/parser.js b/src/parser.js new file mode 100644 index 0000000..d8771ac --- /dev/null +++ b/src/parser.js @@ -0,0 +1,130 @@ +'use strict'; + +const marked = require('marked'); + +class Parser { + // @Private + constructor() { + this.tokens = []; + this.token = null; + this.links = null; + } + + static parse(tokens) { + const parser = new Parser(); + return parser.parse(tokens); + } + + parse(tokens) { + this.tokens = tokens.reverse(); + this.links = tokens.links; + + const content = []; + while (this.next()) { + const element = this.parseElement(); + if (element === null) continue; + content.push(element); + } + + return content; + } + + next() { + return this.token = this.tokens.pop(); + } + + peek() { + return this.tokens[this.tokens.length - 1] || 0; + } + + parseElement() { + const token = this.token; + switch (token.type) { + case 'hr': return {type: 'hr'}; + case 'heading': return this.parseHeading(token); + case 'code': return this.parseCode(token); + case 'table': return null; // TODO + case 'blockquote_start': return this.parseBlockquote(token); + case 'list_start': return this.parseList(token); + case 'list_item_start': + case 'loose_item_start': return this.parseListItem(token); + case 'html': return {type: 'html', children: token.text}; + case 'paragraph': return this.parseParagraph(token); + case 'text': return this.parseText(token); + default: return null; + } + } + + parseHeading(token) { + return {type: 'h' + token.depth, children: token.text}; + } + + parseCode(token) { + return { + type: 'code', + props: {lang: token.lang}, + children: token.text + }; + } + + parseBlockquote() { + const blockquote = { + type: 'blockquote', + children: [] + }; + + while (this.next().type !== 'blockquote_end') { + blockquote.children.push(this.parseElement()); + } + + return blockquote; + } + + parseList(token) { + const list = { + type: token.ordered ? 'ol' : 'ul', + children: [] + }; + + while (this.next().type !== 'list_end') { + list.children.push(this.parseElement()); + } + + return list; + } + + parseListItem() { + const listItem = { + type: 'li', + children: [] + } + + while (this.next().type !== 'list_item_end') { + listItem.children.push(this.parseElement()); + } + + return listItem; + } + + parseParagraph(token) { + return { + type: 'p', + children: marked.inlineLexer(token.text, this.links) // TODO + }; + } + + parseText(token) { + let text = token.text; + + while (this.peek().type === 'text') { + text += '\n' + this.next().text; + } + + return { + type: 'span', + children: marked.inlineLexer(text, this.links) // TODO + }; + } +} + +module.exports = Parser; diff --git a/test/index.test.js b/test/index.test.js new file mode 100644 index 0000000..e91a5e4 --- /dev/null +++ b/test/index.test.js @@ -0,0 +1,34 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const MT = require('..'); + +describe('MT', function() { + const md = fs.readFileSync('./test/test.md'); + const elements = MT(md.toString()); + console.log(JSON.stringify(elements, null, 2)); + + it('should process headers correctly', function() { + assert.strictEqual(elements[0].type, 'h1'); + assert.strictEqual(elements[1].type, 'h2'); + assert.strictEqual(elements[2].type, 'h3'); + assert.strictEqual(elements[3].type, 'h4'); + assert.strictEqual(elements[4].type, 'h5'); + assert.strictEqual(elements[5].type, 'h6'); + + assert.strictEqual(elements[0].children, 'H1'); + }); + + it('should process lists correctly', function() { + const ol = elements[6]; + assert.strictEqual(ol.type, 'ol'); + assert.strictEqual(ol.children[0].type, 'li'); + assert.strictEqual(ol.children[1].children[0].type, 'span'); + assert.strictEqual(ol.children[2].children[0].children, 'Third'); + + const ul = elements[7]; + assert.strictEqual(ul.type, 'ul'); + assert.strictEqual(ul.children.length, 3); + }); +}); diff --git a/test/test.md b/test/test.md new file mode 100644 index 0000000..bbda1f5 --- /dev/null +++ b/test/test.md @@ -0,0 +1,16 @@ +# H1 +## H2 +### H3 +#### H4 +##### H5 +###### H6 + + +1. First +1. Second +1. Third + + +* Something +* Something +* Somethong