Let there be Mark Twain!

benjycui · Dec 1, 2015 · c65b44d · c65b44d
commit c65b44d
Show file tree

Hide file tree

Showing 8 changed files with 255 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+/node_modules/
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+# Mark Twain
+
+It is not easy to process Markdown directly. However, we can use `mark-twain` to parse a Markdown file into a JavaScript object which is easier to process.
+
+## Installation
+
+```bash
+npm install mark-twain
+```
+
+## Usage
+
+```js
+const MT = require('mark-twain');
+const fs = require('fs');
+const elements = MT(fs.readFileSync('something.md').toString());
+```
+
+The returned value of `MT` would be something like this:
+
+```js
+[{
+  type: 'h1',
+  children: 'heading'
+ }, {
+  type: 'ul',
+  children: [{
+    type: 'li',
+    children: [{
+      type: 'span',
+      children: 'Bla bla ...'
+    }]
+  }]
+ }, {
+  type: 'hr'
+ }, {
+  type: 'p',
+  children: 'Bla bla ...'
+}]
+```
+
+## Liscence
+
+MIT
diff --git a/index.js b/index.js
@@ -0,0 +1,3 @@
+'use strict';
+
+module.exports = require('./src/MT');
diff --git a/package.json b/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "mark-twain",
+  "version": "0.1.0-beta",
+  "description": "Parse Markdown into JavaScript object.",
+  "main": "index.js",
+  "scripts": {
+    "test": "mocha"
+  },
+  "keywords": [
+    "markdown",
+    "parser"
+  ],
+  "author": "Benjy Cui",
+  "license": "MIT",
+  "dependencies": {
+    "marked": "^0.3.5"
+  }
+}
diff --git a/src/MT.js b/src/MT.js
@@ -0,0 +1,9 @@
+'use strict';
+
+const marked = require('marked');
+const Parser = require('./parser');
+
+module.exports = function MT(markdown) {
+  const tokens = marked.lexer(markdown);
+  return Parser.parse(tokens);
+};
diff --git a/src/parser.js b/src/parser.js
@@ -0,0 +1,130 @@
+'use strict';
+
+const marked = require('marked');
+
+class Parser {
+  // @Private
+  constructor() {
+    this.tokens = [];
+    this.token = null;
+    this.links = null;
+  }
+
+  static parse(tokens) {
+    const parser = new Parser();
+    return parser.parse(tokens);
+  }
+
+  parse(tokens) {
+    this.tokens = tokens.reverse();
+    this.links = tokens.links;
+
+    const content = [];
+    while (this.next()) {
+      const element = this.parseElement();
+      if (element === null) continue;
+      content.push(element);
+    }
+
+    return content;
+  }
+
+  next() {
+    return this.token = this.tokens.pop();
+  }
+
+  peek() {
+    return this.tokens[this.tokens.length - 1] || 0;
+  }
+
+  parseElement() {
+    const token = this.token;
+    switch (token.type) {
+    case 'hr': return {type: 'hr'};
+    case 'heading': return this.parseHeading(token);
+    case 'code': return this.parseCode(token);
+    case 'table': return null; // TODO
+    case 'blockquote_start': return this.parseBlockquote(token);
+    case 'list_start': return this.parseList(token);
+    case 'list_item_start':
+    case 'loose_item_start': return this.parseListItem(token);
+    case 'html': return {type: 'html', children: token.text};
+    case 'paragraph': return this.parseParagraph(token);
+    case 'text': return this.parseText(token);
+    default: return null;
+    }
+  }
+
+  parseHeading(token) {
+    return {type: 'h' + token.depth, children: token.text};
+  }
+
+  parseCode(token) {
+    return {
+      type: 'code',
+      props: {lang: token.lang},
+      children: token.text
+    };
+  }
+
+  parseBlockquote() {
+    const blockquote = {
+      type: 'blockquote',
+      children: []
+    };
+
+    while (this.next().type !== 'blockquote_end') {
+      blockquote.children.push(this.parseElement());
+    }
+
+    return blockquote;
+  }
+
+  parseList(token) {
+    const list = {
+      type: token.ordered ? 'ol' : 'ul',
+      children: []
+    };
+
+    while (this.next().type !== 'list_end') {
+      list.children.push(this.parseElement());
+    }
+
+    return list;
+  }
+
+  parseListItem() {
+    const listItem = {
+      type: 'li',
+      children: []
+    }
+
+    while (this.next().type !== 'list_item_end') {
+      listItem.children.push(this.parseElement());
+    }
+
+    return listItem;
+  }
+
+  parseParagraph(token) {
+    return {
+      type: 'p',
+      children: marked.inlineLexer(token.text, this.links) // TODO
+    };
+  }
+
+  parseText(token) {
+    let text = token.text;
+
+    while (this.peek().type === 'text') {
+      text += '\n' + this.next().text;
+    }
+
+    return {
+      type: 'span',
+      children: marked.inlineLexer(text, this.links) // TODO
+    };
+  }
+}
+
+module.exports = Parser;
diff --git a/test/index.test.js b/test/index.test.js
@@ -0,0 +1,34 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const MT = require('..');
+
+describe('MT', function() {
+  const md = fs.readFileSync('./test/test.md');
+  const elements = MT(md.toString());
+  console.log(JSON.stringify(elements, null, 2));
+
+  it('should process headers correctly', function() {
+    assert.strictEqual(elements[0].type, 'h1');
+    assert.strictEqual(elements[1].type, 'h2');
+    assert.strictEqual(elements[2].type, 'h3');
+    assert.strictEqual(elements[3].type, 'h4');
+    assert.strictEqual(elements[4].type, 'h5');
+    assert.strictEqual(elements[5].type, 'h6');
+
+    assert.strictEqual(elements[0].children, 'H1');
+  });
+
+  it('should process lists correctly', function() {
+    const ol = elements[6];
+    assert.strictEqual(ol.type, 'ol');
+    assert.strictEqual(ol.children[0].type, 'li');
+    assert.strictEqual(ol.children[1].children[0].type, 'span');
+    assert.strictEqual(ol.children[2].children[0].children, 'Third');
+
+    const ul = elements[7];
+    assert.strictEqual(ul.type, 'ul');
+    assert.strictEqual(ul.children.length, 3);
+  });
+});
diff --git a/test/test.md b/test/test.md
@@ -0,0 +1,16 @@
+# H1
+## H2
+### H3
+#### H4
+##### H5
+###### H6
+
+
+1. First
+1. Second
+1. Third
+
+
+* Something
+* Something
+* Somethong
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		'use strict';

		module.exports = require('./src/MT');