Skip to content

Commit

Permalink
Fixes for parsing top-level and trailing text nodes. (#12)
Browse files Browse the repository at this point in the history
* When there are top-level text nodes, add them to the results array instead of one level up when at level 0.

* Make sure we calculate the correct end of the text node when it's the last node in the html string.

* Add tests to verify parser behavior on top-level and trailing text nodes.

* We ought to drop text nodes that are nothing but whitespace since not useful in HTML.

* Update tests for dropping whitespace-only nodes.
  • Loading branch information
rayd authored and frontmesh committed Oct 1, 2018
1 parent a334b00 commit f683874
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 4 deletions.
19 changes: 15 additions & 4 deletions lib/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,21 @@ module.exports = function parse(html, options) {
level--;
if (!inComponent && nextChar !== '<' && nextChar) {
// trailing text node
arr[level].children.push({
type: 'text',
content: html.slice(start, html.indexOf('<', start))
});
// if we're at the root, push a base text node. otherwise add as
// a child to the current node.
parent = level === -1 ? result : arr[level].children;

// calculate correct end of the content slice in case there's
// no tag after the text node.
var end = html.indexOf('<', start);
var content = html.slice(start, end === -1 ? undefined : end);
// if a node is nothing but whitespace, no need to add it.
if (!/^\s*$/.test(content)) {
parent.push({
type: 'text',
content: content
});
}
}
}
});
Expand Down
74 changes: 74 additions & 0 deletions test/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,80 @@ test('parse', function (t) {
{ type: 'text', content: ' 10 ' },
]
}], 'should not give voidElements children');

html = '<div></div>\n';
parsed = HTML.parse(html);
t.deepEqual(parsed, [{
type: 'tag',
name: 'div',
attrs: {},
voidElement: false,
children: []
}], 'should not explode on trailing whitespace');

html = '<div>Hi</div> There ';
parsed = HTML.parse(html);
t.deepEqual(parsed, [{
type: 'tag',
name: 'div',
attrs: {},
voidElement: false,
children: [
{ type: 'text', content: 'Hi' }
]
},{
type: 'text', content: ' There '
}], 'should handle trailing text nodes at the top-level');

html = '<div>Hi</div> There <span>something</span> <a></a>else ';
parsed = HTML.parse(html);
t.deepEqual(parsed, [{
type: 'tag',
name: 'div',
attrs: {},
voidElement: false,
children: [
{ type: 'text', content: 'Hi' }
]
},{
type: 'text', content: ' There '
},{
type: 'tag',
name: 'span',
attrs: {},
voidElement: false,
children: [
{ type: 'text', content: 'something' }
]
},{
type: 'tag',
name: 'a',
attrs: {},
voidElement: false,
children: []
},{
type: 'text', content: 'else '
}], 'should handle text nodes in the middle of tags at the top-level');

html = '<div>Hi</div>\n\n <span>There</span> \t ';
parsed = HTML.parse(html);
t.deepEqual(parsed, [{
type: 'tag',
name: 'div',
attrs: {},
voidElement: false,
children: [
{ type: 'text', content: 'Hi' }
]
},{
type: 'tag',
name: 'span',
attrs: {},
voidElement: false,
children: [
{ type: 'text', content: 'There' }
]
}], 'should remove text nodes that are nothing but whitespace');
t.end();
});

Expand Down

0 comments on commit f683874

Please sign in to comment.