diff --git a/d/pqmarkup-lite/.gitignore b/d/pqmarkup-lite/.gitignore new file mode 100644 index 0000000..9ec1068 --- /dev/null +++ b/d/pqmarkup-lite/.gitignore @@ -0,0 +1,15 @@ +.dub +docs.json +__dummy.html +docs/ +/pqmarkup-lite +pqmarkup-lite.so +pqmarkup-lite.dylib +pqmarkup-lite.dll +pqmarkup-lite.a +pqmarkup-lite.lib +pqmarkup-lite-test-* +*.exe +*.o +*.obj +*.lst diff --git a/d/pqmarkup-lite/dub.sdl b/d/pqmarkup-lite/dub.sdl new file mode 100644 index 0000000..74623aa --- /dev/null +++ b/d/pqmarkup-lite/dub.sdl @@ -0,0 +1,3 @@ +name "pqmarkup-lite" +authors "Bradley Chatha" +dependency "jcli" version="~>0.12.1" diff --git a/d/pqmarkup-lite/dub.selections.json b/d/pqmarkup-lite/dub.selections.json new file mode 100644 index 0000000..78c227b --- /dev/null +++ b/d/pqmarkup-lite/dub.selections.json @@ -0,0 +1,8 @@ +{ + "fileVersion": 1, + "versions": { + "jcli": "0.12.1", + "jioc": "0.2.0", + "silly": "1.0.2" + } +} diff --git a/d/pqmarkup-lite/source/app.d b/d/pqmarkup-lite/source/app.d new file mode 100644 index 0000000..ee80be4 --- /dev/null +++ b/d/pqmarkup-lite/source/app.d @@ -0,0 +1,101 @@ +module app; + +import std : Nullable; +import jcli : CommandDefault, CommandPositionalArg, CommandNamedArg, Result, CommandHelpText, CommandParser; +import lexer, syntax; + +@CommandDefault +struct Options +{ + @CommandPositionalArg(0, "file", "The markup file to use.") + string file; + + @CommandNamedArg("t|is-test", "Specified if the provided file is a test file.") + Nullable!bool isTestFile; +} + +struct TestCase +{ + string input; + string output; +} + +int main(string[] args) +{ + import std : readText, writeln; + + auto optionsResult = getOptions(args); + if(optionsResult.isFailure) + return -1; + + const options = optionsResult.asSuccess.value; + const text = readText(options.file); + + if(options.isTestFile.get(false)) + { + auto result = getTestCases(text); + if(result.isFailure) + { + import std : writeln; + writeln("error: ", result.asFailure.error); + return -1; + } + + foreach(test; result.asSuccess.value) + { + import std; + auto lexer = Lexer(test.input); + + // DEBUG + auto ast = syntax.parse(lexer); + writeln(*ast); + writeln(); + } + } + return 0; +} + +Result!Options getOptions(string[] args) +{ + import std : writeln; + + CommandParser!Options parser; + Options options; + + auto result = parser.parse(args[1..$], /*ref*/options); + + if(!result.isSuccess) + { + CommandHelpText!Options help; + writeln(help.toString("pqmarkup-lite")); + return typeof(return).failure(""); + } + + return typeof(return).success(options); +} + +Result!(TestCase[]) getTestCases(string text) +{ + import std : splitter, filter, all, array, map, countUntil, byCodeUnit, until; + import std.ascii : isWhite; // dunno why, but I have to do this one separately for it to wkr. + + const DELIM = " (()) "; + auto cases = + text.splitter('|') + .map!((split) + { + if(split.all!isWhite) + return null; + + const delimStart = split.byCodeUnit.countUntil(DELIM); + if(delimStart < 0) + return null; + return [split[0..delimStart+1], split[delimStart+DELIM.length..$]]; + }) + .filter!(splits => splits !is null) + .map!(splits => TestCase(splits[0], splits[1])) + .until!(test => test.input == "@ ") // Allow myself to limit test cases until I've worked on the code to parse them + .array; + + return typeof(return).success(cases); +} \ No newline at end of file diff --git a/d/pqmarkup-lite/source/ast.d b/d/pqmarkup-lite/source/ast.d new file mode 100644 index 0000000..950976d --- /dev/null +++ b/d/pqmarkup-lite/source/ast.d @@ -0,0 +1,132 @@ +module ast; + +import std.sumtype; +import tokens; + +alias AstNodeT = SumType!( + AstRoot, + AstString, + AstText, + AstHeader, + AstLink, + AstAbbr, + AstJunk, + AstWhiteSpace, + AstStyle, + AstNewLine, + AstInconclusive, + AstDot, + AstNumber, + AstBlock, + AstLinkRef, + AstListItem, + AstCode, +); + +struct AstNode +{ + import std.array : Appender; + import std.range : repeat, take; + + AstNode* parent; + AstNodeT value; + AstNode*[] children; + + this(ValueT)(AstNode* p, ValueT v, AstNode*[] c = null) + { + if(p) + p.addChild(&this); + this.value = AstNodeT(v); + this.children = c; + } + + void addChild(AstNode* child) + { + import std.algorithm : remove; + + if(child.parent) + child.parent.children = child.parent.children.remove!(i => i is child); + + this.children ~= child; + child.parent = &this; + } + + string toString() + { + import std.exception : assumeUnique; + + Appender!(char[]) output; + this.toString(0, output); + return output.data.assumeUnique; + } + + void toString(size_t indent, ref Appender!(char[]) output) + { + import std.conv : to; + + output.put(repeat(' ').take(indent * 4)); + output.put(this.value.to!string()); + output.put('\n'); + foreach(child; this.children) + child.toString(indent + 1, output); + } +} + +struct AstRoot{} +struct AstString {} +struct AstText +{ + string text; +} +struct AstHeader +{ + int size; + bool isNegative; +} +struct AstLink +{ + AstNode* textNode; + string href; + bool isLocalLink; +} +struct AstAbbr +{ + AstNode* textNode; + AstNode* titleNode; +} +struct AstDot{} +struct AstWhiteSpace{} +struct AstNewLine{} +struct AstInconclusive +{ + AstNode* textNode; +} +struct AstJunk +{ + string text; + string message; +} +struct AstStyle +{ + StringStyle style; +} +struct AstNumber +{ + int value; +} +struct AstLinkRef +{ + AstNode* textNode; + int value; +} +struct AstBlock +{ + BlockType type; +} +struct AstListItem +{ +} +struct AstCode +{ + string text; +} \ No newline at end of file diff --git a/d/pqmarkup-lite/source/lexer.d b/d/pqmarkup-lite/source/lexer.d new file mode 100644 index 0000000..4cc9bbd --- /dev/null +++ b/d/pqmarkup-lite/source/lexer.d @@ -0,0 +1,322 @@ +module lexer; + +import std, tokens; +import std.uni : isAlphaNum; // resolve symbol conflict +import tokens : EOF; + +struct Lexer +{ + private string _text; + private size_t _cursor; + private size_t _atFirst; + private size_t _afterFirst; + private bool _readNextTextAsIs; // Special case: URLS are very annoying to concat together if we still delimit by things like "-" and stuff. + // So if we encounter a '[' that's not for a comment, then Text takes precedence over operators. + // As a double special case: The open Quote operator still have higher precedence. + + this(string text) + { + this._text = text; + } + + Token next() + { + if(this._cursor >= this._text.length) + return Token(TokenValue(EOF())); + + this._atFirst = this._cursor; + this._afterFirst = this._cursor; + const first = peekUtf(this._afterFirst); + const nextTextAsTrue = this._readNextTextAsIs; + this._readNextTextAsIs = false; + + if(isStyleChar(first)) + { + auto token = this.nextStyle(first); + if(token != Token.init) + return token; + else + { + this.commit(this._afterFirst); + return Token(Text("" ~ first.to!char), this._atFirst, this._afterFirst); + } + } + + size_t afterComment = this._cursor; + size_t commentNest; + if(first == '[' && this.peekManyAscii!3(afterComment) == "[[[") + { + this.commit(afterComment); + commentNest = 1; + + while(commentNest > 0) + { + if(!this.readUntilAscii!(c => c == '[' || c == ']')(this._cursor)) + return Token(EOF()); + + size_t afterPeek = this._cursor; + const peeked = this.peekManyAscii!3(afterPeek); + if(peeked == "[[[") + { + commentNest++; + this.commit(afterPeek); + } + else if(peeked == "]]]") + { + commentNest--; + this.commit(afterPeek); + } + else + this.peekUtf(this._cursor); + } + + return this.next(); + } + + if(nextTextAsTrue) + { + if(first == Q_LEFT) + { + this.commit(this._afterFirst); + return Token(OpenQuote(), this._atFirst, this._afterFirst); + } + else + return this.nextText(); + } + + switch(first) + { + case ' ': + case '\t': + return this.nextWhite(); + + static foreach(op; OPERATORS) + { + case op.ch: + this.commit(this._afterFirst); + static if(op.ch == '[') + this._readNextTextAsIs = true; + return Token(op.value, this._atFirst, this._afterFirst); + } + + case '>': + case '<': + return this.nextBlock(first); + + case '`': + return this.nextCode(); + + default: + this.commit(this._afterFirst); + break; + } + + if(isAlphaNum(first) || isAuxTextChar(first)) + return this.nextText(); + + assert(false, first.to!string); + } + + private: + + void commit(size_t newCursor) + { + this._cursor = newCursor; + } + + char peekAscii(ref size_t nextCursor) + { + return this._text[nextCursor++]; + } + + dchar peekUtf(ref size_t nextCursor) + { + return decode(this._text, nextCursor); + } + + char[amount] peekManyAscii(size_t amount)(ref size_t nextCursor) + { + auto end = nextCursor + amount; + if(end > this._text.length) + end = this._text.length; + + char[amount] chars; + foreach(i, ch; this._text[nextCursor..end]) + chars[i] = ch; + nextCursor = end; + + return chars; + } + + Token nextText() + { + Token tok; + Text text; + + tok.start = this._atFirst; + this.readUntilUtf!(c => (!c.isAlphaNum && c != ' ' && c != '\t' && !isAuxTextChar(c)) || isTextStopChar(c))(this._cursor); + tok.end = this._cursor; + text.text = this._text[tok.start..tok.end]; + tok.value = text; + + if(text.text.isNumeric) + tok.value = Number(text.text.to!int); + + return tok; + } + + Token nextWhite() + { + Token t; + WhiteSpace ws; + t.start = this._atFirst; + + this.readUntilAscii!(c => c != ' ' && c != '\t')(this._cursor); + + t.end = this._cursor; + ws.count = t.end - t.start; + t.value = ws; + + return t; + } + + Token nextStyle(dchar first) + { + switch(first) + { + case '/': + this.commit(this._afterFirst); + size_t afterSlash = this._cursor; + if(this.peekAscii(afterSlash) == '\\') + { + this.commit(afterSlash); + return Token(Style(StringStyle.superset), this._atFirst, afterSlash); + } + else + return Token.init; + + case '\\': + this.commit(this._afterFirst); + size_t afterSlash = this._cursor; + if(this.peekAscii(afterSlash) == '/') + { + this.commit(afterSlash); + return Token(Style(StringStyle.subset), this._atFirst, afterSlash); + } + else + return Token.init; + + default: + this.commit(this._afterFirst); + return Token(Style( + getStyleInfo(first)[1] + ), this._atFirst, this._afterFirst); + } + } + + Token nextBlock(dchar first) + { + this.commit(this._afterFirst); + size_t afterSecond = this._afterFirst; + const second = this.peekAscii(afterSecond); + + switch(first) + { + case '>': + switch(second) + { + case '>': + this.commit(afterSecond); + return Token(Block(BlockType.rightAlign), this._atFirst, afterSecond); + case '<': + this.commit(afterSecond); + return Token(Block(BlockType.centerAlign), this._atFirst, afterSecond); + default: + return Token(Block(BlockType.quote), this._atFirst, afterSecond); + } + + case '<': + switch(second) + { + case '<': + this.commit(afterSecond); + return Token(Block(BlockType.leftAlign), this._atFirst, afterSecond); + case '>': + this.commit(afterSecond); + return Token(Block(BlockType.justify), this._atFirst, afterSecond); + default: + return Token(Block(BlockType.leftAlignReciprocal), this._atFirst, afterSecond); + } + default: assert(false); + } + } + + Token nextCode() + { + size_t afterFence = this._atFirst; + if(this.peekManyAscii!3(afterFence) != "```") + return Token(Text("`"), this._atFirst, this._afterFirst); + + this.commit(afterFence); + const atCode = afterFence; + while(true) + { + if(!this.readUntilAscii!(ch => ch == '`')(this._cursor)) + return Token(EOF()); + + afterFence = this._cursor; + if(this.peekManyAscii!3(afterFence) == "```") + { + this.commit(afterFence); + return Token(Code(this._text[atCode..afterFence-3]), this._atFirst, afterFence); + } + else + this.commit(this._cursor + 1); + } + } +} + +private: + +bool isTextStopChar(dchar ch) +{ + return ch == '0'; +} + +bool isAuxTextChar(dchar ch) +{ + static immutable chs = [ + '!', '"', '£', '$', '%', '^', + '&', '*', '=', '¬', '`', '\'', + '@', '#', '~', ':', ';', ',', + '.', '?', '|', '<', '>', '/', + '-' + ]; + return chs.canFind(ch); +} + +// Needs to be here because otherwise the compiler thinks it needs a double context. +bool readUntilAscii(alias Pred)(ref Lexer lexer, ref size_t cursor) +{ + while(cursor < lexer._text.length) + { + const ch = lexer._text[cursor]; + if(Pred(ch)) + return true; + cursor++; + } + return false; +} + +bool readUntilUtf(alias Pred)(ref Lexer lexer, ref size_t cursor) +{ + while(cursor < lexer._text.length) + { + auto nextCursor = cursor; + const ch = decode(lexer._text, nextCursor); + if(Pred(ch)) + return true; + cursor = nextCursor; + } + return false; +} \ No newline at end of file diff --git a/d/pqmarkup-lite/source/syntax.d b/d/pqmarkup-lite/source/syntax.d new file mode 100644 index 0000000..83a51bd --- /dev/null +++ b/d/pqmarkup-lite/source/syntax.d @@ -0,0 +1,492 @@ +module syntax; + +import std.sumtype, std.exception, std.typecons; +import lexer, tokens, ast; + +alias WasConsumed = Flag!"wasConsumed"; + +AstNode* parse(Lexer lexer) +{ + auto tokens = lexer.toTokens(); // We'll be starting from the bottom. + auto visitor = Visitor(true); + foreach_reverse(tok; tokens) + { + debug import std.stdio; + writeln(tok); + visitor.visit(tok); + } + visitor.finish(); + return visitor.root; +} + +private: + +Token[] toTokens(Lexer lexer) +{ + Token[] tokens; + const eof = TokenValue(EOF()); + while(true) + { + auto next = lexer.next(); + if(next.value == eof) + break; + else + tokens ~= next; + } + + return tokens; +} + +struct Visitor +{ + enum State + { + default_, + parsingHeader, + parsingLinkOrAbbrOrText // yep, link syntax is that ambiguous + } + + AstNode* root; + AstNode*[] nodeStack; + AstNode* currentNode; + uint stringNest; + Token lastToken; + State state; + + this(bool __dummy) + { + this.currentNode = new AstNode(); + this.root = this.currentNode; + + auto lastLine = new AstNode(null, AstNewLine()); + this.root.addChild(lastLine); + this.pushNode(lastLine); + } + + void pushNode(AstNode* node) + { + this.nodeStack ~= this.currentNode; + this.currentNode = node; + } + + void popCurrentNode() + { + this.currentNode = this.nodeStack[$-1]; + this.nodeStack.length--; + } + + void visit(Token token) + { + final switch(this.state) with(State) + { + case default_: this.visitDefault(token); break; + case parsingHeader: this.visitHeader(token); break; + case parsingLinkOrAbbrOrText: this.visitTheUniverse(token); break; + } + + lastToken = token; + } + + void visitDefault(Token token) + { + token.value.match!( + (CloseSParen sp) + { + this.state = State.parsingLinkOrAbbrOrText; + auto node = new AstNode(null, AstInconclusive()); + this.currentNode.addChild(node); + this.pushNode(node); + }, + (CloseCParen cp) + { + this.currentNode.children[$-1].value.match!( + (AstString str) + { + this.state = State.parsingHeader; + }, + (_) { auto node = new AstNode(null, AstText(")")); } + ); + }, + (CapitalH ch) + { + this.currentNode.children[$-1].value.match!( + (AstString str) + { + auto node = new AstNode(null, AstHeader(3)); + node.addChild(this.currentNode.children[$-1]); + this.currentNode.addChild(node); + }, + (_) + { + auto node = new AstNode(null, AstText("H")); + this.currentNode.addChild(node); + } + ); + }, + (Style st) + { + auto node = new AstNode( + null, + AstStyle(st.style) + ); + + bool addAsText = true; + if(this.currentNode.children.length) + { + this.currentNode.children[$-1].value.match!( + (AstString str) + { + addAsText = false; + node.addChild(this.currentNode.children[$-1]); + this.currentNode.addChild(node); + }, + (ref AstNumber number) + { + if(st.style != StringStyle.strike) + return; + + addAsText = false; + number.value *= -1; + }, + (_) {} + ); + } + + if(addAsText) + { + // this.currentNode.addChild(node); + // TODO: + } + }, + (OpenQuote oq) + { + enforce(this.stringNest-- > 0, "Unterminated string."); + this.currentNode.value.match!( + (AstString _) + { + this.popCurrentNode(); + }, + (_){ throw new Exception("Expected currentNode to be a string."); } + ); + }, + (Text te) + { + auto me = new AstNode( + null, + AstText(te.text) + ); + if(!this.handleInconclusiveNode || this.semanticInconclusive(me) == WasConsumed.no) + this.currentNode.addChild(me); + }, + (CloseQuote cq) + { + auto node = new AstNode(null, AstString()); + if(!this.handleInconclusiveNode|| this.semanticInconclusive(node) == WasConsumed.no) + this.currentNode.addChild(node); + this.stringNest++; + this.pushNode(node); + }, + (Dot dot) + { + this.currentNode.addChild(new AstNode(null, AstDot())); + }, + (WhiteSpace ws) + { + this.currentNode.addChild(new AstNode(null, AstWhiteSpace())); + }, + (NewLine nl) + { + auto nextLine = new AstNode(null, AstNewLine()); + this.currentNode.value.match!( + (AstNewLine _) + { + if(this.currentNode.children.length) + { + this.currentNode.children[$-1].value.match!( + (AstBlock block) + { + AstNode* wrapperNode = new AstNode(); + final switch(block.type) with(BlockType) + { + case leftAlignReciprocal: + case none: return; + case leftAlign: wrapperNode.value = AstBlock(leftAlign); break; + case rightAlign: wrapperNode.value = AstBlock(rightAlign); break; + case justify: wrapperNode.value = AstBlock(justify); break; + case centerAlign: wrapperNode.value = AstBlock(centerAlign); break; + case quote: wrapperNode.value = AstBlock(quote); break; + } + this.currentNode.children.length--; + while(this.currentNode.children.length) + wrapperNode.addChild(this.currentNode.children[0]); + this.currentNode.addChild(wrapperNode); + }, + (AstDot dot) + { + auto wrapperNode = new AstNode(null, AstListItem()); + this.currentNode.children.length--; + while(this.currentNode.children.length) + wrapperNode.addChild(this.currentNode.children[0]); + this.currentNode.addChild(wrapperNode); + }, + (_){} + ); + } + this.popCurrentNode(); + this.currentNode.addChild(nextLine); + this.pushNode(nextLine); + }, + (AstBlock _) + { + this.popCurrentNode(); + }, + (_){ this.currentNode.addChild(nextLine); } + ); + }, + (Block block) + { + if(block.type != BlockType.leftAlignReciprocal) + { + bool addByItself = true; + if(this.currentNode.children.length) + { + addByItself = this.currentNode.children[$-1].value.match!( + (AstString str) + { + auto node = new AstNode(null, AstBlock(block.type)); + node.addChild(this.currentNode.children[$-1]); + this.currentNode.addChild(node); + return false; + }, + (_){ return true; } + ); + } + + if(addByItself) + this.currentNode.addChild(new AstNode(null, AstBlock(block.type))); + return; + } + + auto node = new AstNode(null, AstBlock(block.type)); + this.currentNode.addChild(node); + this.pushNode(node); + }, + (Number num) + { + this.currentNode.addChild(new AstNode(null, AstNumber(num.value))); + }, + (Code code) + { + this.currentNode.addChild(new AstNode(null, AstCode(code.text))); + }, + (_) + { + import std.conv : to; + this.currentNode.addChild(new AstNode(null, AstJunk(token.to!string, "Unhandled/Unexpected token"))); + } + ); + if(this.handleInconclusiveNode) + this.semanticInconclusive(null); + } + + void finish() + { + if(this.handleInconclusiveNode) + this.semanticInconclusive(null); + this.visit(Token(NewLine())); // Fixes a few corner cases that rely on NewLine's logic. + } + + bool handleInconclusiveNode() + { + return this.semanticInconclusiveNodeStack.length > 0 && this.stringNest == 0; + } + + bool foundHeaderCount = false; + bool foundHeaderOpenParen = false; + void visitHeader(Token token) + { + import std.conv : to; + if(!foundHeaderCount) + { + token.value.match!( + (Number num) + { + auto node = new AstNode(null, AstHeader(num.value)); + node.addChild(this.currentNode.children[$-1]); // Add the AstString + this.currentNode.addChild(node); + foundHeaderCount = true; + }, + (_) + { + this.currentNode.addChild(new AstNode(null, AstJunk(token.to!string, "Expected a number when parsing header size."))); + foundHeaderCount = true; + } + ); + } + else if(!foundHeaderOpenParen) + { + const transition = token.value.match!( + (Plus pl){ return false; }, + (Style st) + { + if(st.style != StringStyle.strike) + { + this.currentNode.addChild(new AstNode(null, AstJunk(token.to!string, "Unexpected token when parsing header size sign."))); + return false; + } + + // God I'd kill to just be able to go "value.as!AstHeader" instead of this. + this.currentNode.children[$-1].value.match!( + (ref AstHeader h){ h.isNegative = true; }, + (_){assert(false);} + ); + return false; + }, + (OpenCParen op){ return true; }, + (_) + { + this.currentNode.addChild(new AstNode(null, AstJunk(token.to!string, "Expected a '(' when parsing header size."))); + return true; + } + ); + foundHeaderOpenParen = transition; + } + else + { + token.value.match!( + (CapitalH h) + { + this.currentNode.children[$-1].value.match!( + (ref AstHeader h) + { + if(h.isNegative) + h.size += 3; + else + h.size = 3 - h.size; + }, + (_){assert(false);} + ); + }, + (_) + { + this.currentNode.addChild(new AstNode(null, AstJunk(token.to!string, "Expected a 'H' when parsing header."))); + } + ); + + foundHeaderCount = false; + foundHeaderOpenParen = false; + this.state = State.default_; + } + } + + void visitTheUniverse(Token token) + { + // *Thankfully* you can't embed links into other links, otherwise we'd have a *very* fun time. + // Future me: Oh god, you can. + token.value.match!( + (CloseSParen csp) + { + auto node = new AstNode(null, AstInconclusive()); + this.currentNode.addChild(node); + this.pushNode(node); + }, + (OpenSParen osp) + { + this.semanticInconclusiveNodeStack ~= this.currentNode; + this.popCurrentNode(); + + this.currentNode.value.match!( + (AstInconclusive _){ }, + (_){ this.state = State.default_; } + ); + }, + (_) + { + this.visitDefault(token); + } + ); + } + + AstNode*[] semanticInconclusiveNodeStack; + WasConsumed semanticInconclusive(AstNode* prefixNode) + { + auto semanticInconclusiveNode = semanticInconclusiveNodeStack[$-1]; + + // First, see if the node prefixing the inconclusive one is some text, because that means it's for the inconclusive node. + WasConsumed result; + if(prefixNode) + { + result = prefixNode.value.match!( + (ref node) + { + static if(!is(typeof(node) == AstText) && !is(typeof(node) == AstString)) + static assert(false); // Jank like this is why I prefer language solutions over library solutions :( + + semanticInconclusiveNode.value.match!( + (ref AstInconclusive link){ link.textNode = prefixNode; }, + (_){assert(false);} + ); + return WasConsumed.yes; + }, + (_) => WasConsumed.no + ); + } + + // Extract the text node so we can preserve it. + AstNode* textNode; + semanticInconclusiveNode.value.match!( + (ref AstInconclusive link){ textNode = link.textNode; }, + (_){assert(false);} + ); + + // Now, we have the fun of trying to figure whatever the frick this node represents. + // Also, keep in mind we're going from bottom to top, so children[$-1] is actually children[0] lexically. + if(semanticInconclusiveNode.children.length >= 1) + { + semanticInconclusiveNode.children[$-1].value.match!( + (AstText text) + { + // link or text. Appears to be determined by the format of text.text's value + if(isLink(text.text) || isLocalLink(text.text)) + { + semanticInconclusiveNode.value = AstLink( + textNode, + text.text, + isLocalLink(text.text) + ); + } + else + semanticInconclusiveNode.value = AstText("["~text.text~"]"); + semanticInconclusiveNode.children.length--; + }, + (AstString str) + { + // probably an abbreviation. + semanticInconclusiveNode.value = AstAbbr(textNode, semanticInconclusiveNode.children[$-1]); + semanticInconclusiveNode.children.length--; + }, + (AstNumber number) + { + semanticInconclusiveNode.value = AstLinkRef(textNode, number.value); + semanticInconclusiveNode.children.length--; + }, + (_){ /* Stay inconclusive */ } + ); + } + + semanticInconclusiveNodeStack.length--; + return result; + } +} + +bool isLocalLink(string text) +{ + import std.algorithm : startsWith; + return text.startsWith("./"); +} + +bool isLink(string text) +{ + import std.algorithm : canFind; + return text.canFind("://"); +} \ No newline at end of file diff --git a/d/pqmarkup-lite/source/tokens.d b/d/pqmarkup-lite/source/tokens.d new file mode 100644 index 0000000..e5145c2 --- /dev/null +++ b/d/pqmarkup-lite/source/tokens.d @@ -0,0 +1,221 @@ +module tokens; + +import std; + +enum StringStyle +{ + none, + bold, + underline, + strike, + italic, + superset, + subset +} + +enum BlockType +{ + none, + leftAlign, + leftAlignReciprocal, + rightAlign, + centerAlign, + justify, + quote +} + +enum Q_LEFT = '‘'; +enum Q_RIGHT = '’'; +immutable STYLES = +[ + tuple('*', StringStyle.bold, "b"), + tuple('_', StringStyle.underline, "u"), + tuple('-', StringStyle.strike, "s"), + tuple('~', StringStyle.italic, "i"), + tuple('\\', StringStyle.subset, "sub"), + tuple('/', StringStyle.superset, "sup"), +]; + +struct Operator +{ + dchar ch; + TokenValue value; +} + +immutable OPERATORS = +[ + Operator(Q_LEFT, TokenValue(OpenQuote())), + Operator(Q_RIGHT, TokenValue(CloseQuote())), + Operator('H', TokenValue(CapitalH())), + Operator('\n', TokenValue(NewLine(1))), + Operator('(', TokenValue(OpenCParen())), + Operator(')', TokenValue(CloseCParen())), + Operator('+', TokenValue(Plus())), + Operator('[', TokenValue(OpenSParen())), + Operator(']', TokenValue(CloseSParen())), + Operator('.', TokenValue(Dot())), + Operator('0', TokenValue(Zero())), +]; + +bool isStyleChar(dchar ch) +{ + switch(ch) + { + static foreach(style; STYLES) + case style[0]: return true; + + default: return false; + } +} +/// +unittest +{ + assert('_'.isStyleChar); + assert(!'!'.isStyleChar); +} + +auto getStyleInfo(dchar ch) +{ + return STYLES.filter!(s => s[0] == ch).front; +} + +alias TokenValue = SumType!( + Style, + OpenQuote, + CloseQuote, + CapitalH, + OpenCParen, + CloseCParen, + Number, + OpenSParen, + CloseSParen, + Zero, + LeftAlign, + RightAlign, + CenterAlign, + JustifyAlign, + Block, + NewLine, + WhiteSpace, + EOF, + Text, + Plus, + Dot, + Junk, + Code +); + +struct Token +{ + TokenValue value; + size_t start; + size_t end; + + this(T)(T value, size_t s = 0, size_t e = 0) + { + this.value = value; + this.start = s; + this.end = e; + } +} + +struct Style +{ + StringStyle style; +} + +struct OpenQuote +{ +} + +struct CloseQuote +{ +} + +struct CapitalH +{ +} + +struct OpenCParen +{ +} + +struct CloseCParen +{ +} + +struct Number +{ + int value; +} + +struct OpenSParen +{ +} + +struct CloseSParen +{ +} + +struct Zero +{ +} + +struct LeftAlign +{ +} + +struct RightAlign +{ +} + +struct CenterAlign +{ +} + +struct JustifyAlign +{ +} + +struct Block +{ + BlockType type; +} + +struct NewLine +{ + size_t count; +} + +struct WhiteSpace +{ + size_t count; +} + +struct EOF +{ +} + +struct Text +{ + string text; +} + +struct Plus +{ +} + +struct Dot +{ +} + +struct Junk +{ + string text; + string message; +} + +struct Code +{ + string text; +} \ No newline at end of file diff --git a/d/pqmarkup-lite/tests.txt b/d/pqmarkup-lite/tests.txt new file mode 100644 index 0000000..d8337db --- /dev/null +++ b/d/pqmarkup-lite/tests.txt @@ -0,0 +1,56 @@ +> Quote +>‘Quote2’ + (())
Quote
+
Quote2
+| + +|>[http://address]:‘Quoted text.’ (())
http://address:
+Quoted text.
| + +|>[http://another-address][-1]:‘Quoted text.’ +>[-1]:‘Another quoted text.’ (())
http://another-address:
+Quoted text.
+
Another quoted text.
| + +|>‘Author's name’[http://address]:‘Quoted text.’ (())
Author's name:
+Quoted text.
| + +|>‘Author's name’:‘Quoted text.’ (())
Author's name:
+Quoted text.
| + +|‘Quoted text.’:‘Author's name’< (())
Quoted text.
+
Author's name
| + +|>‘Как люди думают. Дмитрий Чернышев. 2015. 304с.’:‘[[[стр.89:]]]...’ (())
Как люди думают. Дмитрий Чернышев. 2015. 304с.:
+...
| + +|>‘>‘Автор против nullable-типов?’ +Да. Адрес, указывающий на незаконный участок памяти, сам незаконен.’ (())
Автор против nullable-типов?
+Да. Адрес, указывающий на незаконный участок памяти, сам незаконен.
| + +|>‘> Автор против nullable-типов? +Да. Адрес, указывающий на незаконный участок памяти, сам незаконен.’ (())
Автор против nullable-типов?
+Да. Адрес, указывающий на незаконный участок памяти, сам незаконен.
| + +|‘понимание [[[процесса]]] разбора [[[разметки]]] человеком’[‘говоря проще: приходится [[[гораздо]]] меньше думать о том, будет это работать или не будет, а просто пишешь в соответствии с чёткими/простыми/логичными правилами, и всё’] (()) понимание разбора человеком| + +|. unordered +. list (()) • unordered
+• list| + + +|A +``` +let s2 = str + .lowercaseString + .replace("hello", withString: "goodbye") +``` +B +C (()) A
+
+let s2 = str
+        .lowercaseString
+        .replace("hello", withString: "goodbye")
+
+B
+C \ No newline at end of file