-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathScanner.cpp
192 lines (159 loc) · 5.59 KB
/
Scanner.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#include <utility>
#include <assert.h>
//
// Created by cybex on 2019/05/03.
//
#include "Scanner.h"
#include "Parser.h"
Scanner::Scanner() : curPos(0) {
_sentence = "";
}
Scanner::Scanner(std::string sentence) : _sentence(std::move(sentence)), curPos(0) {
}
int Scanner::buildTokenList(std::string sentence) {
_sentence = std::move(sentence);
return buildTokenList();
}
int Scanner::buildTokenList() {
while (curPos < _sentence.length()) {
// Get next token
std::string curToken = buildNextToken();
// Get next token type
TokenType tokenType = findType(curToken);
// Check for invalid identifiers
if (tokenType == TokenType::InvalidIdentifier
|| tokenType == TokenType::InvalidToken) {
if (_VERBOSITY >= 2) {
if (tokenType == InvalidIdentifier) {
fprintf(stderr, ANSI_COLOR_RED "Invalid Token \'%s\'\n" ANSI_COLOR_RESET, curToken.data());
} else {
fprintf(stderr, ANSI_COLOR_RED "Failed!\nError Parsing Tokens. Check your code for syntax errors." ANSI_COLOR_RESET);
}
}
return 1;
}
// Language allows for spaces, but the spaces are not added to the token list as they serve no purpose.
if (tokenType == TokenType::SpaceToken) {
continue;
}
// Here, the assumption is the tokentype is valid and sane. We add it to the tokenlist
tokenList.emplace_back(Token(curToken, tokenType));
if (_VERBOSITY >= 2) {
fprintf(stdout, ANSI_COLOR_YELLOW "\t | %s\t->\t%s\n" ANSI_COLOR_RESET, curToken.data(),
Token::tokenDesc(tokenType).data());
}
}
// All tokens processed here and are sane.
return 0;
}
std::string Scanner::buildNextToken() {
// Check if we are within our limits
if (curPos >= _sentence.length()) {
return "";
}
// Get initial values
std::string token = std::string(1, _sentence[curPos++]);
TokenType type = findType(token);
TokenType nextType;
// Loop over string until either a token mismatch is found or string has compeleted.
while (curPos < _sentence.length()) {
// Peek ahead. See if the next character type matches the current
std::string readahead = std::string(1, _sentence[curPos]);
if (type == DeclVarToken && readahead == "=")
nextType = type = findType(token + readahead);
else
nextType = findType(readahead);
// If it does, increment current pos and add to token
if (type == nextType) {
curPos++;
token += readahead;
} else {
// We got a new token type, this means we have reached the end of the current token definition
break;
}
}
return token;
}
TokenType Scanner::findType(const std::string &_spelling) {
std::string spelling = _spelling;
switch (spelling[0]) {
// Left bracket
case '(':
return TokenType::LParToken;
// Right bracket
case ')':
return TokenType::RParToken;
// Operators
case '+':
case '-':
case '*':
case '/':
return TokenType::OperaterToken;
// Const Assignment
case '~':
return TokenType::DeclConstToken;
// Declaration of variable or assignment. Depends on 2nd char
case ':': {
// Variable Declaration
if (spelling.length() == 1) {
return TokenType::DeclVarToken;
}
// Var assignment
switch (spelling[1]) {
// :=
case '=': {
return TokenType::AssignVarToken;
}
default: {
return TokenType::InvalidToken;
}
}
}
// [a-zA-Z]
default: {
// Handle spaces, as these are important when language depends on spaces between 'THEN' and s'Command'
if (spelling == " ")
return TokenType::SpaceToken;
// Regex matches:
// myVar, varr, var123,
// but not 1var, +varr
if (!std::regex_match(spelling, std::regex("^[a-zA-Z0-9]{0,}$"))) {
// if (_VERBOSITY >= 3) {
// fprintf(stderr, ANSI_COLOR_RED "Invalid identifier(s) \'%s\'\n" ANSI_COLOR_RESET, spelling.data());
// }
return TokenType::InvalidIdentifier;
}
spelling = toUpper(spelling);
// Scan non-terminals
if (spelling == "IF") {
return TokenType::IfToken;
} else if (spelling == "THEN") {
return TokenType::ThenToken;
} else if (spelling == "ELSE") {
return TokenType::ElseToken;
} else if (spelling == "LET") {
return TokenType::LetToken;
} else if (spelling == "IN") {
return TokenType::InToken;
} else if (spelling == "VAR") {
return TokenType::VarToken;
} else if (spelling == "CONST") {
return TokenType::ConstToken;
} else {
return TokenType::IdentifierToken;
}
}
}
}
std::vector<Token> Scanner::getTokenList() {
return tokenList;
}
std::string Scanner::toUpper(const std::string &str) {
std::string temp;
std::locale loc;
for (char i : str)
temp.push_back(std::toupper(i, loc));
return temp;
}
Scanner::~Scanner() {
}