|
1 | | -// Define token types |
2 | | -type TokenType = 'INSTRUCTION' | 'REGISTER' | 'NUMBER' | 'COMMA' | 'SEGMENT' | 'ENDS' | 'LABEL' | 'INCLUDE' | 'PROCESSOR_DIRECTIVE' | 'MODEL_DIRECTIVE' | 'STACK_DIRECTIVE' | 'DATA_DIRECTIVE' | 'CODE_DIRECTIVE'; |
| 1 | +// 使用typescript 将汇编语言解析成AST,注释使用英语,支持段定义,包括简单段定义,支持include命令,支持宏和子程序,支持label,支持字符串,支持.386 这样的命令,如果代码有问题,请支持输出代码的问题,包括问题类型,位置。 |
3 | 2 |
|
4 | | -// Define token interface |
5 | | -interface Token { |
6 | | - type: TokenType; |
7 | | - value: string; |
8 | | - position: number; |
9 | | -} |
10 | | - |
11 | | -// Define AST node types |
12 | | -type ASTNode = InstructionNode | SegmentNode | LabelNode | IncludeNode | ProcessorDirectiveNode | ModelDirectiveNode | StackDirectiveNode | DataDirectiveNode | CodeDirectiveNode; |
13 | | - |
14 | | -// Define instruction node interface |
15 | | -interface InstructionNode { |
16 | | - type: 'INSTRUCTION'; |
17 | | - name: string; |
18 | | - operands: (RegisterNode | NumberNode | LabelReferenceNode)[]; |
19 | | -} |
20 | | - |
21 | | -// Define register node interface |
22 | | -interface RegisterNode { |
23 | | - type: 'REGISTER'; |
24 | | - name: string; |
25 | | -} |
26 | | - |
27 | | -// Define number node interface |
28 | | -interface NumberNode { |
29 | | - type: 'NUMBER'; |
30 | | - value: number; |
31 | | -} |
32 | | - |
33 | | -// Define segment node interface |
34 | | -interface SegmentNode { |
35 | | - type: 'SEGMENT'; |
36 | | - name: string; |
37 | | - instructions: ASTNode[]; |
38 | | -} |
39 | | - |
40 | | -// Define label node interface |
41 | | -interface LabelNode { |
42 | | - type: 'LABEL'; |
43 | | - name: string; |
44 | | - position: number; |
45 | | -} |
46 | | - |
47 | | -// Define label reference node interface |
48 | | -interface LabelReferenceNode { |
49 | | - type: 'LABEL_REFERENCE'; |
50 | | - name: string; |
51 | | -} |
52 | | - |
53 | | -// Define include node interface |
54 | | -interface IncludeNode { |
55 | | - type: 'INCLUDE'; |
56 | | - filename: string; |
57 | | - ast: ASTNode[]; |
58 | | -} |
59 | | - |
60 | | -// Define processor directive node interface |
61 | | -interface ProcessorDirectiveNode { |
62 | | - type: 'PROCESSOR_DIRECTIVE'; |
63 | | - directive: string; |
64 | | -} |
65 | | - |
66 | | -// Define .MODEL directive node interface |
67 | | -interface ModelDirectiveNode { |
68 | | - type: 'MODEL_DIRECTIVE'; |
69 | | - model: string; |
70 | | -} |
71 | | - |
72 | | -// Define .STACK directive node interface |
73 | | -interface StackDirectiveNode { |
74 | | - type: 'STACK_DIRECTIVE'; |
75 | | - size: number; |
76 | | -} |
77 | | - |
78 | | -// Define .DATA directive node interface |
79 | | -interface DataDirectiveNode { |
80 | | - type: 'DATA_DIRECTIVE'; |
81 | | -} |
82 | | - |
83 | | -// Define .CODE directive node interface |
84 | | -interface CodeDirectiveNode { |
85 | | - type: 'CODE_DIRECTIVE'; |
86 | | -} |
87 | | - |
88 | | -// Define error information interface |
89 | | -interface ErrorInfo { |
90 | | - type: string; |
91 | | - position: number; |
92 | | - message: string; |
93 | | -} |
94 | | - |
95 | | -// Define common assembly instructions |
96 | | -const commonInstructions = [ |
97 | | - 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'INC', 'DEC', 'CMP', |
98 | | - 'JMP', 'JE', 'JNE', 'JG', 'JGE', 'JL', 'JLE', |
99 | | - 'PUSH', 'POP', 'CALL', 'RET' |
100 | | -]; |
101 | | - |
102 | | -// Simulate file reading function. Replace with actual file reading logic in production. |
103 | | -function readFile(filename: string): string { |
104 | | - // Here you can implement the logic to read file content from the file system. |
105 | | - // In this example, it simply returns an empty string. You need to modify it according to your actual situation. |
106 | | - return ''; |
107 | | -} |
108 | | - |
109 | | -// Lexical analyzer |
110 | | -export function tokenize(input: string): { tokens: Token[]; errors: ErrorInfo[] } { |
111 | | - const tokens: Token[] = []; |
112 | | - const errors: ErrorInfo[] = []; |
113 | | - const regex = /\s*(?:([A-Za-z]+):?|(\d+)|(,)|(SEGMENT)|(ENDS)|(INCLUDE)\s+([^\s]+)|(\.[A-Za-z0-9]+)(?:\s+([^\s]+))?)\s*/g; |
114 | | - let match; |
115 | | - while ((match = regex.exec(input))!== null) { |
116 | | - const position = match.index; |
117 | | - if (match[1]) { |
118 | | - if (match[1].endsWith(':')) { |
119 | | - tokens.push({ type: 'LABEL', value: match[1].slice(0, -1), position }); |
120 | | - } else if (isInstruction(match[1])) { |
121 | | - tokens.push({ type: 'INSTRUCTION', value: match[1], position }); |
122 | | - } else { |
123 | | - tokens.push({ type: 'REGISTER', value: match[1], position }); |
124 | | - } |
125 | | - } else if (match[2]) { |
126 | | - tokens.push({ type: 'NUMBER', value: match[2], position }); |
127 | | - } else if (match[3]) { |
128 | | - tokens.push({ type: 'COMMA', value: match[3], position }); |
129 | | - } else if (match[4]) { |
130 | | - tokens.push({ type: 'SEGMENT', value: match[4], position }); |
131 | | - } else if (match[5]) { |
132 | | - tokens.push({ type: 'ENDS', value: match[5], position }); |
133 | | - } else if (match[6]) { |
134 | | - tokens.push({ type: 'INCLUDE', value: match[7], position }); |
135 | | - } else if (match[8]) { |
136 | | - switch (match[8].toUpperCase()) { |
137 | | - case '.386': |
138 | | - tokens.push({ type: 'PROCESSOR_DIRECTIVE', value: match[8], position }); |
139 | | - break; |
140 | | - case '.MODEL': |
141 | | - if (!match[9]) { |
142 | | - errors.push({ |
143 | | - type: 'SyntaxError', |
144 | | - position, |
145 | | - message: 'Expected model type after .MODEL directive' |
146 | | - }); |
147 | | - } else { |
148 | | - tokens.push({ type: 'MODEL_DIRECTIVE', value: match[9], position }); |
149 | | - } |
150 | | - break; |
151 | | - case '.STACK': |
152 | | - if (!match[9] || isNaN(Number(match[9]))) { |
153 | | - errors.push({ |
154 | | - type: 'SyntaxError', |
155 | | - position, |
156 | | - message: 'Expected a valid number for stack size after .STACK directive' |
157 | | - }); |
158 | | - } else { |
159 | | - tokens.push({ type: 'STACK_DIRECTIVE', value: match[9], position }); |
160 | | - } |
161 | | - break; |
162 | | - case '.DATA': |
163 | | - tokens.push({ type: 'DATA_DIRECTIVE', value: match[8], position }); |
164 | | - break; |
165 | | - case '.CODE': |
166 | | - tokens.push({ type: 'CODE_DIRECTIVE', value: match[8], position }); |
167 | | - break; |
168 | | - default: |
169 | | - tokens.push({ type: 'PROCESSOR_DIRECTIVE', value: match[8], position }); |
170 | | - } |
171 | | - } |
172 | | - } |
173 | | - return { tokens, errors }; |
174 | | -} |
175 | | - |
176 | | -// Check if a value is an instruction |
177 | | -function isInstruction(value: string, instructions = commonInstructions): boolean { |
178 | | - return instructions.includes(value.toUpperCase()); |
179 | | -} |
180 | | - |
181 | | -// Syntax analyzer |
182 | | -export function parse(tokens: Token[]): { ast: ASTNode[]; errors: ErrorInfo[] } { |
183 | | - const ast: ASTNode[] = []; |
184 | | - const errors: ErrorInfo[] = []; |
185 | | - let currentSegment: SegmentNode | null = null; |
186 | | - const labelMap = new Map<string, LabelNode>(); |
187 | | - |
188 | | - for (let i = 0; i < tokens.length; i++) { |
189 | | - const token = tokens[i]; |
190 | | - try { |
191 | | - switch (token.type) { |
192 | | - case 'SEGMENT': { |
193 | | - const segmentNameToken = tokens[++i]; |
194 | | - if (!segmentNameToken || segmentNameToken.type!== 'REGISTER') { |
195 | | - throw new Error('Expected segment name after SEGMENT keyword'); |
196 | | - } |
197 | | - currentSegment = { |
198 | | - type: 'SEGMENT', |
199 | | - name: segmentNameToken.value, |
200 | | - instructions: [] |
201 | | - }; |
202 | | - ast.push(currentSegment); |
203 | | - break; |
204 | | - } |
205 | | - case 'ENDS': { |
206 | | - if (!currentSegment) { |
207 | | - throw new Error('ENDS keyword without corresponding SEGMENT'); |
208 | | - } |
209 | | - currentSegment = null; |
210 | | - break; |
211 | | - } |
212 | | - case 'LABEL': { |
213 | | - if (labelMap.has(token.value)) { |
214 | | - errors.push({ |
215 | | - type: 'LabelError', |
216 | | - position: token.position, |
217 | | - message: `Label '${token.value}' is already defined at position ${labelMap.get(token.value)?.position}` |
218 | | - }); |
219 | | - } else { |
220 | | - const labelNode: LabelNode = { |
221 | | - type: 'LABEL', |
222 | | - name: token.value, |
223 | | - position: token.position |
224 | | - }; |
225 | | - labelMap.set(token.value, labelNode); |
226 | | - if (currentSegment) { |
227 | | - currentSegment.instructions.push(labelNode); |
228 | | - } else { |
229 | | - ast.push(labelNode); |
230 | | - } |
231 | | - } |
232 | | - break; |
233 | | - } |
234 | | - case 'INSTRUCTION': { |
235 | | - const instructionToken = token; |
236 | | - const operands: (RegisterNode | NumberNode | LabelReferenceNode)[] = []; |
237 | | - while (i + 1 < tokens.length) { |
238 | | - const nextToken = tokens[++i]; |
239 | | - if (nextToken.type === 'REGISTER') { |
240 | | - operands.push({ type: 'REGISTER', name: nextToken.value }); |
241 | | - } else if (nextToken.type === 'NUMBER') { |
242 | | - operands.push({ type: 'NUMBER', value: parseInt(nextToken.value, 10) }); |
243 | | - } else if (nextToken.type === 'LABEL') { |
244 | | - operands.push({ type: 'LABEL_REFERENCE', name: nextToken.value }); |
245 | | - } else if (nextToken.type === 'COMMA') { |
246 | | - continue; |
247 | | - } else { |
248 | | - i--; |
249 | | - break; |
250 | | - } |
251 | | - } |
252 | | - const instructionNode: InstructionNode = { |
253 | | - type: 'INSTRUCTION', |
254 | | - name: instructionToken.value, |
255 | | - operands |
256 | | - }; |
257 | | - if (currentSegment) { |
258 | | - currentSegment.instructions.push(instructionNode); |
259 | | - } else { |
260 | | - ast.push(instructionNode); |
261 | | - } |
262 | | - break; |
263 | | - } |
264 | | - case 'INCLUDE': { |
265 | | - const filename = token.value; |
266 | | - const fileContent = readFile(filename); |
267 | | - const { tokens: includedTokens, errors: includedErrors } = tokenize(fileContent); |
268 | | - errors.push(...includedErrors.map(err => ({ |
269 | | - ...err, |
270 | | - message: `In included file ${filename}: ${err.message}` |
271 | | - }))); |
272 | | - const { ast: includedAst, errors: parseErrors } = parse(includedTokens); |
273 | | - errors.push(...parseErrors.map(err => ({ |
274 | | - ...err, |
275 | | - message: `In included file ${filename}: ${err.message}` |
276 | | - }))); |
277 | | - const includeNode: IncludeNode = { |
278 | | - type: 'INCLUDE', |
279 | | - filename, |
280 | | - ast: includedAst |
281 | | - }; |
282 | | - if (currentSegment) { |
283 | | - currentSegment.instructions.push(includeNode); |
284 | | - } else { |
285 | | - ast.push(includeNode); |
286 | | - } |
287 | | - break; |
288 | | - } |
289 | | - case 'PROCESSOR_DIRECTIVE': { |
290 | | - const directiveNode: ProcessorDirectiveNode = { |
291 | | - type: 'PROCESSOR_DIRECTIVE', |
292 | | - directive: token.value |
293 | | - }; |
294 | | - if (currentSegment) { |
295 | | - currentSegment.instructions.push(directiveNode); |
296 | | - } else { |
297 | | - ast.push(directiveNode); |
298 | | - } |
299 | | - break; |
300 | | - } |
301 | | - case 'MODEL_DIRECTIVE': { |
302 | | - const modelDirectiveNode: ModelDirectiveNode = { |
303 | | - type: 'MODEL_DIRECTIVE', |
304 | | - model: token.value |
305 | | - }; |
306 | | - ast.push(modelDirectiveNode); |
307 | | - break; |
308 | | - } |
309 | | - case 'STACK_DIRECTIVE': { |
310 | | - const stackDirectiveNode: StackDirectiveNode = { |
311 | | - type: 'STACK_DIRECTIVE', |
312 | | - size: parseInt(token.value, 10) |
313 | | - }; |
314 | | - ast.push(stackDirectiveNode); |
315 | | - break; |
316 | | - } |
317 | | - case 'DATA_DIRECTIVE': { |
318 | | - const dataDirectiveNode: DataDirectiveNode = { |
319 | | - type: 'DATA_DIRECTIVE' |
320 | | - }; |
321 | | - ast.push(dataDirectiveNode); |
322 | | - break; |
323 | | - } |
324 | | - case 'CODE_DIRECTIVE': { |
325 | | - const codeDirectiveNode: CodeDirectiveNode = { |
326 | | - type: 'CODE_DIRECTIVE' |
327 | | - }; |
328 | | - ast.push(codeDirectiveNode); |
329 | | - break; |
330 | | - } |
331 | | - } |
332 | | - } catch (e) { |
333 | | - const errorMessage = e instanceof Error? e.message : 'Unknown error'; |
334 | | - errors.push({ |
335 | | - type: 'SyntaxError', |
336 | | - position: token.position, |
337 | | - message: errorMessage |
338 | | - }); |
339 | | - } |
340 | | - } |
341 | | - |
342 | | - if (currentSegment) { |
343 | | - errors.push({ |
344 | | - type: 'SyntaxError', |
345 | | - position: tokens[tokens.length - 1]?.position || 0, |
346 | | - message: 'Unclosed segment' |
347 | | - }); |
348 | | - } |
349 | | - |
350 | | - return { ast, errors }; |
351 | | -} |
0 commit comments