Skip to content

Commit 8811054

Browse files
committed
lexer, untested and waiting for parser
1 parent b78058c commit 8811054

File tree

10 files changed

+272
-135
lines changed

10 files changed

+272
-135
lines changed

clone

Lines changed: 0 additions & 3 deletions
This file was deleted.

const/opcodes.h

Lines changed: 0 additions & 60 deletions
This file was deleted.

const/strings.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,22 @@ const char help[] = version "\
3535

3636
#define ERROR "\e[0;31merror:\e[0m"
3737
#define HELPCMD "\e[0;32mgasm help\e[0m"
38+
#define HELPDIAG " will bring up the help dialog\n"
3839

3940
const char cmd_help[] = "help";
4041
const char cmd_version[] = "version";
4142
const char cmd_license[] = "license";
4243
const char cmd_contributors[] = "contributors";
43-
const char badargs[] = ERROR " invalid arguments\nrunning " HELPCMD " will bring up the help dialog\n";
44+
const char noargs[] = ERROR " given no inputs, expected two\nrunning " HELPCMD HELPDIAG;
45+
const char badargs[] = ERROR " invalid arguments\nrunning " HELPCMD HELPDIAG;
4446
void printLicense() { printf("%s%s\n", notice, license); }
4547

48+
const char asm_error[] = ERROR " \e[0;35m\"%s\" \e[0mln %d, col %d\n%s\n";
4649
const char asm_expected_instruction[] = "expected an instruction\n";
4750
const char asm_wrong_type[] = "given argument of wrong type\n";
48-
51+
const char asm_redefinition[] = "redefinition of symbol\n";
52+
const char asm_lexerstuck[] = "lexer stuck\n";
53+
const char asm_string_hanging[] = "expected string to be closed\n";
54+
const char asm_number_multidecimal[] = "number contains multiple decimal points\n";
55+
const char asm_number_badcharacter[] = "number contains invalid character\n";
56+
const char asm_symbol_badcharacter[] = "invalid symbol definition or unknown character\n";

gasm.c

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,17 @@
1-
#include <stdio.h>
2-
#include <stdlib.h>
3-
#include <string.h>
4-
#include <stdint.h>
5-
#include <stdbool.h>
6-
#include <sys/stat.h>
71
#include "include/libgyb.h"
82
#include "include/libgyb.c"
93
#include "const/license.h"
104
#include "const/strings.h"
11-
#include "const/opcodes.h"
125
#include "src/gasm.h"
136
#include "src/load.c"
147
#include "src/assemble.c"
8+
#include "src/lexer.h"
159
#include "src/lexer.c"
1610
#include "src/parser.c"
1711
#include "src/emit.c"
1812

1913
int main (int argc, char **argv) {
20-
if (argc == 1) { printf(badargs); exit(1); }
14+
if (argc == 1) { printf(noargs); exit(1); }
2115
if (argc == 2) {
2216
if ( argv[1][0]=='-' ) {
2317
switch(argv[1][1]) {

src/assemble.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
void assembler_error(token_t token, parse_t *context, const char *error) {
2-
printf("\e[0;31merror:\e[0;33m%s\e[0m, ln %d col %d\n%s",
3-
context->source,
4-
token.ln, token.col, error
1+
void assembler_error(const char *error) {
2+
printf(asm_error,
3+
assembler_filename, assembler_ln, assembler_col, error
54
); exit(1);
65
}
76

87
int assemble(char *filename, int srcn, char **srcs){
9-
gybfile_t object = bytecode_new();
10-
int i; for (i=0; i<srcn; i++) {
11-
parse_t local = gasm_load(srcs[i]);
12-
}
13-
return bytecode_save(filename, object);
8+
gybfile_t object = gyb_bytecode_new();
9+
int i; for (i=0; i<srcn; i++) { gasm_load(&object, srcs[i]); }
10+
return gyb_bytecode_save(filename, object);
1411
}

src/gasm.h

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,16 @@ typedef struct {
1111
} token_t;
1212

1313
typedef struct {
14-
unsigned int size;
14+
int size; int index;
1515
token_t *tokens;
1616
} lexicon_t;
1717

18-
typedef struct {
19-
char *source;
20-
symboltable_t symtable;
21-
lexicon_t lexicon;
22-
} parse_t;
18+
void gasm_load(gybfile_t *context, char *filename);
19+
void parser(gybfile_t *context, lexicon_t lexicon);
20+
lexicon_t lexer(char *stream, int size);
2321

24-
parse_t gasm_import(parse_t *parent, parse_t *child);
25-
parse_t gasm_load(char *filename);
26-
lexicon_t lexer(char *stream, unsigned int size);
27-
parse_t parser(lexicon_t lexicon);
22+
unsigned char assembler_section = 0;
23+
unsigned char assembler_datasize = 0;
24+
unsigned int assembler_ln;
25+
unsigned int assembler_col;
26+
char *assembler_filename = NULL;

src/lexer.c

Lines changed: 153 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,159 @@
1-
token_t lexer_consume(char *stream, int max){
2-
// Data types, ex. bytes
1+
void lexicon_push(lexicon_t *lexicon, token_t token) {
2+
if ( lexicon->size <= sizeof(token_t)*(lexicon->index) ) {
3+
lexicon->size *= 2;
4+
lexicon->tokens = realloc(lexicon->tokens, lexicon->size);
5+
} memcpy(&lexicon->tokens[lexicon->index], &token, sizeof(token_t));
6+
}
7+
8+
int len_nonwsp(char *stream, int max) {
9+
int i; for(i=0;i<max;i++) {
10+
switch(stream[i]){ case ' ': case '\r': case '\t': case ',': case '\n': return i; }}
11+
}
312

4-
// Keywords: registers, constants, et cetera
13+
int len_whitespace(char *stream, int max) {
14+
int i; for(i=0;i<max;i++) {
15+
switch(stream[i]){
16+
case ' ': case '\r': case '\t': case ',':
17+
assembler_col++;
18+
break;
19+
case '\n':
20+
assembler_ln++;
21+
assembler_col=0;
22+
break;
23+
default: return i;
24+
}}
25+
}
26+
27+
int len_string(char *stream, int max) {
28+
char c; int i=0;
29+
switch(stream[0]) {
30+
case '`': case '"': case '\'': c = stream[0]; break;
31+
default: return 0; }
32+
while(stream[i]!=0) {
33+
if ( stream[i] == c ) { return i+1; }
34+
i++; }
35+
// if we never get the ending quote, error
36+
assembler_error(asm_string_hanging);
37+
}
538

6-
// Symbolic Names
39+
int len_number(char *stream, int max) {
40+
bool hex=false; bool octal=false; int decimal=0;
41+
int i; for(i=0;i<max;i++) {
42+
if ( i == 0 && stream[i] == '-' ) { continue; }
43+
if ( i == 0 && stream[i] == '0' ) { continue; }
44+
if ( i == 1 && stream[i] == 'x' ) { hex = true; continue; }
45+
if ( i == 1 && stream[i] == 'o' ) { octal = true; continue; }
46+
if ( stream[i] <= '9' && stream[i] >= '0' ) { continue; }
47+
if ( stream[i] <= 'f' && stream[i] >= 'a' && hex == true ) { continue; }
48+
if ( stream[i] <= 'F' && stream[i] >= 'A' && hex == true ) { continue; }
49+
if ( hex == false && octal == false && stream[i] == '.' ) { decimal++; continue; }
50+
} return i;
51+
}
52+
53+
int value_number(char *stream, int max) {
54+
int base = 10; int cursor = 0; bool negative=false;
55+
int value = 0; int decimal = 0; int point = -1;
56+
if ( max > 2 ) { if (stream[0] == '0') {
57+
if ( stream[1] == 'x' ) { cursor = 2; base = 16; }
58+
if ( stream[1] == 'o' ) { cursor = 2; base = 8; }
59+
}}
60+
while(cursor < max) {
61+
if ( point == -1 ) { // integers
62+
if ( cursor == 0 && stream[cursor] == '-' ) { negative = true; cursor++; continue; }
63+
if ( stream[cursor] <= '0'-1+base && stream[cursor] >= '0' ) { value *= base; value += stream[cursor] - '0'; cursor++; continue; }
64+
if ( stream[cursor] <= 'f' && stream[cursor] >= 'a' && base == 16) { value *= base; value += stream[cursor] - 'a'+10; cursor++; continue; }
65+
if ( stream[cursor] <= 'F' && stream[cursor] >= 'A' && base == 16) { value *= base; value += stream[cursor] - 'A'+10; cursor++; continue; }
66+
if ( stream[cursor] == '.' && base == 10 ) { point = 1; continue; }
67+
} else {
68+
if ( stream[cursor] <= '9' && stream[cursor] >= '0' ) { decimal *= base; point*=10; decimal += stream[cursor] - '0'; cursor++; continue; }
69+
if ( stream[cursor] == '.' ) { assembler_error(asm_number_multidecimal); }
70+
}
71+
assembler_error(asm_number_badcharacter); // if we aren't doing a continue && aren't breaking out, then something is wrong
72+
}
73+
if ( point != -1 ) {
74+
float fvalue = ( value ) + ( decimal / point );
75+
memcpy(&fvalue, &value, sizeof(float));
76+
}
77+
return value;
78+
}
79+
80+
int len_symbol(char *stream, int max) {
81+
int i; for(i=0;i<max;i++){
82+
if ( stream[i] >= 'a' && stream[i] <= 'z' ) { continue; }
83+
if ( stream[i] >= 'A' && stream[i] <= 'Z' ) { continue; }
84+
if ( stream[i] >= '0' && stream[i] <= '9' && i != 0 ) { continue; }
85+
if ( stream[i] == '_' || stream[i] == '$' ) { continue; }
86+
} return i;
87+
}
788

8-
// Instructions
89+
unsigned int lexer_fetch(lexicon_t *lexicon, char *stream, int max){
90+
int len; int i;
91+
// skip over whitespace
92+
len=len_whitespace(stream, max); while( len !=0 ) { stream+=len; max-=len; len=len_whitespace(stream, max); }
93+
// if NULL, break
94+
if ( stream[0]==0 ) { return 0; }
95+
// prep token
96+
token_t result = { .ln = assembler_ln, .col = assembler_col, };
97+
// check string
98+
len = len_string(stream, max); if ( len != 0 ) {
99+
result.type = parsetype_data;
100+
result.string = stream + 1; // skip the first quote
101+
result.value = assembler_datasize;
102+
stream[len-1] = '\0'; // && null terminate it
103+
lexicon_push(lexicon, result);
104+
return len;
105+
}
106+
// check numbers
107+
len = len_number(stream, max); if ( len != 0 ) {
108+
result.type = parsetype_data;
109+
result.string = stream;
110+
result.value = value_number(stream, len);
111+
stream[len+1] = '\0';
112+
lexicon_push(lexicon, result);
113+
return len + 1;
114+
}
115+
// check for sections
116+
for(i=0;i<4;i++) {
117+
if ( strncmp(section_names[i].name, stream, len_nonwsp(stream, max))==0 ) {
118+
result.type = parsetype_macro; result.string = section_names[i].name; result.value = section_names[i].args;
119+
stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
120+
}}
121+
// check for data sizes
122+
for(i=0;i<4;i++) {
123+
if ( strncmp(datasizes[i].name, stream, len_nonwsp(stream, max))==0 ) {
124+
result.type = parsetype_size; result.string = section_names[i].name; result.value = section_names[i].args;
125+
stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
126+
}}
127+
// check for syscalls
128+
for(i=0;i<255;i++) {
129+
if ( syscalls[i].type != 'S' ) { continue; }
130+
if ( strncmp(syscalls[i].name, stream, len_nonwsp(stream, max))==0 ) {
131+
result.type = parsetype_sys; result.string = syscalls[i].name; result.value = syscalls[i].args;
132+
stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
133+
}}
134+
// check for keywords
135+
for(i=0;i<255;i++) {
136+
if ( keywords[i].type != 'I' ) { continue; }
137+
if ( strncmp(keywords[i].name, stream, len_nonwsp(stream, max))==0 ) {
138+
result.type = parsetype_sys; result.string = keywords[i].name; result.value = keywords[i].args;
139+
stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
140+
}}
141+
// if none of those things, we're probably looking at a symbol
142+
len = len_symbol(stream, max); i = len_nonwsp(stream, max);
143+
if ( len != i ) { assembler_error(asm_symbol_badcharacter); }
144+
// make a note of the section its in
145+
if ( assembler_section == section_executable ) { result.type = parsetype_label; }
146+
else { result.type = parsetype_address; }
147+
// redefinition checks happen during parsing, not lexing
148+
result.string = stream; stream[len] = '\0'; result.value = symbolhash(stream);
149+
lexicon_push(lexicon, result); return len+1;
9150
}
10151

11-
lexicon_t lexer(char *stream, unsigned int size){
12-
lexicon_t yield;
13-
int cursor=0; while(cursor < size) {
14-
token_t token = lexer_consume(stream + cursor, size - cursor);
15-
cursor += strlen(token.string);
16-
} return yield;
152+
lexicon_t lexer(char *stream, int streamlen){
153+
lexicon_t lexicon;
154+
unsigned int cursor=0; while(cursor < streamlen) {
155+
unsigned int n = lexer_fetch(&lexicon, stream + cursor, streamlen - cursor);
156+
if ( n == 0) { assembler_error(asm_lexerstuck); exit(1); }
157+
cursor += n; assembler_ln += n;
158+
} return lexicon;
17159
}

0 commit comments

Comments
 (0)