1
- token_t lexer_consume (char * stream , int max ){
2
- // Data types, ex. bytes
1
+ void lexicon_push (lexicon_t * lexicon , token_t token ) {
2
+ if ( lexicon -> size <= sizeof (token_t )* (lexicon -> index ) ) {
3
+ lexicon -> size *= 2 ;
4
+ lexicon -> tokens = realloc (lexicon -> tokens , lexicon -> size );
5
+ } memcpy (& lexicon -> tokens [lexicon -> index ], & token , sizeof (token_t ));
6
+ }
7
+
8
+ int len_nonwsp (char * stream , int max ) {
9
+ int i ; for (i = 0 ;i < max ;i ++ ) {
10
+ switch (stream [i ]){ case ' ' : case '\r' : case '\t' : case ',' : case '\n' : return i ; }}
11
+ }
3
12
4
- // Keywords: registers, constants, et cetera
13
+ int len_whitespace (char * stream , int max ) {
14
+ int i ; for (i = 0 ;i < max ;i ++ ) {
15
+ switch (stream [i ]){
16
+ case ' ' : case '\r' : case '\t' : case ',' :
17
+ assembler_col ++ ;
18
+ break ;
19
+ case '\n' :
20
+ assembler_ln ++ ;
21
+ assembler_col = 0 ;
22
+ break ;
23
+ default : return i ;
24
+ }}
25
+ }
26
+
27
+ int len_string (char * stream , int max ) {
28
+ char c ; int i = 0 ;
29
+ switch (stream [0 ]) {
30
+ case '`' : case '"' : case '\'' : c = stream [0 ]; break ;
31
+ default : return 0 ; }
32
+ while (stream [i ]!= 0 ) {
33
+ if ( stream [i ] == c ) { return i + 1 ; }
34
+ i ++ ; }
35
+ // if we never get the ending quote, error
36
+ assembler_error (asm_string_hanging );
37
+ }
5
38
6
- // Symbolic Names
39
+ int len_number (char * stream , int max ) {
40
+ bool hex = false; bool octal = false; int decimal = 0 ;
41
+ int i ; for (i = 0 ;i < max ;i ++ ) {
42
+ if ( i == 0 && stream [i ] == '-' ) { continue ; }
43
+ if ( i == 0 && stream [i ] == '0' ) { continue ; }
44
+ if ( i == 1 && stream [i ] == 'x' ) { hex = true; continue ; }
45
+ if ( i == 1 && stream [i ] == 'o' ) { octal = true; continue ; }
46
+ if ( stream [i ] <= '9' && stream [i ] >= '0' ) { continue ; }
47
+ if ( stream [i ] <= 'f' && stream [i ] >= 'a' && hex == true ) { continue ; }
48
+ if ( stream [i ] <= 'F' && stream [i ] >= 'A' && hex == true ) { continue ; }
49
+ if ( hex == false && octal == false && stream [i ] == '.' ) { decimal ++ ; continue ; }
50
+ } return i ;
51
+ }
52
+
53
+ int value_number (char * stream , int max ) {
54
+ int base = 10 ; int cursor = 0 ; bool negative = false;
55
+ int value = 0 ; int decimal = 0 ; int point = -1 ;
56
+ if ( max > 2 ) { if (stream [0 ] == '0' ) {
57
+ if ( stream [1 ] == 'x' ) { cursor = 2 ; base = 16 ; }
58
+ if ( stream [1 ] == 'o' ) { cursor = 2 ; base = 8 ; }
59
+ }}
60
+ while (cursor < max ) {
61
+ if ( point == -1 ) { // integers
62
+ if ( cursor == 0 && stream [cursor ] == '-' ) { negative = true; cursor ++ ; continue ; }
63
+ if ( stream [cursor ] <= '0' - 1 + base && stream [cursor ] >= '0' ) { value *= base ; value += stream [cursor ] - '0' ; cursor ++ ; continue ; }
64
+ if ( stream [cursor ] <= 'f' && stream [cursor ] >= 'a' && base == 16 ) { value *= base ; value += stream [cursor ] - 'a' + 10 ; cursor ++ ; continue ; }
65
+ if ( stream [cursor ] <= 'F' && stream [cursor ] >= 'A' && base == 16 ) { value *= base ; value += stream [cursor ] - 'A' + 10 ; cursor ++ ; continue ; }
66
+ if ( stream [cursor ] == '.' && base == 10 ) { point = 1 ; continue ; }
67
+ } else {
68
+ if ( stream [cursor ] <= '9' && stream [cursor ] >= '0' ) { decimal *= base ; point *=10 ; decimal += stream [cursor ] - '0' ; cursor ++ ; continue ; }
69
+ if ( stream [cursor ] == '.' ) { assembler_error (asm_number_multidecimal ); }
70
+ }
71
+ assembler_error (asm_number_badcharacter ); // if we aren't doing a continue && aren't breaking out, then something is wrong
72
+ }
73
+ if ( point != -1 ) {
74
+ float fvalue = ( value ) + ( decimal / point );
75
+ memcpy (& fvalue , & value , sizeof (float ));
76
+ }
77
+ return value ;
78
+ }
79
+
80
+ int len_symbol (char * stream , int max ) {
81
+ int i ; for (i = 0 ;i < max ;i ++ ){
82
+ if ( stream [i ] >= 'a' && stream [i ] <= 'z' ) { continue ; }
83
+ if ( stream [i ] >= 'A' && stream [i ] <= 'Z' ) { continue ; }
84
+ if ( stream [i ] >= '0' && stream [i ] <= '9' && i != 0 ) { continue ; }
85
+ if ( stream [i ] == '_' || stream [i ] == '$' ) { continue ; }
86
+ } return i ;
87
+ }
7
88
8
- // Instructions
89
+ unsigned int lexer_fetch (lexicon_t * lexicon , char * stream , int max ){
90
+ int len ; int i ;
91
+ // skip over whitespace
92
+ len = len_whitespace (stream , max ); while ( len != 0 ) { stream += len ; max -= len ; len = len_whitespace (stream , max ); }
93
+ // if NULL, break
94
+ if ( stream [0 ]== 0 ) { return 0 ; }
95
+ // prep token
96
+ token_t result = { .ln = assembler_ln , .col = assembler_col , };
97
+ // check string
98
+ len = len_string (stream , max ); if ( len != 0 ) {
99
+ result .type = parsetype_data ;
100
+ result .string = stream + 1 ; // skip the first quote
101
+ result .value = assembler_datasize ;
102
+ stream [len - 1 ] = '\0' ; // && null terminate it
103
+ lexicon_push (lexicon , result );
104
+ return len ;
105
+ }
106
+ // check numbers
107
+ len = len_number (stream , max ); if ( len != 0 ) {
108
+ result .type = parsetype_data ;
109
+ result .string = stream ;
110
+ result .value = value_number (stream , len );
111
+ stream [len + 1 ] = '\0' ;
112
+ lexicon_push (lexicon , result );
113
+ return len + 1 ;
114
+ }
115
+ // check for sections
116
+ for (i = 0 ;i < 4 ;i ++ ) {
117
+ if ( strncmp (section_names [i ].name , stream , len_nonwsp (stream , max ))== 0 ) {
118
+ result .type = parsetype_macro ; result .string = section_names [i ].name ; result .value = section_names [i ].args ;
119
+ stream [len ] = '\0' ; lexicon_push (lexicon , result ); return len + 1 ;
120
+ }}
121
+ // check for data sizes
122
+ for (i = 0 ;i < 4 ;i ++ ) {
123
+ if ( strncmp (datasizes [i ].name , stream , len_nonwsp (stream , max ))== 0 ) {
124
+ result .type = parsetype_size ; result .string = section_names [i ].name ; result .value = section_names [i ].args ;
125
+ stream [len ] = '\0' ; lexicon_push (lexicon , result ); return len + 1 ;
126
+ }}
127
+ // check for syscalls
128
+ for (i = 0 ;i < 255 ;i ++ ) {
129
+ if ( syscalls [i ].type != 'S' ) { continue ; }
130
+ if ( strncmp (syscalls [i ].name , stream , len_nonwsp (stream , max ))== 0 ) {
131
+ result .type = parsetype_sys ; result .string = syscalls [i ].name ; result .value = syscalls [i ].args ;
132
+ stream [len ] = '\0' ; lexicon_push (lexicon , result ); return len + 1 ;
133
+ }}
134
+ // check for keywords
135
+ for (i = 0 ;i < 255 ;i ++ ) {
136
+ if ( keywords [i ].type != 'I' ) { continue ; }
137
+ if ( strncmp (keywords [i ].name , stream , len_nonwsp (stream , max ))== 0 ) {
138
+ result .type = parsetype_sys ; result .string = keywords [i ].name ; result .value = keywords [i ].args ;
139
+ stream [len ] = '\0' ; lexicon_push (lexicon , result ); return len + 1 ;
140
+ }}
141
+ // if none of those things, we're probably looking at a symbol
142
+ len = len_symbol (stream , max ); i = len_nonwsp (stream , max );
143
+ if ( len != i ) { assembler_error (asm_symbol_badcharacter ); }
144
+ // make a note of the section its in
145
+ if ( assembler_section == section_executable ) { result .type = parsetype_label ; }
146
+ else { result .type = parsetype_address ; }
147
+ // redefinition checks happen during parsing, not lexing
148
+ result .string = stream ; stream [len ] = '\0' ; result .value = symbolhash (stream );
149
+ lexicon_push (lexicon , result ); return len + 1 ;
9
150
}
10
151
11
- lexicon_t lexer (char * stream , unsigned int size ){
12
- lexicon_t yield ;
13
- int cursor = 0 ; while (cursor < size ) {
14
- token_t token = lexer_consume (stream + cursor , size - cursor );
15
- cursor += strlen (token .string );
16
- } return yield ;
152
+ lexicon_t lexer (char * stream , int streamlen ){
153
+ lexicon_t lexicon ;
154
+ unsigned int cursor = 0 ; while (cursor < streamlen ) {
155
+ unsigned int n = lexer_fetch (& lexicon , stream + cursor , streamlen - cursor );
156
+ if ( n == 0 ) { assembler_error (asm_lexerstuck ); exit (1 ); }
157
+ cursor += n ; assembler_ln += n ;
158
+ } return lexicon ;
17
159
}
0 commit comments