1414#define IS_LETTER (C ) ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'))
1515#define IS_DIGIT (C ) (C >= '0' && C <= '9')
1616#define IS_HEX (C ) (IS_DIGIT(C) || (C >= 'A' && C <= 'F'))
17- #define IS_WHITESPACE (C ) (C == ' ' || C == 9 || C == 10 || C == 13 )
17+ #define IS_WHITESPACE (C ) (C == ' ' || C == '\t' || C == '\n' || C == '\r' )
1818#define IS_PUNCT (P , A , B ) (*P == A && P[1] == B)
1919#define IS_TYPE (KIND ) (KIND >= KW_int && KIND <= KW_void)
2020#define ALIGN (x ) ((x + 3) & -4)
21+ // @TODO: refactor error
2122#define COMPILE_ERROR (...) { printf(__VA_ARGS__); exit(1); }
2223#define PUSH (REG , VAL ) instruction(Push | (REG << 24), VAL)
2324#define POP (REG ) instruction(Pop | (REG << 8), 0)
3738#define CALL_ATTRIB (IDX , ATTRIB ) g_calls[((IDX) * CallSize) + ATTRIB]
3839#define OP (op , dest , src1 , src2 ) ((op) | (dest << 8) | (src1 << 16) | (src2 << 24))
3940
40- #define MAX_PRINF_ARGS 8
41+ #define MAX_PRINF_ARGS (8)
4142#define CHUNK_SIZE (1 << 27)
42- #define MAX_SCOPE 128
43- #define MAX_CALLS 1024
43+ #define MAX_SCOPE ( 128)
44+ #define MAX_CALLS ( 1024)
4445
4546enum { Undefined , Global , Param , Local , Func , Const };
4647enum { EAX = 1 , EBX , ECX , EDX , ESP , EBP , IMME };
@@ -70,8 +71,7 @@ int strlen(char* p) {
7071}
7172#pragma endregion utils
7273
73- #pragma region token
74-
74+ //---------------------------------- TOKEN ----------------------------------//
7575enum {
7676 _TK_START = 128 , // 0-127 is reserved for ascii
7777 TK_INT , // int
@@ -111,6 +111,7 @@ enum {
111111};
112112
113113// @TODO: implement struct. Use enum and array to mimic array of struct for now
114+ #define GET_TK_FIELD (IDX , ATTRIB ) (g_token_buffer[((IDX) * _TkFieldCount) + ATTRIB])
114115enum {
115116 TkFieldKind ,
116117 TkFieldValue , // store the value of token if char or int
@@ -121,19 +122,16 @@ enum {
121122 _TkFieldCount ,
122123};
123124
124- int * g_token_buffer , // global int array to hold token information
125- g_token_idx ; // global index of current token
126-
127- #define GET_TK_FIELD (IDX , ATTRIB ) (g_token_buffer[((IDX) * _TkFieldCount) + ATTRIB])
125+ int * g_token_buffer ; // global int array to hold token information
128126
129127void check_if_token_keyword (int token_idx ) {
130128 char * keywords = "int\0 char\0 void\0 break\0 continue\0"
131129 "else\0 enum\0 if\0 return\0 while\0 "
132130 "printf\0 fopen\0 fgetc\0 calloc\0 memset\0 "
133131 "exit\0 " ;
134132
135- int start = GET_TK_FIELD (token_idx , TkFieldBegin );
136- int token_len = GET_TK_FIELD (token_idx , TkFieldEnd ) - start ;
133+ char * start = GET_TK_FIELD (token_idx , TkFieldBegin );
134+ int token_len = ( char * ) GET_TK_FIELD (token_idx , TkFieldEnd ) - start ;
137135
138136 int idx = 0 ;
139137 while (idx < (_KW_END - KW_int )) {
@@ -148,12 +146,8 @@ void check_if_token_keyword(int token_idx) {
148146 return ;
149147}
150148
151- #pragma endregion token
152-
153149// @TODO: refactor
154-
155150char * g_ram , * g_src ;
156-
157151int g_reserved , g_bss ,
158152 g_tkIter ,
159153 * g_syms , g_symCnt ,
@@ -163,114 +157,98 @@ int g_reserved, g_bss,
163157 g_scopeId , * g_scopes , g_scopeCnt ,
164158 * g_calls , g_callCnt ;
165159
166- void lex () {
160+ //---------------------------------- PARSER ----------------------------------//
161+ int parse_escape_sequence (int letter , int ln ) {
162+ if (letter == '0' ) return '\0' ;
163+ if (letter == 'n' ) return '\n' ;
164+ if (letter == 'r' ) return '\r' ;
165+ if (letter == 't' ) return '\t' ;
166+ if (letter == '\\' ) return '\\' ;
167+ if (letter == '\'' ) return '\'' ;
168+ if (letter == '"' ) return '"' ;
169+
170+ COMPILE_ERROR ("error:%d: unknown escape sequence '\\%c'\n" , ln , letter );
171+ return 0 ;
172+ }
173+
174+ int lex (char * p ) {
175+ int token_idx = 0 ;
167176 int ln = 1 ;
168- char * p = g_src ;
169177 while (* p ) {
170178 if (* p == '#' || (* p == '/' && p [1 ] == '/' )) { // handle '#' and comment '//'
171- while (* p && * p != 10 ) ++ p ;
179+ while (* p && * p != '\n' ) ++ p ;
172180 } else if (IS_WHITESPACE (* p )) { // handle whitespace
173- ln += (* p == 10 ); ++ p ;
181+ ln += (* p == '\n' ); ++ p ;
174182 } else {
175- GET_TK_FIELD (g_token_idx , TkFieldLine ) = ln ;
176- GET_TK_FIELD (g_token_idx , TkFieldBegin ) = p ;
183+ GET_TK_FIELD (token_idx , TkFieldLine ) = ln ;
184+ GET_TK_FIELD (token_idx , TkFieldBegin ) = ( int ) p ;
177185
178186 if (IS_LETTER (* p ) || * p == '_' ) { // handle token or keyword
179- GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_IDENT ;
187+ GET_TK_FIELD (token_idx , TkFieldKind ) = TK_IDENT ;
180188 ++ p ;
181189 while (IS_LETTER (* p ) || IS_DIGIT (* p ) || * p == '_' ) {
182190 ++ p ;
183191 }
184- GET_TK_FIELD (g_token_idx , TkFieldEnd ) = p ;
185- check_if_token_keyword (g_token_idx );
186- g_token_idx += 1 ;
192+ GET_TK_FIELD (token_idx , TkFieldEnd ) = ( int ) p ;
193+ check_if_token_keyword (token_idx );
194+ token_idx += 1 ;
187195 } else if (* p == '0' && p [1 ] == 'x' ) { // handle hex number
188- GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_INT ;
196+ GET_TK_FIELD (token_idx , TkFieldKind ) = TK_INT ;
189197 int result = 0 ;
190198 p += 2 ; while (IS_HEX (* p )) {
191199 result = (result << 4 ) + ((* p < 'A' ) ? (* p - '0' ) : (* p - 55 ));
192200 ++ p ;
193201 }
194- GET_TK_FIELD (g_token_idx , TkFieldValue ) = result ;
195- GET_TK_FIELD (g_token_idx ++ , TkFieldEnd ) = p ;
202+ GET_TK_FIELD (token_idx , TkFieldValue ) = result ;
203+ GET_TK_FIELD (token_idx ++ , TkFieldEnd ) = p ;
196204 } else if (IS_DIGIT (* p )) { // handle decimal number
197- GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_INT ;
205+ GET_TK_FIELD (token_idx , TkFieldKind ) = TK_INT ;
198206 int result = 0 ;
199207 while (IS_DIGIT (* p )) { result = result * 10 + (* p - '0' ); ++ p ; }
200- GET_TK_FIELD (g_token_idx , TkFieldValue ) = result ;
201- GET_TK_FIELD (g_token_idx ++ , TkFieldEnd ) = p ;
208+ GET_TK_FIELD (token_idx , TkFieldValue ) = result ;
209+ GET_TK_FIELD (token_idx ++ , TkFieldEnd ) = p ;
202210 } else if (* p == '"' ) { // handle string
203- GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_STRING ;
211+ GET_TK_FIELD (token_idx , TkFieldKind ) = TK_STRING ;
204212 ++ p ; while (* p != '"' ) { ++ p ; };
205- GET_TK_FIELD (g_token_idx ++ , TkFieldEnd ) = ++ p ;
206- } else if (* p == 39 ) { // ascii '''
207- GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_CHAR ;
208- GET_TK_FIELD (g_token_idx , TkFieldValue ) = p [1 ];
209- GET_TK_FIELD (g_token_idx ++ , TkFieldEnd ) = (p += 3 );
213+ GET_TK_FIELD (token_idx ++ , TkFieldEnd ) = ++ p ;
214+ } else if (* p == '\'' ) {
215+ // @TODO: handle escape
216+ GET_TK_FIELD (token_idx , TkFieldKind ) = TK_CHAR ;
217+ int v = * (++ p ); // skip opening '
218+ if (v == '\\' ) {
219+ v = parse_escape_sequence (* (++ p ), ln );
220+ }
221+ GET_TK_FIELD (token_idx , TkFieldValue ) = v ;
222+ GET_TK_FIELD (token_idx ++ , TkFieldEnd ) = (p += 2 ); // skip char and closing '
210223 } else {
211- GET_TK_FIELD (g_token_idx , TkFieldKind ) = * p ;
224+ GET_TK_FIELD (token_idx , TkFieldKind ) = * p ;
212225
213- if (IS_PUNCT (p , '=' , '=' )) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_EQ ; ++ p ; }
214- else if (IS_PUNCT (p , '!' , '=' )) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_NE ; ++ p ; }
215- else if (IS_PUNCT (p , '&' , '&' )) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_AND ; ++ p ; }
216- else if (IS_PUNCT (p , '|' , '|' )) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_OR ; ++ p ; }
226+ if (IS_PUNCT (p , '=' , '=' )) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_EQ ; ++ p ; }
227+ else if (IS_PUNCT (p , '!' , '=' )) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_NE ; ++ p ; }
228+ else if (IS_PUNCT (p , '&' , '&' )) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_AND ; ++ p ; }
229+ else if (IS_PUNCT (p , '|' , '|' )) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_OR ; ++ p ; }
217230 else if (* p == '+' ) {
218- if (p [1 ] == '+' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_INC ; ++ p ; }
219- else if (p [1 ] == '=' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_ADD_ASSIGN ; ++ p ; }
231+ if (p [1 ] == '+' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_INC ; ++ p ; }
232+ else if (p [1 ] == '=' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_ADD_ASSIGN ; ++ p ; }
220233 } else if (* p == '-' ) {
221- if (p [1 ] == '-' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_DEC ; ++ p ; }
222- else if (p [1 ] == '=' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_SUB_ASSIGN ; ++ p ; }
234+ if (p [1 ] == '-' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_DEC ; ++ p ; }
235+ else if (p [1 ] == '=' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_SUB_ASSIGN ; ++ p ; }
223236 } else if (* p == '>' ) {
224- if (p [1 ] == '=' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_GE ; ++ p ; }
225- else if (p [1 ] == '>' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_RSHIFT ; ++ p ; }
237+ if (p [1 ] == '=' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_GE ; ++ p ; }
238+ else if (p [1 ] == '>' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_RSHIFT ; ++ p ; }
226239 } else if (* p == '<' ) {
227- if (p [1 ] == '=' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_LE ; ++ p ; }
228- else if (p [1 ] == '<' ) { GET_TK_FIELD (g_token_idx , TkFieldKind ) = TK_LSHIFT ; ++ p ; }
240+ if (p [1 ] == '=' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_LE ; ++ p ; }
241+ else if (p [1 ] == '<' ) { GET_TK_FIELD (token_idx , TkFieldKind ) = TK_LSHIFT ; ++ p ; }
229242 }
230243
231- GET_TK_FIELD (g_token_idx ++ , TkFieldEnd ) = ++ p ;
232- }
233- }
234- }
235- return ;
236- }
237-
238- // debug
239- void dump_tokens () {
240- printf ("-------- lex --------\n" );
241- int indent = 0 , i = 0 , ln = 0 ;
242- while (i < g_token_idx ) {
243- int tkln = GET_TK_FIELD (i , TkFieldLine );
244- int kind = GET_TK_FIELD (i , TkFieldKind );
245- int start = GET_TK_FIELD (i , TkFieldBegin );
246- int end = GET_TK_FIELD (i , TkFieldEnd );
247- int len = end - start ;
248- if (kind == '{' ) { indent += 1 ; }
249- else if (kind == '}' ) { indent -= 1 ; }
250- if (ln != tkln ) {
251- printf ("\n%-3d:%.*s" , tkln , indent * 4 , " " );
252- ln = tkln ;
253- }
254- char * names = "Int Char Void Break Cont Else Enum If "
255- "Ret While Print Fopen Fgetc CallocMemsetExit " ;
256- printf ("%.*s" , len , start );
257- if (kind >= KW_int ) {
258- printf ("{" );
259- char * p = names + 6 * (kind - KW_int ); int ii = 0 ;
260- while (ii < 6 ) {
261- if (* p == ' ' ) break ;
262- printf ("%c" , * p );
263- ++ ii ; ++ p ;
244+ GET_TK_FIELD (token_idx ++ , TkFieldEnd ) = ++ p ;
264245 }
265- printf ("}" );
266246 }
267- printf (" " );
268- ++ i ;
269247 }
270- printf ("\n" );
271- return ;
248+ return token_idx ;
272249}
273250
251+ //--------------------------------- CODEGEN ----------------------------------//
274252void enter_scope () {
275253 if (g_scopeCnt >= MAX_SCOPE ) {
276254 panic ("scope overflow" );
@@ -343,11 +321,8 @@ int primary_expr() {
343321 int i = 1 ;
344322 while (i < len ) {
345323 int c = start [i ];
346- if (c == 92 ) { // '\'
347- c = start [i += 1 ];
348- if (c == 'n' ) { c = 10 ; }
349- else if (c == '0' ) { c = 0 ; }
350- else { COMPILE_ERROR ("error:%d: unknown escape sequence '%c'\n" , ln , c ); }
324+ if (c == '\\' ) {
325+ c = parse_escape_sequence (start [i += 1 ], ln );
351326 }
352327 * ((char * )g_bss ++ ) = c ;
353328 ++ i ;
@@ -977,10 +952,10 @@ void obj() {
977952 return ;
978953}
979954
980- void gen (int argc , char * * argv ) {
955+ void gen (int argc , char * * argv , int token_count ) {
981956 enter_scope ();
982957
983- while (g_tkIter < g_token_idx ) {
958+ while (g_tkIter < token_count ) {
984959 obj ();
985960 }
986961
@@ -1102,16 +1077,18 @@ void dump_code() {
11021077 return ;
11031078}
11041079
1080+ #define FATAL_ERROR (fmt , ...) { printf("c.c: \033[31mfatal error\033[0m: " fmt "\ncompilation terminated.\n", ##__VA_ARGS__); exit(1); }
1081+
11051082int main (int argc , char * * argv ) {
11061083 // @TODO: better error handling
11071084 if (argc == 1 ) {
1108- printf ( "%s: fatal error: no input files\n compilation terminated." , * argv );
1085+ FATAL_ERROR ( "no input files" );
11091086 return 1 ;
11101087 }
11111088
11121089 void * fp = fopen (argv [1 ], "r" );
11131090 if (!fp ) {
1114- printf ("%s: fatal error: %s : No such file or directory\n compilation terminated." , * argv , * (argv + 1 ));
1091+ FATAL_ERROR ("%s: No such file or directory" , * (argv + 1 ));
11151092 return 1 ;
11161093 }
11171094
@@ -1140,10 +1117,10 @@ int main(int argc, char **argv) {
11401117 g_src [src_len ] = 0 ;
11411118
11421119 // lexing
1143- lex ();
1120+ int token_count = lex (g_src );
11441121
11451122 // code generation
1146- gen (argc - 1 , argv + 1 );
1123+ gen (argc - 1 , argv + 1 , token_count );
11471124
11481125 // run
11491126 g_regs = g_ram + g_reserved - 4 * IMME ;
0 commit comments