diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5784791..c283b17 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,4 +31,4 @@ jobs: with: name: cc - name: Run Tests - run: python -u .\run_tests.py + run: python -u scripts/run_tests.py diff --git a/.gitignore b/.gitignore index 403f911..0f0cfc8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ build/ +__pycache__/ *.exe *.actual -*.expect \ No newline at end of file +*.expect +*.pyc +tmp.c diff --git a/README.md b/README.md index 819ff69..e2855bc 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,19 @@ ```shell $ cmake -S . -B build -A Win32 $ cmake --build build --config Debug -# then copy c.c.exe to root ``` -## Shell +## Self Hosting +```shell +$ py -u scripts/preprocess.py # This preprocess the source code, because c.c doesn't have preprocessor +$ ./c hello.c # hello world +$ ./c c.c c.c hello.c # self hosting +``` +## Testing ```shell -$ sh build.sh -$ ./c hello.c -$ ./c c.c hello.c -$ ./c c.c c.c hello.c -$ ./c c.c c.c c.c hello.c -$ sh run-test.sh +$ # copy c.c.exe to root dir +$ py -u scripts/run_tests.py ``` This project is a tiny C interpreter inspired by [c4](https://github.com/rswier/c4). diff --git a/build.sh b/build.sh deleted file mode 100644 index 868c76b..0000000 --- a/build.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/bash -echo "//< this file is generated by build.sh" | cat > c.c -echo "#include " | cat >> c.c -echo "#include " | cat >> c.c -gcc -E -DPREPROC c-source.c > tmp.c || exit 1 -cat tmp.c | sed '/^#/d' tmp.c | sed '/^\s*$/d' | cat >> c.c -rm tmp.c -gcc -m32 -Wno-int-conversion -Wno-format -Wno-sign-compare -Wno-builtin-declaration-mismatch -Wno-implicit-function-declaration c.c -o c || exit 1 diff --git a/c-source.c b/c-source.c index 0cdb7a8..d6f1bd9 100644 --- a/c-source.c +++ b/c-source.c @@ -16,7 +16,7 @@ #define IS_HEX(C) (IS_DIGIT(C) || (C >= 'A' && C <= 'F')) #define IS_WHITESPACE(C) (C == ' ' || C == 9 || C == 10 || C == 13) #define IS_PUNCT(P, A, B) (*P == A && P[1] == B) -#define IS_TYPE(KIND) (KIND >= Int && KIND <= Void) +#define IS_TYPE(KIND) (KIND >= KW_int && KIND <= KW_void) #define ALIGN(x) ((x + 3) & -4) #define COMPILE_ERROR(...) { printf(__VA_ARGS__); exit(1); } #define PUSH(REG, VAL) instruction(Push | (REG << 24), VAL) @@ -29,8 +29,8 @@ #define CALL(ENTRY) instruction(Call, ENTRY); #define LOADB(DEST, SRC) instruction(Load | (DEST << 8) | (SRC << 16), 1) #define LOADW(DEST, SRC) instruction(Load | (DEST << 8) | (SRC << 16), 4) -#define CHAR_PTR (0xFF0000 | Char) -#define VOID_PTR (0xFF0000 | Void) +#define CHAR_PTR (0xFF0000 | KW_char) +#define VOID_PTR (0xFF0000 | KW_void) #define IS_PTR(TYPE) (0xFF0000 & TYPE) #define SYM_ATTRIB(IDX, ATTRIB) g_syms[((IDX) * SymSize) + ATTRIB] #define OP_ATTRIB(IDX, ATTRIB) g_ops[((IDX) * OpSize) + ATTRIB] @@ -42,29 +42,12 @@ #define MAX_SCOPE 128 #define MAX_CALLS 1024 -enum /* TokenKind */ { - _TkOffset = 128, // 0-127 is reserved for ascii - CInt, Id, CStr, CChar, - TkNeq, TkEq, TkGe, TkLe, - TkAddTo, TkSubFrom, TkInc, TkDec, TkAnd, TkOr, LShift, RShift, - _KeywordStart, - Int, Char, Void, - Break, Cont, Else, Enum, If, Return, While, - Printf, Fopen, Fgetc, Malloc, Memset, Exit, - _KeywordEnd, - Add, Sub, Mul, Div, Rem, - Mov, Push, Pop, Load, Save, - Neq, Eq, Gt, Ge, Lt, Le, And, Or, - Not, Ret, Jz, Jnz, Jump, Call, - _BreakStub, _ContStub }; enum { Undefined, Global, Param, Local, Func, Const }; enum { EAX = 1, EBX, ECX, EDX, ESP, EBP, IMME }; enum { TkIdx, Scope, DType, Storage, Address, SymSize }; enum { OpCode, Imme, OpSize }; enum { /* TkIndex = 0, */ InsIdx = 1, CallSize }; -char *g_ram, *g_src; - #pragma region utils void panic(char* fmt) { printf("[panic] %s\n", fmt); @@ -87,38 +70,77 @@ int strlen(char* p) { } #pragma endregion utils -// @TODO: implement struct. Use enum and array to mimic array of struct for now #pragma region token + +enum { + _TK_START = 128, // 0-127 is reserved for ascii + TK_INT, // int + TK_IDENT, // identifier + TK_STRING, // c string + TK_CHAR, // char + + TK_NE, // != + TK_EQ, // == + TK_GE, // >= + TK_LE, // <= + + TK_ADD_ASSIGN, // += + TK_SUB_ASSIGN, // -= + TK_INC, // ++ + TK_DEC, // -- + TK_AND, // && + TK_OR, // || + TK_LSHIFT, // << + TK_RSHIFT, // >> + + _KW_START, // keywords + + KW_int, KW_char, KW_void, KW_break, KW_continue, + KW_else, KW_enum, KW_if, KW_return, KW_while, + KW_printf, KW_fopen, KW_fgetc, KW_calloc, KW_memset, + KW_exit, + + _KW_END, + + // @TODO: refactor the following, because they are opcode + Add, Sub, Mul, Div, Rem, + Mov, Push, Pop, Load, Save, + Neq, Eq, Gt, Ge, Lt, Le, And, Or, + Not, Ret, Jz, Jnz, Jump, Call, + _BreakStub, _ContStub +}; + +// @TODO: implement struct. Use enum and array to mimic array of struct for now enum { - TokenKind, - TokenValue, // store the value of token if char or int - TokenLine, // current line of a token - TokenBegin, - TokenEnd, + TkFieldKind, + TkFieldValue, // store the value of token if char or int + TkFieldLine, // current line of a token + TkFieldBegin, + TkFieldEnd, - _TokenEnumCount, + _TkFieldCount, }; int* g_token_buffer, // global int array to hold token information g_token_idx; // global index of current token -#define GET_TK_FIELD(IDX, ATTRIB) (g_token_buffer[((IDX) * _TokenEnumCount) + ATTRIB]) +#define GET_TK_FIELD(IDX, ATTRIB) (g_token_buffer[((IDX) * _TkFieldCount) + ATTRIB]) void check_if_token_keyword(int token_idx) { char* keywords = "int\0 char\0 void\0 break\0 continue\0" "else\0 enum\0 if\0 return\0 while\0 " - "printf\0 fopen\0 fgetc\0 malloc\0 memset\0 " + "printf\0 fopen\0 fgetc\0 calloc\0 memset\0 " "exit\0 "; - int start = GET_TK_FIELD(token_idx, TokenBegin); - int token_len = GET_TK_FIELD(token_idx, TokenEnd) - start; + int start = GET_TK_FIELD(token_idx, TkFieldBegin); + int token_len = GET_TK_FIELD(token_idx, TkFieldEnd) - start; int idx = 0; - while (idx < (_KeywordEnd - Int)) { + while (idx < (_KW_END - KW_int)) { char* kw = keywords + (idx * 9); // a keyword is at most 8 char, plus '\0' int keyword_len = strlen(kw); if (keyword_len == token_len && streq(start, kw, 8)) { - GET_TK_FIELD(token_idx, TokenKind) = Int + idx; + GET_TK_FIELD(token_idx, TkFieldKind) = KW_int + idx; break; } ++idx; @@ -130,6 +152,8 @@ void check_if_token_keyword(int token_idx) { // @TODO: refactor +char *g_ram, *g_src; + int g_reserved, g_bss, g_tkIter, *g_syms, g_symCnt, @@ -148,63 +172,63 @@ void lex() { } else if (IS_WHITESPACE(*p)) { // handle whitespace ln += (*p == 10); ++p; } else { - GET_TK_FIELD(g_token_idx, TokenLine) = ln; - GET_TK_FIELD(g_token_idx, TokenBegin) = p; + GET_TK_FIELD(g_token_idx, TkFieldLine) = ln; + GET_TK_FIELD(g_token_idx, TkFieldBegin) = p; if (IS_LETTER(*p) || *p == '_') { // handle token or keyword - GET_TK_FIELD(g_token_idx, TokenKind) = Id; + GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_IDENT; ++p; while (IS_LETTER(*p) || IS_DIGIT(*p) || *p == '_') { ++p; } - GET_TK_FIELD(g_token_idx, TokenEnd) = p; + GET_TK_FIELD(g_token_idx, TkFieldEnd) = p; check_if_token_keyword(g_token_idx); g_token_idx += 1; } else if (*p == '0' && p[1] == 'x') { // handle hex number - GET_TK_FIELD(g_token_idx, TokenKind) = CInt; + GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_INT; int result = 0; p += 2; while(IS_HEX(*p)) { result = (result << 4) + ((*p < 'A') ? (*p - '0') : (*p - 55)); ++p; } - GET_TK_FIELD(g_token_idx, TokenValue) = result; - GET_TK_FIELD(g_token_idx++, TokenEnd) = p; + GET_TK_FIELD(g_token_idx, TkFieldValue) = result; + GET_TK_FIELD(g_token_idx++, TkFieldEnd) = p; } else if (IS_DIGIT(*p)) { // handle decimal number - GET_TK_FIELD(g_token_idx, TokenKind) = CInt; + GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_INT; int result = 0; while (IS_DIGIT(*p)) { result = result * 10 + (*p - '0'); ++p; } - GET_TK_FIELD(g_token_idx, TokenValue) = result; - GET_TK_FIELD(g_token_idx++, TokenEnd) = p; + GET_TK_FIELD(g_token_idx, TkFieldValue) = result; + GET_TK_FIELD(g_token_idx++, TkFieldEnd) = p; } else if (*p == '"') { // handle string - GET_TK_FIELD(g_token_idx, TokenKind) = CStr; + GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_STRING; ++p; while (*p != '"') { ++p; }; - GET_TK_FIELD(g_token_idx++, TokenEnd) = ++p; + GET_TK_FIELD(g_token_idx++, TkFieldEnd) = ++p; } else if (*p == 39) { // ascii ''' - GET_TK_FIELD(g_token_idx, TokenKind) = CChar; - GET_TK_FIELD(g_token_idx, TokenValue) = p[1]; - GET_TK_FIELD(g_token_idx++, TokenEnd) = (p += 3); + GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_CHAR; + GET_TK_FIELD(g_token_idx, TkFieldValue) = p[1]; + GET_TK_FIELD(g_token_idx++, TkFieldEnd) = (p += 3); } else { - GET_TK_FIELD(g_token_idx, TokenKind) = *p; + GET_TK_FIELD(g_token_idx, TkFieldKind) = *p; - if (IS_PUNCT(p, '=', '=')) { GET_TK_FIELD(g_token_idx, TokenKind) = TkEq; ++p; } - else if (IS_PUNCT(p, '!', '=')) { GET_TK_FIELD(g_token_idx, TokenKind) = TkNeq; ++p; } - else if (IS_PUNCT(p, '&', '&')) { GET_TK_FIELD(g_token_idx, TokenKind) = TkAnd; ++p; } - else if (IS_PUNCT(p, '|', '|')) { GET_TK_FIELD(g_token_idx, TokenKind) = TkOr; ++p; } + if (IS_PUNCT(p, '=', '=')) { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_EQ; ++p; } + else if (IS_PUNCT(p, '!', '=')) { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_NE; ++p; } + else if (IS_PUNCT(p, '&', '&')) { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_AND; ++p; } + else if (IS_PUNCT(p, '|', '|')) { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_OR; ++p; } else if (*p == '+') { - if (p[1] == '+') { GET_TK_FIELD(g_token_idx, TokenKind) = TkInc; ++p; } - else if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TokenKind) = TkAddTo; ++p; } + if (p[1] == '+') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_INC; ++p; } + else if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_ADD_ASSIGN; ++p; } } else if (*p == '-') { - if (p[1] == '-') { GET_TK_FIELD(g_token_idx, TokenKind) = TkDec; ++p; } - else if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TokenKind) = TkSubFrom; ++p; } + if (p[1] == '-') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_DEC; ++p; } + else if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_SUB_ASSIGN; ++p; } } else if (*p == '>') { - if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TokenKind) = TkGe; ++p; } - else if (p[1] == '>') { GET_TK_FIELD(g_token_idx, TokenKind) = RShift; ++p; } + if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_GE; ++p; } + else if (p[1] == '>') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_RSHIFT; ++p; } } else if (*p == '<') { - if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TokenKind) = TkLe; ++p; } - else if (p[1] == '<') { GET_TK_FIELD(g_token_idx, TokenKind) = LShift; ++p; } + if (p[1] == '=') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_LE; ++p; } + else if (p[1] == '<') { GET_TK_FIELD(g_token_idx, TkFieldKind) = TK_LSHIFT; ++p; } } - GET_TK_FIELD(g_token_idx++, TokenEnd) = ++p; + GET_TK_FIELD(g_token_idx++, TkFieldEnd) = ++p; } } } @@ -216,10 +240,10 @@ void dump_tokens() { printf("-------- lex --------\n"); int indent = 0, i = 0, ln = 0; while (i < g_token_idx) { - int tkln = GET_TK_FIELD(i, TokenLine); - int kind = GET_TK_FIELD(i, TokenKind); - int start = GET_TK_FIELD(i, TokenBegin); - int end = GET_TK_FIELD(i, TokenEnd); + int tkln = GET_TK_FIELD(i, TkFieldLine); + int kind = GET_TK_FIELD(i, TkFieldKind); + int start = GET_TK_FIELD(i, TkFieldBegin); + int end = GET_TK_FIELD(i, TkFieldEnd); int len = end - start; if (kind == '{') { indent += 1; } else if (kind == '}') { indent -= 1; } @@ -228,11 +252,11 @@ void dump_tokens() { ln = tkln; } char* names = "Int Char Void Break Cont Else Enum If " - "Ret While Print Fopen Fgetc MallocMemsetExit "; + "Ret While Print Fopen Fgetc CallocMemsetExit "; printf("%.*s", len, start); - if (kind >= Int) { + if (kind >= KW_int) { printf("{"); - char *p = names + 6 * (kind - Int); int ii = 0; + char *p = names + 6 * (kind - KW_int); int ii = 0; while (ii < 6) { if (*p == ' ') break; printf("%c", *p); @@ -271,25 +295,25 @@ void exit_scope() { } int expect(int kind) { - if (GET_TK_FIELD(g_tkIter, TokenKind) != kind) { - int start = GET_TK_FIELD(g_tkIter, TokenBegin), end = GET_TK_FIELD(g_tkIter, TokenEnd); + if (GET_TK_FIELD(g_tkIter, TkFieldKind) != kind) { + int start = GET_TK_FIELD(g_tkIter, TkFieldBegin), end = GET_TK_FIELD(g_tkIter, TkFieldEnd); COMPILE_ERROR("error:%d: expected token '%c'(%d), got '%.*s'\n", - GET_TK_FIELD(g_tkIter, TokenLine), kind < 128 ? kind : ' ', kind, end - start, start); + GET_TK_FIELD(g_tkIter, TkFieldLine), kind < 128 ? kind : ' ', kind, end - start, start); } return g_tkIter++; } int expect_type() { - int base_type = GET_TK_FIELD(g_tkIter, TokenKind); + int base_type = GET_TK_FIELD(g_tkIter, TkFieldKind); if (IS_TYPE(base_type)) { ++g_tkIter; int ptr = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } + while (GET_TK_FIELD(g_tkIter, TkFieldKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } return (ptr << 16) | base_type; } - int start = GET_TK_FIELD(g_tkIter, TokenBegin), end = GET_TK_FIELD(g_tkIter, TokenEnd); - COMPILE_ERROR("error:%d: expected type specifier, got '%.*s'\n", GET_TK_FIELD(g_tkIter, TokenLine), end - start, start); + int start = GET_TK_FIELD(g_tkIter, TkFieldBegin), end = GET_TK_FIELD(g_tkIter, TkFieldEnd); + COMPILE_ERROR("error:%d: expected type specifier, got '%.*s'\n", GET_TK_FIELD(g_tkIter, TkFieldLine), end - start, start); } void instruction(int op, int imme) { @@ -301,18 +325,18 @@ void instruction(int op, int imme) { int primary_expr() { int tkIdx = g_tkIter++; - char* start = GET_TK_FIELD(tkIdx, TokenBegin); - char* end = GET_TK_FIELD(tkIdx, TokenEnd); - int ln = GET_TK_FIELD(tkIdx, TokenLine); - int kind = GET_TK_FIELD(tkIdx, TokenKind); - int value = GET_TK_FIELD(tkIdx, TokenValue); + char* start = GET_TK_FIELD(tkIdx, TkFieldBegin); + char* end = GET_TK_FIELD(tkIdx, TkFieldEnd); + int ln = GET_TK_FIELD(tkIdx, TkFieldLine); + int kind = GET_TK_FIELD(tkIdx, TkFieldKind); + int value = GET_TK_FIELD(tkIdx, TkFieldValue); int len = end - start; - if (kind == CInt || kind == CChar) { + if (kind == TK_INT || kind == TK_CHAR) { MOV(EAX, IMME, value); - return Int; + return KW_int; } - if (kind == CStr) { + if (kind == TK_STRING) { MOV(EAX, IMME, g_bss); while (1) { len = len - 1; @@ -329,10 +353,10 @@ int primary_expr() { ++i; } - if (GET_TK_FIELD(g_tkIter, TokenKind) != CStr) break; - start = GET_TK_FIELD(g_tkIter, TokenBegin); - end = GET_TK_FIELD(g_tkIter, TokenEnd); - ln = GET_TK_FIELD(g_tkIter, TokenLine); + if (GET_TK_FIELD(g_tkIter, TkFieldKind) != TK_STRING) break; + start = GET_TK_FIELD(g_tkIter, TkFieldBegin); + end = GET_TK_FIELD(g_tkIter, TkFieldEnd); + ln = GET_TK_FIELD(g_tkIter, TkFieldLine); len = end - start; ++g_tkIter; } @@ -348,11 +372,11 @@ int primary_expr() { return data_type; } - if (kind == Id) { - if (GET_TK_FIELD(g_tkIter, TokenKind) == '(') { + if (kind == TK_IDENT) { + if (GET_TK_FIELD(g_tkIter, TkFieldKind) == '(') { ++g_tkIter; int argc = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) != ')') { + while (GET_TK_FIELD(g_tkIter, TkFieldKind) != ')') { if (argc > 0) expect(','); assign_expr(); PUSH(EAX, 0); @@ -365,13 +389,13 @@ int primary_expr() { CALL(0); if (argc) { ADD(ESP, ESP, IMME, argc << 2); } expect(')'); - return Int; + return KW_int; } int address = 0, type = Undefined, data_type = 0, i = g_symCnt - 1; while (i >= 0) { int tmp = SYM_ATTRIB(i, TkIdx); - char *tmpstart = GET_TK_FIELD(tmp, TokenBegin), *tmpend = GET_TK_FIELD(tmp, TokenEnd); + char *tmpstart = GET_TK_FIELD(tmp, TkFieldBegin), *tmpend = GET_TK_FIELD(tmp, TkFieldEnd); if (len == (tmpend - tmpstart) && streq(start, tmpstart, len)) { address = SYM_ATTRIB(i, Address); type = SYM_ATTRIB(i, Storage); @@ -388,7 +412,7 @@ int primary_expr() { } if (type == Const) { MOV(EAX, IMME, address); - return Int; + return KW_int; } if (type == Undefined) { COMPILE_ERROR("error:%d: '%.*s' undeclared\n", ln, len, start); @@ -399,10 +423,10 @@ int primary_expr() { return data_type; } - if (kind == Printf) { + if (kind == KW_printf) { expect('('); int argc = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) != ')') { + while (GET_TK_FIELD(g_tkIter, TkFieldKind) != ')') { if (argc > 0) expect(','); assign_expr(); PUSH(EAX, 0); @@ -410,17 +434,17 @@ int primary_expr() { } if (argc > MAX_PRINF_ARGS) panic("printf supports at most %d args"); SUB(ESP, ESP, IMME, (MAX_PRINF_ARGS - argc) << 2); - instruction(Printf, argc); + instruction(KW_printf, argc); ADD(ESP, ESP, IMME, MAX_PRINF_ARGS << 2); expect(')'); - return Int; + return KW_int; } - int paramCnt = 0, ret = Void, i = 0; - if (kind == Fopen) { paramCnt = 2; ret = VOID_PTR; } - else if (kind == Fgetc) { paramCnt = 1; ret = Int; } - else if (kind == Malloc) { paramCnt = 1; ret = VOID_PTR; } - else if (kind == Exit) { paramCnt = 1; ret = Void; } + int paramCnt = 0, ret = KW_void, i = 0; + if (kind == KW_fopen) { paramCnt = 2; ret = VOID_PTR; } + else if (kind == KW_fgetc) { paramCnt = 1; ret = KW_int; } + else if (kind == KW_calloc) { paramCnt = 2; ret = VOID_PTR; } + else if (kind == KW_exit) { paramCnt = 1; ret = KW_void; } else { COMPILE_ERROR("error:%d: expected expression, got '%.*s'\n", ln, len, start); } expect('('); @@ -438,8 +462,8 @@ int primary_expr() { int post_expr() { int data_type = primary_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind); - int ln = GET_TK_FIELD(g_tkIter, TokenLine); + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind); + int ln = GET_TK_FIELD(g_tkIter, TkFieldLine); if (kind == '[') { ++g_tkIter; if (!IS_PTR(data_type)) { @@ -458,12 +482,12 @@ int post_expr() { else { LOADW(EAX, EAX); } expect(']'); data_type = ((data_type >> 8) & 0xFF0000) | ((data_type & 0xFFFF)); - } else if (kind == TkInc || kind == TkDec) { + } else if (kind == TK_INC || kind == TK_DEC) { ++g_tkIter; LOADW(EAX, EDX); MOV(EBX, EAX, 0); int value = (IS_PTR(data_type) && data_type != CHAR_PTR) ? 4 : 1; - int op = kind == TkInc ? Add : Sub; + int op = kind == TK_INC ? Add : Sub; instruction(OP(op, EBX, EBX, IMME), value); SAVEW(EDX, EBX); } else { @@ -474,8 +498,8 @@ int post_expr() { } int unary_expr() { - int kind = GET_TK_FIELD(g_tkIter, TokenKind); - int ln = GET_TK_FIELD(g_tkIter, TokenLine); + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind); + int ln = GET_TK_FIELD(g_tkIter, TkFieldLine); if (kind == '!') { ++g_tkIter; int data_type = unary_expr(); @@ -505,12 +529,12 @@ int unary_expr() { else LOADW(EAX, EDX); return ((data_type >> 8) & 0xFF0000) | (0xFFFF & data_type); } - if (kind == TkInc || kind == TkDec) { + if (kind == TK_INC || kind == TK_DEC) { ++g_tkIter; int data_type = unary_expr(); LOADW(EAX, EDX); int value = (IS_PTR(data_type) && data_type != CHAR_PTR) ? 4 : 1; - int op = kind == TkInc ? Add : Sub; + int op = kind == TK_INC ? Add : Sub; instruction(OP(op, EAX, EAX, IMME), value); SAVEW(EDX, EAX); return data_type; @@ -519,8 +543,8 @@ int unary_expr() { } int cast_expr() { - if (GET_TK_FIELD(g_tkIter, TokenKind) == '(') { - int kind = GET_TK_FIELD(g_tkIter + 1, TokenKind); + if (GET_TK_FIELD(g_tkIter, TkFieldKind) == '(') { + int kind = GET_TK_FIELD(g_tkIter + 1, TkFieldKind); if (IS_TYPE(kind)) { ++g_tkIter; // skip '(' int data_type = expect_type(); @@ -536,7 +560,7 @@ int cast_expr() { int mul_expr() { int data_type = cast_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind), opcode; + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind), opcode; if (kind == '*') opcode = Mul; else if (kind == '/') opcode = Div; else if (kind == '%') opcode = Rem; @@ -554,7 +578,7 @@ int mul_expr() { int add_expr() { int data_type = mul_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind), opcode; + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind), opcode; if (kind == '+') opcode = Add; else if (kind == '-') opcode = Sub; else break; @@ -563,12 +587,12 @@ int add_expr() { int rhs = mul_expr(); if (IS_PTR(data_type) && IS_PTR(rhs)) { if (data_type != rhs) { - COMPILE_ERROR("error:%d: type mismatch", GET_TK_FIELD(g_tkIter, TokenLine)); + COMPILE_ERROR("error:%d: type mismatch", GET_TK_FIELD(g_tkIter, TkFieldLine)); } if (data_type != CHAR_PTR) { panic("TODO: handle subtraction other than char* - char*"); } - data_type = Int; + data_type = KW_int; } POP(EBX); if (IS_PTR(data_type) && data_type != CHAR_PTR) { MUL(EAX, EAX, IMME, 4); } @@ -581,8 +605,8 @@ int add_expr() { int shift_expr() { int data_type = add_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind); - if (kind != LShift && kind != RShift) break; + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind); + if (kind != TK_LSHIFT && kind != TK_RSHIFT) break; ++g_tkIter; PUSH(EAX, 0); add_expr(); @@ -596,14 +620,14 @@ int shift_expr() { int relation_expr() { int data_type = shift_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind), opcode; - if (kind == TkNeq) opcode = Neq; - else if (kind == TkEq) opcode = Eq; + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind), opcode; + if (kind == TK_NE) opcode = Neq; + else if (kind == TK_EQ) opcode = Eq; else if (kind == '<') opcode = Lt; else if (kind == '>') opcode = Gt; - else if (kind == TkGe) opcode = Ge; + else if (kind == TK_GE) opcode = Ge; else if (kind == '<') opcode = Lt; - else if (kind == TkLe) opcode = Le; + else if (kind == TK_LE) opcode = Le; else break; ++g_tkIter; PUSH(EAX, 0); @@ -617,7 +641,7 @@ int relation_expr() { int bit_expr() { int data_type = relation_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind), opcode; + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind), opcode; if (kind == '&') opcode = And; else if (kind == '|') opcode = Or; else break; @@ -633,9 +657,9 @@ int bit_expr() { int logical_expr() { int data_type = bit_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind), opcode; - if (kind == TkAnd) opcode = Jz; - else if (kind == TkOr) opcode = Jnz; + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind), opcode; + if (kind == TK_AND) opcode = Jz; + else if (kind == TK_OR) opcode = Jnz; else break; ++g_tkIter; @@ -652,17 +676,17 @@ int logical_expr() { int assign_expr() { int data_type = logical_expr(); while (1) { - int kind = GET_TK_FIELD(g_tkIter, TokenKind); + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind); if (kind == '=') { ++g_tkIter; PUSH(EDX, 0); logical_expr(); POP(EDX); - instruction(OP(Save, EDX, EAX, 0), data_type == Char ? 1 : 4); + instruction(OP(Save, EDX, EAX, 0), data_type == KW_char ? 1 : 4); continue; } - if (kind == TkAddTo) { + if (kind == TK_ADD_ASSIGN) { ++g_tkIter; PUSH(EDX, 0); relation_expr(); @@ -674,7 +698,7 @@ int assign_expr() { continue; } - if (kind == TkSubFrom) { + if (kind == TK_SUB_ASSIGN) { ++g_tkIter; PUSH(EDX, 0); relation_expr(); @@ -708,7 +732,7 @@ int assign_expr() { int expr() { int type = assign_expr(); - while (GET_TK_FIELD(g_tkIter, TokenKind) == ',') { + while (GET_TK_FIELD(g_tkIter, TkFieldKind) == ',') { g_tkIter += 1; type = assign_expr(); } @@ -716,9 +740,9 @@ int expr() { } void stmt() { - int kind = GET_TK_FIELD(g_tkIter, TokenKind); - if (kind == Return) { - if (GET_TK_FIELD(++g_tkIter, TokenKind) != ';') { assign_expr(); } + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind); + if (kind == KW_return) { + if (GET_TK_FIELD(++g_tkIter, TkFieldKind) != ';') { assign_expr(); } MOV(ESP, EBP, 0); POP(EBP); instruction(Ret, 0); @@ -726,7 +750,7 @@ void stmt() { return; } - if (kind == If) { + if (kind == KW_if) { // eax == 0; goto L1 | eax == 0; goto L1 // ... | ... // goto L2 | L1: ... @@ -738,7 +762,7 @@ void stmt() { instruction(Jz, 0); stmt(); - if (GET_TK_FIELD(g_tkIter, TokenKind) != Else) { + if (GET_TK_FIELD(g_tkIter, TkFieldKind) != KW_else) { OP_ATTRIB(goto_L1, Imme) = g_opCnt; return; } @@ -752,7 +776,7 @@ void stmt() { return; } - if (kind == While) { + if (kind == KW_while) { // CONT: ... // eax == 0; goto BREAK // ... @@ -776,14 +800,14 @@ void stmt() { return; } - if (kind == Break) { + if (kind == KW_break) { ++g_tkIter; instruction(_BreakStub, 0); expect(';'); return; } - if (kind == Cont) { + if (kind == KW_continue) { ++g_tkIter; instruction(_ContStub, 0); expect(';'); @@ -794,8 +818,8 @@ void stmt() { enter_scope(); ++g_tkIter; int restore = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) != '}') { - kind = GET_TK_FIELD(g_tkIter, TokenKind); + while (GET_TK_FIELD(g_tkIter, TkFieldKind) != '}') { + kind = GET_TK_FIELD(g_tkIter, TkFieldKind); if (IS_TYPE(kind)) { ++g_tkIter; int base_type = kind, varNum = 0; @@ -805,8 +829,8 @@ void stmt() { } int ptr = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } - int id = expect(Id), prev = g_symCnt - 1; + while (GET_TK_FIELD(g_tkIter, TkFieldKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } + int id = expect(TK_IDENT), prev = g_symCnt - 1; SYM_ATTRIB(g_symCnt, Address) = 4; if (prev >= 0 && SYM_ATTRIB(prev, Storage) == Local) { @@ -819,7 +843,7 @@ void stmt() { SYM_ATTRIB(g_symCnt, DType) = (ptr << 16) | base_type; SUB(ESP, ESP, IMME, 4); - if (GET_TK_FIELD(g_tkIter, TokenKind) == '=') { + if (GET_TK_FIELD(g_tkIter, TkFieldKind) == '=') { ++g_tkIter; assign_expr(); SUB(EDX, EBP, IMME, SYM_ATTRIB(g_symCnt, Address)); @@ -827,7 +851,7 @@ void stmt() { } ++restore, ++varNum, ++g_symCnt; - if (GET_TK_FIELD(g_tkIter, TokenKind) == ';') { break; } + if (GET_TK_FIELD(g_tkIter, TkFieldKind) == ';') { break; } } ++g_tkIter; @@ -854,27 +878,27 @@ void stmt() { // an object could be a global variable, an enum or a function void obj() { - int kind = GET_TK_FIELD(g_tkIter, TokenKind); - if (kind == Enum) { + int kind = GET_TK_FIELD(g_tkIter, TkFieldKind); + if (kind == KW_enum) { ++g_tkIter; expect('{'); int val = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) != '}') { - int idx = expect(Id); + while (GET_TK_FIELD(g_tkIter, TkFieldKind) != '}') { + int idx = expect(TK_IDENT); SYM_ATTRIB(g_symCnt, TkIdx) = idx; SYM_ATTRIB(g_symCnt, Storage) = Const; - SYM_ATTRIB(g_symCnt, DType) = Int; + SYM_ATTRIB(g_symCnt, DType) = KW_int; SYM_ATTRIB(g_symCnt, Scope) = g_scopes[g_scopeCnt - 1]; - if (GET_TK_FIELD(g_tkIter, TokenKind) == '=') { + if (GET_TK_FIELD(g_tkIter, TkFieldKind) == '=') { ++g_tkIter; - idx = expect(CInt); - val = GET_TK_FIELD(idx, TokenValue); + idx = expect(TK_INT); + val = GET_TK_FIELD(idx, TkFieldValue); } SYM_ATTRIB(g_symCnt++, Address) = val++; - if (GET_TK_FIELD(g_tkIter, TokenKind) == '}') { break; } + if (GET_TK_FIELD(g_tkIter, TkFieldKind) == '}') { break; } expect(','); } ++g_tkIter; @@ -882,24 +906,24 @@ void obj() { return; } - int ln = GET_TK_FIELD(g_tkIter, TokenLine); - int start = GET_TK_FIELD(g_tkIter, TokenBegin); - int end = GET_TK_FIELD(g_tkIter++, TokenEnd); + int ln = GET_TK_FIELD(g_tkIter, TkFieldLine); + int start = GET_TK_FIELD(g_tkIter, TkFieldBegin); + int end = GET_TK_FIELD(g_tkIter++, TkFieldEnd); if (!IS_TYPE(kind)) { COMPILE_ERROR("error:%d: unexpected token '%.*s'\n", ln, end - start, start); } - while (GET_TK_FIELD(g_tkIter, TokenKind) != ';') { + while (GET_TK_FIELD(g_tkIter, TkFieldKind) != ';') { int data_type = kind, ptr = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) == '*') { + while (GET_TK_FIELD(g_tkIter, TkFieldKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } data_type = (ptr << 16) | data_type; - int id = expect(Id); + int id = expect(TK_IDENT); - if (GET_TK_FIELD(g_tkIter, TokenKind) != '(') { + if (GET_TK_FIELD(g_tkIter, TkFieldKind) != '(') { SYM_ATTRIB(g_symCnt, Storage) = Global; SYM_ATTRIB(g_symCnt, TkIdx) = id; SYM_ATTRIB(g_symCnt, Scope) = g_scopes[g_scopeCnt - 1]; @@ -907,11 +931,11 @@ void obj() { *((int*)g_bss) = 0; SYM_ATTRIB(g_symCnt++, Address) = g_bss; g_bss += 4; - if (GET_TK_FIELD(g_tkIter, TokenKind) != ';') { expect(','); } + if (GET_TK_FIELD(g_tkIter, TkFieldKind) != ';') { expect(','); } continue; } - if (streq("main", GET_TK_FIELD(id, TokenBegin), 4)) { + if (streq("main", GET_TK_FIELD(id, TkFieldBegin), 4)) { g_entry = g_opCnt; } else { SYM_ATTRIB(g_symCnt, Storage) = Func; @@ -924,13 +948,13 @@ void obj() { enter_scope(); expect('('); int argCnt = 0, i = 1; - while (GET_TK_FIELD(g_tkIter, TokenKind) != ')') { + while (GET_TK_FIELD(g_tkIter, TkFieldKind) != ')') { if (argCnt > 0) { expect(','); } int data_type = expect_type(); int ptr = 0; - while (GET_TK_FIELD(g_tkIter, TokenKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } + while (GET_TK_FIELD(g_tkIter, TkFieldKind) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } data_type = (ptr << 16) | data_type; - SYM_ATTRIB(g_symCnt, TkIdx) = expect(Id); + SYM_ATTRIB(g_symCnt, TkIdx) = expect(TK_IDENT); SYM_ATTRIB(g_symCnt, Scope) = g_scopes[g_scopeCnt - 1]; SYM_ATTRIB(g_symCnt, DType) = data_type; SYM_ATTRIB(g_symCnt++, Storage) = Param; @@ -963,16 +987,16 @@ void gen(int argc, char** argv) { int i = 0; while (i < g_callCnt) { int idx = CALL_ATTRIB(i, TkIdx); - int start = GET_TK_FIELD(idx, TokenBegin); - int end = GET_TK_FIELD(idx, TokenEnd); - int ln = GET_TK_FIELD(idx, TokenLine); + int start = GET_TK_FIELD(idx, TkFieldBegin); + int end = GET_TK_FIELD(idx, TkFieldEnd); + int ln = GET_TK_FIELD(idx, TkFieldLine); int len = end - start; int found = 0, j = 0; while (j < g_symCnt) { if (SYM_ATTRIB(j, Storage) == Func) { int funcIdx = SYM_ATTRIB(j, TkIdx); - if (streq(start, GET_TK_FIELD(funcIdx, TokenBegin), len)) { + if (streq(start, GET_TK_FIELD(funcIdx, TkFieldBegin), len)) { found = 1; /// NOTE: potential error here? OP_ATTRIB(CALL_ATTRIB(i, InsIdx), Imme) = SYM_ATTRIB(j, Address); @@ -1051,9 +1075,9 @@ void dump_code() { printf(" or %.*s, %.*s, %.*s\n", REG2STR(dest), REG2STR(src1), REG2STR(src2)); } else if (op == Not) { printf(" not %.*s\n", REG2STR(dest)); - } else if (op == LShift) { + } else if (op == TK_LSHIFT) { printf(" lshift %.*s, %.*s, %.*s\n", REG2STR(dest), REG2STR(src1), REG2STR(src2)); - } else if (op == RShift) { + } else if (op == TK_RSHIFT) { printf(" rshift %.*s, %.*s, %.*s\n", REG2STR(dest), REG2STR(src1), REG2STR(src2)); } else if (op == Push) { if (src2 == IMME) printf(" push %d(0x%08X)\n", imme, imme); @@ -1067,8 +1091,8 @@ void dump_code() { } else if (op == Jump || op == Jz || op == Jnz || op == Call) { char* opstr = op == Jump ? "jmp" : op == Jz ? "jz" : op == Jnz ? "jnz" : "call"; printf(" %s %d\n", opstr, imme); - } else if (op == Printf || op == Fopen || op == Fgetc || op == Malloc || op == Exit) { - char* opstr = op == Printf ? "printf" : op == Fopen ? "fopen" : op == Fgetc ? "fgetc" : op == Malloc ? "malloc" : "exit"; + } else if (op == KW_printf || op == KW_fopen || op == KW_fgetc || op == KW_calloc || op == KW_exit) { + char* opstr = op == KW_printf ? "printf" : op == KW_fopen ? "fopen" : op == KW_fgetc ? "fgetc" : op == KW_calloc ? "calloc" : "exit"; printf(" %s\n", opstr); } else { panic("invalid op code"); @@ -1079,16 +1103,17 @@ void dump_code() { } int main(int argc, char **argv) { - if (argc < 2) { - printf("Usage: %s file [args...]\n", *argv); - return 1; - } + // @TODO: better error handling + if (argc == 1) { + printf("%s: fatal error: no input files\n compilation terminated.", *argv); + return 1; + } - void* fp = fopen(argv[1], "r"); - if (!fp) { - printf("file '%s' does not exist\n", argv[1]); - return 1; - } + void* fp = fopen(argv[1], "r"); + if (!fp) { + printf("%s: fatal error: %s : No such file or directory\n compilation terminated.", *argv, *(argv + 1)); + return 1; + } g_reserved = 2 * CHUNK_SIZE * argc; g_ram = calloc(g_reserved, 1); // @TODO: support calloc @@ -1096,7 +1121,7 @@ int main(int argc, char **argv) { // memory layout // | instructions | global variables | ... script memory ... | stack | int src_reserved = 1 << 18; - int tk_reserved = 4 * _TokenEnumCount * (src_reserved >> 2); + int tk_reserved = 4 * _TkFieldCount * (src_reserved >> 2); int sym_reserved = 4 * SymSize * (tk_reserved >> 8); int opcode_reserved = 4 * OpSize * (src_reserved >> 3); int scope_reserved = 4 * MAX_SCOPE; @@ -1183,8 +1208,8 @@ int main(int argc, char **argv) { else if (op == And) { g_regs[dest] = g_regs[src1] & value; } else if (op == Or) { g_regs[dest] = g_regs[src1] | value; } else if (op == Not) { g_regs[dest] = !g_regs[dest]; } - else if (op == LShift) { g_regs[dest] = g_regs[src1] << value; } - else if (op == RShift) { g_regs[dest] = g_regs[src1] >> value; } + else if (op == TK_LSHIFT) { g_regs[dest] = g_regs[src1] << value; } + else if (op == TK_RSHIFT) { g_regs[dest] = g_regs[src1] >> value; } else if (op == Save) { if (imme == 4) *((int*)g_regs[dest]) = g_regs[src1]; else *((char*)g_regs[dest]) = g_regs[src1]; @@ -1193,18 +1218,18 @@ int main(int argc, char **argv) { if (imme == 4) g_regs[dest] = *((int*)g_regs[src1]); else g_regs[dest] = *((char*)g_regs[src1]); } - else if (op == Printf) { + else if (op == KW_printf) { int* p = g_regs[ESP]; printf((char*)(p[7]), p[6], p[5], p[4], p[3], p[2], p[1], p[0]); - } else if (op == Fgetc) { + } else if (op == KW_fgetc) { int* p = g_regs[ESP]; g_regs[EAX] = fgetc((void*)(p[0])); - } else if (op == Fopen) { + } else if (op == KW_fopen) { int* p = g_regs[ESP]; g_regs[EAX] = fopen((char*)(p[1]), (char*)(p[0])); - } else if (op == Malloc) { - g_regs[EAX] = g_ram + CHUNK_SIZE; - } else if (op == Exit) { + } else if (op == KW_calloc) { + g_regs[EAX] = g_ram + CHUNK_SIZE; // @HACK: not really allocating desired amount of memory + } else if (op == KW_exit) { g_regs[EAX] = *((int*)g_regs[ESP]); break; } else { panic("Invalid op code"); } diff --git a/c.c b/c.c index 9cf064b..1348a47 100644 --- a/c.c +++ b/c.c @@ -1,38 +1,20 @@ -//< this file is generated by build.sh -#include -#include -enum { _TkOffset = 128, - CInt, Id, CStr, CChar, - TkNeq, TkEq, TkGe, TkLe, - TkAddTo, TkSubFrom, TkInc, TkDec, TkAnd, TkOr, LShift, RShift, - _KeywordStart, Int, Char, Void, - Break, Cont, Else, Enum, If, Return, While, - Printf, Fopen, Fgetc, Malloc, Memset, Exit, - _KeywordEnd, - Add, Sub, Mul, Div, Rem, - Mov, Push, Pop, Load, Save, - Neq, Eq, Gt, Ge, Lt, Le, And, Or, - Not, Ret, Jz, Jnz, Jump, Call, - _BreakStub, _ContStub }; +# 0 "c-source.c" +# 0 "" +# 0 "" +# 1 "c-source.c" +# 45 "c-source.c" enum { Undefined, Global, Param, Local, Func, Const }; enum { EAX = 1, EBX, ECX, EDX, ESP, EBP, IMME }; -enum { Kind, Value, Ln, Start, End, TokenSize }; enum { TkIdx, Scope, DType, Storage, Address, SymSize }; enum { OpCode, Imme, OpSize }; enum { InsIdx = 1, CallSize }; -char *g_ram, *g_src; -int g_reserved, g_bss, - *g_tks, g_tkCnt, g_tkIter, - *g_syms, g_symCnt, - *g_ops, g_opCnt, - *g_regs, - g_entry, - g_scopeId, *g_scopes, g_scopeCnt, - *g_calls, g_callCnt; + +#pragma region utils void panic(char* fmt) { printf("[panic] %s\n", fmt); exit(1); } + int streq(char* p1, char* p2, int len) { while (len--) { if (*p1 == 0 || *p2 == 0) return 1; @@ -41,95 +23,188 @@ int streq(char* p1, char* p2, int len) { } return 1; } + int strlen(char* p) { int len = 0; while (*p++) { len += 1; } return len; } +#pragma endregion utils + +#pragma region token + +enum { + _TK_START = 128, + TK_INT, + TK_IDENT, + TK_STRING, + TK_CHAR, + + TK_NE, + TK_EQ, + TK_GE, + TK_LE, + + TK_ADD_ASSIGN, + TK_SUB_ASSIGN, + TK_INC, + TK_DEC, + TK_AND, + TK_OR, + TK_LSHIFT, + TK_RSHIFT, + + _KW_START, + + KW_int, KW_char, KW_void, KW_break, KW_continue, + KW_else, KW_enum, KW_if, KW_return, KW_while, + KW_printf, KW_fopen, KW_fgetc, KW_calloc, KW_memset, + KW_exit, + + _KW_END, + + + Add, Sub, Mul, Div, Rem, + Mov, Push, Pop, Load, Save, + Neq, Eq, Gt, Ge, Lt, Le, And, Or, + Not, Ret, Jz, Jnz, Jump, Call, + _BreakStub, _ContStub +}; + + +enum { + TkFieldKind, + TkFieldValue, + TkFieldLine, + TkFieldBegin, + TkFieldEnd, + + _TkFieldCount, +}; + +int* g_token_buffer, + g_token_idx; + + + +void check_if_token_keyword(int token_idx) { + char* keywords = "int\0 char\0 void\0 break\0 continue\0" + "else\0 enum\0 if\0 return\0 while\0 " + "printf\0 fopen\0 fgetc\0 calloc\0 memset\0 " + "exit\0 "; + + int start = (g_token_buffer[((token_idx) * _TkFieldCount) + TkFieldBegin]); + int token_len = (g_token_buffer[((token_idx) * _TkFieldCount) + TkFieldEnd]) - start; + + int idx = 0; + while (idx < (_KW_END - KW_int)) { + char* kw = keywords + (idx * 9); + int keyword_len = strlen(kw); + if (keyword_len == token_len && streq(start, kw, 8)) { + (g_token_buffer[((token_idx) * _TkFieldCount) + TkFieldKind]) = KW_int + idx; + break; + } + ++idx; + } + return; +} + +#pragma endregion token + + + +char *g_ram, *g_src; + +int g_reserved, g_bss, + g_tkIter, + *g_syms, g_symCnt, + *g_ops, g_opCnt, + *g_regs, + g_entry, + g_scopeId, *g_scopes, g_scopeCnt, + *g_calls, g_callCnt; + void lex() { - int ln = 1, range = _KeywordEnd - _KeywordStart - 1; + int ln = 1; char *p = g_src; - while (*p) { - if (*p == '#' || (*p == '/' && p[1] == '/')) { while (*p && *p != 10) ++p; } - else if ((*p == ' ' || *p == 9 || *p == 10 || *p == 13)) { ln += (*p == 10); ++p; } - else { - g_tks[((g_tkCnt) * TokenSize) + Ln] = ln; - g_tks[((g_tkCnt) * TokenSize) + Start] = p; + while (*p) { + if (*p == '#' || (*p == '/' && p[1] == '/')) { + while (*p && *p != 10) ++p; + } else if ((*p == ' ' || *p == 9 || *p == 10 || *p == 13)) { + ln += (*p == 10); ++p; + } else { + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldLine]) = ln; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldBegin]) = p; + if (((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) || *p == '_') { - g_tks[((g_tkCnt) * TokenSize) + Kind] = Id; - ++p; while (((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) || (*p >= '0' && *p <= '9') || *p == '_') { ++p; } - g_tks[((g_tkCnt) * TokenSize) + End] = p; - char *keywords = "int\0 char\0 void\0 break\0 continue\0" - "else\0 enum\0 if\0 return\0 while\0 " - "printf\0 fopen\0 fgetc\0 malloc\0 memset\0 " - "exit\0 "; - int i = 0, start = g_tks[((g_tkCnt) * TokenSize) + Start], len = g_tks[((g_tkCnt) * TokenSize) + End] - start; - while (i < range) { - char* kw = keywords + (i * 9); - int kwlen = strlen(kw); - if (kwlen == len && streq(start, kw, 8)) { - g_tks[((g_tkCnt) * TokenSize) + Kind] = _KeywordStart + i + 1; - break; - } - ++i; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_IDENT; + ++p; + while (((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) || (*p >= '0' && *p <= '9') || *p == '_') { + ++p; } - g_tkCnt += 1; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldEnd]) = p; + check_if_token_keyword(g_token_idx); + g_token_idx += 1; } else if (*p == '0' && p[1] == 'x') { - g_tks[((g_tkCnt) * TokenSize) + Kind] = CInt; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_INT; int result = 0; p += 2; while(((*p >= '0' && *p <= '9') || (*p >= 'A' && *p <= 'F'))) { result = (result << 4) + ((*p < 'A') ? (*p - '0') : (*p - 55)); ++p; } - g_tks[((g_tkCnt) * TokenSize) + Value] = result; - g_tks[((g_tkCnt++) * TokenSize) + End] = p; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldValue]) = result; + (g_token_buffer[((g_token_idx++) * _TkFieldCount) + TkFieldEnd]) = p; } else if ((*p >= '0' && *p <= '9')) { - g_tks[((g_tkCnt) * TokenSize) + Kind] = CInt; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_INT; int result = 0; while ((*p >= '0' && *p <= '9')) { result = result * 10 + (*p - '0'); ++p; } - g_tks[((g_tkCnt) * TokenSize) + Value] = result; - g_tks[((g_tkCnt++) * TokenSize) + End] = p; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldValue]) = result; + (g_token_buffer[((g_token_idx++) * _TkFieldCount) + TkFieldEnd]) = p; } else if (*p == '"') { - g_tks[((g_tkCnt) * TokenSize) + Kind] = CStr; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_STRING; ++p; while (*p != '"') { ++p; }; - g_tks[((g_tkCnt++) * TokenSize) + End] = ++p; + (g_token_buffer[((g_token_idx++) * _TkFieldCount) + TkFieldEnd]) = ++p; } else if (*p == 39) { - g_tks[((g_tkCnt) * TokenSize) + Kind] = CChar; - g_tks[((g_tkCnt) * TokenSize) + Value] = p[1]; - g_tks[((g_tkCnt++) * TokenSize) + End] = (p += 3); + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_CHAR; + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldValue]) = p[1]; + (g_token_buffer[((g_token_idx++) * _TkFieldCount) + TkFieldEnd]) = (p += 3); } else { - g_tks[((g_tkCnt) * TokenSize) + Kind] = *p; - if ((*p == '=' && p[1] == '=')) { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkEq; ++p; } - else if ((*p == '!' && p[1] == '=')) { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkNeq; ++p; } - else if ((*p == '&' && p[1] == '&')) { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkAnd; ++p; } - else if ((*p == '|' && p[1] == '|')) { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkOr; ++p; } + (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = *p; + + if ((*p == '=' && p[1] == '=')) { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_EQ; ++p; } + else if ((*p == '!' && p[1] == '=')) { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_NE; ++p; } + else if ((*p == '&' && p[1] == '&')) { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_AND; ++p; } + else if ((*p == '|' && p[1] == '|')) { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_OR; ++p; } else if (*p == '+') { - if (p[1] == '+') { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkInc; ++p; } - else if (p[1] == '=') { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkAddTo; ++p; } + if (p[1] == '+') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_INC; ++p; } + else if (p[1] == '=') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_ADD_ASSIGN; ++p; } } else if (*p == '-') { - if (p[1] == '-') { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkDec; ++p; } - else if (p[1] == '=') { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkSubFrom; ++p; } + if (p[1] == '-') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_DEC; ++p; } + else if (p[1] == '=') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_SUB_ASSIGN; ++p; } } else if (*p == '>') { - if (p[1] == '=') { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkGe; ++p; } - else if (p[1] == '>') { g_tks[((g_tkCnt) * TokenSize) + Kind] = RShift; ++p; } + if (p[1] == '=') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_GE; ++p; } + else if (p[1] == '>') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_RSHIFT; ++p; } } else if (*p == '<') { - if (p[1] == '=') { g_tks[((g_tkCnt) * TokenSize) + Kind] = TkLe; ++p; } - else if (p[1] == '<') { g_tks[((g_tkCnt) * TokenSize) + Kind] = LShift; ++p; } + if (p[1] == '=') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_LE; ++p; } + else if (p[1] == '<') { (g_token_buffer[((g_token_idx) * _TkFieldCount) + TkFieldKind]) = TK_LSHIFT; ++p; } } - g_tks[((g_tkCnt++) * TokenSize) + End] = ++p; + + (g_token_buffer[((g_token_idx++) * _TkFieldCount) + TkFieldEnd]) = ++p; } } } return; } + + void dump_tokens() { printf("-------- lex --------\n"); int indent = 0, i = 0, ln = 0; - while (i < g_tkCnt) { - int tkln = g_tks[((i) * TokenSize) + Ln]; - int kind = g_tks[((i) * TokenSize) + Kind]; - int start = g_tks[((i) * TokenSize) + Start]; - int end = g_tks[((i) * TokenSize) + End]; + while (i < g_token_idx) { + int tkln = (g_token_buffer[((i) * _TkFieldCount) + TkFieldLine]); + int kind = (g_token_buffer[((i) * _TkFieldCount) + TkFieldKind]); + int start = (g_token_buffer[((i) * _TkFieldCount) + TkFieldBegin]); + int end = (g_token_buffer[((i) * _TkFieldCount) + TkFieldEnd]); int len = end - start; if (kind == '{') { indent += 1; } else if (kind == '}') { indent -= 1; } @@ -138,11 +213,11 @@ void dump_tokens() { ln = tkln; } char* names = "Int Char Void Break Cont Else Enum If " - "Ret While Print Fopen Fgetc MallocMemsetExit "; + "Ret While Print Fopen Fgetc CallocMemsetExit "; printf("%.*s", len, start); - if (kind >= Int) { + if (kind >= KW_int) { printf("{"); - char *p = names + 6 * (kind - Int); int ii = 0; + char *p = names + 6 * (kind - KW_int); int ii = 0; while (ii < 6) { if (*p == ' ') break; printf("%c", *p); @@ -156,62 +231,73 @@ void dump_tokens() { printf("\n"); return; } + void enter_scope() { if (g_scopeCnt >= 128) { panic("scope overflow"); } + g_scopes[g_scopeCnt++] = ++g_scopeId; return; } + void exit_scope() { if (g_scopeCnt <= 0) { panic("scope overflow"); } + int i = g_symCnt - 1; while (g_syms[((i) * SymSize) + Scope] == g_scopes[g_scopeCnt - 1]) { --g_symCnt, --i; } + --g_scopeCnt; return; } + int expect(int kind) { - if (g_tks[((g_tkIter) * TokenSize) + Kind] != kind) { - int start = g_tks[((g_tkIter) * TokenSize) + Start], end = g_tks[((g_tkIter) * TokenSize) + End]; - { printf("error:%d: expected token '%c'(%d), got '%.*s'\n", g_tks[((g_tkIter) * TokenSize) + Ln], kind < 128 ? kind : ' ', kind, end - start, start); exit(1); } - ; + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != kind) { + int start = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldBegin]), end = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldEnd]); + { printf("error:%d: expected token '%c'(%d), got '%.*s'\n", (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine]), kind < 128 ? kind : ' ', kind, end - start, start); exit(1); } + ; } return g_tkIter++; } + int expect_type() { - int base_type = g_tks[((g_tkIter) * TokenSize) + Kind]; - if ((base_type >= Int && base_type <= Void)) { + int base_type = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + if ((base_type >= KW_int && base_type <= KW_void)) { ++g_tkIter; int ptr = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } return (ptr << 16) | base_type; } - int start = g_tks[((g_tkIter) * TokenSize) + Start], end = g_tks[((g_tkIter) * TokenSize) + End]; - { printf("error:%d: expected type specifier, got '%.*s'\n", g_tks[((g_tkIter) * TokenSize) + Ln], end - start, start); exit(1); }; + + int start = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldBegin]), end = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldEnd]); + { printf("error:%d: expected type specifier, got '%.*s'\n", (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine]), end - start, start); exit(1); }; } + void instruction(int op, int imme) { g_ops[((g_opCnt) * OpSize) + OpCode] = op; g_ops[((g_opCnt) * OpSize) + Imme] = imme; ++g_opCnt; return; } + int primary_expr() { int tkIdx = g_tkIter++; - char* start = g_tks[((tkIdx) * TokenSize) + Start]; - char* end = g_tks[((tkIdx) * TokenSize) + End]; - int ln = g_tks[((tkIdx) * TokenSize) + Ln]; - int kind = g_tks[((tkIdx) * TokenSize) + Kind]; - int value = g_tks[((tkIdx) * TokenSize) + Value]; + char* start = (g_token_buffer[((tkIdx) * _TkFieldCount) + TkFieldBegin]); + char* end = (g_token_buffer[((tkIdx) * _TkFieldCount) + TkFieldEnd]); + int ln = (g_token_buffer[((tkIdx) * _TkFieldCount) + TkFieldLine]); + int kind = (g_token_buffer[((tkIdx) * _TkFieldCount) + TkFieldKind]); + int value = (g_token_buffer[((tkIdx) * _TkFieldCount) + TkFieldValue]); int len = end - start; - if (kind == CInt || kind == CChar) { + if (kind == TK_INT || kind == TK_CHAR) { instruction(Mov | (EAX << 8) | (IMME << 24), value); - return Int; + return KW_int; } - if (kind == CStr) { + + if (kind == TK_STRING) { instruction(Mov | (EAX << 8) | (IMME << 24), g_bss); while (1) { len = len - 1; @@ -227,43 +313,50 @@ int primary_expr() { *((char*)g_bss++) = c; ++i; } - if (g_tks[((g_tkIter) * TokenSize) + Kind] != CStr) break; - start = g_tks[((g_tkIter) * TokenSize) + Start]; - end = g_tks[((g_tkIter) * TokenSize) + End]; - ln = g_tks[((g_tkIter) * TokenSize) + Ln]; + + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != TK_STRING) break; + start = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldBegin]); + end = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldEnd]); + ln = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine]); len = end - start; ++g_tkIter; } + *((char*)g_bss++) = 0; g_bss = ((g_bss + 3) & -4); - return (0xFF0000 | Char); + return (0xFF0000 | KW_char); } + if (kind == '(') { int data_type = expr(); expect(')'); return data_type; } - if (kind == Id) { - if (g_tks[((g_tkIter) * TokenSize) + Kind] == '(') { + + if (kind == TK_IDENT) { + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '(') { ++g_tkIter; int argc = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] != ')') { + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != ')') { if (argc > 0) expect(','); assign_expr(); instruction(Push | (EAX << 24), 0); ++argc; } + g_calls[((g_callCnt) * CallSize) + TkIdx] = tkIdx; g_calls[((g_callCnt++) * CallSize) + InsIdx] = g_opCnt; + instruction(Call, 0);; if (argc) { instruction(Add | (ESP << 8) | (ESP << 16) | (IMME << 24), (argc << 2)); } expect(')'); - return Int; + return KW_int; } + int address = 0, type = Undefined, data_type = 0, i = g_symCnt - 1; while (i >= 0) { int tmp = g_syms[((i) * SymSize) + TkIdx]; - char *tmpstart = g_tks[((tmp) * TokenSize) + Start], *tmpend = g_tks[((tmp) * TokenSize) + End]; + char *tmpstart = (g_token_buffer[((tmp) * _TkFieldCount) + TkFieldBegin]), *tmpend = (g_token_buffer[((tmp) * _TkFieldCount) + TkFieldEnd]); if (len == (tmpend - tmpstart) && streq(start, tmpstart, len)) { address = g_syms[((i) * SymSize) + Address]; type = g_syms[((i) * SymSize) + Storage]; @@ -272,6 +365,7 @@ int primary_expr() { } --i; } + if (type == Global) { instruction(Mov | (EDX << 8) | (IMME << 24), address); instruction(Load | (EAX << 8) | (EDX << 16), 4); @@ -279,19 +373,21 @@ int primary_expr() { } if (type == Const) { instruction(Mov | (EAX << 8) | (IMME << 24), address); - return Int; + return KW_int; } if (type == Undefined) { { printf("error:%d: '%.*s' undeclared\n", ln, len, start); exit(1); }; } + instruction(Sub | (EDX << 8) | (EBP << 16) | (IMME << 24), (address)); instruction(Load | (EAX << 8) | (EDX << 16), 4); return data_type; } - if (kind == Printf) { + + if (kind == KW_printf) { expect('('); int argc = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] != ')') { + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != ')') { if (argc > 0) expect(','); assign_expr(); instruction(Push | (EAX << 24), 0); @@ -299,17 +395,19 @@ int primary_expr() { } if (argc > 8) panic("printf supports at most %d args"); instruction(Sub | (ESP << 8) | (ESP << 16) | (IMME << 24), ((8 - argc) << 2)); - instruction(Printf, argc); + instruction(KW_printf, argc); instruction(Add | (ESP << 8) | (ESP << 16) | (IMME << 24), (8 << 2)); expect(')'); - return Int; + return KW_int; } - int paramCnt = 0, ret = Void, i = 0; - if (kind == Fopen) { paramCnt = 2; ret = (0xFF0000 | Void); } - else if (kind == Fgetc) { paramCnt = 1; ret = Int; } - else if (kind == Malloc) { paramCnt = 1; ret = (0xFF0000 | Void); } - else if (kind == Exit) { paramCnt = 1; ret = Void; } + + int paramCnt = 0, ret = KW_void, i = 0; + if (kind == KW_fopen) { paramCnt = 2; ret = (0xFF0000 | KW_void); } + else if (kind == KW_fgetc) { paramCnt = 1; ret = KW_int; } + else if (kind == KW_calloc) { paramCnt = 2; ret = (0xFF0000 | KW_void); } + else if (kind == KW_exit) { paramCnt = 1; ret = KW_void; } else { { printf("error:%d: expected expression, got '%.*s'\n", ln, len, start); exit(1); }; } + expect('('); while (i < paramCnt) { if (i++ > 0) { expect(','); } @@ -321,11 +419,12 @@ int primary_expr() { expect(')'); return ret; } + int post_expr() { int data_type = primary_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind]; - int ln = g_tks[((g_tkIter) * TokenSize) + Ln]; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + int ln = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine]); if (kind == '[') { ++g_tkIter; if (!(0xFF0000 & data_type)) { @@ -333,7 +432,7 @@ int post_expr() { } instruction(Push | (EAX << 24), 0); assign_expr(); - int is_charptr = data_type == (0xFF0000 | Char); + int is_charptr = data_type == (0xFF0000 | KW_char); if (!is_charptr) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } @@ -344,12 +443,12 @@ int post_expr() { else { instruction(Load | (EAX << 8) | (EAX << 16), 4); } expect(']'); data_type = ((data_type >> 8) & 0xFF0000) | ((data_type & 0xFFFF)); - } else if (kind == TkInc || kind == TkDec) { + } else if (kind == TK_INC || kind == TK_DEC) { ++g_tkIter; instruction(Load | (EAX << 8) | (EDX << 16), 4); instruction(Mov | (EBX << 8) | (EAX << 24), 0); - int value = ((0xFF0000 & data_type) && data_type != (0xFF0000 | Char)) ? 4 : 1; - int op = kind == TkInc ? Add : Sub; + int value = ((0xFF0000 & data_type) && data_type != (0xFF0000 | KW_char)) ? 4 : 1; + int op = kind == TK_INC ? Add : Sub; instruction(((op) | (EBX << 8) | (EBX << 16) | (IMME << 24)), value); instruction(Save | (EDX << 8) | (EBX << 16), 4);; } else { @@ -358,9 +457,10 @@ int post_expr() { } return data_type; } + int unary_expr() { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind]; - int ln = g_tks[((g_tkIter) * TokenSize) + Ln]; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + int ln = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine]); if (kind == '!') { ++g_tkIter; int data_type = unary_expr(); @@ -384,27 +484,29 @@ int unary_expr() { if (!(0xFF0000 & data_type)) { { printf("error:%d: attempted to dereference a non-pointer type 0x%X\n", ln, data_type); exit(1); }; } + instruction(Mov | (EDX << 8) | (EAX << 24), 0); - if (data_type == (0xFF0000 | Char)) instruction(Load | (EAX << 8) | (EDX << 16), 1); + if (data_type == (0xFF0000 | KW_char)) instruction(Load | (EAX << 8) | (EDX << 16), 1); else instruction(Load | (EAX << 8) | (EDX << 16), 4); return ((data_type >> 8) & 0xFF0000) | (0xFFFF & data_type); } - if (kind == TkInc || kind == TkDec) { + if (kind == TK_INC || kind == TK_DEC) { ++g_tkIter; int data_type = unary_expr(); instruction(Load | (EAX << 8) | (EDX << 16), 4); - int value = ((0xFF0000 & data_type) && data_type != (0xFF0000 | Char)) ? 4 : 1; - int op = kind == TkInc ? Add : Sub; + int value = ((0xFF0000 & data_type) && data_type != (0xFF0000 | KW_char)) ? 4 : 1; + int op = kind == TK_INC ? Add : Sub; instruction(((op) | (EAX << 8) | (EAX << 16) | (IMME << 24)), value); instruction(Save | (EDX << 8) | (EAX << 16), 4);; return data_type; } return post_expr(); } + int cast_expr() { - if (g_tks[((g_tkIter) * TokenSize) + Kind] == '(') { - int kind = g_tks[((g_tkIter + 1) * TokenSize) + Kind]; - if ((kind >= Int && kind <= Void)) { + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '(') { + int kind = (g_token_buffer[((g_tkIter + 1) * _TkFieldCount) + TkFieldKind]); + if ((kind >= KW_int && kind <= KW_void)) { ++g_tkIter; int data_type = expect_type(); expect(')'); @@ -412,12 +514,14 @@ int cast_expr() { return data_type; } } + return unary_expr(); } + int mul_expr() { int data_type = cast_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind], opcode; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]), opcode; if (kind == '*') opcode = Mul; else if (kind == '/') opcode = Div; else if (kind == '%') opcode = Rem; @@ -428,12 +532,14 @@ int mul_expr() { instruction(Pop | (EBX << 8), 0); instruction(((opcode) | (EAX << 8) | (EBX << 16) | (EAX << 24)), 0); } + return data_type; } + int add_expr() { int data_type = mul_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind], opcode; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]), opcode; if (kind == '+') opcode = Add; else if (kind == '-') opcode = Sub; else break; @@ -442,24 +548,26 @@ int add_expr() { int rhs = mul_expr(); if ((0xFF0000 & data_type) && (0xFF0000 & rhs)) { if (data_type != rhs) { - { printf("error:%d: type mismatch", g_tks[((g_tkIter) * TokenSize) + Ln]); exit(1); }; + { printf("error:%d: type mismatch", (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine])); exit(1); }; } - if (data_type != (0xFF0000 | Char)) { + if (data_type != (0xFF0000 | KW_char)) { panic("TODO: handle subtraction other than char* - char*"); } - data_type = Int; + data_type = KW_int; } instruction(Pop | (EBX << 8), 0); - if ((0xFF0000 & data_type) && data_type != (0xFF0000 | Char)) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } + if ((0xFF0000 & data_type) && data_type != (0xFF0000 | KW_char)) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } instruction(((opcode) | (EAX << 8) | (EBX << 16) | (EAX << 24)), 0); } + return data_type; } + int shift_expr() { int data_type = add_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind]; - if (kind != LShift && kind != RShift) break; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + if (kind != TK_LSHIFT && kind != TK_RSHIFT) break; ++g_tkIter; instruction(Push | (EAX << 24), 0); add_expr(); @@ -468,17 +576,19 @@ int shift_expr() { } return data_type; } + + int relation_expr() { int data_type = shift_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind], opcode; - if (kind == TkNeq) opcode = Neq; - else if (kind == TkEq) opcode = Eq; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]), opcode; + if (kind == TK_NE) opcode = Neq; + else if (kind == TK_EQ) opcode = Eq; else if (kind == '<') opcode = Lt; else if (kind == '>') opcode = Gt; - else if (kind == TkGe) opcode = Ge; + else if (kind == TK_GE) opcode = Ge; else if (kind == '<') opcode = Lt; - else if (kind == TkLe) opcode = Le; + else if (kind == TK_LE) opcode = Le; else break; ++g_tkIter; instruction(Push | (EAX << 24), 0); @@ -488,10 +598,11 @@ int relation_expr() { } return data_type; } + int bit_expr() { int data_type = relation_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind], opcode; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]), opcode; if (kind == '&') opcode = And; else if (kind == '|') opcode = Or; else break; @@ -503,13 +614,15 @@ int bit_expr() { } return data_type; } + int logical_expr() { int data_type = bit_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind], opcode; - if (kind == TkAnd) opcode = Jz; - else if (kind == TkOr) opcode = Jnz; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]), opcode; + if (kind == TK_AND) opcode = Jz; + else if (kind == TK_OR) opcode = Jnz; else break; + ++g_tkIter; int skip = g_opCnt; instruction(opcode, 0); @@ -517,42 +630,47 @@ int logical_expr() { g_ops[((skip) * OpSize) + Imme] = g_opCnt; continue; } + return data_type; } + int assign_expr() { int data_type = logical_expr(); while (1) { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind]; + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); if (kind == '=') { ++g_tkIter; instruction(Push | (EDX << 24), 0); logical_expr(); instruction(Pop | (EDX << 8), 0); - instruction(((Save) | (EDX << 8) | (EAX << 16) | (0 << 24)), data_type == Char ? 1 : 4); + instruction(((Save) | (EDX << 8) | (EAX << 16) | (0 << 24)), data_type == KW_char ? 1 : 4); continue; } - if (kind == TkAddTo) { + + if (kind == TK_ADD_ASSIGN) { ++g_tkIter; instruction(Push | (EDX << 24), 0); relation_expr(); instruction(Pop | (EDX << 8), 0); instruction(Load | (EBX << 8) | (EDX << 16), 4); - if ((0xFF0000 & data_type) && data_type != (0xFF0000 | Char)) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } + if ((0xFF0000 & data_type) && data_type != (0xFF0000 | KW_char)) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } instruction(Add | (EAX << 8) | (EBX << 16) | (EAX << 24), (0)); instruction(Save | (EDX << 8) | (EAX << 16), 4);; continue; } - if (kind == TkSubFrom) { + + if (kind == TK_SUB_ASSIGN) { ++g_tkIter; instruction(Push | (EDX << 24), 0); relation_expr(); instruction(Pop | (EDX << 8), 0); instruction(Load | (EBX << 8) | (EDX << 16), 4); - if ((0xFF0000 & data_type) && data_type != (0xFF0000 | Char)) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } + if ((0xFF0000 & data_type) && data_type != (0xFF0000 | KW_char)) { instruction(Mul | (EAX << 8) | (EAX << 16) | (IMME << 24), (4)); } instruction(Sub | (EAX << 8) | (EBX << 16) | (EAX << 24), (0)); instruction(Save | (EDX << 8) | (EAX << 16), 4);; continue; } + if (kind == '?') { ++g_tkIter; int goto_L1 = g_opCnt; @@ -566,38 +684,50 @@ int assign_expr() { g_ops[((goto_L2) * OpSize) + Imme] = g_opCnt; continue; } + break; } + return data_type; } + int expr() { int type = assign_expr(); - while (g_tks[((g_tkIter) * TokenSize) + Kind] == ',') { + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == ',') { g_tkIter += 1; type = assign_expr(); } return type; } + void stmt() { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind]; - if (kind == Return) { - if (g_tks[((++g_tkIter) * TokenSize) + Kind] != ';') { assign_expr(); } + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + if (kind == KW_return) { + if ((g_token_buffer[((++g_tkIter) * _TkFieldCount) + TkFieldKind]) != ';') { assign_expr(); } instruction(Mov | (ESP << 8) | (EBP << 24), 0); instruction(Pop | (EBP << 8), 0); instruction(Ret, 0); expect(';'); return; } - if (kind == If) { + + if (kind == KW_if) { + + + + + ++g_tkIter; expect('('); expr(); expect(')'); int goto_L1 = g_opCnt; instruction(Jz, 0); stmt(); - if (g_tks[((g_tkIter) * TokenSize) + Kind] != Else) { + + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != KW_else) { g_ops[((goto_L1) * OpSize) + Imme] = g_opCnt; return; } + ++g_tkIter; int goto_L2 = g_opCnt; instruction(Jump, g_opCnt + 1); @@ -606,7 +736,13 @@ void stmt() { g_ops[((goto_L2) * OpSize) + Imme] = g_opCnt; return; } - if (kind == While) { + + if (kind == KW_while) { + + + + + int label_cont = g_opCnt; ++g_tkIter; expect('('); expr(); expect(')'); @@ -624,110 +760,131 @@ void stmt() { } return; } - if (kind == Break) { + + if (kind == KW_break) { ++g_tkIter; instruction(_BreakStub, 0); expect(';'); return; } - if (kind == Cont) { + + if (kind == KW_continue) { ++g_tkIter; instruction(_ContStub, 0); expect(';'); return; } + if (kind == '{') { enter_scope(); ++g_tkIter; int restore = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] != '}') { - kind = g_tks[((g_tkIter) * TokenSize) + Kind]; - if ((kind >= Int && kind <= Void)) { + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != '}') { + kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + if ((kind >= KW_int && kind <= KW_void)) { ++g_tkIter; int base_type = kind, varNum = 0; while (1) { if (varNum > 0) { expect(','); } + int ptr = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } - int id = expect(Id), prev = g_symCnt - 1; + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } + int id = expect(TK_IDENT), prev = g_symCnt - 1; g_syms[((g_symCnt) * SymSize) + Address] = 4; + if (prev >= 0 && g_syms[((prev) * SymSize) + Storage] == Local) { g_syms[((g_symCnt) * SymSize) + Address] += g_syms[((prev) * SymSize) + Address]; } + g_syms[((g_symCnt) * SymSize) + Storage] = Local; g_syms[((g_symCnt) * SymSize) + TkIdx] = id; g_syms[((g_symCnt) * SymSize) + Scope] = g_scopes[g_scopeCnt - 1]; g_syms[((g_symCnt) * SymSize) + DType] = (ptr << 16) | base_type; + instruction(Sub | (ESP << 8) | (ESP << 16) | (IMME << 24), (4)); - if (g_tks[((g_tkIter) * TokenSize) + Kind] == '=') { + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '=') { ++g_tkIter; assign_expr(); instruction(Sub | (EDX << 8) | (EBP << 16) | (IMME << 24), (g_syms[((g_symCnt) * SymSize) + Address])); instruction(Save | (EDX << 8) | (EAX << 16), 4);; } + ++restore, ++varNum, ++g_symCnt; - if (g_tks[((g_tkIter) * TokenSize) + Kind] == ';') { break; } + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == ';') { break; } } + ++g_tkIter; } else { stmt(); } } ++g_tkIter; + if (restore) { instruction(Add | (ESP << 8) | (ESP << 16) | (IMME << 24), (restore << 2)); } exit_scope(); return; } + if (kind == ';') { ++g_tkIter; return; } + expr(); expect(';'); return; } + + void obj() { - int kind = g_tks[((g_tkIter) * TokenSize) + Kind]; - if (kind == Enum) { + int kind = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]); + if (kind == KW_enum) { ++g_tkIter; expect('{'); int val = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] != '}') { - int idx = expect(Id); + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != '}') { + int idx = expect(TK_IDENT); g_syms[((g_symCnt) * SymSize) + TkIdx] = idx; g_syms[((g_symCnt) * SymSize) + Storage] = Const; - g_syms[((g_symCnt) * SymSize) + DType] = Int; + g_syms[((g_symCnt) * SymSize) + DType] = KW_int; g_syms[((g_symCnt) * SymSize) + Scope] = g_scopes[g_scopeCnt - 1]; - if (g_tks[((g_tkIter) * TokenSize) + Kind] == '=') { + + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '=') { ++g_tkIter; - idx = expect(CInt); - val = g_tks[((idx) * TokenSize) + Value]; + idx = expect(TK_INT); + val = (g_token_buffer[((idx) * _TkFieldCount) + TkFieldValue]); } + g_syms[((g_symCnt++) * SymSize) + Address] = val++; - if (g_tks[((g_tkIter) * TokenSize) + Kind] == '}') { break; } + + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '}') { break; } expect(','); } ++g_tkIter; expect(';'); return; } - int ln = g_tks[((g_tkIter) * TokenSize) + Ln]; - int start = g_tks[((g_tkIter) * TokenSize) + Start]; - int end = g_tks[((g_tkIter++) * TokenSize) + End]; - if (!(kind >= Int && kind <= Void)) { + + int ln = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldLine]); + int start = (g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldBegin]); + int end = (g_token_buffer[((g_tkIter++) * _TkFieldCount) + TkFieldEnd]); + if (!(kind >= KW_int && kind <= KW_void)) { { printf("error:%d: unexpected token '%.*s'\n", ln, end - start, start); exit(1); }; } - while (g_tks[((g_tkIter) * TokenSize) + Kind] != ';') { + + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != ';') { int data_type = kind, ptr = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] == '*') { + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } data_type = (ptr << 16) | data_type; - int id = expect(Id); - if (g_tks[((g_tkIter) * TokenSize) + Kind] != '(') { + + int id = expect(TK_IDENT); + + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != '(') { g_syms[((g_symCnt) * SymSize) + Storage] = Global; g_syms[((g_symCnt) * SymSize) + TkIdx] = id; g_syms[((g_symCnt) * SymSize) + Scope] = g_scopes[g_scopeCnt - 1]; @@ -735,10 +892,11 @@ void obj() { *((int*)g_bss) = 0; g_syms[((g_symCnt++) * SymSize) + Address] = g_bss; g_bss += 4; - if (g_tks[((g_tkIter) * TokenSize) + Kind] != ';') { expect(','); } + if ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != ';') { expect(','); } continue; } - if (streq("main", g_tks[((id) * TokenSize) + Start], 4)) { + + if (streq("main", (g_token_buffer[((id) * _TkFieldCount) + TkFieldBegin]), 4)) { g_entry = g_opCnt; } else { g_syms[((g_symCnt) * SymSize) + Storage] = Func; @@ -747,16 +905,17 @@ void obj() { g_syms[((g_symCnt) * SymSize) + Scope] = g_scopes[g_scopeCnt - 1]; g_syms[((g_symCnt++) * SymSize) + Address] = g_opCnt; } + enter_scope(); expect('('); int argCnt = 0, i = 1; - while (g_tks[((g_tkIter) * TokenSize) + Kind] != ')') { + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) != ')') { if (argCnt > 0) { expect(','); } int data_type = expect_type(); int ptr = 0; - while (g_tks[((g_tkIter) * TokenSize) + Kind] == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } + while ((g_token_buffer[((g_tkIter) * _TkFieldCount) + TkFieldKind]) == '*') { ptr = (ptr << 8) | 0xFF; ++g_tkIter; } data_type = (ptr << 16) | data_type; - g_syms[((g_symCnt) * SymSize) + TkIdx] = expect(Id); + g_syms[((g_symCnt) * SymSize) + TkIdx] = expect(TK_IDENT); g_syms[((g_symCnt) * SymSize) + Scope] = g_scopes[g_scopeCnt - 1]; g_syms[((g_symCnt) * SymSize) + DType] = data_type; g_syms[((g_symCnt++) * SymSize) + Storage] = Param; @@ -767,6 +926,8 @@ void obj() { g_syms[((g_symCnt - i) * SymSize) + Address] = -((i + 1) << 2); ++i; } + + instruction(Push | (EBP << 24), 0); instruction(Mov | (EBP << 8) | (ESP << 24), 0); stmt(); @@ -776,36 +937,45 @@ void obj() { ++g_tkIter; return; } + void gen(int argc, char** argv) { enter_scope(); - while (g_tkIter < g_tkCnt) { + + while (g_tkIter < g_token_idx) { obj(); } + int i = 0; while (i < g_callCnt) { int idx = g_calls[((i) * CallSize) + TkIdx]; - int start = g_tks[((idx) * TokenSize) + Start]; - int end = g_tks[((idx) * TokenSize) + End]; - int ln = g_tks[((idx) * TokenSize) + Ln]; + int start = (g_token_buffer[((idx) * _TkFieldCount) + TkFieldBegin]); + int end = (g_token_buffer[((idx) * _TkFieldCount) + TkFieldEnd]); + int ln = (g_token_buffer[((idx) * _TkFieldCount) + TkFieldLine]); int len = end - start; + int found = 0, j = 0; while (j < g_symCnt) { if (g_syms[((j) * SymSize) + Storage] == Func) { int funcIdx = g_syms[((j) * SymSize) + TkIdx]; - if (streq(start, g_tks[((funcIdx) * TokenSize) + Start], len)) { + if (streq(start, (g_token_buffer[((funcIdx) * _TkFieldCount) + TkFieldBegin]), len)) { found = 1; + g_ops[((g_calls[((i) * CallSize) + InsIdx]) * OpSize) + Imme] = g_syms[((j) * SymSize) + Address]; break; } } ++j; } + if (!found) { { printf("error:%d: unknown reference to call %.*s\n", ln, len, start); exit(1); }; } ++i; } + exit_scope(); + + int argptr = g_bss, it = 0; char** argStart = g_bss; char* stringStart = argStart + argc; @@ -819,15 +989,23 @@ void gen(int argc, char** argv) { *stringStart++ = 0; ++g_bss, ++it; } + g_bss = ((g_bss + 3) & -4); + + int entry = g_opCnt; instruction(Push | (IMME << 24), argc); instruction(Push | (IMME << 24), argptr); instruction(Call, g_entry);; + g_entry = entry; + return; } + + void dump_code() { + printf("-------- code --------\n"); char* regs = " eaxebxecxedxespebp"; int pc = 0; @@ -858,9 +1036,9 @@ void dump_code() { printf(" or %.*s, %.*s, %.*s\n", 3, regs + 3 * dest, 3, regs + 3 * src1, 3, regs + 3 * src2); } else if (op == Not) { printf(" not %.*s\n", 3, regs + 3 * dest); - } else if (op == LShift) { + } else if (op == TK_LSHIFT) { printf(" lshift %.*s, %.*s, %.*s\n", 3, regs + 3 * dest, 3, regs + 3 * src1, 3, regs + 3 * src2); - } else if (op == RShift) { + } else if (op == TK_RSHIFT) { printf(" rshift %.*s, %.*s, %.*s\n", 3, regs + 3 * dest, 3, regs + 3 * src1, 3, regs + 3 * src2); } else if (op == Push) { if (src2 == IMME) printf(" push %d(0x%08X)\n", imme, imme); @@ -874,8 +1052,8 @@ void dump_code() { } else if (op == Jump || op == Jz || op == Jnz || op == Call) { char* opstr = op == Jump ? "jmp" : op == Jz ? "jz" : op == Jnz ? "jnz" : "call"; printf(" %s %d\n", opstr, imme); - } else if (op == Printf || op == Fopen || op == Fgetc || op == Malloc || op == Exit) { - char* opstr = op == Printf ? "printf" : op == Fopen ? "fopen" : op == Fgetc ? "fgetc" : op == Malloc ? "malloc" : "exit"; + } else if (op == KW_printf || op == KW_fopen || op == KW_fgetc || op == KW_calloc || op == KW_exit) { + char* opstr = op == KW_printf ? "printf" : op == KW_fopen ? "fopen" : op == KW_fgetc ? "fgetc" : op == KW_calloc ? "calloc" : "exit"; printf(" %s\n", opstr); } else { panic("invalid op code"); @@ -884,38 +1062,54 @@ void dump_code() { } return; } + int main(int argc, char **argv) { - if (argc < 2) { - printf("Usage: %s file [args...]\n", *argv); - return 1; - } - void* fp = fopen(argv[1], "r"); - if (!fp) { - printf("file '%s' does not exist\n", argv[1]); - return 1; - } + + if (argc == 1) { + printf("%s: fatal error: no input files\n compilation terminated.", *argv); + return 1; + } + + void* fp = fopen(argv[1], "r"); + if (!fp) { + printf("%s: fatal error: %s : No such file or directory\n compilation terminated.", *argv, *(argv + 1)); + return 1; + } + g_reserved = 2 * (1 << 27) * argc; - g_ram = malloc(g_reserved); + g_ram = calloc(g_reserved, 1); + + + int src_reserved = 1 << 18; - int tk_reserved = 4 * TokenSize * (src_reserved >> 2); + int tk_reserved = 4 * _TkFieldCount * (src_reserved >> 2); int sym_reserved = 4 * SymSize * (tk_reserved >> 8); int opcode_reserved = 4 * OpSize * (src_reserved >> 3); int scope_reserved = 4 * 128; int call_reserved = 4 * CallSize * 1024; g_src = g_ram + (g_reserved - src_reserved); - g_tks = g_ram + (g_reserved - src_reserved - tk_reserved); + g_token_buffer = g_ram + (g_reserved - src_reserved - tk_reserved); g_syms = g_ram + (g_reserved - src_reserved - tk_reserved - sym_reserved); g_scopes = g_ram + (g_reserved - src_reserved - tk_reserved - sym_reserved - scope_reserved); g_calls = g_ram + (g_reserved - src_reserved - tk_reserved - sym_reserved - scope_reserved - call_reserved); g_bss = g_ram + opcode_reserved; g_ops = g_ram; + + int src_len = 0, c; while ((c = fgetc(fp)) != -1) { g_src[src_len++] = c; } g_src[src_len] = 0; + + lex(); + + gen(argc - 1, argv + 1); + + g_regs = g_ram + g_reserved - 4 * IMME; g_regs[ESP] = g_ram + g_reserved - 4 * IMME; + int pc = g_entry; while (pc < g_opCnt) { int op = g_ops[((pc) * OpSize) + OpCode]; @@ -925,32 +1119,39 @@ int main(int argc, char **argv) { int src2 = (op & 0xFF000000) >> 24; int value = src2 == IMME ? imme : g_regs[src2]; op = op & 0xFF; + if (op == Call) { g_regs[ESP] -= 4; *((int*)g_regs[ESP]) = pc + 1; pc = imme; continue; } + if ((op == Jump)) { pc = imme; continue; } + if (op == Jz) { if (g_regs[EAX] == 0) pc = imme; else pc = pc + 1; continue; } + if (op == Jnz) { if (g_regs[EAX]) pc = imme; else pc = pc + 1; continue; } + if (op == Ret) { pc = *((int*)g_regs[ESP]); g_regs[ESP] += 4; continue; } + pc = pc + 1; + if (op == Mov) { g_regs[dest] = value; } else if (op == Push) { g_regs[ESP] -= 4; *((int*)g_regs[ESP]) = value; } else if (op == Pop) { g_regs[dest] = *((int*)g_regs[ESP]); g_regs[ESP] += 4; } @@ -968,8 +1169,8 @@ int main(int argc, char **argv) { else if (op == And) { g_regs[dest] = g_regs[src1] & value; } else if (op == Or) { g_regs[dest] = g_regs[src1] | value; } else if (op == Not) { g_regs[dest] = !g_regs[dest]; } - else if (op == LShift) { g_regs[dest] = g_regs[src1] << value; } - else if (op == RShift) { g_regs[dest] = g_regs[src1] >> value; } + else if (op == TK_LSHIFT) { g_regs[dest] = g_regs[src1] << value; } + else if (op == TK_RSHIFT) { g_regs[dest] = g_regs[src1] >> value; } else if (op == Save) { if (imme == 4) *((int*)g_regs[dest]) = g_regs[src1]; else *((char*)g_regs[dest]) = g_regs[src1]; @@ -978,18 +1179,18 @@ int main(int argc, char **argv) { if (imme == 4) g_regs[dest] = *((int*)g_regs[src1]); else g_regs[dest] = *((char*)g_regs[src1]); } - else if (op == Printf) { + else if (op == KW_printf) { int* p = g_regs[ESP]; printf((char*)(p[7]), p[6], p[5], p[4], p[3], p[2], p[1], p[0]); - } else if (op == Fgetc) { + } else if (op == KW_fgetc) { int* p = g_regs[ESP]; g_regs[EAX] = fgetc((void*)(p[0])); - } else if (op == Fopen) { + } else if (op == KW_fopen) { int* p = g_regs[ESP]; g_regs[EAX] = fopen((char*)(p[1]), (char*)(p[0])); - } else if (op == Malloc) { + } else if (op == KW_calloc) { g_regs[EAX] = g_ram + (1 << 27); - } else if (op == Exit) { + } else if (op == KW_exit) { g_regs[EAX] = *((int*)g_regs[ESP]); break; } else { panic("Invalid op code"); } diff --git a/run_tests.py b/run_tests.py deleted file mode 100644 index 0815f3b..0000000 --- a/run_tests.py +++ /dev/null @@ -1,118 +0,0 @@ -import subprocess -import sys -import difflib -from pathlib import Path - -def run_command(args, stdout_path=None): - """ - Run a command. - If stdout_path is given, redirect stdout to that file. - Returns the process return code. - """ - if stdout_path is not None: - with open(stdout_path, "w", encoding="utf-8", newline="") as f: - proc = subprocess.run(args, stdout=f, stderr=subprocess.PIPE, text=True) - else: - proc = subprocess.run(args, stderr=subprocess.PIPE, text=True) - - if proc.stderr: - sys.stderr.write(proc.stderr) - - return proc.returncode - - -def testcase(c_file: Path) -> bool: - """ - Run a single test case for the given C file. - Returns True if passed, False if failed. - """ - name = c_file.stem - print(f"Running test [{name}]...") - - # 1) Compile with gcc - print(f"Compiling {c_file} with gcc") - ret = run_command(["gcc", str(c_file)]) - if ret != 0: - print("ERROR: gcc compilation failed.") - return False - - # 2) Run c.c.exe and capture output -> name.actual - # (translation of: .\c.c.exe "%file%" ".\test\%name%.c" > "%name%.actual") - actual_path = Path(f"{name}.actual") - test_c_path = Path("test") / f"{name}.c" - ret = run_command( - [str(Path("c.c.exe")), str(c_file), str(test_c_path)], - stdout_path=actual_path, - ) - if ret != 0: - print(f"ERROR: c.c.exe failed for {c_file}") - return False - - # 3) Run a.exe and capture output -> name.expect - # (translation of: .\a "..\test\%name%.c" > "%name%.expect") - # Using .\test\ instead of ..\test\, since we are in project root. - expect_path = Path(f"{name}.expect") - ret = run_command( - [str(Path("a.exe")), str(test_c_path)], - stdout_path=expect_path, - ) - if ret != 0: - print(f"ERROR: a.exe failed for {c_file}") - return False - - # 4) Compare outputs (fc /N /W "%name%.expect" "%name%.actual" >nul) - try: - actual_text = actual_path.read_text(encoding="utf-8") - expect_text = expect_path.read_text(encoding="utf-8") - except OSError as e: - print(f"ERROR: Failed to read output files for test [{name}]: {e}") - return False - - RED = "\033[31m" - GREEN = "\033[32m" - RESET = "\033[0m" - - if actual_text == expect_text: - print(f"{GREEN}Test [{name}] passed.{RESET}") - return True - else: - print(f"{RED}ERROR: Test [{name}] failed.{RESET}") - - diff = difflib.unified_diff( - expect_text.splitlines(keepends=True), - actual_text.splitlines(keepends=True), - fromfile=f"{name}.expect", - tofile=f"{name}.actual", - lineterm="") - - print("".join(diff)) - return False - - -def main() -> int: - # Main loop over .\test\*.c - test_dir = Path("test") - if not test_dir.is_dir(): - print("ERROR: test directory 'test' not found.", file=sys.stderr) - return 1 - - c_files = sorted(test_dir.glob("*.c")) - if not c_files: - print("No test .c files found in 'test' directory.") - return 0 - - for c_file in c_files: - if not testcase(c_file): - # On first failure, mimic `exit /b 1` - return 1 - - # @TODO: make sure to clean up generated even if tests fail - for pattern in ("*.actual", "*.expect"): - for file in Path(".").glob(pattern): - file.unlink() - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/preprocess.py b/scripts/preprocess.py new file mode 100644 index 0000000..8f7fe56 --- /dev/null +++ b/scripts/preprocess.py @@ -0,0 +1,20 @@ +import subprocess + +from pathlib import Path +from utils import Log, run_command, rename + +def main(): + # 1. Create c.c with initial lines + with open('c.c', 'w', encoding='utf-8') as f: + f.write('#include \n') + f.write('#include \n') + + # 2. Preprocess c-source.c -> tmp.c + run_command(['gcc', '-E', '-DPREPROC', 'c-source.c', '-o', 'tmp.c']) + + # 3. Rename tmp.c -> c.c + rename(Path('tmp.c'), Path('c.c')) + Log.ok('Preprocessing completed.') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/scripts/run_tests.py b/scripts/run_tests.py new file mode 100644 index 0000000..34a65b2 --- /dev/null +++ b/scripts/run_tests.py @@ -0,0 +1,83 @@ +import sys +import difflib +from pathlib import Path + +from utils import Log, run_command + +def testcase(c_file: Path) -> bool: + ''' + Run a single test case for the given C file. + Returns True if passed, False if failed. + ''' + name = c_file.stem + Log.info(f'Running test [{name}]...') + + # 1. Compile with gcc + ret = run_command(['gcc', str(c_file)]) + if ret != 0: + Log.error('ERROR: gcc compilation failed.') + return False + + # 2. Run a.exe and capture output -> name.expect + actual_path = Path(f'{name}.actual') + expect_path = Path(f'{name}.expect') + + ret = run_command( + [str(Path('a.exe')), str(c_file)], + stdout_path=expect_path, + ) + if ret != 0: + Log.error(f'a.exe failed for {c_file}') + return False + + # 3. Run c.c.exe and capture output -> name.actual + ret = run_command([str(Path('c.c.exe')), str(c_file)], stdout_path=actual_path) + + if ret != 0: + Log.error(f'c.c failed for {c_file}') + return False + + # 4. Compare outputs + try: + actual_text = actual_path.read_text(encoding='utf-8') + expect_text = expect_path.read_text(encoding='utf-8') + except OSError as e: + Log.error(f'Failed to read output files for test [{name}]: {e}') + return False + + if actual_text == expect_text: + Log.ok(f'Test [{name}] passed.') + return True + else: + diff = difflib.unified_diff( + expect_text.splitlines(keepends=True), + actual_text.splitlines(keepends=True), + fromfile=f'{name}.expect', + tofile=f'{name}.actual', + lineterm='') + + Log.error(f'Test [{name}] failed.\n{"".join(diff)}') + return False + +def main() -> int: + test_dir = 'test' + + c_files = sorted(Path(test_dir).glob('*.c')) + if not c_files: + Log.error(f'No test .c files found in "{test_dir}" directory.') + return 0 + + for c_file in c_files: + if not testcase(c_file): + return 1 + + # @TODO: make sure to clean up generated even if tests fail + for pattern in ('*.actual', '*.expect'): + for file in Path('.').glob(pattern): + file.unlink() + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/utils.py b/scripts/utils.py new file mode 100644 index 0000000..f51b57f --- /dev/null +++ b/scripts/utils.py @@ -0,0 +1,55 @@ +import sys +import subprocess +from pathlib import Path + +class Log: + COLOR_RESET = '\033[0m' + COLOR_GREEN = '\033[32m' + COLOR_YELLOW = '\033[33m' + COLOR_RED = '\033[31m' + COLOR_BOLD_RED = '\033[1;31m' + + @staticmethod + def info(msg: str): + print(msg) + + @staticmethod + def ok(msg: str): + print(f'{Log.COLOR_GREEN}[ OK ] {msg}{Log.COLOR_RESET}') + + @staticmethod + def warn(msg: str): + print(f'{Log.COLOR_YELLOW}[WARN] {msg}{Log.COLOR_RESET}') + + @staticmethod + def error(msg: str): + # @TODO: throw exception instead of exiting? + print(f'{Log.COLOR_RED}[ERR ] {msg}{Log.COLOR_RESET}', file=sys.stderr) + +def run_command(args, stdout_path=None): + ''' + Run a command. + If stdout_path is given, redirect stdout to that file. + Returns the process return code. + ''' + + Log.info(f'Running command: {" ".join(args)}') + if stdout_path is not None: + with open(stdout_path, 'w', encoding='utf-8', newline='') as f: + proc = subprocess.run(args, stdout=f, stderr=subprocess.PIPE, text=True) + else: + proc = subprocess.run(args, stderr=subprocess.PIPE, text=True) + + if proc.stderr: + sys.stderr.write(proc.stderr) + + return proc.returncode + +def rename(src: Path, dst: Path): + ''' + Rename a file from src to dst, overwriting dst if it exists. + ''' + if dst.exists(): + Log.warn(f'Overwriting existing file: {dst}') + dst.unlink() + src.rename(dst) diff --git a/test/cast.c b/test/cast.c index b88e962..85a6bf9 100644 --- a/test/cast.c +++ b/test/cast.c @@ -2,7 +2,7 @@ #include int main() { - char* p = malloc(4096); + char* p = calloc(4096, 1); char* src = "hello there one more time"; int i = 0; while (src[i]) { diff --git a/test/readfile.c b/test/readfile.c deleted file mode 100644 index 8f803d8..0000000 --- a/test/readfile.c +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include - -int main(int argc, char** argv) { - if (argc < 2) { - printf("Usage: %s file [args...]\n", *argv); - return 1; - } - - void* fp = fopen(*(argv + 1), "r"); - if (!fp) { - printf("file '%s' does not exist\n", *(argv + 1)); - return 1; - } - - char* p = malloc(4096); - char* pp = p; - - int c; - while ((c = fgetc(fp)) != -1) { - *pp = c; - pp += 1; - } - *pp = 0; - - printf("%s\n", p); - - return 0; -} -