Modula-dev
diff --git a/‎clone
Lines changed: 0 additions & 3 deletions b/‎clone
Lines changed: 0 additions & 3 deletions
diff --git a/‎const/opcodes.h
Lines changed: 0 additions & 60 deletions b/‎const/opcodes.h
Lines changed: 0 additions & 60 deletions
diff --git a/‎const/strings.h
Lines changed: 10 additions & 2 deletions b/‎const/strings.h
Lines changed: 10 additions & 2 deletions
diff --git a/‎gasm.c
Lines changed: 2 additions & 8 deletions b/‎gasm.c
Lines changed: 2 additions & 8 deletions
diff --git a/‎src/assemble.c
Lines changed: 6 additions & 9 deletions b/‎src/assemble.c
Lines changed: 6 additions & 9 deletions
diff --git a/‎src/gasm.h
Lines changed: 9 additions & 10 deletions b/‎src/gasm.h
Lines changed: 9 additions & 10 deletions
diff --git a/‎src/lexer.c
Lines changed: 153 additions & 11 deletions b/‎src/lexer.c
Lines changed: 153 additions & 11 deletions
@@ -35,14 +35,22 @@ const char help[] = version "\
 
 #define ERROR "\e[0;31merror:\e[0m"
 #define HELPCMD "\e[0;32mgasm help\e[0m"
+#define HELPDIAG " will bring up the help dialog\n"
 
 const char cmd_help[] = "help";
 const char cmd_version[] = "version";
 const char cmd_license[] = "license";
 const char cmd_contributors[] = "contributors";
-const char badargs[] = ERROR " invalid arguments\nrunning " HELPCMD " will bring up the help dialog\n";
+const char noargs[] = ERROR " given no inputs, expected two\nrunning " HELPCMD HELPDIAG;
+const char badargs[] = ERROR " invalid arguments\nrunning " HELPCMD HELPDIAG;
 void printLicense() { printf("%s%s\n", notice, license); }
 
+const char asm_error[] = ERROR " \e[0;35m\"%s\" \e[0mln %d, col %d\n%s\n";
 const char asm_expected_instruction[] = "expected an instruction\n";
 const char asm_wrong_type[] = "given argument of wrong type\n";
-
+const char asm_redefinition[] = "redefinition of symbol\n";
+const char asm_lexerstuck[] = "lexer stuck\n";
+const char asm_string_hanging[] = "expected string to be closed\n";
+const char asm_number_multidecimal[] = "number contains multiple decimal points\n";
+const char asm_number_badcharacter[] = "number contains invalid character\n";
+const char asm_symbol_badcharacter[] = "invalid symbol definition or unknown character\n";
@@ -1,23 +1,17 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <sys/stat.h>
 #include "include/libgyb.h"
 #include "include/libgyb.c"
 #include "const/license.h"
 #include "const/strings.h"
-#include "const/opcodes.h"
 #include "src/gasm.h"
 #include "src/load.c"
 #include "src/assemble.c"
+#include "src/lexer.h"
 #include "src/lexer.c"
 #include "src/parser.c"
 #include "src/emit.c"
 
 int main (int argc, char **argv) {
-    if (argc == 1) { printf(badargs); exit(1); }
+    if (argc == 1) { printf(noargs); exit(1); }
     if (argc == 2) {
         if ( argv[1][0]=='-' ) {
         switch(argv[1][1]) {
 
@@ -1,14 +1,11 @@
-void assembler_error(token_t token, parse_t *context, const char *error) {
-    printf("\e[0;31merror:\e[0;33m%s\e[0m, ln %d col %d\n%s",
-        context->source,
-        token.ln, token.col, error
+void assembler_error(const char *error) {
+    printf(asm_error,
+        assembler_filename, assembler_ln, assembler_col, error
     ); exit(1);
 }
 
 int assemble(char *filename, int srcn, char **srcs){
-    gybfile_t object = bytecode_new();
-    int i; for (i=0; i<srcn; i++) {
-        parse_t local = gasm_load(srcs[i]);
-    }
-    return bytecode_save(filename, object);
+    gybfile_t object = gyb_bytecode_new();
+    int i; for (i=0; i<srcn; i++) { gasm_load(&object, srcs[i]); }
+    return gyb_bytecode_save(filename, object);
 }
@@ -11,17 +11,16 @@ typedef struct {
 } token_t;
 
 typedef struct {
-    unsigned int size;
+    int size; int index;
     token_t *tokens;
 } lexicon_t;
 
-typedef struct {
-    char *source;
-    symboltable_t symtable;
-    lexicon_t lexicon;
-} parse_t;
+void gasm_load(gybfile_t *context, char *filename);
+void parser(gybfile_t *context, lexicon_t lexicon);
+lexicon_t lexer(char *stream, int size);
 
-parse_t gasm_import(parse_t *parent, parse_t *child);
-parse_t gasm_load(char *filename);
-lexicon_t lexer(char *stream, unsigned int size);
-parse_t parser(lexicon_t lexicon);
+unsigned char   assembler_section = 0;
+unsigned char   assembler_datasize = 0;
+unsigned int    assembler_ln;
+unsigned int    assembler_col;
+char           *assembler_filename = NULL;
@@ -1,17 +1,159 @@
-token_t lexer_consume(char *stream, int max){
-    // Data types, ex. bytes
+void lexicon_push(lexicon_t *lexicon, token_t token) {
+    if ( lexicon->size <= sizeof(token_t)*(lexicon->index) ) {
+        lexicon->size *= 2;
+        lexicon->tokens = realloc(lexicon->tokens, lexicon->size);
+    }   memcpy(&lexicon->tokens[lexicon->index], &token, sizeof(token_t));
+}
+
+int len_nonwsp(char *stream, int max) {
+    int i; for(i=0;i<max;i++) {
+    switch(stream[i]){ case ' ': case '\r': case '\t': case ',': case '\n': return i; }}
+}
 
-    // Keywords: registers, constants, et cetera
+int len_whitespace(char *stream, int max) {
+    int i; for(i=0;i<max;i++) {
+        switch(stream[i]){
+            case ' ': case '\r': case '\t': case ',':
+                assembler_col++;
+                break;
+            case '\n':
+                assembler_ln++;
+                assembler_col=0;
+                break;
+            default: return i;
+        }}
+}
+
+int len_string(char *stream, int max) {
+    char c; int i=0;
+    switch(stream[0]) {
+        case '`': case '"': case '\'': c = stream[0]; break;
+        default: return 0; }
+    while(stream[i]!=0) {
+        if ( stream[i] == c ) { return i+1; }
+        i++; }
+    // if we never get the ending quote, error
+    assembler_error(asm_string_hanging);
+}
 
-    // Symbolic Names
+int len_number(char *stream, int max) {
+    bool hex=false; bool octal=false; int decimal=0;
+    int i; for(i=0;i<max;i++) {
+        if ( i == 0 && stream[i] == '-' ) { continue; }
+        if ( i == 0 && stream[i] == '0' ) { continue; }
+        if ( i == 1 && stream[i] == 'x' ) { hex = true; continue; }
+        if ( i == 1 && stream[i] == 'o' ) { octal = true; continue; }
+        if ( stream[i] <= '9' && stream[i] >= '0' ) { continue; }
+        if ( stream[i] <= 'f' && stream[i] >= 'a' && hex == true ) { continue; }
+        if ( stream[i] <= 'F' && stream[i] >= 'A' && hex == true ) { continue; }
+        if ( hex == false && octal == false && stream[i] == '.' ) { decimal++; continue; }
+    }   return i;
+}
+
+int value_number(char *stream, int max) {
+    int base = 10; int cursor = 0; bool negative=false;
+    int value = 0; int decimal = 0; int point = -1;
+    if ( max > 2 ) { if (stream[0] == '0') {
+        if ( stream[1] == 'x' ) { cursor = 2; base = 16; }
+        if ( stream[1] == 'o' ) { cursor = 2; base = 8; }
+    }}
+    while(cursor < max) {
+        if ( point == -1 ) { // integers
+            if ( cursor == 0 && stream[cursor] == '-' ) { negative = true; cursor++; continue; }
+            if ( stream[cursor] <= '0'-1+base && stream[cursor] >= '0' ) { value *= base; value += stream[cursor] - '0'; cursor++; continue; }
+            if ( stream[cursor] <= 'f' && stream[cursor] >= 'a' && base == 16) { value *= base; value += stream[cursor] - 'a'+10; cursor++; continue; }
+            if ( stream[cursor] <= 'F' && stream[cursor] >= 'A' && base == 16) { value *= base; value += stream[cursor] - 'A'+10; cursor++; continue; }
+            if ( stream[cursor] == '.' && base == 10 ) { point = 1; continue; }
+        } else {
+            if ( stream[cursor] <= '9' && stream[cursor] >= '0' ) { decimal *= base; point*=10; decimal += stream[cursor] - '0'; cursor++; continue; }
+            if ( stream[cursor] == '.' ) { assembler_error(asm_number_multidecimal); }
+        }
+        assembler_error(asm_number_badcharacter); // if we aren't doing a continue && aren't breaking out, then something is wrong
+    }
+    if ( point != -1 ) {
+        float fvalue = ( value ) + ( decimal / point );
+        memcpy(&fvalue, &value, sizeof(float));
+    }
+    return value;
+}
+
+int len_symbol(char *stream, int max) {
+    int i; for(i=0;i<max;i++){
+        if ( stream[i] >= 'a' && stream[i] <= 'z' ) { continue; }
+        if ( stream[i] >= 'A' && stream[i] <= 'Z' ) { continue; }
+        if ( stream[i] >= '0' && stream[i] <= '9' && i != 0 ) { continue; }
+        if ( stream[i] == '_' || stream[i] == '$' ) { continue; }
+    }   return i;
+}
 
-    // Instructions
+unsigned int lexer_fetch(lexicon_t *lexicon, char *stream, int max){
+    int len; int i;
+    // skip over whitespace
+    len=len_whitespace(stream, max); while( len !=0 ) { stream+=len; max-=len; len=len_whitespace(stream, max); }
+    // if NULL, break
+    if ( stream[0]==0 ) { return 0; }
+    // prep token
+    token_t result = { .ln = assembler_ln, .col = assembler_col, };
+    // check string
+    len = len_string(stream, max); if ( len != 0 ) {
+        result.type = parsetype_data;
+        result.string = stream + 1; // skip the first quote
+        result.value = assembler_datasize;
+        stream[len-1] = '\0'; // && null terminate it
+        lexicon_push(lexicon, result);
+        return len;
+    }
+    // check numbers
+    len = len_number(stream, max); if ( len != 0 ) {
+        result.type = parsetype_data;
+        result.string = stream;
+        result.value = value_number(stream, len);
+        stream[len+1] = '\0';
+        lexicon_push(lexicon, result);
+        return len + 1;
+    }
+    // check for sections
+    for(i=0;i<4;i++) {
+    if ( strncmp(section_names[i].name, stream, len_nonwsp(stream, max))==0 ) {
+        result.type = parsetype_macro; result.string = section_names[i].name; result.value = section_names[i].args;
+        stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
+    }}
+    // check for data sizes
+    for(i=0;i<4;i++) {
+    if ( strncmp(datasizes[i].name, stream, len_nonwsp(stream, max))==0 ) {
+        result.type = parsetype_size; result.string = section_names[i].name; result.value = section_names[i].args;
+        stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
+    }}
+    // check for syscalls
+    for(i=0;i<255;i++) {
+    if ( syscalls[i].type != 'S' ) { continue; }
+    if ( strncmp(syscalls[i].name, stream, len_nonwsp(stream, max))==0 ) {
+        result.type = parsetype_sys; result.string = syscalls[i].name; result.value = syscalls[i].args;
+        stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
+    }}
+    // check for keywords
+    for(i=0;i<255;i++) {
+    if ( keywords[i].type != 'I' ) { continue; }
+    if ( strncmp(keywords[i].name, stream, len_nonwsp(stream, max))==0 ) {
+        result.type = parsetype_sys; result.string = keywords[i].name; result.value = keywords[i].args;
+        stream[len] = '\0'; lexicon_push(lexicon, result); return len+1;
+    }}
+    // if none of those things, we're probably looking at a symbol
+    len = len_symbol(stream, max); i = len_nonwsp(stream, max);
+    if ( len != i ) { assembler_error(asm_symbol_badcharacter); }
+    // make a note of the section its in
+    if ( assembler_section == section_executable ) { result.type = parsetype_label; }
+    else { result.type = parsetype_address; }
+    // redefinition checks happen during parsing, not lexing
+    result.string = stream; stream[len] = '\0'; result.value = symbolhash(stream);
+    lexicon_push(lexicon, result); return len+1;
 }
 
-lexicon_t lexer(char *stream, unsigned int size){
-    lexicon_t yield;
-    int cursor=0; while(cursor < size) {
-        token_t token = lexer_consume(stream + cursor, size - cursor);
-        cursor += strlen(token.string);
-    }   return yield;
+lexicon_t lexer(char *stream, int streamlen){
+    lexicon_t lexicon;
+    unsigned int cursor=0; while(cursor < streamlen) {
+        unsigned int n = lexer_fetch(&lexicon, stream + cursor, streamlen - cursor);
+        if ( n == 0) { assembler_error(asm_lexerstuck); exit(1); }
+        cursor += n; assembler_ln += n;
+    }   return lexicon;
 }