Skip to content

Commit b78058c

Browse files
committed
Init, todo lexer and parser
0 parents  commit b78058c

14 files changed

+2892
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
out/*
2+
include/*

LICENSE

+619
Large diffs are not rendered by default.

build

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# bytes license LICENSE const/license.h
2+
gcc -o out/gasm gasm.c -static

clone

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
mkdir -p include
2+
wget -O ./include/libgyb.h https://raw.githubusercontent.com/Modula-dev/gyb/refs/heads/main/lib/libgyb.h
3+
wget -O ./include/libgyb.c https://raw.githubusercontent.com/Modula-dev/gyb/refs/heads/main/lib/libgyb.c

const/license.h

+2,029
Large diffs are not rendered by default.

const/opcodes.h

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#define argtype_instruction 'I' // USED BY LEXER ONLY
2+
#define argtype_register 'R'
3+
#define argtype_label 'L'
4+
#define argtype_address 'A'
5+
#define argtype_constant 'C'
6+
#define argtype_any '*'
7+
#define argtype_optional '?'
8+
9+
typedef struct {
10+
unsigned char args[6]; // NULL terminated list
11+
char name[10];
12+
} operation_t;
13+
14+
const operation_t operations[255] = {
15+
[0x80] = { "RRRR\0", "interrupt" },
16+
[0x81] = { "****\0", "syscall" },
17+
[0xA0] = { "RR\0", "compare" },
18+
[0xA1] = { "RR\0", "add" },
19+
[0xA2] = { "RR\0", "sub" },
20+
[0xA3] = { "RR\0", "mul" }, // x86: mul cl
21+
[0xA4] = { "RR\0", "div" }, // x86: div cl
22+
[0xA5] = { "RR\0", "left" }, // x86: shl cl
23+
[0xA6] = { "RR\0", "right" }, // x86: shr cl
24+
[0xA7] = { "RR\0", "and" },
25+
[0xA8] = { "RR\0", "or" },
26+
[0xA9] = { "RR\0", "xor" },
27+
[0xAA] = { "R\0", "flip" },
28+
[0xA5] = { "R\0", "inc"},
29+
[0xA6] = { "R\0", "dec"},
30+
[0x50] = { "R\0", "push" },
31+
[0x51] = { "R\0", "pop" },
32+
[0xBF] = { "L\0", "goto" },
33+
[0xB1] = { "L\0", "if" },
34+
[0xB2] = { "L\0", "not" },
35+
[0xB3] = { "L\0", "lt" },
36+
[0xB4] = { "L\0", "gt" },
37+
[0xB5] = { "L\0", "le" },
38+
[0xB6] = { "L\0", "ge" },
39+
[0xF8] = { "R\0", "fcast" }, // cvttss2si rax, xmm0
40+
[0xF9] = { "R\0", "icast" }, // cvtsi2sd xmm0, rax
41+
[0xFA] = { "R\0", "fmov" },
42+
[0xFB] = { "R\0", "imov" }, // movd xmm0, rax
43+
[0xF1] = { "RR\0", "fadd" }, // addsd
44+
[0xF2] = { "RR\0", "fsub" }, // subsd
45+
[0xF3] = { "RR\0", "fmul" }, // mulsd
46+
[0xF4] = { "RR\0", "fdiv" }, // divsd
47+
[0x90] = { "RR\0", "move"}, // mov
48+
[0x91] = { "RA\0", "load"}, // mov
49+
[0x92] = { "RA\0", "store"}, // mov
50+
[0x93] = { "RC\0", "immediate"}// mov
51+
};
52+
53+
const char registernames[][3] = {
54+
"ar\0",
55+
"br\0",
56+
"cr\0",
57+
"dr\0",
58+
"sr\0", // stack pointer
59+
"sb\0" // stack base pointer
60+
};

const/strings.h

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#define GPLV3Link "https://www.gnu.org/licenses/gpl-3.0.en.html"
2+
#define copyright "Copyright (c) 2025 Modula.dev"
3+
#define version "gasm 0.0.0\n"
4+
5+
const char contributors[] = "\
6+
John Alex (Modula) <Modula.dev>\n";
7+
8+
const char notice[] = version copyright "\n\
9+
This program is free software: you can redistribute it and/or modify\n\
10+
it under the terms of the GNU General Public License as published by\n\
11+
the Free Software Foundation, only under version 3 of this license.\n\n\
12+
This program is distributed in the hope that it will be useful,\n\
13+
but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
14+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\n";
15+
16+
const char help[] = version "\
17+
\e[0;34mSYNOPSIS\e[0m\n\
18+
\e[0;32m\
19+
gasm <dest> [sources]\n\
20+
gasm license\n\
21+
\e[0;34mDESCRIPTION\e[0m\n\
22+
\e[0;32m\
23+
creates garter bytecode from garter assembly code\n\
24+
\e[0;34mLICENSE\e[0m\n\
25+
\e[0;32m\
26+
GPLv3, \
27+
\e[0;35m" GPLV3Link "\n\
28+
\e[0m\
29+
Put briefly; you may redistribute and/or modify this software\n\
30+
under the terms of the GPLv3 License as published by \n\
31+
the Free Software Foundation if and only if \n\
32+
the license and our copyright notice is included:\n\
33+
\e[0;31m\"" copyright "\"\
34+
\e[0m\n\0";
35+
36+
#define ERROR "\e[0;31merror:\e[0m"
37+
#define HELPCMD "\e[0;32mgasm help\e[0m"
38+
39+
const char cmd_help[] = "help";
40+
const char cmd_version[] = "version";
41+
const char cmd_license[] = "license";
42+
const char cmd_contributors[] = "contributors";
43+
const char badargs[] = ERROR " invalid arguments\nrunning " HELPCMD " will bring up the help dialog\n";
44+
void printLicense() { printf("%s%s\n", notice, license); }
45+
46+
const char asm_expected_instruction[] = "expected an instruction\n";
47+
const char asm_wrong_type[] = "given argument of wrong type\n";
48+

gasm.c

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <stdint.h>
5+
#include <stdbool.h>
6+
#include <sys/stat.h>
7+
#include "include/libgyb.h"
8+
#include "include/libgyb.c"
9+
#include "const/license.h"
10+
#include "const/strings.h"
11+
#include "const/opcodes.h"
12+
#include "src/gasm.h"
13+
#include "src/load.c"
14+
#include "src/assemble.c"
15+
#include "src/lexer.c"
16+
#include "src/parser.c"
17+
#include "src/emit.c"
18+
19+
int main (int argc, char **argv) {
20+
if (argc == 1) { printf(badargs); exit(1); }
21+
if (argc == 2) {
22+
if ( argv[1][0]=='-' ) {
23+
switch(argv[1][1]) {
24+
case 'h': printf(help); exit(0);
25+
case 'v': printf(version); exit(0);
26+
case 'l': printLicense(); exit(0);
27+
case 'c': printf(contributors); exit(0);
28+
default: printf(badargs); exit(1);
29+
}}
30+
if ( strncmp(argv[1], cmd_help, sizeof(cmd_help))==0 ) { printf(help); exit(0); }
31+
if ( strncmp(argv[1], cmd_version, sizeof(cmd_version))==0 ) { printf(version); exit(0); }
32+
if ( strncmp(argv[1], cmd_license, sizeof(cmd_license))==0 ) { printLicense(); exit(0); }
33+
if ( strncmp(argv[1], cmd_contributors, sizeof(cmd_version))==0 ) { printf(contributors); exit(0); }
34+
printf(badargs); exit(1);
35+
}
36+
assemble(argv[1], argc-2, argv + 2);
37+
}

src/assemble.c

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
void assembler_error(token_t token, parse_t *context, const char *error) {
2+
printf("\e[0;31merror:\e[0;33m%s\e[0m, ln %d col %d\n%s",
3+
context->source,
4+
token.ln, token.col, error
5+
); exit(1);
6+
}
7+
8+
int assemble(char *filename, int srcn, char **srcs){
9+
gybfile_t object = bytecode_new();
10+
int i; for (i=0; i<srcn; i++) {
11+
parse_t local = gasm_load(srcs[i]);
12+
}
13+
return bytecode_save(filename, object);
14+
}

src/emit.c

Whitespace-only changes.

src/gasm.h

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#define register_t unsigned char
2+
#define label_t uint32_t // symbol hash, must be in E section
3+
#define constant_t uint32_t // symbol hash, must be constant-type
4+
#define address_t uint32_t // symbol hash, any section allowed
5+
#define symbol_t uint32_t // symbol hash, any allowed
6+
7+
typedef struct {
8+
unsigned int ln; unsigned int col;
9+
unsigned char type; unsigned int value; // symbol, const, whatever
10+
char *string; // null terminated
11+
} token_t;
12+
13+
typedef struct {
14+
unsigned int size;
15+
token_t *tokens;
16+
} lexicon_t;
17+
18+
typedef struct {
19+
char *source;
20+
symboltable_t symtable;
21+
lexicon_t lexicon;
22+
} parse_t;
23+
24+
parse_t gasm_import(parse_t *parent, parse_t *child);
25+
parse_t gasm_load(char *filename);
26+
lexicon_t lexer(char *stream, unsigned int size);
27+
parse_t parser(lexicon_t lexicon);

src/lexer.c

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
token_t lexer_consume(char *stream, int max){
2+
// Data types, ex. bytes
3+
4+
// Keywords: registers, constants, et cetera
5+
6+
// Symbolic Names
7+
8+
// Instructions
9+
}
10+
11+
lexicon_t lexer(char *stream, unsigned int size){
12+
lexicon_t yield;
13+
int cursor=0; while(cursor < size) {
14+
token_t token = lexer_consume(stream + cursor, size - cursor);
15+
cursor += strlen(token.string);
16+
} return yield;
17+
}

src/load.c

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
parse_t gasm_import(parse_t *parent, parse_t *child) {
2+
// Clone in the symbols
3+
4+
// Clone in the lexicon
5+
}
6+
parse_t gasm_load(char *filename) {
7+
// Read and perform lexical analysis
8+
unsigned int source_length = file_size(filename);
9+
char *buffer = malloc(sizeof(char)*source_length);
10+
file_read(filename, buffer, source_length);
11+
lexicon_t lexicon = lexer(buffer, source_length);
12+
free(buffer);
13+
// Then parse
14+
parse_t yield = parser(lexicon);
15+
yield.source = filename;
16+
return yield;
17+
}

src/parser.c

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
int parser_consume(parse_t *state, token_t *stream, int depth){
2+
// Sections and other high-level state commands
3+
4+
// Definitions; bytes, symbols, et
5+
6+
// System Call instructions
7+
8+
// Regular assembly instructions
9+
}
10+
11+
parse_t parser(lexicon_t lexicon) {
12+
parse_t yield; int cursor;
13+
while(cursor < lexicon.size) {
14+
int step = parser_consume(&yield, lexicon.tokens + cursor, lexicon.size-cursor);
15+
cursor += step;
16+
} return yield;
17+
}

0 commit comments

Comments
 (0)