remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

commit 8c54354b4a11a01b6a9b6078bdbaa39317452feb
parent d895fb0ec2da821721e23ce508c0ee2165fc3ec1
Author: Ethan Long <edl@disroot.org>
Date:   Sun, 18 Jun 2023 00:22:51 -0600

bruh

Diffstat:
Mimplementations/C/src/remcc.c | 296+++++++++++++++++++++----------------------------------------------------------
Aimplementations/C/src/remcc.h | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dimplementations/C/tests/lextest.c | 105-------------------------------------------------------------------------------
3 files changed, 190 insertions(+), 325 deletions(-)

diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c @@ -1,118 +1,19 @@ #include <assert.h> #include <ctype.h> #include <errno.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -/* Data types */ -typedef enum { - NOP = 0, - // Arithmetic - ADD, - SUB, - MUL, - DIV, - // Logical & bit - AND, - OR, - XOR, - NOT, - SHIFTL, - SHIFTR_L, - SHIFTR_A, - // Memory & registers - MOVE, - SWAP, - PUSH, - POP, - PEEK, - LOAD, - STORE, - // Control flow - JUMP, - CALL, - RETURN -} opcode_t; - -typedef enum { - GT = 0, - GEQ, - LT, - LEQ, - EQ, - NEQ, - POS, - NEG -} conditional_t; - -typedef enum { - ARG, - TEMP, - RET -} REG_TYPE; - -typedef struct { - REG_TYPE type; - uint64_t num; -} reg_t; - -typedef enum { - REG, - IMM -} OPER_TYPE; - -typedef struct { - OPER_TYPE type; - union { - reg_t reg; - uint64_t imm; - } val; -} oper_t; - -typedef struct { - opcode_t opcode; - conditional_t cond; - oper_t dest; - oper_t temp_1; - oper_t temp_2; -} inst_t; - -// The maximum number of characters representing a token -#define MAX_TOK 256 - -typedef enum { - OPCODE, - COND, - OPER, - ERR -} TOKEN_TYPE; - -typedef enum { - LEX_ERROR -} error_t; - -typedef struct { - TOKEN_TYPE type; - union { - opcode_t opcode; - conditional_t cond; - oper_t operand; - error_t error; - } val; -} token_t; +#include "remcc.h" + /* Function prototypes */ int usage(char *arg0); -token_t *lexer(FILE *stream); -token_t lex(char *tok); -inst_t *parse(token_t *tokens); -uint8_t *byte_compile(inst_t *instructions); void write_bytecode(FILE *stream, uint8_t *bytecode); /* Implementation: */ -/*int main(int argc, char **argv) { +int main(int argc, char **argv) { char *input_fname = NULL, *output_fname = NULL; FILE *input_f = NULL, *output_f = NULL; token_t *prog_tokens = NULL; @@ -141,12 +42,12 @@ void write_bytecode(FILE *stream, uint8_t *bytecode); prog_tokens = lexer(input_f); prog_insts = parse(prog_tokens); - prog_bytecode = byte_compile(prog_insts); + prog_bytecode = byte_compile_prog(prog_insts); write_bytecode(output_f, prog_bytecode); return 0; - }*/ +} int usage(char *arg0) { fprintf(stderr, "Usage: %s input.rasm output.rin\n", arg0); @@ -191,14 +92,56 @@ void write_bytecode(FILE *stream, uint8_t *bytecode) { assert(NULL == "write_bytecode not yet implemented"); } +inline token_t opcode_token(opcode_t opcode) { + return (token_t) { + .type = OPCODE, + .val.opcode = opcode + }; +} + +inline token_t cond_token(conditional_t cond) { + return (token_t) { + .type = COND, + .val.cond = cond + }; +} + +inline token_t oper_token(oper_t oper) { + return (token_t) { + .type = OPER, + .val.operand = oper + }; +} + +inline token_t error_token(error_t error) { + return (token_t) { + .type = ERR, + .val.error = error + }; +} + +inline oper_t reg_operand(REG_TYPE type, uint64_t num) { + return (oper_t) { + .type = REG, + .val.reg = { + .type = type, + .num = num + } + }; +} + +inline oper_t imm_operand(uint64_t immediate) { + return (oper_t) { + .type = IMM, + .val.imm = immediate + }; +} + // My homemade lexer, it's a bit filthy but it'll do for now token_t lex(char *tok_str) { // TODO: Operands // TODO: Case-Insensitive - token_t err = { - .type = ERR, - .val.error = LEX_ERROR, - }; + token_t err = error_token(LEX_ERROR); switch (tok_str[0]) { case 'a': @@ -207,146 +150,83 @@ token_t lex(char *tok_str) { if (errno) { break; } - return (token_t) { - .type = OPER, - .val.operand = { - .type = REG, - .val.reg = { - .type = ARG, - .num = n - } - } - }; + return oper_token(reg_operand(ARG, n)); } else if (strcmp(tok_str + 1, "dd") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = ADD - }; + return opcode_token(ADD); } else if (strcmp(tok_str + 1, "nd") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = AND - }; + return opcode_token(AND); } break; case 'c': // The only C instruction is call if (strcmp(tok_str + 1, "all") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = CALL - }; + return opcode_token(CALL); } break; case 'd': // The only D instruction is div if (strcmp(tok_str + 1, "iv") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = DIV - }; + return opcode_token(DIV); } break; case 'e': if (strcmp(tok_str + 1, "q") == 0) { - return (token_t) { - .type = COND, - .val.cond = EQ - }; + return cond_token(EQ); } break; case 'g': if (strcmp(tok_str + 1, "t") == 0) { - return (token_t) { - .type = COND, - .val.cond = GT - }; + return cond_token(GT); } else if (strcmp(tok_str + 1, "eq") == 0) { - return (token_t) { - .type = COND, - .val.cond = GEQ - }; + return cond_token(GEQ); } break; case 'j': // The only J instruction is jump if (strcmp(tok_str + 1, "ump") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = JUMP - }; + return opcode_token(JUMP); } break; case 'l': // The only L instruction is load if (strcmp(tok_str + 1, "t") == 0) { - return (token_t) { - .type = COND, - .val.cond = LT - }; + return cond_token(LT); } else if (strcmp(tok_str + 1, "eq") == 0) { - return (token_t) { - .type = COND, - .val.cond = LEQ - }; + return cond_token(LEQ); } else if (strcmp(tok_str + 1, "oad") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = LOAD - }; + return opcode_token(LOAD); } break; case 'm': if (strcmp(tok_str + 1, "ul") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = MUL - }; + return opcode_token(MUL); } else if (strcmp(tok_str + 1, "ove") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = MOVE - }; + return opcode_token(MOVE); } break; case 'n': if (strcmp(tok_str + 1, "op") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = NOP - }; + return opcode_token(NOP); } else if (strcmp(tok_str + 1, "ot") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = NOT - }; + return opcode_token(NOT); } else if (strcmp(tok_str + 1, "eq") == 0) { - return (token_t) { - .type = COND, - .val.cond = NEQ - }; + return cond_token(NEQ); } else if (strcmp(tok_str + 1, "eg") == 0) { - return (token_t) { - .type = COND, - .val.cond = NEG - }; + return cond_token(NEG); } break; case 'o': if (strcmp(tok_str + 1, "r") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = OR - }; + return opcode_token(OR); } break; @@ -354,33 +234,21 @@ token_t lex(char *tok_str) { switch (tok_str[1]) { case 'u': if (strcmp(tok_str + 2, "sh") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = PUSH - }; + return opcode_token(PUSH); } break; case 'o': if (strcmp(tok_str + 2, "p") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = POP - }; + return opcode_token(POP); } else if (strcmp(tok_str + 2, "s") == 0) { - return (token_t) { - .type = COND, - .val.cond = POS - }; + return cond_token(POS); } break; case 'e': if (strcmp(tok_str + 2, "ek") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = PEEK - }; + return opcode_token(PEEK); } break; @@ -391,25 +259,13 @@ token_t lex(char *tok_str) { case 'r': if (strcmp(tok_str + 1, "eturn") == 0) { - return (token_t) { - .type = OPCODE, - .val.opcode = RETURN - }; + return opcode_token(RETURN); } else if (isdigit(tok_str[1])) { unsigned long n = strtoul(tok_str + 1, NULL, 10); if (errno) { break; } - return (token_t) { - .type = OPER, - .val.operand = { - .type = REG, - .val.reg = { - .type = RET, - .num = n - } - } - }; + return oper_token(reg_operand(RET, n)); } break; diff --git a/implementations/C/src/remcc.h b/implementations/C/src/remcc.h @@ -0,0 +1,114 @@ +#include <stdint.h> + +/* Data types */ +typedef enum { + NOP = 0, + // Arithmetic + ADD, + SUB, + MUL, + DIV, + // Logical & bit + AND, + OR, + XOR, + NOT, + SHIFTL, + SHIFTR_L, + SHIFTR_A, + // Memory & registers + MOVE, + SWAP, + PUSH, + POP, + PEEK, + LOAD, + STORE, + // Control flow + JUMP, + CALL, + RETURN +} opcode_t; + +typedef enum { + GT = 0, + GEQ, + LT, + LEQ, + EQ, + NEQ, + POS, + NEG +} conditional_t; + +typedef enum { + ARG, + TEMP, + RET +} REG_TYPE; + +typedef struct { + REG_TYPE type; + uint64_t num; +} reg_t; + +typedef enum { + REG, + IMM +} OPER_TYPE; + +typedef struct { + OPER_TYPE type; + union { + reg_t reg; + uint64_t imm; + } val; +} oper_t; + +typedef struct { + opcode_t opcode; + conditional_t cond; + oper_t dest; + oper_t temp_1; + oper_t temp_2; +} inst_t; + +// The maximum number of characters representing a token +#define MAX_TOK 256 + +typedef enum { + OPCODE, + COND, + OPER, + ERR +} TOKEN_TYPE; + +typedef enum { + LEX_ERROR +} error_t; + +typedef struct { + TOKEN_TYPE type; + union { + opcode_t opcode; + conditional_t cond; + oper_t operand; + error_t error; + } val; +} token_t; + +/* Function definitions */ +token_t opcode_token(opcode_t opcode); +token_t cond_token(conditional_t cond); +token_t oper_token(oper_t oper); +token_t error_token(error_t error); + +oper_t reg_operand(REG_TYPE type, uint64_t num); +oper_t imm_operand(uint64_t immediate); + +token_t *lexer(FILE *stream); +token_t lex(char *tok); +inst_t *parse(token_t *tokens); +uint8_t *byte_compile_prog(inst_t *instructions); +uint8_t *byte_compile_inst(inst_t instruction); + diff --git a/implementations/C/tests/lextest.c b/implementations/C/tests/lextest.c @@ -1,105 +0,0 @@ -// Test if the assembler lexer is functioning as expected -#import "../src/remcc.c" - -typedef enum { - PASS, - FAIL -} RESULT; - -typedef struct { - RESULT state; - union { - char *result; - char *error; - } val; -} result_t; - -result_t test_lex(char *tok, opcode_t expect); -result_t test_lexer(FILE *stream); - -int main(int argc, char **argv) { - result_t result; - - char *lex_tests[] = { - // Instructions tests - "nop", - "add", - "sub", - "mul", - "div", - "and", - "or", - "xor", - "not", - "shiftl", - "shiftr(l)", - "shiftr(a)", - "move", - "swap", - "push", - "pop", - "peek", - "load", - "store", - "jump", - "call", - "return", - NULL - }; - opcode_t lex_expects[] = { - NOP, - ADD, - SUB, - MUL, - DIV, - AND, - OR, - XOR, - NOT, - SHIFTL, - SHIFTR_L, - SHIFTR_A, - MOVE, - SWAP, - PUSH, - POP, - PEEK, - LOAD, - STORE, - JUMP, - CALL, - RETURN - }; - - //FILE *stream_tests[] = {0}; - - for (int i = 0; lex_tests[i] != NULL; i++) { - switch ((result = test_lex(lex_tests[i], lex_expects[i])).state) { - case PASS: - printf("We have a success!\n"); - printf("Result: %s\n", result.val.result); - break; - case FAIL: - fprintf(stderr, "Dumbledore dies\n"); - fprintf(stderr, "Error: %s\n", result.val.error); - break; - } - } - //test_lexer(); - return 0; -} - -result_t test_lex(char *tok, opcode_t expect) { - token_t res = lex(tok); - if (res.type == OPCODE && res.val.opcode == expect) { - return (result_t) { - .state = PASS, - .val.result = "Success!" - }; - } else { - return (result_t) { - .state = FAIL, - .val.error = "Didn't get what we expected!" - }; - } -}