remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

commit d895fb0ec2da821721e23ce508c0ee2165fc3ec1
parent ec3e1757ac50cd9d692354ee5f7cb54bd50b75c5
Author: Ethan Long <edl@disroot.org>
Date:   Sun, 18 Jun 2023 14:53:49 +1000

Gaming. Wrote some tests and completed the lexer
Still need to test cases for non-instruction tokens

Diffstat:
Mimplementations/C/src/remcc.c | 381++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Aimplementations/C/tests/lextest.c | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 474 insertions(+), 12 deletions(-)

diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c @@ -1,4 +1,6 @@ #include <assert.h> +#include <ctype.h> +#include <errno.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -45,29 +47,72 @@ typedef enum { NEG } conditional_t; +typedef enum { + ARG, + TEMP, + RET +} REG_TYPE; + +typedef struct { + REG_TYPE type; + uint64_t num; +} reg_t; + +typedef enum { + REG, + IMM +} OPER_TYPE; + +typedef struct { + OPER_TYPE type; + union { + reg_t reg; + uint64_t imm; + } val; +} oper_t; + typedef struct { opcode_t opcode; conditional_t cond; - uint64_t operand_1; - uint64_t operand_2; - uint64_t operand_3; + oper_t dest; + oper_t temp_1; + oper_t temp_2; } inst_t; -typedef union { - opcode_t opcode; - conditional_t cond; - uint64_t operand; +// The maximum number of characters representing a token +#define MAX_TOK 256 + +typedef enum { + OPCODE, + COND, + OPER, + ERR +} TOKEN_TYPE; + +typedef enum { + LEX_ERROR +} error_t; + +typedef struct { + TOKEN_TYPE type; + union { + opcode_t opcode; + conditional_t cond; + oper_t operand; + error_t error; + } val; } token_t; /* Function prototypes */ int usage(char *arg0); -token_t *tokenise(FILE *stream); +token_t *lexer(FILE *stream); +token_t lex(char *tok); inst_t *parse(token_t *tokens); uint8_t *byte_compile(inst_t *instructions); void write_bytecode(FILE *stream, uint8_t *bytecode); /* Implementation: */ -int main(int argc, char **argv) { +/*int main(int argc, char **argv) { char *input_fname = NULL, *output_fname = NULL; FILE *input_f = NULL, *output_f = NULL; token_t *prog_tokens = NULL; @@ -94,25 +139,44 @@ int main(int argc, char **argv) { return 1; } - prog_tokens = tokenise(input_f); + prog_tokens = lexer(input_f); prog_insts = parse(prog_tokens); prog_bytecode = byte_compile(prog_insts); write_bytecode(output_f, prog_bytecode); return 0; -} + }*/ int usage(char *arg0) { fprintf(stderr, "Usage: %s input.rasm output.rin\n", arg0); return 1; } -token_t *tokenise(FILE *stream) { +token_t *lexer(FILE *stream) { assert(NULL == "tokenise not yet implemented"); + + char buf[MAX_TOK] = {0}; + size_t i, j = 0, tok_arr_size = 100; + token_t *tokens = calloc(tok_arr_size, sizeof(token_t)); + + while (!feof(stream)) { + for (i = 0; i < MAX_TOK && (buf[i] = getc(stream)) != ' '; i++); + buf[i] = '\0'; + + tokens[j] = lex(buf); + + j++; + if (j >= tok_arr_size) { + tok_arr_size *= 2; + tokens = realloc(tokens, tok_arr_size); + } + } + return NULL; } + inst_t *parse(token_t *tokens) { assert(NULL == "parse not yet implemented"); return NULL; @@ -126,3 +190,296 @@ uint8_t *byte_compile(inst_t *instructions) { void write_bytecode(FILE *stream, uint8_t *bytecode) { assert(NULL == "write_bytecode not yet implemented"); } + +// My homemade lexer, it's a bit filthy but it'll do for now +token_t lex(char *tok_str) { + // TODO: Operands + // TODO: Case-Insensitive + token_t err = { + .type = ERR, + .val.error = LEX_ERROR, + }; + + switch (tok_str[0]) { + case 'a': + if (isdigit(tok_str[1])) { + unsigned long n = strtoul(tok_str + 1, NULL, 10); + if (errno) { + break; + } + return (token_t) { + .type = OPER, + .val.operand = { + .type = REG, + .val.reg = { + .type = ARG, + .num = n + } + } + }; + } else if (strcmp(tok_str + 1, "dd") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = ADD + }; + } else if (strcmp(tok_str + 1, "nd") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = AND + }; + } + break; + + case 'c': + // The only C instruction is call + if (strcmp(tok_str + 1, "all") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = CALL + }; + } + break; + + case 'd': + // The only D instruction is div + if (strcmp(tok_str + 1, "iv") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = DIV + }; + } + break; + + case 'e': + if (strcmp(tok_str + 1, "q") == 0) { + return (token_t) { + .type = COND, + .val.cond = EQ + }; + } + break; + + case 'g': + if (strcmp(tok_str + 1, "t") == 0) { + return (token_t) { + .type = COND, + .val.cond = GT + }; + } else if (strcmp(tok_str + 1, "eq") == 0) { + return (token_t) { + .type = COND, + .val.cond = GEQ + }; + } + break; + + case 'j': + // The only J instruction is jump + if (strcmp(tok_str + 1, "ump") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = JUMP + }; + } + break; + + case 'l': + // The only L instruction is load + if (strcmp(tok_str + 1, "t") == 0) { + return (token_t) { + .type = COND, + .val.cond = LT + }; + } else if (strcmp(tok_str + 1, "eq") == 0) { + return (token_t) { + .type = COND, + .val.cond = LEQ + }; + } else if (strcmp(tok_str + 1, "oad") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = LOAD + }; + } + break; + + case 'm': + if (strcmp(tok_str + 1, "ul") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = MUL + }; + } else if (strcmp(tok_str + 1, "ove") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = MOVE + }; + } + break; + + case 'n': + if (strcmp(tok_str + 1, "op") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = NOP + }; + } else if (strcmp(tok_str + 1, "ot") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = NOT + }; + } else if (strcmp(tok_str + 1, "eq") == 0) { + return (token_t) { + .type = COND, + .val.cond = NEQ + }; + } else if (strcmp(tok_str + 1, "eg") == 0) { + return (token_t) { + .type = COND, + .val.cond = NEG + }; + } + break; + + case 'o': + if (strcmp(tok_str + 1, "r") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = OR + }; + } + break; + + case 'p': + switch (tok_str[1]) { + case 'u': + if (strcmp(tok_str + 2, "sh") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = PUSH + }; + } + break; + + case 'o': + if (strcmp(tok_str + 2, "p") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = POP + }; + } else if (strcmp(tok_str + 2, "s") == 0) { + return (token_t) { + .type = COND, + .val.cond = POS + }; + } + break; + + case 'e': + if (strcmp(tok_str + 2, "ek") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = PEEK + }; + } + break; + + default: + break; + } + break; + + case 'r': + if (strcmp(tok_str + 1, "eturn") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = RETURN + }; + } else if (isdigit(tok_str[1])) { + unsigned long n = strtoul(tok_str + 1, NULL, 10); + if (errno) { + break; + } + return (token_t) { + .type = OPER, + .val.operand = { + .type = REG, + .val.reg = { + .type = RET, + .num = n + } + } + }; + } + break; + + case 's': + // FIXME: Filthy + if (strcmp(tok_str + 1, "ub") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = SUB + }; + } else if (strcmp(tok_str + 1, "wap") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = SWAP + }; + } else if (strcmp(tok_str + 1, "tore") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = STORE + }; + } else if (strcmp(tok_str + 1, "hiftl") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = SHIFTL + }; + } else if (strcmp(tok_str + 1, "hiftr(l)") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = SHIFTR_L + }; + } else if (strcmp(tok_str + 1, "hiftr(a)") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = SHIFTR_A + }; + } + break; + + case 't': + if (isdigit(tok_str[1])) { + unsigned long n = strtoul(tok_str + 1, NULL, 10); + if (errno) { + break; + } + return (token_t) { + .type = OPER, + .val.operand = { + .type = REG, + .val.reg = { + .type = TEMP, + .num = n + } + } + }; + } + break; + + case 'x': + if (strcmp(tok_str + 1, "or") == 0) { + return (token_t) { + .type = OPCODE, + .val.opcode = XOR + }; + } + break; + + default: + break; + } + + fprintf(stderr, "Unknown token: %s\n", tok_str); + return err; +} diff --git a/implementations/C/tests/lextest.c b/implementations/C/tests/lextest.c @@ -0,0 +1,105 @@ +// Test if the assembler lexer is functioning as expected +#import "../src/remcc.c" + +typedef enum { + PASS, + FAIL +} RESULT; + +typedef struct { + RESULT state; + union { + char *result; + char *error; + } val; +} result_t; + +result_t test_lex(char *tok, opcode_t expect); +result_t test_lexer(FILE *stream); + +int main(int argc, char **argv) { + result_t result; + + char *lex_tests[] = { + // Instructions tests + "nop", + "add", + "sub", + "mul", + "div", + "and", + "or", + "xor", + "not", + "shiftl", + "shiftr(l)", + "shiftr(a)", + "move", + "swap", + "push", + "pop", + "peek", + "load", + "store", + "jump", + "call", + "return", + NULL + }; + opcode_t lex_expects[] = { + NOP, + ADD, + SUB, + MUL, + DIV, + AND, + OR, + XOR, + NOT, + SHIFTL, + SHIFTR_L, + SHIFTR_A, + MOVE, + SWAP, + PUSH, + POP, + PEEK, + LOAD, + STORE, + JUMP, + CALL, + RETURN + }; + + //FILE *stream_tests[] = {0}; + + for (int i = 0; lex_tests[i] != NULL; i++) { + switch ((result = test_lex(lex_tests[i], lex_expects[i])).state) { + case PASS: + printf("We have a success!\n"); + printf("Result: %s\n", result.val.result); + break; + case FAIL: + fprintf(stderr, "Dumbledore dies\n"); + fprintf(stderr, "Error: %s\n", result.val.error); + break; + } + } + //test_lexer(); + return 0; +} + +result_t test_lex(char *tok, opcode_t expect) { + token_t res = lex(tok); + if (res.type == OPCODE && res.val.opcode == expect) { + return (result_t) { + .state = PASS, + .val.result = "Success!" + }; + } else { + return (result_t) { + .state = FAIL, + .val.error = "Didn't get what we expected!" + }; + } +}