remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

commit ec3e1757ac50cd9d692354ee5f7cb54bd50b75c5
parent d1c4af22120775e40f06afe5cb81435adc02eb6b
Author: Ethan Long <edl@disroot.org>
Date:   Fri, 16 Jun 2023 01:03:25 +1000

Some minor progress!
It is time to sleep and get ready for Spaceport now.
Also I need to mark COMP2300 assigments oops I forgor.

Diffstat:
Mdoc/specs.ms | 161+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
Mdoc/specs.pdf | 0
Aimplementations/C/examples/fib.rasm | 22++++++++++++++++++++++
Aimplementations/C/examples/fib_recursive.rasm | 26++++++++++++++++++++++++++
Mimplementations/C/nobuild.c | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Aimplementations/C/src/remcc.c | 128+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aimplementations/C/src/remvm_interp.c | 17+++++++++++++++++
7 files changed, 427 insertions(+), 19 deletions(-)

diff --git a/doc/specs.ms b/doc/specs.ms @@ -1,4 +1,4 @@ -\" Macros used: +\" Document specific macros: .de AL . IP . B1 @@ -19,7 +19,7 @@ . R .. -\" Document start: +\" Actual Document start: .TL RemedyVM Specifications @@ -43,7 +43,7 @@ Instructions & Syntax All instructions follow the following general pattern: .DS I .CW -inst <CND> <DST> <SRC> +inst.<CND> <DST> <SRC> .DE Where .CW <CND> @@ -93,7 +93,7 @@ The instruction copies/moves the value stored in one temporary to another temporary. .SH 4 Usage -.AL "move <CND> <DST> <SRC>" "move <CND> y x" "y <- <CND> ? x : y" +.AL "move.<CND> <DST> <SRC>" "move.<CND> y x" "y :=.<CND> ? x : y" .NH 3 .CW swap @@ -103,7 +103,7 @@ The instruction swaps the value stored in one temporary with the value in another temporary. .SH 4 Usage -.AL "swap <CND> <TEMP1> <TEMP2>" "swap <CND> x y" "if CND { x <-> y }" +.AL "swap.<CND> <TEMP1> <TEMP2>" "swap.<CND> x y" "if CND { (x, y) := (y, x) }" .NH 3 .CW push @@ -113,7 +113,7 @@ The instruction pushes the specified temporary onto the stack. .SH 4 Usage -.AL "push <CND> <SRC>" "push <CND> x" "if CND { sp <- sp + size(x), stack <- x : stack }" +.AL "push.<CND> <SRC>" "push.<CND> x" "if CND { sp := sp + size(x), stack := x : stack }" .NH 3 .CW pop @@ -123,7 +123,18 @@ The instruction pops from the top of the stack into the specified temporary. .SH 4 Usage -.AL "pop <CND> <DEST>" "pop <CND> x" "if CND { sp <- sp - size(x), (x, stack) <- (x, xs) in stack(x:xs) }" +.AL "pop.<CND> <DEST>" "pop.<CND> x" "if CND { sp := sp - size(x), (x, stack) := (x, xs) in stack(x:xs) }" + +.NH 3 +.CW peek +.PP +The +.CW peek +instruction peeks at the top of the stack, copying the value on the top of the stack into the specified temporary. +This is essentially a pop without lowering the stack. +.SH 4 +Usage +.AL "peek.<CND> <DEST>" "peek.<CND> x" "x := CND ? head(stack)" .NH 3 .CW load @@ -136,7 +147,7 @@ into the specified .CW <TEMP> . .SH 4 Usage -.AL "load <CND> <TEMP> <MEMADDR>" "store <CND> y x" "CND ? y <- Mem(x)" +.AL "load.<CND> <TEMP> <MEMADDR>" "store.<CND> y x" "CND ? y := Mem(x)" .NH 3 .CW store @@ -149,7 +160,7 @@ into the memory pointed to by .CW <MEMADDR> . .SH 4 Usage -.AL "store <CND> <TEMP> <MEMADDR>" "store <CND> y x" "CND ? Mem(x) <- y" +.AL "store.<CND> <TEMP> <MEMADDR>" "store.<CND> y x" "CND ? Mem(x) := y" .B NOTE: the order of the store instruction is reversed to the regular load & other typical instructions. @@ -165,7 +176,7 @@ The instruction adds two temporaries and puts the result in a third. .SH 4 Usage -.AL "add <CND> <DEST> <TEMP> <TEMP>" "add <CND> z x y" "z <- CND ? x + y" +.AL "add.<CND> <DEST> <TEMP> <TEMP>" "add.<CND> z x y" "z := CND ? x + y" .NH 3 .CW sub @@ -180,7 +191,7 @@ and puts the result in .CW <DEST> . .SH 4 Usage -.AL "sub <CND> <DEST> <TEMP1> <TEMP2>" "sub <CND> z x y" "z <- CND ? x - y" +.AL "sub.<CND> <DEST> <TEMP1> <TEMP2>" "sub.<CND> z x y" "z := CND ? x - y" .NH 3 .CW mul @@ -191,7 +202,7 @@ instruction multiplies two temporaries together, putting the result in .CW <DEST> . .SH 4 Usage -.AL "mul <CND> <DEST> <TEMP> <TEMP>" "mul <CND> z x y" "z <- CND ? x * y" +.AL "mul.<CND> <DEST> <TEMP> <TEMP>" "mul.<CND> z x y" "z := CND ? x * y" .NH 3 .CW div @@ -206,7 +217,7 @@ and puts the result in .CW <DEST> . .SH 4 Usage -.AL "div <CND> <DEST> <TEMP1> <TEMP2>" "div <CND> z x y" "z <- (CND && y != 0) ? x / y" +.AL "div.<CND> <DEST> <TEMP1> <TEMP2>" "div.<CND> z x y" "z := (CND && y != 0) ? x / y" .B NOTE: if .CW <TEMP2> @@ -223,25 +234,145 @@ Bit Manipulation & Logical Operations .NH 3 .CW shiftl +.PP +The +.CW shiftl +instruction shifts a temporary +.CW <TEMP1> +left by +.CW <TEMP2> +into the temporary +.CW <DEST> . +.SH 4 +Usage +.AL "shiftl.<CND> <DEST> <TEMP1> <TEMP2>" "shiftl.<CND> z x y" "z := CND ? x << y" .NH 3 -.CW shiftr +.CW shiftr(T) +.PP +The +.CW shiftr(T) +instruction shifts a temporary +.CW <TEMP1> +right by +.CW <TEMP2> +into the temporary +.CW <DEST> . + +The +.CW T +parameter takes either +.CW l +(logical), or +.CW a +(arithmetic), and determines whether or not the instruction is an arithmetic or logical shift right. +.SH 4 +Usage +.AL "shiftr(T).<CND> <DEST> <TEMP1> <TEMP2>" "shiftr(l).<CND> z x y" "z := CND ? x >> y" +.AL "shiftr(T).<CND> <DEST> <TEMP1> <TEMP2>" "shiftr(a).<CND> z x y" "z := CND ? x \(ti>> y" .NH 3 .CW and +.PP +The +.CW and +instruction performs a bitwise AND on +.CW <TEMP1> +and +.CW <TEMP2> +and puts the result in +.CW <DEST> . +.SH 4 +Usage +.AL "and.<CND> <DEST> <TEMP1> <TEMP2>" "and.<CND> z x y" "z := CND ? x & y" .NH 3 .CW or +.PP +The +.CW or +instruction performs a bitwise OR on +.CW <TEMP1> +and +.CW <TEMP2> +and puts the result in +.CW <DEST> . +.SH 4 +Usage +.AL "or.<CND> <DEST> <TEMP1> <TEMP2>" "and.<CND> z x y" "z := CND ? x | y" .NH 3 .CW xor +.PP +The +.CW xor +instruction performs a bitwise XOR (eXclusive OR) on +.CW <TEMP1> +and +.CW <TEMP2> +and puts the result in +.CW <DEST> . +.SH 4 +Usage +.AL "xor.<CND> <DEST> <TEMP1> <TEMP2>" "xor.<CND> z x y" "z := CND ? x ^ y" .NH 3 .CW not - +.PP +The +.CW not +instruction performs a bitwise NOT on +.CW <SRC> +and puts the result in +.CW <DEST> . +.SH 4 +Usage +.AL "not.<CND> <DEST> <SRC>" "xor.<CND> y x" "y := CND ? !x" .NH 2 Control Flow .NH 3 .CW jump +.PP +The +.CW jump +instruction perfoms a jump to +.CW <LOCATION> , +changing the program counter to point to that location. +.SH 4 +Usage +.AL "jump.<CND> <LOCATION>" "jump.<CND> x" "if CND { goto x; }" + +.NH 3 +.CW call +.PP +The +.CW call +instruction performs a jump to +.CW <LOCATION> , +pushing the current program counter to the stack so that the callee can return to the caller. +.PP +.B NOTE: +All +.CW call s +should always be +.CW return ed +to, as a +.CW call +pushes to a stack. +.SH 4 +Usage +.AL "call.<CND> <LOCATION>" "call.<CND> x" "if CND {x(<ARGS>)}" + +.NH 3 +.CW return +.PP +The +.CW return +instruction performs a jump back to wherever the last +.CW call +was. +.SH 4 +Usage +.AL "return.<CND>" "return.<CND>" "if CND { return }" diff --git a/doc/specs.pdf b/doc/specs.pdf Binary files differ. diff --git a/implementations/C/examples/fib.rasm b/implementations/C/examples/fib.rasm @@ -0,0 +1,22 @@ +; NOTE: This implementation could probably still be improved, but it will be far +; faster than fib_recursive, as it utilizes the register architecture more +; efficiently. It also uses less memory & scales O(1) with respect to memory. + +; Arg: a0 := n +; Returns: r0 := fib(n) +fib: + push [t0, t1, t2] + sub a0, a0, 3 ; a0 := a0 - 2 <=> m := n - 2 + move t0, 0 ; t0 := 0 <=> i := 0 + + move t1, 1 ; t1 := 1 <=> fib(0) = 1 + move t2, 1 ; t2 := 1 <=> fib(1) = 1 +fib_start: + add t2, t1, t2 ; t2 := t1 + t2 <=> fib(i + 2) = fib(i) + fib(i + 1) + swap t2, t1 ; (t1, t2) := (t2, t1) + add t0, t0, 1 ; t0 := t0 + 1 <=> i++ + cmp t0, a0 ; t0 vs a0? <=> i < m ? + jump.lt fib_start ; <=> if (i < m) { goto fib_start } + move r0, t1 ; r0 := t1 <=> ret := fib(n) ; as i + 2 = n + pop [t0, t1, t2] + return diff --git a/implementations/C/examples/fib_recursive.rasm b/implementations/C/examples/fib_recursive.rasm @@ -0,0 +1,26 @@ +; NOTE: This is probably the most inneficient implementation of the fibonacci +; function, the memory usage grows as O(2^n). It IS however a good stack test :) + +; Arg: a0 := n +; Returns: r0 := fib(n) +fib_rec: + cmp a0 1 ; a0 vs 1? <=> n <= 1? + jump.leq fib_base_case ; <=> if (n <= 1) { return 1 } + + push t0 + + push a0 ; stack := (a0 : stack) <=> stack := (n : stack) + sub a0, a0, 1 ; a1 := a0 - 1 <=> (n - 1) + call fib_rec ; <=> fib(n - 1) + move t0, r0 ; t0 := r0 <=> t0 := fib(n - 1) + + pop a0 ; a0 := head(stack) <=> n + sub a0, a0, 2 ; a0 := a0 - 2 <=> (n - 2) + call fib_rec ; <=> fib(n - 2) + add r0, r0, t0 ; r0 := r0 + t0 <=> ret := fib(n - 1) + fib(n - 2) + + pop t0 + return ; <=> return ret +fib_base_case: ; if (n <= 1) { return 1 } + move r0, 1 ; <=> ret := 1 + return ; <=> return ret diff --git a/implementations/C/nobuild.c b/implementations/C/nobuild.c @@ -1,15 +1,99 @@ +/* + I DID NOT MAKE THIS BUILD SYSTEM!! + All credit to Tsoding/Rexim, go check out the nobuild repo: + https://github.com/tsoding/nobuild + + Usage: + # Compile the build system + cc -o nobuild nobuild.c + # Compile the program + ./nobuild + */ #define NOBUILD_IMPLEMENTATION #include "./nobuild.h" #define CFLAGS "-Wall", "-Wextra", "-std=c99", "-pedantic" -void build_vm() { +void build_target(const char *target) { + Cstr source_path = PATH("src", target); + Cstr dest_path = NOEXT(PATH("bin", target)); + + CMD("cc", CFLAGS, "-o", dest_path, source_path); +} + +void build(void) { + FOREACH_FILE_IN_DIR(target, "src", { + if (ENDS_WITH(target, ".c")) { + build_target(target); + } + }); +} - CMD("cc", CFLAGS, "-o", "bin/interp", "src/interp.c"); +void print_chain(const Chain *chain) { + INFO("input: %s", chain->input_filepath); + INFO("output: %s", chain->output_filepath); + FOREACH_ARRAY(Cmd, cmd, chain->cmds, { + INFO("cmd: %s", cmd_show(*cmd)); + }); } -int main() { - build_vm(); +void byte_compile_example(const char *example) { + Cstr source_path = PATH("examples", example); + Cstr dest_path = CONCAT(NOEXT(source_path), ".rin"); + + CMD("bin/remcc", source_path, dest_path); +} + +void byte_compile_examples(void) { + FOREACH_FILE_IN_DIR(example, "examples", { + if (ENDS_WITH(example, ".rasm")) { + byte_compile_example(example); + } + }); +} + +void run_example(const char *example) { + Cstr path = PATH("examples", example); + + CMD("bin/vm_interp", path); +} + +void run_examples(void) { + FOREACH_FILE_IN_DIR(example, "examples", { + if (ENDS_WITH(example, ".rin")) { + run_example(example); + } + }); +} + +void clean(void) { + FOREACH_FILE_IN_DIR(bin, "bin", { + if (!ENDS_WITH(bin, ".")) { + CMD("rm", PATH("bin", bin)); + } + }); + + FOREACH_FILE_IN_DIR(bytecode, "examples", { + if (ENDS_WITH(bytecode, ".rin")) { + CMD("rm", PATH("examples", bytecode)); + } + }); + + CMD("rm", "nobuild.old"); +} + +int main(int argc, char **argv) { + GO_REBUILD_URSELF(argc, argv); + + if (argc == 2 && strcmp(argv[1], "clean") == 0) { // Cleanup builds + clean(); + return 0; + } // else + + build(); + + byte_compile_examples(); + run_examples(); return 0; } diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c @@ -0,0 +1,128 @@ +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Data types */ +typedef enum { + NOP = 0, + // Arithmetic + ADD, + SUB, + MUL, + DIV, + // Logical & bit + AND, + OR, + XOR, + NOT, + SHIFTL, + SHIFTR_L, + SHIFTR_A, + // Memory & registers + MOVE, + SWAP, + PUSH, + POP, + PEEK, + LOAD, + STORE, + // Control flow + JUMP, + CALL, + RETURN +} opcode_t; + +typedef enum { + GT = 0, + GEQ, + LT, + LEQ, + EQ, + NEQ, + POS, + NEG +} conditional_t; + +typedef struct { + opcode_t opcode; + conditional_t cond; + uint64_t operand_1; + uint64_t operand_2; + uint64_t operand_3; +} inst_t; + +typedef union { + opcode_t opcode; + conditional_t cond; + uint64_t operand; +} token_t; + +/* Function prototypes */ +int usage(char *arg0); +token_t *tokenise(FILE *stream); +inst_t *parse(token_t *tokens); +uint8_t *byte_compile(inst_t *instructions); +void write_bytecode(FILE *stream, uint8_t *bytecode); + +/* Implementation: */ +int main(int argc, char **argv) { + char *input_fname = NULL, *output_fname = NULL; + FILE *input_f = NULL, *output_f = NULL; + token_t *prog_tokens = NULL; + inst_t *prog_insts = NULL; + uint8_t *prog_bytecode = NULL; + + if (argc <= 2) { + return usage(argv[0]); + } else if (argc == 3) { + // output and input specified + input_fname = argv[1]; + output_fname = argv[2]; + } else { + return usage(argv[0]); + } + + const char *open_err = "Unable to open file"; + if ((input_f = fopen(input_fname, "r")) == NULL) { + perror(open_err); + return 1; + } + if ((output_f = fopen(output_fname, "w")) == NULL) { + perror(open_err); + return 1; + } + + prog_tokens = tokenise(input_f); + prog_insts = parse(prog_tokens); + prog_bytecode = byte_compile(prog_insts); + + write_bytecode(output_f, prog_bytecode); + + return 0; +} + +int usage(char *arg0) { + fprintf(stderr, "Usage: %s input.rasm output.rin\n", arg0); + return 1; +} + +token_t *tokenise(FILE *stream) { + assert(NULL == "tokenise not yet implemented"); + return NULL; +} + +inst_t *parse(token_t *tokens) { + assert(NULL == "parse not yet implemented"); + return NULL; +} + +uint8_t *byte_compile(inst_t *instructions) { + assert(NULL == "byte_compile not yet implemented"); + return NULL; +} + +void write_bytecode(FILE *stream, uint8_t *bytecode) { + assert(NULL == "write_bytecode not yet implemented"); +} diff --git a/implementations/C/src/remvm_interp.c b/implementations/C/src/remvm_interp.c @@ -0,0 +1,17 @@ +#include <stdio.h> +#include <stdint.h> + +int usage(char *arg0); + +int main(int argc, char **argv) { + if (argc < 2) { + return usage(argv[0]); + } + + return 0; +} + +int usage(char *arg0) { + fprintf(stderr, "USAGE: %s <FILE>\n", arg0); + return 1; +}