remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

commit 25914a6e52fd416a35b4e40b4414e0cd776abe8b
parent 4ace8a25056cb8c2d39ee34033fa415c7ea3718d
Author: Ethan Long <ethandavidlong@gmail.com>
Date:   Fri, 14 Jun 2024 18:52:18 +1000

Decided I'm going to try a different project structure

Diffstat:
M.gitignore | 2+-
Aimplementations/C/examples/fib.rin | 0
Mimplementations/C/src/remcc.c | 162++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
Mimplementations/C/src/remcc.h | 31++++++++++++++++++++++++-------
Aimplementations/rust/Cargo.toml | 18++++++++++++++++++
Aimplementations/rust/src/lib/parse.rs | 27+++++++++++++++++++++++++++
Aimplementations/rust/src/lib/remedy.rs | 1+
Aimplementations/rust/src/main.rs | 3+++
8 files changed, 223 insertions(+), 21 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1 +1 @@ - +.DS_Store diff --git a/implementations/C/examples/fib.rin b/implementations/C/examples/fib.rin diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c @@ -10,15 +10,15 @@ /* Function prototypes */ int usage(char *arg0); -void write_bytecode(FILE *stream, uint8_t *bytecode); +void write_bytecode(FILE *stream, codevec_t bytecode); /* Implementation: */ int main(int argc, char **argv) { char *input_fname = NULL, *output_fname = NULL; FILE *input_f = NULL, *output_f = NULL; - token_t *prog_tokens = NULL; - inst_t *prog_insts = NULL; - uint8_t *prog_bytecode = NULL; + tokenvec_t prog_tokens; + instvec_t prog_insts; + codevec_t prog_bytecode; if (argc <= 2) { return usage(argv[0]); @@ -54,8 +54,8 @@ int usage(char *arg0) { return 1; } -token_t *lexer(FILE *stream) { - assert(NULL == "tokenise not yet implemented"); +tokenvec_t lexer(FILE *stream) { + assert(NULL == "lexer not yet implemented"); char buf[MAX_TOK] = {0}; size_t i, j = 0, tok_arr_size = 100; @@ -74,22 +74,158 @@ token_t *lexer(FILE *stream) { } } - return NULL; + return (tokenvec_t) { + .tokens = NULL, + .num = 0 + }; } -inst_t *parse(token_t *tokens) { +instvec_t parse(tokenvec_t tokens) { assert(NULL == "parse not yet implemented"); - return NULL; + + return (instvec_t) { + .insts = NULL, + .num = 0 + }; +} + +uint64_t byte_compile_inst(inst_t instruction) { + assert(NULL == "byte_compile_inst not yet implemented"); + // Opcodes are the first 5 bits of an instruction + uint8_t opcode = (uint8_t) instruction.opcode; + // The last three bits should be zero, else our opcode is too big! + assert(opcode >> 5 == 0); + + uint8_t cond = (uint8_t) instruction.cond; + // The last 5 bits of the conditional should be zero + assert(cond >> 3 == 0); + + uint8_t first_byte = (opcode << 3) + cond; + + // The dest is a register with address up to 22 bits + // Plus 2 bits for the register type + uint32_t dest = instruction.dest.num; + // The last 10 bits of this should be zero + assert(dest >> 22 == 0); + + uint8_t reg_type = (uint8_t) instruction.dest.type; + // the last 6 bits should be 0, we only want a 2 bit register identifier + assert(reg_type >> 2 == 0); + + uint32_t next_24_bits = (reg_type << 23) + dest; + + // What comes next depends on the type of instruction that we're dealing with, + // some instructions only take 2 other operand, some take 3. (including the dest) + // TODO: Implement inst_num_operands + uint32_t next_32_bits; + switch (inst_num_operands(instruction.opcode)) { + case 0: + case 1: + next_32_bits = 0; + break; + case 2: { + // The temp is some 31 bit number + // Plus 1 bit to differentiate between an immediate vs a register + // Where 2 bits are dedicated for the register type if it's a register + oper_t temp = instruction.temp_1; + + if (temp.type == IMM) { + uint32_t imm = temp.val.imm; + assert(imm >> 31 == 0); // final bit must not be used + next_32_bits = (1 << 31) + imm; + } else if (temp.type == REG) { + uint32_t reg_num = temp.val.reg.num; + uint8_t reg_type = temp.val.reg.type; + + assert(reg_num >> 29 == 0); // final 3 bits must not be used in the register number + assert(reg_type >> 2 == 0); // only the first 2 bits can be used of the reg type + next_32_bits = (0 << 31) + (reg_type << 30) + reg_num; + } else { + // We should never be here + assert(NULL == "We should never be in a situation where the temporary is neither an immediate, nor a register."); + next_32_bits = 0; + } + break; + } + case 3: + assert(NULL == "Instructions with 3 operands not yet implemented"); + // temp_1 is some 15 bit register addr or 15 bit immediate + // Plus 1 bit allocated to differentiate an immediate vs a register + oper_t temp_1 = instruction.temp_1; + + + // temp_2 is some 15 bit register addr or 15 bit immediate + // plus 1 bit allocated to differentiate an immediate vs register + break; + default: + // We should never be here + assert(NULL == "Invalid number of operands associated with an instruction"); + break; + } + + + // Different instructions will have different operand sizes + return 0; } -uint8_t *byte_compile(inst_t *instructions) { - assert(NULL == "byte_compile not yet implemented"); - return NULL; +int inst_num_operands(opcode_t op) { + int ret = 0; + switch (op) { + // 0 operand instructions + case NOP: + case RETURN: + ret = 0; + break; + // 1 operand instructions + case PUSH: + case POP: + case PEEK: + case JUMP: + case CALL: + ret = 1; + break; + // 2 operand instructions + case NOT: + case MOVE: + case SWAP: + case LOAD: + case STORE: + ret = 2; + break; + // 3 operand instructions + case ADD: + case SUB: + case MUL: + case DIV: + case AND: + case OR: + case XOR: + case SHIFTL: + case SHIFTR_L: + case SHIFTR_A: + ret = 3; + break; + default: + ret = -1; + } + return ret; +} + +codevec_t byte_compile_prog(instvec_t instructions) { + uint64_t *program = calloc(instructions.num, sizeof(uint64_t)); + for (size_t i = 0; i < instructions.num; i++) { + program[i] = byte_compile_inst(instructions.insts[i]); + } + return (codevec_t) { + .data = program, + .num = instructions.num + }; } -void write_bytecode(FILE *stream, uint8_t *bytecode) { +void write_bytecode(FILE *stream, codevec_t bytecode) { assert(NULL == "write_bytecode not yet implemented"); + return; } inline token_t opcode_token(opcode_t opcode) { diff --git a/implementations/C/src/remcc.h b/implementations/C/src/remcc.h @@ -49,7 +49,7 @@ typedef enum { typedef struct { REG_TYPE type; - uint64_t num; + uint32_t num; } reg_t; typedef enum { @@ -61,14 +61,14 @@ typedef struct { OPER_TYPE type; union { reg_t reg; - uint64_t imm; + uint32_t imm; } val; } oper_t; typedef struct { opcode_t opcode; conditional_t cond; - oper_t dest; + reg_t dest; oper_t temp_1; oper_t temp_2; } inst_t; @@ -97,6 +97,21 @@ typedef struct { } val; } token_t; +typedef struct { + token_t *tokens; + size_t num; +} tokenvec_t; + +typedef struct { + inst_t *insts; + size_t num; +} instvec_t; + +typedef struct { + uint64_t *data; + size_t num; +} codevec_t; + /* Function definitions */ token_t opcode_token(opcode_t opcode); token_t cond_token(conditional_t cond); @@ -106,9 +121,11 @@ token_t error_token(error_t error); oper_t reg_operand(REG_TYPE type, uint64_t num); oper_t imm_operand(uint64_t immediate); -token_t *lexer(FILE *stream); +tokenvec_t lexer(FILE *stream); token_t lex(char *tok); -inst_t *parse(token_t *tokens); -uint8_t *byte_compile_prog(inst_t *instructions); -uint8_t *byte_compile_inst(inst_t instruction); +instvec_t parse(tokenvec_t tokens); +codevec_t byte_compile_prog(instvec_t instructions); +uint64_t byte_compile_inst(inst_t instruction); + +int inst_num_operands(opcode_t op); diff --git a/implementations/rust/Cargo.toml b/implementations/rust/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "remedyvm-rust" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[lib] +name = "remedyvm" +path = "src/lib/remedy.rs" +test = false +bench = false +doc = false +proc-macro = false +crate-type = "lib" +required-features = [] diff --git a/implementations/rust/src/lib/parse.rs b/implementations/rust/src/lib/parse.rs @@ -0,0 +1,27 @@ +enum Token { + Nop, + // Arithmetic + Add, + Sub, + Mul, + Div, // Logical & bit + And, + Or, + Xor, + Not, + ShiftL, + ShiftRLogical, + ShiftRArithmatic, + // Memory & registers + Move, + Swap, + Push, + Pop, + Peek, + Load, + Store, + // Control flow + Jump, + Call, + Return, +} diff --git a/implementations/rust/src/lib/remedy.rs b/implementations/rust/src/lib/remedy.rs @@ -0,0 +1 @@ +pub mod parse; diff --git a/implementations/rust/src/main.rs b/implementations/rust/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +}