commit 25914a6e52fd416a35b4e40b4414e0cd776abe8b
parent 4ace8a25056cb8c2d39ee34033fa415c7ea3718d
Author: Ethan Long <ethandavidlong@gmail.com>
Date: Fri, 14 Jun 2024 18:52:18 +1000
Decided I'm going to try a different project structure
Diffstat:
8 files changed, 223 insertions(+), 21 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1 +1 @@
-
+.DS_Store
diff --git a/implementations/C/examples/fib.rin b/implementations/C/examples/fib.rin
diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c
@@ -10,15 +10,15 @@
/* Function prototypes */
int usage(char *arg0);
-void write_bytecode(FILE *stream, uint8_t *bytecode);
+void write_bytecode(FILE *stream, codevec_t bytecode);
/* Implementation: */
int main(int argc, char **argv) {
char *input_fname = NULL, *output_fname = NULL;
FILE *input_f = NULL, *output_f = NULL;
- token_t *prog_tokens = NULL;
- inst_t *prog_insts = NULL;
- uint8_t *prog_bytecode = NULL;
+ tokenvec_t prog_tokens;
+ instvec_t prog_insts;
+ codevec_t prog_bytecode;
if (argc <= 2) {
return usage(argv[0]);
@@ -54,8 +54,8 @@ int usage(char *arg0) {
return 1;
}
-token_t *lexer(FILE *stream) {
- assert(NULL == "tokenise not yet implemented");
+tokenvec_t lexer(FILE *stream) {
+ assert(NULL == "lexer not yet implemented");
char buf[MAX_TOK] = {0};
size_t i, j = 0, tok_arr_size = 100;
@@ -74,22 +74,158 @@ token_t *lexer(FILE *stream) {
}
}
- return NULL;
+ return (tokenvec_t) {
+ .tokens = NULL,
+ .num = 0
+ };
}
-inst_t *parse(token_t *tokens) {
+instvec_t parse(tokenvec_t tokens) {
assert(NULL == "parse not yet implemented");
- return NULL;
+
+ return (instvec_t) {
+ .insts = NULL,
+ .num = 0
+ };
+}
+
+uint64_t byte_compile_inst(inst_t instruction) {
+ assert(NULL == "byte_compile_inst not yet implemented");
+ // Opcodes are the first 5 bits of an instruction
+ uint8_t opcode = (uint8_t) instruction.opcode;
+ // The last three bits should be zero, else our opcode is too big!
+ assert(opcode >> 5 == 0);
+
+ uint8_t cond = (uint8_t) instruction.cond;
+ // The last 5 bits of the conditional should be zero
+ assert(cond >> 3 == 0);
+
+ uint8_t first_byte = (opcode << 3) + cond;
+
+ // The dest is a register with address up to 22 bits
+ // Plus 2 bits for the register type
+ uint32_t dest = instruction.dest.num;
+ // The last 10 bits of this should be zero
+ assert(dest >> 22 == 0);
+
+ uint8_t reg_type = (uint8_t) instruction.dest.type;
+ // the last 6 bits should be 0, we only want a 2 bit register identifier
+ assert(reg_type >> 2 == 0);
+
+ uint32_t next_24_bits = (reg_type << 23) + dest;
+
+ // What comes next depends on the type of instruction that we're dealing with,
+ // some instructions only take 2 other operand, some take 3. (including the dest)
+ // TODO: Implement inst_num_operands
+ uint32_t next_32_bits;
+ switch (inst_num_operands(instruction.opcode)) {
+ case 0:
+ case 1:
+ next_32_bits = 0;
+ break;
+ case 2: {
+ // The temp is some 31 bit number
+ // Plus 1 bit to differentiate between an immediate vs a register
+ // Where 2 bits are dedicated for the register type if it's a register
+ oper_t temp = instruction.temp_1;
+
+ if (temp.type == IMM) {
+ uint32_t imm = temp.val.imm;
+ assert(imm >> 31 == 0); // final bit must not be used
+ next_32_bits = (1 << 31) + imm;
+ } else if (temp.type == REG) {
+ uint32_t reg_num = temp.val.reg.num;
+ uint8_t reg_type = temp.val.reg.type;
+
+ assert(reg_num >> 29 == 0); // final 3 bits must not be used in the register number
+ assert(reg_type >> 2 == 0); // only the first 2 bits can be used of the reg type
+ next_32_bits = (0 << 31) + (reg_type << 30) + reg_num;
+ } else {
+ // We should never be here
+ assert(NULL == "We should never be in a situation where the temporary is neither an immediate, nor a register.");
+ next_32_bits = 0;
+ }
+ break;
+ }
+ case 3:
+ assert(NULL == "Instructions with 3 operands not yet implemented");
+ // temp_1 is some 15 bit register addr or 15 bit immediate
+ // Plus 1 bit allocated to differentiate an immediate vs a register
+ oper_t temp_1 = instruction.temp_1;
+
+
+ // temp_2 is some 15 bit register addr or 15 bit immediate
+ // plus 1 bit allocated to differentiate an immediate vs register
+ break;
+ default:
+ // We should never be here
+ assert(NULL == "Invalid number of operands associated with an instruction");
+ break;
+ }
+
+
+ // Different instructions will have different operand sizes
+ return 0;
}
-uint8_t *byte_compile(inst_t *instructions) {
- assert(NULL == "byte_compile not yet implemented");
- return NULL;
+int inst_num_operands(opcode_t op) {
+ int ret = 0;
+ switch (op) {
+ // 0 operand instructions
+ case NOP:
+ case RETURN:
+ ret = 0;
+ break;
+ // 1 operand instructions
+ case PUSH:
+ case POP:
+ case PEEK:
+ case JUMP:
+ case CALL:
+ ret = 1;
+ break;
+ // 2 operand instructions
+ case NOT:
+ case MOVE:
+ case SWAP:
+ case LOAD:
+ case STORE:
+ ret = 2;
+ break;
+ // 3 operand instructions
+ case ADD:
+ case SUB:
+ case MUL:
+ case DIV:
+ case AND:
+ case OR:
+ case XOR:
+ case SHIFTL:
+ case SHIFTR_L:
+ case SHIFTR_A:
+ ret = 3;
+ break;
+ default:
+ ret = -1;
+ }
+ return ret;
+}
+
+codevec_t byte_compile_prog(instvec_t instructions) {
+ uint64_t *program = calloc(instructions.num, sizeof(uint64_t));
+ for (size_t i = 0; i < instructions.num; i++) {
+ program[i] = byte_compile_inst(instructions.insts[i]);
+ }
+ return (codevec_t) {
+ .data = program,
+ .num = instructions.num
+ };
}
-void write_bytecode(FILE *stream, uint8_t *bytecode) {
+void write_bytecode(FILE *stream, codevec_t bytecode) {
assert(NULL == "write_bytecode not yet implemented");
+ return;
}
inline token_t opcode_token(opcode_t opcode) {
diff --git a/implementations/C/src/remcc.h b/implementations/C/src/remcc.h
@@ -49,7 +49,7 @@ typedef enum {
typedef struct {
REG_TYPE type;
- uint64_t num;
+ uint32_t num;
} reg_t;
typedef enum {
@@ -61,14 +61,14 @@ typedef struct {
OPER_TYPE type;
union {
reg_t reg;
- uint64_t imm;
+ uint32_t imm;
} val;
} oper_t;
typedef struct {
opcode_t opcode;
conditional_t cond;
- oper_t dest;
+ reg_t dest;
oper_t temp_1;
oper_t temp_2;
} inst_t;
@@ -97,6 +97,21 @@ typedef struct {
} val;
} token_t;
+typedef struct {
+ token_t *tokens;
+ size_t num;
+} tokenvec_t;
+
+typedef struct {
+ inst_t *insts;
+ size_t num;
+} instvec_t;
+
+typedef struct {
+ uint64_t *data;
+ size_t num;
+} codevec_t;
+
/* Function definitions */
token_t opcode_token(opcode_t opcode);
token_t cond_token(conditional_t cond);
@@ -106,9 +121,11 @@ token_t error_token(error_t error);
oper_t reg_operand(REG_TYPE type, uint64_t num);
oper_t imm_operand(uint64_t immediate);
-token_t *lexer(FILE *stream);
+tokenvec_t lexer(FILE *stream);
token_t lex(char *tok);
-inst_t *parse(token_t *tokens);
-uint8_t *byte_compile_prog(inst_t *instructions);
-uint8_t *byte_compile_inst(inst_t instruction);
+instvec_t parse(tokenvec_t tokens);
+codevec_t byte_compile_prog(instvec_t instructions);
+uint64_t byte_compile_inst(inst_t instruction);
+
+int inst_num_operands(opcode_t op);
diff --git a/implementations/rust/Cargo.toml b/implementations/rust/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "remedyvm-rust"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+
+[lib]
+name = "remedyvm"
+path = "src/lib/remedy.rs"
+test = false
+bench = false
+doc = false
+proc-macro = false
+crate-type = "lib"
+required-features = []
diff --git a/implementations/rust/src/lib/parse.rs b/implementations/rust/src/lib/parse.rs
@@ -0,0 +1,27 @@
+enum Token {
+ Nop,
+ // Arithmetic
+ Add,
+ Sub,
+ Mul,
+ Div, // Logical & bit
+ And,
+ Or,
+ Xor,
+ Not,
+ ShiftL,
+ ShiftRLogical,
+ ShiftRArithmatic,
+ // Memory & registers
+ Move,
+ Swap,
+ Push,
+ Pop,
+ Peek,
+ Load,
+ Store,
+ // Control flow
+ Jump,
+ Call,
+ Return,
+}
diff --git a/implementations/rust/src/lib/remedy.rs b/implementations/rust/src/lib/remedy.rs
@@ -0,0 +1 @@
+pub mod parse;
diff --git a/implementations/rust/src/main.rs b/implementations/rust/src/main.rs
@@ -0,0 +1,3 @@
+fn main() {
+ println!("Hello, world!");
+}