remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

commit 10ad44f7ea9e892f13cd32cd7eaa7728372f2e7d
parent 95ab7b1fefc308baeacaeda507c3ac1af1a5c0fd
Author: Ethan Long <ethandavidlong@gmail.com>
Date:   Sat, 15 Jun 2024 20:04:34 +1000

Began work on implementing the assembler in Rust

I had already made some progress in C, but I feel there will be less
hassle if I do this in rust, even if it is a little slower.

Diffstat:
ACargo.lock | 7+++++++
MCargo.toml | 2+-
Csrc/lib/remedy.rs -> src/lib/asm/mod.rs | 0
Asrc/lib/asm/parse.rs | 662+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/lib/parse.rs | 27---------------------------
Msrc/lib/remedy.rs | 2+-
6 files changed, 671 insertions(+), 29 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "remedyvm-rust" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml @@ -14,5 +14,5 @@ test = false bench = false doc = false proc-macro = false -crate-type = "lib" +crate-type = ["lib"] required-features = [] diff --git a/src/lib/remedy.rs b/src/lib/asm/mod.rs diff --git a/src/lib/asm/parse.rs b/src/lib/asm/parse.rs @@ -0,0 +1,662 @@ +use std::error::Error; +use std::fmt::{Display, Formatter as FmtFmt, Result as FResult}; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::str::FromStr; + +#[derive(Debug)] +enum ParseError { + TokeniseError(String), + ParseError(String), +} + +impl Display for ParseError { + fn fmt(&self, f: &mut FmtFmt<'_>) -> FResult { + write!(f, "[ERROR]")?; + match self { + Self::TokeniseError(str) => { + write!(f, " Tokenisation Error: {}", str) + } + Self::ParseError(str) => { + write!(f, " Parsing Error: {}", str) + } + } + } +} + +impl Error for ParseError {} + +#[derive(Copy, Clone)] +enum Conditional { + Eq, + Neq, + Gt, + Geq, + Lt, + Leq, + Z, +} + +impl Display for Conditional { + fn fmt(&self, f: &mut FmtFmt<'_>) -> FResult { + match self { + Conditional::Eq => write!(f, "eq"), + Conditional::Neq => write!(f, "neq"), + Conditional::Gt => write!(f, "gt"), + Conditional::Geq => write!(f, "geq"), + Conditional::Lt => write!(f, "lt"), + Conditional::Leq => write!(f, "leq"), + Conditional::Z => write!(f, "z"), + } + } +} + +impl FromStr for Conditional { + type Err = ParseError; + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s { + "eq" => Ok(Conditional::Eq), + "neq" => Ok(Conditional::Neq), + "gt" => Ok(Conditional::Gt), + "geq" => Ok(Conditional::Geq), + "lt" => Ok(Conditional::Lt), + "leq" => Ok(Conditional::Leq), + "z" => Ok(Conditional::Z), + s => Err(ParseError::TokeniseError(format!( + "{} is not a valid conditional", + s, + ))), + } + } +} + +impl TryFrom<&Token> for Conditional { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Cond(c) => Ok(*c), + t => Err(ParseError::ParseError(format!( + "{} is not a valid conditional", + t + ))), + } + } +} + +#[derive(Clone)] +enum Token { + // Instructions + Nop, + // Arithmetic + Add, + Sub, + Mul, + Div, + // Logical & bit + And, + Or, + Xor, + Not, + ShiftL, + ShiftRLogical, + ShiftRArithmetic, + // Memory & registers + Move, + Swap, + Push, + Pop, + Peek, + Load, + Store, + RegImm, // Put an immediate value in a register, imm <DST> <IMM> + // Control flow + Jump, + Call, + Return, + // Arguments / Immediates + Cond(Conditional), + SectionLabel(String), + Label(String), + // Numbers + Int(i128), + //Float(f64), + // Registers + Register(u128), +} + +impl Display for Token { + fn fmt(&self, f: &mut FmtFmt<'_>) -> FResult { + match self { + Token::Nop => write!(f, "nop"), + // Arithmetic + Token::Add => write!(f, "add"), + Token::Sub => write!(f, "sub"), + Token::Mul => write!(f, "mul"), + Token::Div => write!(f, "div"), + // Logical & bit + Token::And => write!(f, "and"), + Token::Or => write!(f, "or"), + Token::Xor => write!(f, "xor"), + Token::Not => write!(f, "not"), + Token::ShiftL => write!(f, "shiftl"), + Token::ShiftRLogical => write!(f, "shiftrl"), + Token::ShiftRArithmetic => write!(f, "shiftra"), + // Memory & registers + Token::Move => write!(f, "move"), + Token::Swap => write!(f, "swap"), + Token::Push => write!(f, "push"), + Token::Pop => write!(f, "pop"), + Token::Peek => write!(f, "peek"), + Token::Load => write!(f, "load"), + Token::Store => write!(f, "store"), + Token::RegImm => write!(f, "imm"), + // Control flow + Token::Jump => write!(f, "jump"), + Token::Call => write!(f, "call"), + Token::Return => write!(f, "return"), + // Immediates + Token::Cond(c) => write!(f, "{}", c), + Token::SectionLabel(l) => write!(f, "{}:", l), + Token::Label(l) => write!(f, "{}", l), + // Numbers + Token::Int(i) => write!(f, "{}", i), + //Token::Float(fl) => write!(f, "{}", fl), + // Registers + Token::Register(r) => write!(f, "r{}", r), + } + } +} + +// FIXME Check conditionals are all g +impl FromStr for Token { + type Err = ParseError; + fn from_str(s: &str) -> Result<Self, Self::Err> { + match Conditional::from_str(s) { + Ok(c) => return Ok(Token::Cond(c)), + _ => (), + }; + match s { + "nop" => Ok(Token::Nop), + // Arithmetic + "add" => Ok(Token::Add), + "sub" => Ok(Token::Sub), + "mul" => Ok(Token::Mul), + "div" => Ok(Token::Div), + // Logical & bit + "and" => Ok(Token::And), + "or" => Ok(Token::Or), + "xor" => Ok(Token::Xor), + "not" => Ok(Token::Not), + "shiftl" => Ok(Token::ShiftL), + "shiftrl" => Ok(Token::ShiftRLogical), + "shiftra" => Ok(Token::ShiftRArithmetic), + // Memory & registers + "move" => Ok(Token::Move), + "swap" => Ok(Token::Swap), + "push" => Ok(Token::Push), + "pop" => Ok(Token::Pop), + "peek" => Ok(Token::Peek), + "load" => Ok(Token::Load), + "store" => Ok(Token::Store), + "imm" => Ok(Token::RegImm), + // Control flow + "jump" => Ok(Token::Jump), + "call" => Ok(Token::Call), + "return" => Ok(Token::Return), + _ => { + let i = s.parse::<i128>(); + // FIXME: Add binary and hex int parsing + match i { + Ok(int) => return Ok(Token::Int(int)), + _ => (), + } + /*let f = s.parse::<f64>(); + match f { + Ok(float) => return Ok(Token::Float(float)), + _ => (), + }*/ + let mut cs = s.chars(); + match cs.next() { + Some('r') => { + let rest = cs.as_str(); + let reg_n = rest.parse::<u128>(); + match reg_n { + Ok(n) => Ok(Token::Register(n)), + _ => Err(ParseError::TokeniseError(format!( + "{} is not a valid register number", + rest + ))), + } + } + _ => match cs.last() { + Some(':') => Ok(Token::SectionLabel(s.to_owned())), + _ => Ok(Token::Label(s.to_owned())), + }, + } + } + } + } +} + +impl Token { + fn tokenise_line(s: &str) -> Result<Vec<Token>, ParseError> { + s.split(|c| c == ' ' || c == '\t') + .map(|s| Token::from_str(s)) + .collect() + } + + fn tokenise(source: &str) -> Result<Vec<Vec<Token>>, ParseError> { + source.lines().map(Token::tokenise_line).collect() + } + + fn tokenise_file(fname: &str) -> Result<Vec<Vec<Token>>, ParseError> { + let file = File::open(fname).map_err(|e| ParseError::TokeniseError(e.to_string()))?; + let reader = BufReader::new(file); + reader + .lines() + .filter_map(|r| r.ok()) + .map(|l| Token::tokenise_line(&l)) + .collect() + } +} + +#[derive(Debug, Copy, Clone)] +enum RegOp { + Move, + Swap, +} + +impl TryFrom<&Token> for RegOp { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Move => Ok(RegOp::Move), + Token::Swap => Ok(RegOp::Swap), + _ => Err(ParseError::ParseError(format!( + "{} is not a valid op on registers", + tok + ))), + } + } +} + +#[derive(Debug, Copy, Clone)] +enum StackOp { + Push, + Pop, + Peek, +} + +impl TryFrom<&Token> for StackOp { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Push => Ok(StackOp::Push), + Token::Pop => Ok(StackOp::Pop), + Token::Peek => Ok(StackOp::Peek), + _ => Err(ParseError::ParseError(format!( + "{} is not a valid op on the stack", + tok + ))), + } + } +} + +#[derive(Debug, Copy, Clone)] +enum MemOp { + Load, + Store, +} + +impl TryFrom<&Token> for MemOp { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Load => Ok(MemOp::Load), + Token::Store => Ok(MemOp::Store), + _ => Err(ParseError::ParseError(format!( + "{} is not a valid memory op", + tok + ))), + } + } +} + +#[derive(Debug, Copy, Clone)] +enum ArithOp { + Add, + Sub, + Mul, + Div, +} + +impl TryFrom<&Token> for ArithOp { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Add => Ok(ArithOp::Add), + Token::Sub => Ok(ArithOp::Sub), + Token::Mul => Ok(ArithOp::Mul), + Token::Div => Ok(ArithOp::Div), + _ => Err(ParseError::ParseError(format!( + "{} is not a valid memory op", + tok + ))), + } + } +} + +#[derive(Debug, Copy, Clone)] +enum LogOp { + And, + Or, + Xor, + ShiftL, + ShiftRLogical, + ShiftRArithmetic, +} + +impl TryFrom<&Token> for LogOp { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::And => Ok(LogOp::And), + Token::Or => Ok(LogOp::Or), + Token::Xor => Ok(LogOp::Xor), + Token::ShiftL => Ok(LogOp::ShiftL), + Token::ShiftRLogical => Ok(LogOp::ShiftRLogical), + Token::ShiftRArithmetic => Ok(LogOp::ShiftRArithmetic), + _ => Err(ParseError::ParseError(format!( + "{} is not a valid memory op", + tok + ))), + } + } +} + +enum MemArg { + Label(String), + Register(u128), +} + +impl TryFrom<&Token> for MemArg { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Label(s) => Ok(MemArg::Label(s.to_owned())), + Token::Register(r) => Ok(MemArg::Register(r.to_owned())), + t => Err(ParseError::ParseError(format!( + "{} is not a valid memory address/argument", + t + ))), + } + } +} + +enum ArithArg { + Int(i128), + Register(u128), +} + +impl TryFrom<&Token> for ArithArg { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Int(i) => Ok(ArithArg::Int(*i)), + Token::Register(r) => Ok(ArithArg::Register(*r)), + t => Err(ParseError::ParseError(format!( + "{} is not a valid argument for register arithmetic", + t + ))), + } + } +} + +enum Immediate { + Int(i128), + Label(String), +} + +impl TryFrom<&Token> for Immediate { + type Error = ParseError; + fn try_from(tok: &Token) -> Result<Self, Self::Error> { + match tok { + Token::Int(i) => Ok(Immediate::Int(*i)), + Token::Label(s) => Ok(Immediate::Label(s.to_owned())), + t => Err(ParseError::ParseError(format!( + "{} is not a valid immediate", + t + ))), + } + } +} + +enum Statement { + SectionDecl(String), + ImmediateDecl(String, i128), + // Operations + Nop, + ImmRegister(Option<Conditional>, u128, Immediate), + Register(RegOp, Option<Conditional>, u128, u128), + Stack(StackOp, Option<Conditional>, u128), + Memory(MemOp, Option<Conditional>, u128, MemArg), + Arithmetic(ArithOp, Option<Conditional>, u128, ArithArg, ArithArg), + Logical(LogOp, Option<Conditional>, u128, ArithArg, ArithArg), + Not(Option<Conditional>, u128, u128), +} + +impl Statement { + fn get_register(t: &Token) -> Result<u128, ParseError> { + match t { + Token::Register(r) => Ok(*r), + t => Err(ParseError::ParseError(format!( + "{} is not a valid register", + t + ))), + } + } + fn parse_line(l: Vec<Token>) -> Result<Statement, ParseError> { + match l.get(0) { + Some(Token::SectionLabel(s)) => Ok(Statement::SectionDecl(s.to_owned())), + Some(Token::Label(s)) => match &l[1..l.len()] { + [i] => { + let i = match i { + Token::Int(n) => Ok(*n), + t => Err(ParseError::ParseError(format!( + "{} is not a valid immediate to predefine", + t + ))), + }?; + Ok(Statement::ImmediateDecl(s.to_owned(), i)) + } + _ => Err(ParseError::ParseError(format!( + "Cannot construct immediate {} from given tokens", + s + ))), + }, + // Operations + Some(Token::Nop) => Ok(Statement::Nop), + // ImmRegOp + Some(Token::RegImm) => match &l[1..l.len()] { + [cnd, a1, a2] => { + let cond = Conditional::try_from(cnd)?; + + let r = Statement::get_register(a1)?; + + let imm = Immediate::try_from(a2)?; + + Ok(Statement::ImmRegister(Some(cond), r, imm)) + } + [a1, a2] => { + let r = Statement::get_register(a1)?; + + let imm = Immediate::try_from(a2)?; + + Ok(Statement::ImmRegister(None, r, imm)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {}", + Token::RegImm + ))), + }, + // RegOps + Some(t @ Token::Move) | Some(t @ Token::Swap) => { + let op = RegOp::try_from(t)?; + match &l[1..l.len()] { + [cnd, a1, a2] => { + let cond = Conditional::try_from(cnd)?; + let r1 = Statement::get_register(a1)?; + let r2 = Statement::get_register(a2)?; + Ok(Statement::Register(op, Some(cond), r1, r2)) + } + [a1, a2] => { + let r1 = Statement::get_register(a1)?; + let r2 = Statement::get_register(a2)?; + Ok(Statement::Register(op, None, r1, r2)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {:?}", + op + ))), + } + } + // StackOps + Some(t @ Token::Push) | Some(t @ Token::Pop) | Some(t @ Token::Peek) => { + let op = StackOp::try_from(t)?; + match &l[1..l.len()] { + [cnd, a] => { + let cond = Conditional::try_from(cnd)?; + let r = Statement::get_register(a)?; + Ok(Statement::Stack(op, Some(cond), r)) + } + [a] => { + let r = Statement::get_register(a)?; + Ok(Statement::Stack(op, None, r)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {:?}", + op + ))), + } + } + // MemoryOps + Some(t @ Token::Load) | Some(t @ Token::Store) => { + let op = MemOp::try_from(t)?; + match &l[1..l.len()] { + [cnd, a1, a2] => { + let cond = Conditional::try_from(cnd)?; + let r = Statement::get_register(a1); + let m = MemArg::try_from(a2)?; + Ok(Statement::Memory(op, Some(cond), r, m)) + } + [a1, a2] => { + let r = Statement::get_register(a1); + let m = MemArg::try_from(a2)?; + Ok(Statement::Memory(op, None, r, m)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {:?}", + op + ))), + } + } + // ArithOps + Some(t @ Token::Add) | Some(t @ Token::Sub) | Some(t @ Token::Mul) + | Some(t @ Token::Div) => { + let op = ArithOp::try_from(t)?; + match &l[1..l.len()] { + [cnd, dst, a1, a2] => { + let cond = Conditional::try_from(cnd)?; + let dst = Statement::get_register(dst)?; + let a1 = ArithArg::try_from(a1)?; + let a2 = ArithArg::try_from(a2)?; + Ok(Statement::Arithmetic(op, Some(cond), dst, a1, a2)) + } + [dst, a1, a2] => { + let dst = Statement::get_register(dst)?; + let a1 = ArithArg::try_from(a1)?; + let a2 = ArithArg::try_from(a2)?; + Ok(Statement::Arithmetic(op, None, dst, a1, a2)) + } + [dst, oth] => { + let dst = Statement::get_register(dst)?; + let a1 = ArithArg::Register(dst); + let a2 = ArithArg::try_from(oth)?; + Ok(Statement::Arithmetic(op, None, dst, a1, a2)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {:?}", + op + ))), + } + } + // LogOps + Some(t @ Token::And) + | Some(t @ Token::Or) + | Some(t @ Token::Xor) + | Some(t @ Token::ShiftL) + | Some(t @ Token::ShiftRLogical) + | Some(t @ Token::ShiftRArithmetic) => { + let op = LogOp::try_from(t)?; + match &l[1..l.len()] { + [cnd, dst, a1, a2] => { + let cond = Conditional::try_from(cnd)?; + let dst = Statement::get_register(dst)?; + let a1 = ArithArg::try_from(a1)?; + let a2 = ArithArg::try_from(a2)?; + Ok(Statement::Logical(op, Some(cond), dst, a1, a2)) + } + [dst, a1, a2] => { + let dst = Statement::get_register(dst)?; + let a1 = ArithArg::try_from(a1)?; + let a2 = ArithArg::try_from(a2)?; + Ok(Statement::Logical(op, None, dst, a1, a2)) + } + [dst, oth] => { + let dst = Statement::get_register(dst)?; + let a1 = ArithArg::Register(dst); + let a2 = ArithArg::try_from(oth)?; + Ok(Statement::Logical(op, None, dst, a1, a2)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {:?}", + op + ))), + } + } + // Not + Some(Token::Not) => match &l[1..l.len()] { + [cnd, dst, src] => { + let cond = Conditional::try_from(cnd)?; + let dst = Statement::get_register(dst)?; + let src = Statement::get_register(src)?; + Ok(Statement::Not(Some(cond), dst, src)) + } + [dst, src] => { + let dst = Statement::get_register(dst)?; + let src = Statement::get_register(src)?; + Ok(Statement::Not(None, dst, src)) + } + [src] => { + let src = Statement::get_register(src)?; + Ok(Statement::Not(None, src, src)) + } + _ => Err(ParseError::ParseError(format!( + "Incorrect number of arguments to op {}", + Token::Not + ))), + }, + Some(t) => Err(ParseError::ParseError(format!( + "{} is not a valid token to have at the start of a statement", + t + ))), + None => Err(ParseError::ParseError( + "Empty token line given, no valid statement to parse".to_owned(), + )), + } + } + + fn parse(ls: Vec<Vec<Token>>) -> Result<Vec<Statement>, ParseError> { + ls.into_iter().map(Statement::parse_line).collect() + } +} diff --git a/src/lib/parse.rs b/src/lib/parse.rs @@ -1,27 +0,0 @@ -enum Token { - Nop, - // Arithmetic - Add, - Sub, - Mul, - Div, // Logical & bit - And, - Or, - Xor, - Not, - ShiftL, - ShiftRLogical, - ShiftRArithmatic, - // Memory & registers - Move, - Swap, - Push, - Pop, - Peek, - Load, - Store, - // Control flow - Jump, - Call, - Return, -} diff --git a/src/lib/remedy.rs b/src/lib/remedy.rs @@ -1 +1 @@ -pub mod parse; +pub mod asm;