remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

parse.rs (21576B)


      1 use std::error::Error;
      2 use std::fmt::{Display, Formatter as FmtFmt, Result as FResult};
      3 use std::fs::File;
      4 use std::io::{BufRead, BufReader};
      5 use std::str::FromStr;
      6 
      7 #[derive(Debug)]
      8 enum ParseError {
      9     TokeniseError(String),
     10     ParseError(String),
     11 }
     12 
     13 impl Display for ParseError {
     14     fn fmt(&self, f: &mut FmtFmt<'_>) -> FResult {
     15         write!(f, "[ERROR]")?;
     16         match self {
     17             Self::TokeniseError(str) => {
     18                 write!(f, " Tokenisation Error: {}", str)
     19             }
     20             Self::ParseError(str) => {
     21                 write!(f, " Parsing Error: {}", str)
     22             }
     23         }
     24     }
     25 }
     26 
     27 impl Error for ParseError {}
     28 
     29 #[derive(Copy, Clone)]
     30 enum Conditional {
     31     Eq,
     32     Neq,
     33     Gt,
     34     Geq,
     35     Lt,
     36     Leq,
     37     Z,
     38 }
     39 
     40 impl Display for Conditional {
     41     fn fmt(&self, f: &mut FmtFmt<'_>) -> FResult {
     42         match self {
     43             Conditional::Eq => write!(f, "eq"),
     44             Conditional::Neq => write!(f, "neq"),
     45             Conditional::Gt => write!(f, "gt"),
     46             Conditional::Geq => write!(f, "geq"),
     47             Conditional::Lt => write!(f, "lt"),
     48             Conditional::Leq => write!(f, "leq"),
     49             Conditional::Z => write!(f, "z"),
     50         }
     51     }
     52 }
     53 
     54 impl FromStr for Conditional {
     55     type Err = ParseError;
     56     fn from_str(s: &str) -> Result<Self, Self::Err> {
     57         match s {
     58             "eq" => Ok(Conditional::Eq),
     59             "neq" => Ok(Conditional::Neq),
     60             "gt" => Ok(Conditional::Gt),
     61             "geq" => Ok(Conditional::Geq),
     62             "lt" => Ok(Conditional::Lt),
     63             "leq" => Ok(Conditional::Leq),
     64             "z" => Ok(Conditional::Z),
     65             s => Err(ParseError::TokeniseError(format!(
     66                 "{} is not a valid conditional",
     67                 s,
     68             ))),
     69         }
     70     }
     71 }
     72 
     73 impl TryFrom<&Token> for Conditional {
     74     type Error = ParseError;
     75     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
     76         match tok {
     77             Token::Cond(c) => Ok(*c),
     78             t => Err(ParseError::ParseError(format!(
     79                 "{} is not a valid conditional",
     80                 t
     81             ))),
     82         }
     83     }
     84 }
     85 
     86 #[derive(Clone)]
     87 enum Token {
     88     // Instructions
     89     Nop,
     90     // Arithmetic
     91     Add,
     92     Sub,
     93     Mul,
     94     Div,
     95     // Logical & bit
     96     And,
     97     Or,
     98     Xor,
     99     Not,
    100     ShiftL,
    101     ShiftRLogical,
    102     ShiftRArithmetic,
    103     // Memory & registers
    104     Move,
    105     Swap,
    106     Push,
    107     Pop,
    108     Peek,
    109     Load,
    110     Store,
    111     RegImm, // Put an immediate value in a register, imm <DST> <IMM>
    112     // Control flow
    113     Jump,
    114     Call,
    115     Return,
    116     // Arguments / Immediates
    117     Cond(Conditional),
    118     SectionLabel(String),
    119     Label(String),
    120     // Numbers
    121     Int(i128),
    122     //Float(f64),
    123     // Registers
    124     Register(u128),
    125 }
    126 
    127 impl Display for Token {
    128     fn fmt(&self, f: &mut FmtFmt<'_>) -> FResult {
    129         match self {
    130             Token::Nop => write!(f, "nop"),
    131             // Arithmetic
    132             Token::Add => write!(f, "add"),
    133             Token::Sub => write!(f, "sub"),
    134             Token::Mul => write!(f, "mul"),
    135             Token::Div => write!(f, "div"),
    136             // Logical & bit
    137             Token::And => write!(f, "and"),
    138             Token::Or => write!(f, "or"),
    139             Token::Xor => write!(f, "xor"),
    140             Token::Not => write!(f, "not"),
    141             Token::ShiftL => write!(f, "shiftl"),
    142             Token::ShiftRLogical => write!(f, "shiftrl"),
    143             Token::ShiftRArithmetic => write!(f, "shiftra"),
    144             // Memory & registers
    145             Token::Move => write!(f, "move"),
    146             Token::Swap => write!(f, "swap"),
    147             Token::Push => write!(f, "push"),
    148             Token::Pop => write!(f, "pop"),
    149             Token::Peek => write!(f, "peek"),
    150             Token::Load => write!(f, "load"),
    151             Token::Store => write!(f, "store"),
    152             Token::RegImm => write!(f, "imm"),
    153             // Control flow
    154             Token::Jump => write!(f, "jump"),
    155             Token::Call => write!(f, "call"),
    156             Token::Return => write!(f, "return"),
    157             // Immediates
    158             Token::Cond(c) => write!(f, "{}", c),
    159             Token::SectionLabel(l) => write!(f, "{}:", l),
    160             Token::Label(l) => write!(f, "{}", l),
    161             // Numbers
    162             Token::Int(i) => write!(f, "{}", i),
    163             //Token::Float(fl) => write!(f, "{}", fl),
    164             // Registers
    165             Token::Register(r) => write!(f, "r{}", r),
    166         }
    167     }
    168 }
    169 
    170 // FIXME Check conditionals are all g
    171 impl FromStr for Token {
    172     type Err = ParseError;
    173     fn from_str(s: &str) -> Result<Self, Self::Err> {
    174         match Conditional::from_str(s) {
    175             Ok(c) => return Ok(Token::Cond(c)),
    176             _ => (),
    177         };
    178         match s {
    179             "nop" => Ok(Token::Nop),
    180             // Arithmetic
    181             "add" => Ok(Token::Add),
    182             "sub" => Ok(Token::Sub),
    183             "mul" => Ok(Token::Mul),
    184             "div" => Ok(Token::Div),
    185             // Logical & bit
    186             "and" => Ok(Token::And),
    187             "or" => Ok(Token::Or),
    188             "xor" => Ok(Token::Xor),
    189             "not" => Ok(Token::Not),
    190             "shiftl" => Ok(Token::ShiftL),
    191             "shiftrl" => Ok(Token::ShiftRLogical),
    192             "shiftra" => Ok(Token::ShiftRArithmetic),
    193             // Memory & registers
    194             "move" => Ok(Token::Move),
    195             "swap" => Ok(Token::Swap),
    196             "push" => Ok(Token::Push),
    197             "pop" => Ok(Token::Pop),
    198             "peek" => Ok(Token::Peek),
    199             "load" => Ok(Token::Load),
    200             "store" => Ok(Token::Store),
    201             "imm" => Ok(Token::RegImm),
    202             // Control flow
    203             "jump" => Ok(Token::Jump),
    204             "call" => Ok(Token::Call),
    205             "return" => Ok(Token::Return),
    206             _ => {
    207                 let i = s.parse::<i128>();
    208                 // FIXME: Add binary and hex int parsing
    209                 match i {
    210                     Ok(int) => return Ok(Token::Int(int)),
    211                     _ => (),
    212                 }
    213                 /*let f = s.parse::<f64>();
    214                 match f {
    215                     Ok(float) => return Ok(Token::Float(float)),
    216                     _ => (),
    217                 }*/
    218                 let mut cs = s.chars();
    219                 match cs.next() {
    220                     Some('r') => {
    221                         let rest = cs.as_str();
    222                         let reg_n = rest.parse::<u128>();
    223                         match reg_n {
    224                             Ok(n) => Ok(Token::Register(n)),
    225                             _ => Err(ParseError::TokeniseError(format!(
    226                                 "{} is not a valid register number",
    227                                 rest
    228                             ))),
    229                         }
    230                     }
    231                     _ => match cs.last() {
    232                         Some(':') => Ok(Token::SectionLabel(s.to_owned())),
    233                         _ => Ok(Token::Label(s.to_owned())),
    234                     },
    235                 }
    236             }
    237         }
    238     }
    239 }
    240 
    241 impl Token {
    242     fn tokenise_line(s: &str) -> Result<Vec<Token>, ParseError> {
    243         s.split(|c| c == ' ' || c == '\t')
    244             .map(|s| Token::from_str(s))
    245             .collect()
    246     }
    247 
    248     fn tokenise(source: &str) -> Result<Vec<Vec<Token>>, ParseError> {
    249         source.lines().map(Token::tokenise_line).collect()
    250     }
    251 
    252     fn tokenise_file(fname: &str) -> Result<Vec<Vec<Token>>, ParseError> {
    253         let file = File::open(fname).map_err(|e| ParseError::TokeniseError(e.to_string()))?;
    254         let reader = BufReader::new(file);
    255         reader
    256             .lines()
    257             .filter_map(|r| r.ok())
    258             .map(|l| Token::tokenise_line(&l))
    259             .collect()
    260     }
    261 }
    262 
    263 #[derive(Debug, Copy, Clone)]
    264 enum RegOp {
    265     Move,
    266     Swap,
    267 }
    268 
    269 impl TryFrom<&Token> for RegOp {
    270     type Error = ParseError;
    271     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    272         match tok {
    273             Token::Move => Ok(RegOp::Move),
    274             Token::Swap => Ok(RegOp::Swap),
    275             _ => Err(ParseError::ParseError(format!(
    276                 "{} is not a valid op on registers",
    277                 tok
    278             ))),
    279         }
    280     }
    281 }
    282 
    283 #[derive(Debug, Copy, Clone)]
    284 enum StackOp {
    285     Push,
    286     Pop,
    287     Peek,
    288 }
    289 
    290 impl TryFrom<&Token> for StackOp {
    291     type Error = ParseError;
    292     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    293         match tok {
    294             Token::Push => Ok(StackOp::Push),
    295             Token::Pop => Ok(StackOp::Pop),
    296             Token::Peek => Ok(StackOp::Peek),
    297             _ => Err(ParseError::ParseError(format!(
    298                 "{} is not a valid op on the stack",
    299                 tok
    300             ))),
    301         }
    302     }
    303 }
    304 
    305 #[derive(Debug, Copy, Clone)]
    306 enum MemOp {
    307     Load,
    308     Store,
    309 }
    310 
    311 impl TryFrom<&Token> for MemOp {
    312     type Error = ParseError;
    313     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    314         match tok {
    315             Token::Load => Ok(MemOp::Load),
    316             Token::Store => Ok(MemOp::Store),
    317             _ => Err(ParseError::ParseError(format!(
    318                 "{} is not a valid memory op",
    319                 tok
    320             ))),
    321         }
    322     }
    323 }
    324 
    325 #[derive(Debug, Copy, Clone)]
    326 enum ArithOp {
    327     Add,
    328     Sub,
    329     Mul,
    330     Div,
    331 }
    332 
    333 impl TryFrom<&Token> for ArithOp {
    334     type Error = ParseError;
    335     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    336         match tok {
    337             Token::Add => Ok(ArithOp::Add),
    338             Token::Sub => Ok(ArithOp::Sub),
    339             Token::Mul => Ok(ArithOp::Mul),
    340             Token::Div => Ok(ArithOp::Div),
    341             _ => Err(ParseError::ParseError(format!(
    342                 "{} is not a valid memory op",
    343                 tok
    344             ))),
    345         }
    346     }
    347 }
    348 
    349 #[derive(Debug, Copy, Clone)]
    350 enum LogOp {
    351     And,
    352     Or,
    353     Xor,
    354     ShiftL,
    355     ShiftRLogical,
    356     ShiftRArithmetic,
    357 }
    358 
    359 impl TryFrom<&Token> for LogOp {
    360     type Error = ParseError;
    361     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    362         match tok {
    363             Token::And => Ok(LogOp::And),
    364             Token::Or => Ok(LogOp::Or),
    365             Token::Xor => Ok(LogOp::Xor),
    366             Token::ShiftL => Ok(LogOp::ShiftL),
    367             Token::ShiftRLogical => Ok(LogOp::ShiftRLogical),
    368             Token::ShiftRArithmetic => Ok(LogOp::ShiftRArithmetic),
    369             _ => Err(ParseError::ParseError(format!(
    370                 "{} is not a valid memory op",
    371                 tok
    372             ))),
    373         }
    374     }
    375 }
    376 
    377 enum MemArg {
    378     Label(String),
    379     Register(u128),
    380 }
    381 
    382 impl TryFrom<&Token> for MemArg {
    383     type Error = ParseError;
    384     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    385         match tok {
    386             Token::Label(s) => Ok(MemArg::Label(s.to_owned())),
    387             Token::Register(r) => Ok(MemArg::Register(r.to_owned())),
    388             t => Err(ParseError::ParseError(format!(
    389                 "{} is not a valid memory address/argument",
    390                 t
    391             ))),
    392         }
    393     }
    394 }
    395 
    396 enum ArithArg {
    397     Int(i128),
    398     Register(u128),
    399 }
    400 
    401 impl TryFrom<&Token> for ArithArg {
    402     type Error = ParseError;
    403     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    404         match tok {
    405             Token::Int(i) => Ok(ArithArg::Int(*i)),
    406             Token::Register(r) => Ok(ArithArg::Register(*r)),
    407             t => Err(ParseError::ParseError(format!(
    408                 "{} is not a valid argument for register arithmetic",
    409                 t
    410             ))),
    411         }
    412     }
    413 }
    414 
    415 enum Immediate {
    416     Int(i128),
    417     Label(String),
    418 }
    419 
    420 impl TryFrom<&Token> for Immediate {
    421     type Error = ParseError;
    422     fn try_from(tok: &Token) -> Result<Self, Self::Error> {
    423         match tok {
    424             Token::Int(i) => Ok(Immediate::Int(*i)),
    425             Token::Label(s) => Ok(Immediate::Label(s.to_owned())),
    426             t => Err(ParseError::ParseError(format!(
    427                 "{} is not a valid immediate",
    428                 t
    429             ))),
    430         }
    431     }
    432 }
    433 
    434 enum Statement {
    435     SectionDecl(String),
    436     ImmediateDecl(String, i128),
    437     // Operations
    438     Nop,
    439     ImmRegister(Option<Conditional>, u128, Immediate),
    440     Register(RegOp, Option<Conditional>, u128, u128),
    441     Stack(StackOp, Option<Conditional>, u128),
    442     Memory(MemOp, Option<Conditional>, u128, MemArg),
    443     Arithmetic(ArithOp, Option<Conditional>, u128, ArithArg, ArithArg),
    444     Logical(LogOp, Option<Conditional>, u128, ArithArg, ArithArg),
    445     Not(Option<Conditional>, u128, u128),
    446 }
    447 
    448 impl Statement {
    449     fn get_register(t: &Token) -> Result<u128, ParseError> {
    450         match t {
    451             Token::Register(r) => Ok(*r),
    452             t => Err(ParseError::ParseError(format!(
    453                 "{} is not a valid register",
    454                 t
    455             ))),
    456         }
    457     }
    458     fn parse_line(l: Vec<Token>) -> Result<Statement, ParseError> {
    459         match l.get(0) {
    460             Some(Token::SectionLabel(s)) => Ok(Statement::SectionDecl(s.to_owned())),
    461             Some(Token::Label(s)) => match &l[1..l.len()] {
    462                 [i] => {
    463                     let i = match i {
    464                         Token::Int(n) => Ok(*n),
    465                         t => Err(ParseError::ParseError(format!(
    466                             "{} is not a valid immediate to predefine",
    467                             t
    468                         ))),
    469                     }?;
    470                     Ok(Statement::ImmediateDecl(s.to_owned(), i))
    471                 }
    472                 _ => Err(ParseError::ParseError(format!(
    473                     "Cannot construct immediate {} from given tokens",
    474                     s
    475                 ))),
    476             },
    477             // Operations
    478             Some(Token::Nop) => Ok(Statement::Nop),
    479             // ImmRegOp
    480             Some(Token::RegImm) => match &l[1..l.len()] {
    481                 [cnd, a1, a2] => {
    482                     let cond = Conditional::try_from(cnd)?;
    483 
    484                     let r = Statement::get_register(a1)?;
    485 
    486                     let imm = Immediate::try_from(a2)?;
    487 
    488                     Ok(Statement::ImmRegister(Some(cond), r, imm))
    489                 }
    490                 [a1, a2] => {
    491                     let r = Statement::get_register(a1)?;
    492 
    493                     let imm = Immediate::try_from(a2)?;
    494 
    495                     Ok(Statement::ImmRegister(None, r, imm))
    496                 }
    497                 _ => Err(ParseError::ParseError(format!(
    498                     "Incorrect number of arguments to op {}",
    499                     Token::RegImm
    500                 ))),
    501             },
    502             // RegOps
    503             Some(t @ Token::Move) | Some(t @ Token::Swap) => {
    504                 let op = RegOp::try_from(t)?;
    505                 match &l[1..l.len()] {
    506                     [cnd, a1, a2] => {
    507                         let cond = Conditional::try_from(cnd)?;
    508                         let r1 = Statement::get_register(a1)?;
    509                         let r2 = Statement::get_register(a2)?;
    510                         Ok(Statement::Register(op, Some(cond), r1, r2))
    511                     }
    512                     [a1, a2] => {
    513                         let r1 = Statement::get_register(a1)?;
    514                         let r2 = Statement::get_register(a2)?;
    515                         Ok(Statement::Register(op, None, r1, r2))
    516                     }
    517                     _ => Err(ParseError::ParseError(format!(
    518                         "Incorrect number of arguments to op {:?}",
    519                         op
    520                     ))),
    521                 }
    522             }
    523             // StackOps
    524             Some(t @ Token::Push) | Some(t @ Token::Pop) | Some(t @ Token::Peek) => {
    525                 let op = StackOp::try_from(t)?;
    526                 match &l[1..l.len()] {
    527                     [cnd, a] => {
    528                         let cond = Conditional::try_from(cnd)?;
    529                         let r = Statement::get_register(a)?;
    530                         Ok(Statement::Stack(op, Some(cond), r))
    531                     }
    532                     [a] => {
    533                         let r = Statement::get_register(a)?;
    534                         Ok(Statement::Stack(op, None, r))
    535                     }
    536                     _ => Err(ParseError::ParseError(format!(
    537                         "Incorrect number of arguments to op {:?}",
    538                         op
    539                     ))),
    540                 }
    541             }
    542             // MemoryOps
    543             Some(t @ Token::Load) | Some(t @ Token::Store) => {
    544                 let op = MemOp::try_from(t)?;
    545                 match &l[1..l.len()] {
    546                     [cnd, a1, a2] => {
    547                         let cond = Conditional::try_from(cnd)?;
    548                         let r = Statement::get_register(a1);
    549                         let m = MemArg::try_from(a2)?;
    550                         Ok(Statement::Memory(op, Some(cond), r, m))
    551                     }
    552                     [a1, a2] => {
    553                         let r = Statement::get_register(a1);
    554                         let m = MemArg::try_from(a2)?;
    555                         Ok(Statement::Memory(op, None, r, m))
    556                     }
    557                     _ => Err(ParseError::ParseError(format!(
    558                         "Incorrect number of arguments to op {:?}",
    559                         op
    560                     ))),
    561                 }
    562             }
    563             // ArithOps
    564             Some(t @ Token::Add) | Some(t @ Token::Sub) | Some(t @ Token::Mul)
    565             | Some(t @ Token::Div) => {
    566                 let op = ArithOp::try_from(t)?;
    567                 match &l[1..l.len()] {
    568                     [cnd, dst, a1, a2] => {
    569                         let cond = Conditional::try_from(cnd)?;
    570                         let dst = Statement::get_register(dst)?;
    571                         let a1 = ArithArg::try_from(a1)?;
    572                         let a2 = ArithArg::try_from(a2)?;
    573                         Ok(Statement::Arithmetic(op, Some(cond), dst, a1, a2))
    574                     }
    575                     [dst, a1, a2] => {
    576                         let dst = Statement::get_register(dst)?;
    577                         let a1 = ArithArg::try_from(a1)?;
    578                         let a2 = ArithArg::try_from(a2)?;
    579                         Ok(Statement::Arithmetic(op, None, dst, a1, a2))
    580                     }
    581                     [dst, oth] => {
    582                         let dst = Statement::get_register(dst)?;
    583                         let a1 = ArithArg::Register(dst);
    584                         let a2 = ArithArg::try_from(oth)?;
    585                         Ok(Statement::Arithmetic(op, None, dst, a1, a2))
    586                     }
    587                     _ => Err(ParseError::ParseError(format!(
    588                         "Incorrect number of arguments to op {:?}",
    589                         op
    590                     ))),
    591                 }
    592             }
    593             // LogOps
    594             Some(t @ Token::And)
    595             | Some(t @ Token::Or)
    596             | Some(t @ Token::Xor)
    597             | Some(t @ Token::ShiftL)
    598             | Some(t @ Token::ShiftRLogical)
    599             | Some(t @ Token::ShiftRArithmetic) => {
    600                 let op = LogOp::try_from(t)?;
    601                 match &l[1..l.len()] {
    602                     [cnd, dst, a1, a2] => {
    603                         let cond = Conditional::try_from(cnd)?;
    604                         let dst = Statement::get_register(dst)?;
    605                         let a1 = ArithArg::try_from(a1)?;
    606                         let a2 = ArithArg::try_from(a2)?;
    607                         Ok(Statement::Logical(op, Some(cond), dst, a1, a2))
    608                     }
    609                     [dst, a1, a2] => {
    610                         let dst = Statement::get_register(dst)?;
    611                         let a1 = ArithArg::try_from(a1)?;
    612                         let a2 = ArithArg::try_from(a2)?;
    613                         Ok(Statement::Logical(op, None, dst, a1, a2))
    614                     }
    615                     [dst, oth] => {
    616                         let dst = Statement::get_register(dst)?;
    617                         let a1 = ArithArg::Register(dst);
    618                         let a2 = ArithArg::try_from(oth)?;
    619                         Ok(Statement::Logical(op, None, dst, a1, a2))
    620                     }
    621                     _ => Err(ParseError::ParseError(format!(
    622                         "Incorrect number of arguments to op {:?}",
    623                         op
    624                     ))),
    625                 }
    626             }
    627             // Not
    628             Some(Token::Not) => match &l[1..l.len()] {
    629                 [cnd, dst, src] => {
    630                     let cond = Conditional::try_from(cnd)?;
    631                     let dst = Statement::get_register(dst)?;
    632                     let src = Statement::get_register(src)?;
    633                     Ok(Statement::Not(Some(cond), dst, src))
    634                 }
    635                 [dst, src] => {
    636                     let dst = Statement::get_register(dst)?;
    637                     let src = Statement::get_register(src)?;
    638                     Ok(Statement::Not(None, dst, src))
    639                 }
    640                 [src] => {
    641                     let src = Statement::get_register(src)?;
    642                     Ok(Statement::Not(None, src, src))
    643                 }
    644                 _ => Err(ParseError::ParseError(format!(
    645                     "Incorrect number of arguments to op {}",
    646                     Token::Not
    647                 ))),
    648             },
    649             Some(t) => Err(ParseError::ParseError(format!(
    650                 "{} is not a valid token to have at the start of a statement",
    651                 t
    652             ))),
    653             None => Err(ParseError::ParseError(
    654                 "Empty token line given, no valid statement to parse".to_owned(),
    655             )),
    656         }
    657     }
    658 
    659     fn parse(ls: Vec<Vec<Token>>) -> Result<Vec<Statement>, ParseError> {
    660         ls.into_iter().map(Statement::parse_line).collect()
    661     }
    662 }