bruh - remedyvm - A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'

commit 8c54354b4a11a01b6a9b6078bdbaa39317452feb
parent d895fb0ec2da821721e23ce508c0ee2165fc3ec1
Author: Ethan Long <edl@disroot.org>
Date:   Sun, 18 Jun 2023 00:22:51 -0600

bruh

Diffstat:
M implementations/C/src/remcc.c  | 296 +++++++++++++++++++++----------------------------------------------------------
A implementations/C/src/remcc.h  | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D implementations/C/tests/lextest.c  | 105 -------------------------------------------------------------------------------

3 files changed, 190 insertions(+), 325 deletions(-)
diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c
@@ -1,118 +1,19 @@
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
-/* Data types */
-typedef enum {
-  NOP = 0,
-  // Arithmetic
-  ADD,
-  SUB,
-  MUL,
-  DIV,
-  // Logical & bit
-  AND,
-  OR,
-  XOR,
-  NOT,
-  SHIFTL,
-  SHIFTR_L,
-  SHIFTR_A,
-  // Memory & registers
-  MOVE,
-  SWAP,
-  PUSH,
-  POP,
-  PEEK,
-  LOAD,
-  STORE,
-  // Control flow
-  JUMP,
-  CALL,
-  RETURN
-} opcode_t;
-
-typedef enum {
-  GT = 0,
-  GEQ,
-  LT,
-  LEQ,
-  EQ,
-  NEQ,
-  POS,
-  NEG
-} conditional_t;
-
-typedef enum {
-  ARG,
-  TEMP,
-  RET
-} REG_TYPE;
-
-typedef struct {
-  REG_TYPE type;
-  uint64_t num;
-} reg_t;
-
-typedef enum {
-  REG,
-  IMM
-} OPER_TYPE;
-
-typedef struct {
-  OPER_TYPE type;
-  union {
-    reg_t reg;
-    uint64_t imm;
-  } val;
-} oper_t;
-
-typedef struct {
-  opcode_t opcode;
-  conditional_t cond;
-  oper_t dest;
-  oper_t temp_1;
-  oper_t temp_2;
-} inst_t;
-
-// The maximum number of characters representing a token
-#define MAX_TOK 256
-
-typedef enum {
-  OPCODE,
-  COND,
-  OPER,
-  ERR
-} TOKEN_TYPE;
-
-typedef enum {
-  LEX_ERROR
-} error_t;
-
-typedef struct {
-  TOKEN_TYPE type;
-  union {
-    opcode_t opcode;
-    conditional_t cond;
-    oper_t operand;
-    error_t error;
-  } val;
-} token_t;
+#include "remcc.h"
+
 
 /* Function prototypes */
 int usage(char *arg0);
-token_t *lexer(FILE *stream);
-token_t lex(char *tok);
-inst_t *parse(token_t *tokens);
-uint8_t *byte_compile(inst_t *instructions);
 void write_bytecode(FILE *stream, uint8_t *bytecode);
 
 /* Implementation: */
-/*int main(int argc, char **argv) {
+int main(int argc, char **argv) {
   char *input_fname = NULL, *output_fname = NULL;
   FILE *input_f = NULL, *output_f = NULL;
   token_t *prog_tokens = NULL;
@@ -141,12 +42,12 @@ void write_bytecode(FILE *stream, uint8_t *bytecode);
 
   prog_tokens = lexer(input_f);
   prog_insts = parse(prog_tokens);
-  prog_bytecode = byte_compile(prog_insts);
+  prog_bytecode = byte_compile_prog(prog_insts);
 
   write_bytecode(output_f, prog_bytecode);
 
   return 0;
-  }*/
+}
 
 int usage(char *arg0) {
   fprintf(stderr, "Usage: %s input.rasm output.rin\n", arg0);
@@ -191,14 +92,56 @@ void write_bytecode(FILE *stream, uint8_t *bytecode) {
   assert(NULL == "write_bytecode not yet implemented");
 }
 
+inline token_t opcode_token(opcode_t opcode) {
+  return (token_t) {
+    .type = OPCODE,
+    .val.opcode = opcode
+  };
+}
+
+inline token_t cond_token(conditional_t cond) {
+  return (token_t) {
+    .type = COND,
+    .val.cond = cond
+  };
+}
+
+inline token_t oper_token(oper_t oper) {
+  return (token_t) {
+    .type = OPER,
+    .val.operand = oper
+  };
+}
+
+inline token_t error_token(error_t error) {
+  return (token_t) {
+    .type = ERR,
+    .val.error = error
+  };
+}
+
+inline oper_t reg_operand(REG_TYPE type, uint64_t num) {
+  return (oper_t) {
+    .type = REG,
+    .val.reg = {
+      .type = type,
+      .num = num
+    }
+  };
+}
+
+inline oper_t imm_operand(uint64_t immediate) {
+  return (oper_t) {
+    .type = IMM,
+    .val.imm = immediate
+  };
+}
+
 // My homemade lexer, it's a bit filthy but it'll do for now
 token_t lex(char *tok_str) {
   // TODO: Operands
   // TODO: Case-Insensitive
-  token_t err = {
-    .type = ERR,
-    .val.error = LEX_ERROR,
-  };
+  token_t err = error_token(LEX_ERROR);
 
   switch (tok_str[0]) {
   case 'a':
@@ -207,146 +150,83 @@ token_t lex(char *tok_str) {
       if (errno) {
 	break;
       }
-      return (token_t) {
-	.type = OPER,
-	.val.operand = {
-	  .type = REG,
-	  .val.reg = {
-	    .type = ARG,
-	    .num = n
-	  }
-	}
-      };
+      return oper_token(reg_operand(ARG, n));
     } else if (strcmp(tok_str + 1, "dd") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = ADD
-      };
+      return opcode_token(ADD);
     } else if (strcmp(tok_str + 1, "nd") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = AND
-      };
+      return opcode_token(AND);
     }
     break;
 
   case 'c':
     // The only C instruction is call
     if (strcmp(tok_str + 1, "all") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = CALL
-      };
+      return opcode_token(CALL);
     }
     break;
 
   case 'd':
     // The only D instruction is div
     if (strcmp(tok_str + 1, "iv") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = DIV
-      };
+      return opcode_token(DIV);
     }
     break;
 
   case 'e':
     if (strcmp(tok_str + 1, "q") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = EQ
-      };
+      return cond_token(EQ);
     }
     break;
 
   case 'g':
     if (strcmp(tok_str + 1, "t") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = GT
-      };
+      return cond_token(GT);
     } else if (strcmp(tok_str + 1, "eq") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = GEQ
-      };
+      return cond_token(GEQ);
     }
     break;
 
   case 'j':
     // The only J instruction is jump
     if (strcmp(tok_str + 1, "ump") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = JUMP
-      };
+      return opcode_token(JUMP);
     }
     break;
     
   case 'l':
     // The only L instruction is load
     if (strcmp(tok_str + 1, "t") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = LT
-      };
+      return cond_token(LT);
     } else if (strcmp(tok_str + 1, "eq") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = LEQ
-      };
+      return cond_token(LEQ);
     } else if (strcmp(tok_str + 1, "oad") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = LOAD
-      };
+      return opcode_token(LOAD);
     }
     break;
 
   case 'm':
     if (strcmp(tok_str + 1, "ul") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = MUL
-      };
+      return opcode_token(MUL);
     } else if (strcmp(tok_str + 1, "ove") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = MOVE
-      };
+      return opcode_token(MOVE);
     }
     break;
     
   case 'n':
     if (strcmp(tok_str + 1, "op") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = NOP
-      };
+      return opcode_token(NOP);
     } else if (strcmp(tok_str + 1, "ot") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = NOT
-      };
+      return opcode_token(NOT);
     } else if (strcmp(tok_str + 1, "eq") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = NEQ
-      };
+      return cond_token(NEQ);
     } else if (strcmp(tok_str + 1, "eg") == 0) {
-      return (token_t) {
-	.type = COND,
-	.val.cond = NEG
-      };
+      return cond_token(NEG);
     }
     break;
     
   case 'o':
     if (strcmp(tok_str + 1, "r") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = OR
-      };
+      return opcode_token(OR);
     }
     break;
     
@@ -354,33 +234,21 @@ token_t lex(char *tok_str) {
     switch (tok_str[1]) {
     case 'u':
       if (strcmp(tok_str + 2, "sh") == 0) {
-	return (token_t) {
-	  .type = OPCODE,
-	  .val.opcode = PUSH
-	};
+	return opcode_token(PUSH);
       }
       break;
       
     case 'o':
       if (strcmp(tok_str + 2, "p") == 0) {
-	return (token_t) {
-	  .type = OPCODE,
-	  .val.opcode = POP
-	};
+	return opcode_token(POP);
       } else if (strcmp(tok_str + 2, "s") == 0) {
-	return (token_t) {
-	  .type = COND,
-	  .val.cond = POS
-	};
+	return cond_token(POS);
       }
       break;
 
     case 'e':
       if (strcmp(tok_str + 2, "ek") == 0) {
-	return (token_t) {
-	  .type = OPCODE,
-	  .val.opcode = PEEK
-	};
+	return opcode_token(PEEK);
       }
       break;
 
@@ -391,25 +259,13 @@ token_t lex(char *tok_str) {
     
   case 'r':
     if (strcmp(tok_str + 1, "eturn") == 0) {
-      return (token_t) {
-	.type = OPCODE,
-	.val.opcode = RETURN
-      };
+      return opcode_token(RETURN);
     } else if (isdigit(tok_str[1])) {
       unsigned long n = strtoul(tok_str + 1, NULL, 10);
       if (errno) {
 	break;
       }
-      return (token_t) {
-	.type = OPER,
-	.val.operand = {
-	  .type = REG,
-	  .val.reg = {
-	    .type = RET,
-	    .num = n
-	  }
-	}
-      };
+      return oper_token(reg_operand(RET, n));
     }
     break;
     
diff --git a/implementations/C/src/remcc.h b/implementations/C/src/remcc.h
@@ -0,0 +1,114 @@
+#include <stdint.h>
+
+/* Data types */
+typedef enum {
+  NOP = 0,
+  // Arithmetic
+  ADD,
+  SUB,
+  MUL,
+  DIV,
+  // Logical & bit
+  AND,
+  OR,
+  XOR,
+  NOT,
+  SHIFTL,
+  SHIFTR_L,
+  SHIFTR_A,
+  // Memory & registers
+  MOVE,
+  SWAP,
+  PUSH,
+  POP,
+  PEEK,
+  LOAD,
+  STORE,
+  // Control flow
+  JUMP,
+  CALL,
+  RETURN
+} opcode_t;
+
+typedef enum {
+  GT = 0,
+  GEQ,
+  LT,
+  LEQ,
+  EQ,
+  NEQ,
+  POS,
+  NEG
+} conditional_t;
+
+typedef enum {
+  ARG,
+  TEMP,
+  RET
+} REG_TYPE;
+
+typedef struct {
+  REG_TYPE type;
+  uint64_t num;
+} reg_t;
+
+typedef enum {
+  REG,
+  IMM
+} OPER_TYPE;
+
+typedef struct {
+  OPER_TYPE type;
+  union {
+    reg_t reg;
+    uint64_t imm;
+  } val;
+} oper_t;
+
+typedef struct {
+  opcode_t opcode;
+  conditional_t cond;
+  oper_t dest;
+  oper_t temp_1;
+  oper_t temp_2;
+} inst_t;
+
+// The maximum number of characters representing a token
+#define MAX_TOK 256
+
+typedef enum {
+  OPCODE,
+  COND,
+  OPER,
+  ERR
+} TOKEN_TYPE;
+
+typedef enum {
+  LEX_ERROR
+} error_t;
+
+typedef struct {
+  TOKEN_TYPE type;
+  union {
+    opcode_t opcode;
+    conditional_t cond;
+    oper_t operand;
+    error_t error;
+  } val;
+} token_t;
+
+/* Function definitions */
+token_t opcode_token(opcode_t opcode);
+token_t cond_token(conditional_t cond);
+token_t oper_token(oper_t oper);
+token_t error_token(error_t error);
+
+oper_t reg_operand(REG_TYPE type, uint64_t num);
+oper_t imm_operand(uint64_t immediate);
+
+token_t *lexer(FILE *stream);
+token_t lex(char *tok);
+inst_t *parse(token_t *tokens);
+uint8_t *byte_compile_prog(inst_t *instructions);
+uint8_t *byte_compile_inst(inst_t instruction);
+
diff --git a/implementations/C/tests/lextest.c b/implementations/C/tests/lextest.c
@@ -1,105 +0,0 @@
-// Test if the assembler lexer is functioning as expected
-#import "../src/remcc.c"
-
-typedef enum {
-  PASS,
-  FAIL
-} RESULT;
-
-typedef struct {
-  RESULT state;
-  union {
-    char *result;
-    char *error;
-  } val;
-} result_t;
-
-result_t test_lex(char *tok, opcode_t expect);
-result_t test_lexer(FILE *stream);
-
-int main(int argc, char **argv) {
-  result_t result;
-
-  char *lex_tests[] = {
-    // Instructions tests
-    "nop",
-    "add",
-    "sub",
-    "mul",
-    "div",
-    "and",
-    "or",
-    "xor",
-    "not",
-    "shiftl",
-    "shiftr(l)",
-    "shiftr(a)",
-    "move",
-    "swap",
-    "push",
-    "pop",
-    "peek",
-    "load",
-    "store",
-    "jump",
-    "call",
-    "return",
-    NULL
-  };
-  opcode_t lex_expects[] = {
-    NOP,
-    ADD,
-    SUB,
-    MUL,
-    DIV,
-    AND,
-    OR,
-    XOR,
-    NOT,
-    SHIFTL,
-    SHIFTR_L,
-    SHIFTR_A,
-    MOVE,
-    SWAP,
-    PUSH,
-    POP,
-    PEEK,
-    LOAD,
-    STORE,
-    JUMP,
-    CALL,
-    RETURN
-  };
-
-  //FILE *stream_tests[] = {0};
-
-  for (int i = 0; lex_tests[i] != NULL; i++) {
-    switch ((result = test_lex(lex_tests[i], lex_expects[i])).state) {
-    case PASS:
-      printf("We have a success!\n");
-      printf("Result: %s\n", result.val.result);
-      break;
-    case FAIL:
-      fprintf(stderr, "Dumbledore dies\n");
-      fprintf(stderr, "Error: %s\n", result.val.error);
-      break;
-    }
-  }
-  //test_lexer();
-  return 0;
-}
-
-result_t test_lex(char *tok, opcode_t expect) {
-  token_t res = lex(tok);
-  if (res.type == OPCODE && res.val.opcode == expect) {
-    return (result_t) {
-      .state = PASS,
-      .val.result = "Success!"
-    };
-  } else {
-    return (result_t) {
-      .state = FAIL,
-      .val.error = "Didn't get what we expected!"
-    };
-  }
-}

	remedyvm A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
	git clone git://git.ethandl.dev/remedyvm
	Log \| Files \| Refs

M	implementations/C/src/remcc.c	\|	296	+++++++++++++++++++++----------------------------------------------------------
A	implementations/C/src/remcc.h	\|	114	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
D	implementations/C/tests/lextest.c	\|	105	-------------------------------------------------------------------------------