commit 8c54354b4a11a01b6a9b6078bdbaa39317452feb
parent d895fb0ec2da821721e23ce508c0ee2165fc3ec1
Author: Ethan Long <edl@disroot.org>
Date: Sun, 18 Jun 2023 00:22:51 -0600
bruh
Diffstat:
3 files changed, 190 insertions(+), 325 deletions(-)
diff --git a/implementations/C/src/remcc.c b/implementations/C/src/remcc.c
@@ -1,118 +1,19 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
-#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-/* Data types */
-typedef enum {
- NOP = 0,
- // Arithmetic
- ADD,
- SUB,
- MUL,
- DIV,
- // Logical & bit
- AND,
- OR,
- XOR,
- NOT,
- SHIFTL,
- SHIFTR_L,
- SHIFTR_A,
- // Memory & registers
- MOVE,
- SWAP,
- PUSH,
- POP,
- PEEK,
- LOAD,
- STORE,
- // Control flow
- JUMP,
- CALL,
- RETURN
-} opcode_t;
-
-typedef enum {
- GT = 0,
- GEQ,
- LT,
- LEQ,
- EQ,
- NEQ,
- POS,
- NEG
-} conditional_t;
-
-typedef enum {
- ARG,
- TEMP,
- RET
-} REG_TYPE;
-
-typedef struct {
- REG_TYPE type;
- uint64_t num;
-} reg_t;
-
-typedef enum {
- REG,
- IMM
-} OPER_TYPE;
-
-typedef struct {
- OPER_TYPE type;
- union {
- reg_t reg;
- uint64_t imm;
- } val;
-} oper_t;
-
-typedef struct {
- opcode_t opcode;
- conditional_t cond;
- oper_t dest;
- oper_t temp_1;
- oper_t temp_2;
-} inst_t;
-
-// The maximum number of characters representing a token
-#define MAX_TOK 256
-
-typedef enum {
- OPCODE,
- COND,
- OPER,
- ERR
-} TOKEN_TYPE;
-
-typedef enum {
- LEX_ERROR
-} error_t;
-
-typedef struct {
- TOKEN_TYPE type;
- union {
- opcode_t opcode;
- conditional_t cond;
- oper_t operand;
- error_t error;
- } val;
-} token_t;
+#include "remcc.h"
+
/* Function prototypes */
int usage(char *arg0);
-token_t *lexer(FILE *stream);
-token_t lex(char *tok);
-inst_t *parse(token_t *tokens);
-uint8_t *byte_compile(inst_t *instructions);
void write_bytecode(FILE *stream, uint8_t *bytecode);
/* Implementation: */
-/*int main(int argc, char **argv) {
+int main(int argc, char **argv) {
char *input_fname = NULL, *output_fname = NULL;
FILE *input_f = NULL, *output_f = NULL;
token_t *prog_tokens = NULL;
@@ -141,12 +42,12 @@ void write_bytecode(FILE *stream, uint8_t *bytecode);
prog_tokens = lexer(input_f);
prog_insts = parse(prog_tokens);
- prog_bytecode = byte_compile(prog_insts);
+ prog_bytecode = byte_compile_prog(prog_insts);
write_bytecode(output_f, prog_bytecode);
return 0;
- }*/
+}
int usage(char *arg0) {
fprintf(stderr, "Usage: %s input.rasm output.rin\n", arg0);
@@ -191,14 +92,56 @@ void write_bytecode(FILE *stream, uint8_t *bytecode) {
assert(NULL == "write_bytecode not yet implemented");
}
+inline token_t opcode_token(opcode_t opcode) {
+ return (token_t) {
+ .type = OPCODE,
+ .val.opcode = opcode
+ };
+}
+
+inline token_t cond_token(conditional_t cond) {
+ return (token_t) {
+ .type = COND,
+ .val.cond = cond
+ };
+}
+
+inline token_t oper_token(oper_t oper) {
+ return (token_t) {
+ .type = OPER,
+ .val.operand = oper
+ };
+}
+
+inline token_t error_token(error_t error) {
+ return (token_t) {
+ .type = ERR,
+ .val.error = error
+ };
+}
+
+inline oper_t reg_operand(REG_TYPE type, uint64_t num) {
+ return (oper_t) {
+ .type = REG,
+ .val.reg = {
+ .type = type,
+ .num = num
+ }
+ };
+}
+
+inline oper_t imm_operand(uint64_t immediate) {
+ return (oper_t) {
+ .type = IMM,
+ .val.imm = immediate
+ };
+}
+
// My homemade lexer, it's a bit filthy but it'll do for now
token_t lex(char *tok_str) {
// TODO: Operands
// TODO: Case-Insensitive
- token_t err = {
- .type = ERR,
- .val.error = LEX_ERROR,
- };
+ token_t err = error_token(LEX_ERROR);
switch (tok_str[0]) {
case 'a':
@@ -207,146 +150,83 @@ token_t lex(char *tok_str) {
if (errno) {
break;
}
- return (token_t) {
- .type = OPER,
- .val.operand = {
- .type = REG,
- .val.reg = {
- .type = ARG,
- .num = n
- }
- }
- };
+ return oper_token(reg_operand(ARG, n));
} else if (strcmp(tok_str + 1, "dd") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = ADD
- };
+ return opcode_token(ADD);
} else if (strcmp(tok_str + 1, "nd") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = AND
- };
+ return opcode_token(AND);
}
break;
case 'c':
// The only C instruction is call
if (strcmp(tok_str + 1, "all") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = CALL
- };
+ return opcode_token(CALL);
}
break;
case 'd':
// The only D instruction is div
if (strcmp(tok_str + 1, "iv") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = DIV
- };
+ return opcode_token(DIV);
}
break;
case 'e':
if (strcmp(tok_str + 1, "q") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = EQ
- };
+ return cond_token(EQ);
}
break;
case 'g':
if (strcmp(tok_str + 1, "t") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = GT
- };
+ return cond_token(GT);
} else if (strcmp(tok_str + 1, "eq") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = GEQ
- };
+ return cond_token(GEQ);
}
break;
case 'j':
// The only J instruction is jump
if (strcmp(tok_str + 1, "ump") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = JUMP
- };
+ return opcode_token(JUMP);
}
break;
case 'l':
// The only L instruction is load
if (strcmp(tok_str + 1, "t") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = LT
- };
+ return cond_token(LT);
} else if (strcmp(tok_str + 1, "eq") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = LEQ
- };
+ return cond_token(LEQ);
} else if (strcmp(tok_str + 1, "oad") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = LOAD
- };
+ return opcode_token(LOAD);
}
break;
case 'm':
if (strcmp(tok_str + 1, "ul") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = MUL
- };
+ return opcode_token(MUL);
} else if (strcmp(tok_str + 1, "ove") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = MOVE
- };
+ return opcode_token(MOVE);
}
break;
case 'n':
if (strcmp(tok_str + 1, "op") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = NOP
- };
+ return opcode_token(NOP);
} else if (strcmp(tok_str + 1, "ot") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = NOT
- };
+ return opcode_token(NOT);
} else if (strcmp(tok_str + 1, "eq") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = NEQ
- };
+ return cond_token(NEQ);
} else if (strcmp(tok_str + 1, "eg") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = NEG
- };
+ return cond_token(NEG);
}
break;
case 'o':
if (strcmp(tok_str + 1, "r") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = OR
- };
+ return opcode_token(OR);
}
break;
@@ -354,33 +234,21 @@ token_t lex(char *tok_str) {
switch (tok_str[1]) {
case 'u':
if (strcmp(tok_str + 2, "sh") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = PUSH
- };
+ return opcode_token(PUSH);
}
break;
case 'o':
if (strcmp(tok_str + 2, "p") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = POP
- };
+ return opcode_token(POP);
} else if (strcmp(tok_str + 2, "s") == 0) {
- return (token_t) {
- .type = COND,
- .val.cond = POS
- };
+ return cond_token(POS);
}
break;
case 'e':
if (strcmp(tok_str + 2, "ek") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = PEEK
- };
+ return opcode_token(PEEK);
}
break;
@@ -391,25 +259,13 @@ token_t lex(char *tok_str) {
case 'r':
if (strcmp(tok_str + 1, "eturn") == 0) {
- return (token_t) {
- .type = OPCODE,
- .val.opcode = RETURN
- };
+ return opcode_token(RETURN);
} else if (isdigit(tok_str[1])) {
unsigned long n = strtoul(tok_str + 1, NULL, 10);
if (errno) {
break;
}
- return (token_t) {
- .type = OPER,
- .val.operand = {
- .type = REG,
- .val.reg = {
- .type = RET,
- .num = n
- }
- }
- };
+ return oper_token(reg_operand(RET, n));
}
break;
diff --git a/implementations/C/src/remcc.h b/implementations/C/src/remcc.h
@@ -0,0 +1,114 @@
+#include <stdint.h>
+
+/* Data types */
+typedef enum {
+ NOP = 0,
+ // Arithmetic
+ ADD,
+ SUB,
+ MUL,
+ DIV,
+ // Logical & bit
+ AND,
+ OR,
+ XOR,
+ NOT,
+ SHIFTL,
+ SHIFTR_L,
+ SHIFTR_A,
+ // Memory & registers
+ MOVE,
+ SWAP,
+ PUSH,
+ POP,
+ PEEK,
+ LOAD,
+ STORE,
+ // Control flow
+ JUMP,
+ CALL,
+ RETURN
+} opcode_t;
+
+typedef enum {
+ GT = 0,
+ GEQ,
+ LT,
+ LEQ,
+ EQ,
+ NEQ,
+ POS,
+ NEG
+} conditional_t;
+
+typedef enum {
+ ARG,
+ TEMP,
+ RET
+} REG_TYPE;
+
+typedef struct {
+ REG_TYPE type;
+ uint64_t num;
+} reg_t;
+
+typedef enum {
+ REG,
+ IMM
+} OPER_TYPE;
+
+typedef struct {
+ OPER_TYPE type;
+ union {
+ reg_t reg;
+ uint64_t imm;
+ } val;
+} oper_t;
+
+typedef struct {
+ opcode_t opcode;
+ conditional_t cond;
+ oper_t dest;
+ oper_t temp_1;
+ oper_t temp_2;
+} inst_t;
+
+// The maximum number of characters representing a token
+#define MAX_TOK 256
+
+typedef enum {
+ OPCODE,
+ COND,
+ OPER,
+ ERR
+} TOKEN_TYPE;
+
+typedef enum {
+ LEX_ERROR
+} error_t;
+
+typedef struct {
+ TOKEN_TYPE type;
+ union {
+ opcode_t opcode;
+ conditional_t cond;
+ oper_t operand;
+ error_t error;
+ } val;
+} token_t;
+
+/* Function definitions */
+token_t opcode_token(opcode_t opcode);
+token_t cond_token(conditional_t cond);
+token_t oper_token(oper_t oper);
+token_t error_token(error_t error);
+
+oper_t reg_operand(REG_TYPE type, uint64_t num);
+oper_t imm_operand(uint64_t immediate);
+
+token_t *lexer(FILE *stream);
+token_t lex(char *tok);
+inst_t *parse(token_t *tokens);
+uint8_t *byte_compile_prog(inst_t *instructions);
+uint8_t *byte_compile_inst(inst_t instruction);
+
diff --git a/implementations/C/tests/lextest.c b/implementations/C/tests/lextest.c
@@ -1,105 +0,0 @@
-// Test if the assembler lexer is functioning as expected
-#import "../src/remcc.c"
-
-typedef enum {
- PASS,
- FAIL
-} RESULT;
-
-typedef struct {
- RESULT state;
- union {
- char *result;
- char *error;
- } val;
-} result_t;
-
-result_t test_lex(char *tok, opcode_t expect);
-result_t test_lexer(FILE *stream);
-
-int main(int argc, char **argv) {
- result_t result;
-
- char *lex_tests[] = {
- // Instructions tests
- "nop",
- "add",
- "sub",
- "mul",
- "div",
- "and",
- "or",
- "xor",
- "not",
- "shiftl",
- "shiftr(l)",
- "shiftr(a)",
- "move",
- "swap",
- "push",
- "pop",
- "peek",
- "load",
- "store",
- "jump",
- "call",
- "return",
- NULL
- };
- opcode_t lex_expects[] = {
- NOP,
- ADD,
- SUB,
- MUL,
- DIV,
- AND,
- OR,
- XOR,
- NOT,
- SHIFTL,
- SHIFTR_L,
- SHIFTR_A,
- MOVE,
- SWAP,
- PUSH,
- POP,
- PEEK,
- LOAD,
- STORE,
- JUMP,
- CALL,
- RETURN
- };
-
- //FILE *stream_tests[] = {0};
-
- for (int i = 0; lex_tests[i] != NULL; i++) {
- switch ((result = test_lex(lex_tests[i], lex_expects[i])).state) {
- case PASS:
- printf("We have a success!\n");
- printf("Result: %s\n", result.val.result);
- break;
- case FAIL:
- fprintf(stderr, "Dumbledore dies\n");
- fprintf(stderr, "Error: %s\n", result.val.error);
- break;
- }
- }
- //test_lexer();
- return 0;
-}
-
-result_t test_lex(char *tok, opcode_t expect) {
- token_t res = lex(tok);
- if (res.type == OPCODE && res.val.opcode == expect) {
- return (result_t) {
- .state = PASS,
- .val.result = "Success!"
- };
- } else {
- return (result_t) {
- .state = FAIL,
- .val.error = "Didn't get what we expected!"
- };
- }
-}