remedyvm

A toy RISC virtual machine inspired by Bell Lab's `dis' and Tsoding's `bm'
git clone git://git.ethandl.dev/remedyvm
Log | Files | Refs

commit 2052a724daca6d1fc71fa71b3872144d87cdd24e
Author: Ethan Long <Ethan.Long@anu.edu.au>
Date:   Wed,  7 Jun 2023 22:55:14 +1000

Initial commit :)

Diffstat:
A.gitignore | 1+
Adoc/Makefile | 2++
Adoc/specs.ms | 18++++++++++++++++++
Adoc/specs.pdf | 0
Aimplementations/C/README.md | 10++++++++++
Aimplementations/C/nobuild.c | 14++++++++++++++
Aimplementations/C/nobuild.h | 1151+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aimplementations/C/remedyVM.c | 6++++++
Aoutline.md | 158+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aoutline.org | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 1515 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ + diff --git a/doc/Makefile b/doc/Makefile @@ -0,0 +1,2 @@ +specs.pdf: specs.ms + groff -mspdf -T pdf specs.ms > specs.pdf diff --git a/doc/specs.ms b/doc/specs.ms @@ -0,0 +1,18 @@ +.TL +RemedyVM Specifications + +.AU +Ethan Long + +.AI +Australian National University + +.AB +RemedyVM is a memory machine (MM) inspired by the +.CW dis +virtual machine by Bell Labs, and Tsoding's +.CW bm . +The instruction set is close to machine code to allow for easy just-in-time (JIT) and ahead-of-time (AOT) compilation. +.AE + +.PP diff --git a/doc/specs.pdf b/doc/specs.pdf Binary files differ. diff --git a/implementations/C/README.md b/implementations/C/README.md @@ -0,0 +1,10 @@ +# C Implementation +This is the C implementation of remedyVM. + +## Building +This uses tsoding's [nobuild](https://github.com/tsoding/nobuild), to compile, run the following: +```console +$ cc ./nobuild.c -o nobuild +$ ./nobuild +``` +This should compile all the required files provided my `nobuild.c` implementation is correct. diff --git a/implementations/C/nobuild.c b/implementations/C/nobuild.c @@ -0,0 +1,14 @@ +#define NOBUILD_IMPLEMENTATION +#include "./nobuild.h" + +#define CFLAGS "-Wall", "-Wextra", "-std=c99", "-pedantic" + +void build_vm() { + CMD("cc", CFLAGS, "-o", "remedyVM", "remedyVM.c"); +} + +int main() { + build_vm(); + + return 0; +} diff --git a/implementations/C/nobuild.h b/implementations/C/nobuild.h @@ -0,0 +1,1151 @@ +#ifndef NOBUILD_H_ +#define NOBUILD_H_ + +#ifndef _WIN32 +# define _POSIX_C_SOURCE 200809L +# include <sys/types.h> +# include <sys/wait.h> +# include <sys/stat.h> +# include <unistd.h> +# include <dirent.h> +# include <fcntl.h> +# define PATH_SEP "/" +typedef pid_t Pid; +typedef int Fd; +#else +# define WIN32_MEAN_AND_LEAN +# include "windows.h" +# include <process.h> +# define PATH_SEP "\\" +typedef HANDLE Pid; +typedef HANDLE Fd; +// minirent.h HEADER BEGIN //////////////////////////////////////// +// Copyright 2021 Alexey Kutepov <reximkut@gmail.com> +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// +// ============================================================ +// +// minirent — 0.0.1 — A subset of dirent interface for Windows. +// +// https://github.com/tsoding/minirent +// +// ============================================================ +// +// ChangeLog (https://semver.org/ is implied) +// +// 0.0.1 First Official Release + +#ifndef MINIRENT_H_ +#define MINIRENT_H_ + +#define WIN32_LEAN_AND_MEAN +#include "windows.h" + +struct dirent { + char d_name[MAX_PATH+1]; +}; + +typedef struct DIR DIR; + +DIR *opendir(const char *dirpath); +struct dirent *readdir(DIR *dirp); +int closedir(DIR *dirp); + +#endif // MINIRENT_H_ +// minirent.h HEADER END //////////////////////////////////////// + +// TODO(#28): use GetLastErrorAsString everywhere on Windows error reporting +LPSTR GetLastErrorAsString(void); + +#endif // _WIN32 + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <errno.h> + +#define FOREACH_ARRAY(type, elem, array, body) \ + for (size_t elem_##index = 0; \ + elem_##index < array.count; \ + ++elem_##index) \ + { \ + type *elem = &array.elems[elem_##index]; \ + body; \ + } + +typedef const char * Cstr; + +int cstr_ends_with(Cstr cstr, Cstr postfix); +#define ENDS_WITH(cstr, postfix) cstr_ends_with(cstr, postfix) + +Cstr cstr_no_ext(Cstr path); +#define NOEXT(path) cstr_no_ext(path) + +typedef struct { + Cstr *elems; + size_t count; +} Cstr_Array; + +Cstr_Array cstr_array_make(Cstr first, ...); +Cstr_Array cstr_array_append(Cstr_Array cstrs, Cstr cstr); +Cstr cstr_array_join(Cstr sep, Cstr_Array cstrs); + +#define JOIN(sep, ...) cstr_array_join(sep, cstr_array_make(__VA_ARGS__, NULL)) +#define CONCAT(...) JOIN("", __VA_ARGS__) +#define PATH(...) JOIN(PATH_SEP, __VA_ARGS__) + +typedef struct { + Fd read; + Fd write; +} Pipe; + +Pipe pipe_make(void); + +typedef struct { + Cstr_Array line; +} Cmd; + +Fd fd_open_for_read(Cstr path); +Fd fd_open_for_write(Cstr path); +void fd_close(Fd fd); +void pid_wait(Pid pid); +Cstr cmd_show(Cmd cmd); +Pid cmd_run_async(Cmd cmd, Fd *fdin, Fd *fdout); +void cmd_run_sync(Cmd cmd); + +typedef struct { + Cmd *elems; + size_t count; +} Cmd_Array; + +// TODO(#1): no way to disable echo in nobuild scripts +// TODO(#2): no way to ignore fails +#define CMD(...) \ + do { \ + Cmd cmd = { \ + .line = cstr_array_make(__VA_ARGS__, NULL) \ + }; \ + INFO("CMD: %s", cmd_show(cmd)); \ + cmd_run_sync(cmd); \ + } while (0) + +typedef enum { + CHAIN_TOKEN_END = 0, + CHAIN_TOKEN_IN, + CHAIN_TOKEN_OUT, + CHAIN_TOKEN_CMD +} Chain_Token_Type; + +// A single token for the CHAIN(...) DSL syntax +typedef struct { + Chain_Token_Type type; + Cstr_Array args; +} Chain_Token; + +// TODO(#17): IN and OUT are already taken by WinAPI +#define IN(path) \ + (Chain_Token) { \ + .type = CHAIN_TOKEN_IN, \ + .args = cstr_array_make(path, NULL) \ + } + +#define OUT(path) \ + (Chain_Token) { \ + .type = CHAIN_TOKEN_OUT, \ + .args = cstr_array_make(path, NULL) \ + } + +#define CHAIN_CMD(...) \ + (Chain_Token) { \ + .type = CHAIN_TOKEN_CMD, \ + .args = cstr_array_make(__VA_ARGS__, NULL) \ + } + +// TODO(#20): pipes do not allow redirecting stderr +typedef struct { + Cstr input_filepath; + Cmd_Array cmds; + Cstr output_filepath; +} Chain; + +Chain chain_build_from_tokens(Chain_Token first, ...); +void chain_run_sync(Chain chain); +void chain_echo(Chain chain); + +// TODO(#15): PIPE does not report where exactly a syntactic error has happened +#define CHAIN(...) \ + do { \ + Chain chain = chain_build_from_tokens(__VA_ARGS__, (Chain_Token) {0}); \ + chain_echo(chain); \ + chain_run_sync(chain); \ + } while(0) + +#ifndef REBUILD_URSELF +# if _WIN32 +# if defined(__GNUC__) +# define REBUILD_URSELF(binary_path, source_path) CMD("gcc", "-o", binary_path, source_path) +# elif defined(__clang__) +# define REBUILD_URSELF(binary_path, source_path) CMD("clang", "-o", binary_path, source_path) +# elif defined(_MSC_VER) +# define REBUILD_URSELF(binary_path, source_path) CMD("cl.exe", source_path) +# endif +# else +# define REBUILD_URSELF(binary_path, source_path) CMD("cc", "-o", binary_path, source_path) +# endif +#endif + +// Go Rebuild Urself™ Technology +// +// How to use it: +// int main(int argc, char** argv) { +// GO_REBUILD_URSELF(argc, argv); +// // actual work +// return 0; +// } +// +// After your added this macro every time you run ./nobuild it will detect +// that you modified its original source code and will try to rebuild itself +// before doing any actual work. So you only need to bootstrap your build system +// once. +// +// The modification is detected by comparing the last modified times of the executable +// and its source code. The same way the make utility usually does it. +// +// The rebuilding is done by using the REBUILD_URSELF macro which you can redefine +// if you need a special way of bootstraping your build system. (which I personally +// do not recommend since the whole idea of nobuild is to keep the process of bootstrapping +// as simple as possible and doing all of the actual work inside of the nobuild) +// +#define GO_REBUILD_URSELF(argc, argv) \ + do { \ + const char *source_path = __FILE__; \ + assert(argc >= 1); \ + const char *binary_path = argv[0]; \ + \ + if (is_path1_modified_after_path2(source_path, binary_path)) { \ + RENAME(binary_path, CONCAT(binary_path, ".old")); \ + REBUILD_URSELF(binary_path, source_path); \ + Cmd cmd = { \ + .line = { \ + .elems = (Cstr*) argv, \ + .count = argc, \ + }, \ + }; \ + INFO("CMD: %s", cmd_show(cmd)); \ + cmd_run_sync(cmd); \ + exit(0); \ + } \ + } while(0) +// The implementation idea is stolen from https://github.com/zhiayang/nabs + +void rebuild_urself(const char *binary_path, const char *source_path); + +int path_is_dir(Cstr path); +#define IS_DIR(path) path_is_dir(path) + +int path_exists(Cstr path); +#define PATH_EXISTS(path) path_exists(path) + +void path_mkdirs(Cstr_Array path); +#define MKDIRS(...) \ + do { \ + Cstr_Array path = cstr_array_make(__VA_ARGS__, NULL); \ + INFO("MKDIRS: %s", cstr_array_join(PATH_SEP, path)); \ + path_mkdirs(path); \ + } while (0) + +void path_rename(Cstr old_path, Cstr new_path); +#define RENAME(old_path, new_path) \ + do { \ + INFO("RENAME: %s -> %s", old_path, new_path); \ + path_rename(old_path, new_path); \ + } while (0) + +void path_rm(Cstr path); +#define RM(path) \ + do { \ + INFO("RM: %s", path); \ + path_rm(path); \ + } while(0) + +#define FOREACH_FILE_IN_DIR(file, dirpath, body) \ + do { \ + struct dirent *dp = NULL; \ + DIR *dir = opendir(dirpath); \ + if (dir == NULL) { \ + PANIC("could not open directory %s: %s", \ + dirpath, strerror(errno)); \ + } \ + errno = 0; \ + while ((dp = readdir(dir))) { \ + const char *file = dp->d_name; \ + body; \ + } \ + \ + if (errno > 0) { \ + PANIC("could not read directory %s: %s", \ + dirpath, strerror(errno)); \ + } \ + \ + closedir(dir); \ + } while(0) + +#if defined(__GNUC__) || defined(__clang__) +// https://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/Function-Attributes.html +#define NOBUILD_PRINTF_FORMAT(STRING_INDEX, FIRST_TO_CHECK) __attribute__ ((format (printf, STRING_INDEX, FIRST_TO_CHECK))) +#else +#define NOBUILD_PRINTF_FORMAT(STRING_INDEX, FIRST_TO_CHECK) +#endif + +void VLOG(FILE *stream, Cstr tag, Cstr fmt, va_list args); +void INFO(Cstr fmt, ...) NOBUILD_PRINTF_FORMAT(1, 2); +void WARN(Cstr fmt, ...) NOBUILD_PRINTF_FORMAT(1, 2); +void ERRO(Cstr fmt, ...) NOBUILD_PRINTF_FORMAT(1, 2); +void PANIC(Cstr fmt, ...) NOBUILD_PRINTF_FORMAT(1, 2); + +char *shift_args(int *argc, char ***argv); + +#endif // NOBUILD_H_ + +//////////////////////////////////////////////////////////////////////////////// + +#ifdef NOBUILD_IMPLEMENTATION + +#ifdef _WIN32 +LPSTR GetLastErrorAsString(void) +{ + // https://stackoverflow.com/questions/1387064/how-to-get-the-error-message-from-the-error-code-returned-by-getlasterror + + DWORD errorMessageId = GetLastError(); + assert(errorMessageId != 0); + + LPSTR messageBuffer = NULL; + + DWORD size = + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, // DWORD dwFlags, + NULL, // LPCVOID lpSource, + errorMessageId, // DWORD dwMessageId, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // DWORD dwLanguageId, + (LPSTR) &messageBuffer, // LPTSTR lpBuffer, + 0, // DWORD nSize, + NULL // va_list *Arguments + ); + + return messageBuffer; +} + +// minirent.h IMPLEMENTATION BEGIN //////////////////////////////////////// +struct DIR { + HANDLE hFind; + WIN32_FIND_DATA data; + struct dirent *dirent; +}; + +DIR *opendir(const char *dirpath) +{ + assert(dirpath); + + char buffer[MAX_PATH]; + snprintf(buffer, MAX_PATH, "%s\\*", dirpath); + + DIR *dir = (DIR*)calloc(1, sizeof(DIR)); + + dir->hFind = FindFirstFile(buffer, &dir->data); + if (dir->hFind == INVALID_HANDLE_VALUE) { + errno = ENOSYS; + goto fail; + } + + return dir; + +fail: + if (dir) { + free(dir); + } + + return NULL; +} + +struct dirent *readdir(DIR *dirp) +{ + assert(dirp); + + if (dirp->dirent == NULL) { + dirp->dirent = (struct dirent*)calloc(1, sizeof(struct dirent)); + } else { + if(!FindNextFile(dirp->hFind, &dirp->data)) { + if (GetLastError() != ERROR_NO_MORE_FILES) { + errno = ENOSYS; + } + + return NULL; + } + } + + memset(dirp->dirent->d_name, 0, sizeof(dirp->dirent->d_name)); + + strncpy( + dirp->dirent->d_name, + dirp->data.cFileName, + sizeof(dirp->dirent->d_name) - 1); + + return dirp->dirent; +} + +int closedir(DIR *dirp) +{ + assert(dirp); + + if(!FindClose(dirp->hFind)) { + errno = ENOSYS; + return -1; + } + + if (dirp->dirent) { + free(dirp->dirent); + } + free(dirp); + + return 0; +} +// minirent.h IMPLEMENTATION END //////////////////////////////////////// +#endif // _WIN32 + +Cstr_Array cstr_array_append(Cstr_Array cstrs, Cstr cstr) +{ + Cstr_Array result = { + .count = cstrs.count + 1 + }; + result.elems = malloc(sizeof(result.elems[0]) * result.count); + memcpy(result.elems, cstrs.elems, cstrs.count * sizeof(result.elems[0])); + result.elems[cstrs.count] = cstr; + return result; +} + +int cstr_ends_with(Cstr cstr, Cstr postfix) +{ + const size_t cstr_len = strlen(cstr); + const size_t postfix_len = strlen(postfix); + return postfix_len <= cstr_len + && strcmp(cstr + cstr_len - postfix_len, postfix) == 0; +} + +Cstr cstr_no_ext(Cstr path) +{ + size_t n = strlen(path); + while (n > 0 && path[n - 1] != '.') { + n -= 1; + } + + if (n > 0) { + char *result = malloc(n); + memcpy(result, path, n); + result[n - 1] = '\0'; + + return result; + } else { + return path; + } +} + +Cstr_Array cstr_array_make(Cstr first, ...) +{ + Cstr_Array result = {0}; + + if (first == NULL) { + return result; + } + + result.count += 1; + + va_list args; + va_start(args, first); + for (Cstr next = va_arg(args, Cstr); + next != NULL; + next = va_arg(args, Cstr)) { + result.count += 1; + } + va_end(args); + + result.elems = malloc(sizeof(result.elems[0]) * result.count); + if (result.elems == NULL) { + PANIC("could not allocate memory: %s", strerror(errno)); + } + result.count = 0; + + result.elems[result.count++] = first; + + va_start(args, first); + for (Cstr next = va_arg(args, Cstr); + next != NULL; + next = va_arg(args, Cstr)) { + result.elems[result.count++] = next; + } + va_end(args); + + return result; +} + +Cstr cstr_array_join(Cstr sep, Cstr_Array cstrs) +{ + if (cstrs.count == 0) { + return ""; + } + + const size_t sep_len = strlen(sep); + size_t len = 0; + for (size_t i = 0; i < cstrs.count; ++i) { + len += strlen(cstrs.elems[i]); + } + + const size_t result_len = (cstrs.count - 1) * sep_len + len + 1; + char *result = malloc(sizeof(char) * result_len); + if (result == NULL) { + PANIC("could not allocate memory: %s", strerror(errno)); + } + + len = 0; + for (size_t i = 0; i < cstrs.count; ++i) { + if (i > 0) { + memcpy(result + len, sep, sep_len); + len += sep_len; + } + + size_t elem_len = strlen(cstrs.elems[i]); + memcpy(result + len, cstrs.elems[i], elem_len); + len += elem_len; + } + result[len] = '\0'; + + return result; +} + +Pipe pipe_make(void) +{ + Pipe pip = {0}; + +#ifdef _WIN32 + // https://docs.microsoft.com/en-us/windows/win32/ProcThread/creating-a-child-process-with-redirected-input-and-output + + SECURITY_ATTRIBUTES saAttr = {0}; + saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); + saAttr.bInheritHandle = TRUE; + + if (!CreatePipe(&pip.read, &pip.write, &saAttr, 0)) { + PANIC("Could not create pipe: %s", GetLastErrorAsString()); + } +#else + Fd pipefd[2]; + if (pipe(pipefd) < 0) { + PANIC("Could not create pipe: %s", strerror(errno)); + } + + pip.read = pipefd[0]; + pip.write = pipefd[1]; +#endif // _WIN32 + + return pip; +} + +Fd fd_open_for_read(Cstr path) +{ +#ifndef _WIN32 + Fd result = open(path, O_RDONLY); + if (result < 0) { + PANIC("Could not open file %s: %s", path, strerror(errno)); + } + return result; +#else + // https://docs.microsoft.com/en-us/windows/win32/fileio/opening-a-file-for-reading-or-writing + SECURITY_ATTRIBUTES saAttr = {0}; + saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); + saAttr.bInheritHandle = TRUE; + + Fd result = CreateFile( + path, + GENERIC_READ, + 0, + &saAttr, + OPEN_EXISTING, + FILE_ATTRIBUTE_READONLY, + NULL); + + if (result == INVALID_HANDLE_VALUE) { + PANIC("Could not open file %s", path); + } + + return result; +#endif // _WIN32 +} + +Fd fd_open_for_write(Cstr path) +{ +#ifndef _WIN32 + Fd result = open(path, + O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (result < 0) { + PANIC("could not open file %s: %s", path, strerror(errno)); + } + return result; +#else + SECURITY_ATTRIBUTES saAttr = {0}; + saAttr.nLength = sizeof(SECURITY_ATTRIBUTES); + saAttr.bInheritHandle = TRUE; + + Fd result = CreateFile( + path, // name of the write + GENERIC_WRITE, // open for writing + 0, // do not share + &saAttr, // default security + CREATE_NEW, // create new file only + FILE_ATTRIBUTE_NORMAL, // normal file + NULL // no attr. template + ); + + if (result == INVALID_HANDLE_VALUE) { + PANIC("Could not open file %s: %s", path, GetLastErrorAsString()); + } + + return result; +#endif // _WIN32 +} + +void fd_close(Fd fd) +{ +#ifdef _WIN32 + CloseHandle(fd); +#else + close(fd); +#endif // _WIN32 +} + +void pid_wait(Pid pid) +{ +#ifdef _WIN32 + DWORD result = WaitForSingleObject( + pid, // HANDLE hHandle, + INFINITE // DWORD dwMilliseconds + ); + + if (result == WAIT_FAILED) { + PANIC("could not wait on child process: %s", GetLastErrorAsString()); + } + + DWORD exit_status; + if (GetExitCodeProcess(pid, &exit_status) == 0) { + PANIC("could not get process exit code: %lu", GetLastError()); + } + + if (exit_status != 0) { + PANIC("command exited with exit code %lu", exit_status); + } + + CloseHandle(pid); +#else + for (;;) { + int wstatus = 0; + if (waitpid(pid, &wstatus, 0) < 0) { + PANIC("could not wait on command (pid %d): %s", pid, strerror(errno)); + } + + if (WIFEXITED(wstatus)) { + int exit_status = WEXITSTATUS(wstatus); + if (exit_status != 0) { + PANIC("command exited with exit code %d", exit_status); + } + + break; + } + + if (WIFSIGNALED(wstatus)) { + PANIC("command process was terminated by %s", strsignal(WTERMSIG(wstatus))); + } + } + +#endif // _WIN32 +} + +Cstr cmd_show(Cmd cmd) +{ + // TODO(#31): cmd_show does not render the command line properly + // - No string literals when arguments contains space + // - No escaping of special characters + // - Etc. + return cstr_array_join(" ", cmd.line); +} + +Pid cmd_run_async(Cmd cmd, Fd *fdin, Fd *fdout) +{ +#ifdef _WIN32 + // https://docs.microsoft.com/en-us/windows/win32/procthread/creating-a-child-process-with-redirected-input-and-output + + STARTUPINFO siStartInfo; + ZeroMemory(&siStartInfo, sizeof(siStartInfo)); + siStartInfo.cb = sizeof(STARTUPINFO); + // NOTE: theoretically setting NULL to std handles should not be a problem + // https://docs.microsoft.com/en-us/windows/console/getstdhandle?redirectedfrom=MSDN#attachdetach-behavior + siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE); + // TODO(#32): check for errors in GetStdHandle + siStartInfo.hStdOutput = fdout ? *fdout : GetStdHandle(STD_OUTPUT_HANDLE); + siStartInfo.hStdInput = fdin ? *fdin : GetStdHandle(STD_INPUT_HANDLE); + siStartInfo.dwFlags |= STARTF_USESTDHANDLES; + + PROCESS_INFORMATION piProcInfo; + ZeroMemory(&piProcInfo, sizeof(PROCESS_INFORMATION)); + + BOOL bSuccess = + CreateProcess( + NULL, + // TODO(#33): cmd_run_async on Windows does not render command line properly + // It may require wrapping some arguments with double-quotes if they contains spaces, etc. + cstr_array_join(" ", cmd.line), + NULL, + NULL, + TRUE, + 0, + NULL, + NULL, + &siStartInfo, + &piProcInfo + ); + + if (!bSuccess) { + PANIC("Could not create child process %s: %s\n", + cmd_show(cmd), GetLastErrorAsString()); + } + + CloseHandle(piProcInfo.hThread); + + return piProcInfo.hProcess; +#else + pid_t cpid = fork(); + if (cpid < 0) { + PANIC("Could not fork child process: %s: %s", + cmd_show(cmd), strerror(errno)); + } + + if (cpid == 0) { + Cstr_Array args = cstr_array_append(cmd.line, NULL); + + if (fdin) { + if (dup2(*fdin, STDIN_FILENO) < 0) { + PANIC("Could not setup stdin for child process: %s", strerror(errno)); + } + } + + if (fdout) { + if (dup2(*fdout, STDOUT_FILENO) < 0) { + PANIC("Could not setup stdout for child process: %s", strerror(errno)); + } + } + + if (execvp(args.elems[0], (char * const*) args.elems) < 0) { + PANIC("Could not exec child process: %s: %s", + cmd_show(cmd), strerror(errno)); + } + } + + return cpid; +#endif // _WIN32 +} + +void cmd_run_sync(Cmd cmd) +{ + pid_wait(cmd_run_async(cmd, NULL, NULL)); +} + +static void chain_set_input_output_files_or_count_cmds(Chain *chain, Chain_Token token) +{ + switch (token.type) { + case CHAIN_TOKEN_CMD: { + chain->cmds.count += 1; + } + break; + + case CHAIN_TOKEN_IN: { + if (chain->input_filepath) { + PANIC("Input file path was already set"); + } + + chain->input_filepath = token.args.elems[0]; + } + break; + + case CHAIN_TOKEN_OUT: { + if (chain->output_filepath) { + PANIC("Output file path was already set"); + } + + chain->output_filepath = token.args.elems[0]; + } + break; + + case CHAIN_TOKEN_END: + default: { + assert(0 && "unreachable"); + exit(1); + } + } +} + +static void chain_push_cmd(Chain *chain, Chain_Token token) +{ + if (token.type == CHAIN_TOKEN_CMD) { + chain->cmds.elems[chain->cmds.count++] = (Cmd) { + .line = token.args + }; + } +} + +Chain chain_build_from_tokens(Chain_Token first, ...) +{ + Chain result = {0}; + + chain_set_input_output_files_or_count_cmds(&result, first); + va_list args; + va_start(args, first); + Chain_Token next = va_arg(args, Chain_Token); + while (next.type != CHAIN_TOKEN_END) { + chain_set_input_output_files_or_count_cmds(&result, next); + next = va_arg(args, Chain_Token); + } + va_end(args); + + result.cmds.elems = malloc(sizeof(result.cmds.elems[0]) * result.cmds.count); + if (result.cmds.elems == NULL) { + PANIC("could not allocate memory: %s", strerror(errno)); + } + result.cmds.count = 0; + + chain_push_cmd(&result, first); + + va_start(args, first); + next = va_arg(args, Chain_Token); + while (next.type != CHAIN_TOKEN_END) { + chain_push_cmd(&result, next); + next = va_arg(args, Chain_Token); + } + va_end(args); + + return result; +} + +void chain_run_sync(Chain chain) +{ + if (chain.cmds.count == 0) { + return; + } + + Pid *cpids = malloc(sizeof(Pid) * chain.cmds.count); + + Pipe pip = {0}; + Fd fdin = 0; + Fd *fdprev = NULL; + + if (chain.input_filepath) { + fdin = fd_open_for_read(chain.input_filepath); + if (fdin < 0) { + PANIC("could not open file %s: %s", chain.input_filepath, strerror(errno)); + } + fdprev = &fdin; + } + + for (size_t i = 0; i < chain.cmds.count - 1; ++i) { + pip = pipe_make(); + + cpids[i] = cmd_run_async( + chain.cmds.elems[i], + fdprev, + &pip.write); + + if (fdprev) fd_close(*fdprev); + fd_close(pip.write); + fdprev = &fdin; + fdin = pip.read; + } + + { + Fd fdout = 0; + Fd *fdnext = NULL; + + if (chain.output_filepath) { + fdout = fd_open_for_write(chain.output_filepath); + if (fdout < 0) { + PANIC("could not open file %s: %s", + chain.output_filepath, + strerror(errno)); + } + fdnext = &fdout; + } + + const size_t last = chain.cmds.count - 1; + cpids[last] = + cmd_run_async( + chain.cmds.elems[last], + fdprev, + fdnext); + + if (fdprev) fd_close(*fdprev); + if (fdnext) fd_close(*fdnext); + } + + for (size_t i = 0; i < chain.cmds.count; ++i) { + pid_wait(cpids[i]); + } +} + +void chain_echo(Chain chain) +{ + printf("[INFO] CHAIN:"); + if (chain.input_filepath) { + printf(" %s", chain.input_filepath); + } + + FOREACH_ARRAY(Cmd, cmd, chain.cmds, { + printf(" |> %s", cmd_show(*cmd)); + }); + + if (chain.output_filepath) { + printf(" |> %s", chain.output_filepath); + } + + printf("\n"); +} + +int path_exists(Cstr path) +{ +#ifdef _WIN32 + DWORD dwAttrib = GetFileAttributes(path); + return (dwAttrib != INVALID_FILE_ATTRIBUTES); +#else + struct stat statbuf = {0}; + if (stat(path, &statbuf) < 0) { + if (errno == ENOENT) { + errno = 0; + return 0; + } + + PANIC("could not retrieve information about file %s: %s", + path, strerror(errno)); + } + + return 1; +#endif +} + +int path_is_dir(Cstr path) +{ +#ifdef _WIN32 + DWORD dwAttrib = GetFileAttributes(path); + + return (dwAttrib != INVALID_FILE_ATTRIBUTES && + (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)); +#else + struct stat statbuf = {0}; + if (stat(path, &statbuf) < 0) { + if (errno == ENOENT) { + errno = 0; + return 0; + } + + PANIC("could not retrieve information about file %s: %s", + path, strerror(errno)); + } + + return S_ISDIR(statbuf.st_mode); +#endif // _WIN32 +} + +void path_rename(const char *old_path, const char *new_path) +{ +#ifdef _WIN32 + if (!MoveFileEx(old_path, new_path, MOVEFILE_REPLACE_EXISTING)) { + PANIC("could not rename %s to %s: %s", old_path, new_path, + GetLastErrorAsString()); + } +#else + if (rename(old_path, new_path) < 0) { + PANIC("could not rename %s to %s: %s", old_path, new_path, + strerror(errno)); + } +#endif // _WIN32 +} + +void path_mkdirs(Cstr_Array path) +{ + if (path.count == 0) { + return; + } + + size_t len = 0; + for (size_t i = 0; i < path.count; ++i) { + len += strlen(path.elems[i]); + } + + size_t seps_count = path.count - 1; + const size_t sep_len = strlen(PATH_SEP); + + char *result = malloc(len + seps_count * sep_len + 1); + + len = 0; + for (size_t i = 0; i < path.count; ++i) { + size_t n = strlen(path.elems[i]); + memcpy(result + len, path.elems[i], n); + len += n; + + if (seps_count > 0) { + memcpy(result + len, PATH_SEP, sep_len); + len += sep_len; + seps_count -= 1; + } + + result[len] = '\0'; + + if (mkdir(result, 0755) < 0) { + if (errno == EEXIST) { + errno = 0; + WARN("directory %s already exists", result); + } else { + PANIC("could not create directory %s: %s", result, strerror(errno)); + } + } + } +} + +void path_rm(Cstr path) +{ + if (IS_DIR(path)) { + FOREACH_FILE_IN_DIR(file, path, { + if (strcmp(file, ".") != 0 && strcmp(file, "..") != 0) + { + path_rm(PATH(path, file)); + } + }); + + if (rmdir(path) < 0) { + if (errno == ENOENT) { + errno = 0; + WARN("directory %s does not exist", path); + } else { + PANIC("could not remove directory %s: %s", path, strerror(errno)); + } + } + } else { + if (unlink(path) < 0) { + if (errno == ENOENT) { + errno = 0; + WARN("file %s does not exist", path); + } else { + PANIC("could not remove file %s: %s", path, strerror(errno)); + } + } + } +} + +int is_path1_modified_after_path2(const char *path1, const char *path2) +{ +#ifdef _WIN32 + FILETIME path1_time, path2_time; + + Fd path1_fd = fd_open_for_read(path1); + if (!GetFileTime(path1_fd, NULL, NULL, &path1_time)) { + PANIC("could not get time of %s: %s", path1, GetLastErrorAsString()); + } + fd_close(path1_fd); + + Fd path2_fd = fd_open_for_read(path2); + if (!GetFileTime(path2_fd, NULL, NULL, &path2_time)) { + PANIC("could not get time of %s: %s", path2, GetLastErrorAsString()); + } + fd_close(path2_fd); + + return CompareFileTime(&path1_time, &path2_time) == 1; +#else + struct stat statbuf = {0}; + + if (stat(path1, &statbuf) < 0) { + PANIC("could not stat %s: %s\n", path1, strerror(errno)); + } + int path1_time = statbuf.st_mtime; + + if (stat(path2, &statbuf) < 0) { + PANIC("could not stat %s: %s\n", path2, strerror(errno)); + } + int path2_time = statbuf.st_mtime; + + return path1_time > path2_time; +#endif +} + +void VLOG(FILE *stream, Cstr tag, Cstr fmt, va_list args) +{ + fprintf(stream, "[%s] ", tag); + vfprintf(stream, fmt, args); + fprintf(stream, "\n"); +} + +void INFO(Cstr fmt, ...) +{ + va_list args; + va_start(args, fmt); + VLOG(stderr, "INFO", fmt, args); + va_end(args); +} + +void WARN(Cstr fmt, ...) +{ + va_list args; + va_start(args, fmt); + VLOG(stderr, "WARN", fmt, args); + va_end(args); +} + +void ERRO(Cstr fmt, ...) +{ + va_list args; + va_start(args, fmt); + VLOG(stderr, "ERRO", fmt, args); + va_end(args); +} + +void PANIC(Cstr fmt, ...) +{ + va_list args; + va_start(args, fmt); + VLOG(stderr, "ERRO", fmt, args); + va_end(args); + exit(1); +} + +char *shift_args(int *argc, char ***argv) +{ + assert(*argc > 0); + char *result = **argv; + *argc -= 1; + *argv += 1; + return result; +} + +#endif // NOBUILD_IMPLEMENTATION diff --git a/implementations/C/remedyVM.c b/implementations/C/remedyVM.c @@ -0,0 +1,6 @@ +#include <stdio.h> + +int main() { + printf("Hello, world"); + return 0; +} diff --git a/outline.md b/outline.md @@ -0,0 +1,158 @@ +# RemedyVM, the register-memory dynamic virtual machine + +## Overview + +RemedyVM will be a (memory↔stack↔heap)-based VM target, with an AOT +compiler, a JIT compiler, and a simple interpreter. + +The memory/stack/heap approach allows for register allocation +optimisations at run/compile-time, whilst still offering compilers +targeting RemedyVM the option to put memory into a stack or the heap. + +A memory machine (MM) is a machine with \"infinite\" registers, each +corresponding with a temporary value. Much like a register machine, +temporaries can easily occupy real registers in JIT compilation, but +much like a stack machine, the registers are in a growing list. + +There are currently no plans for garbage collection, but if it is to be +implemented in the future, it will likely be done similarly to Bell +Labs\' `dis`{.verbatim} VM, taking a hybrid reference count/mark and +sweep approach. According to Winterbottom and Pike, the memory machine +architecture makes this easier to accomplish. + +The bytecode should be compilable straight to machine code ahead of time +(AOT), removing *any* runtime in the process. The JIT compiler will need +a psuedo-runtime, all functions are *thunks* until first run, then they +are \"evaluated\" (JIT compiled) to a function that runs with no +overhead. + +Some functions/labels may force themselves to be JIT compiled or +interpreted so as to adopt a structure that could only come about from +being lazily evaluated on the fly. Potentially a full runtime could be +set-up to allow for AOT compiled lazy-thunks. + +The runtime JIT process may need to have multiple steps, akin to the V8 +JavaScript runtime. Register allocation may be heavy on resources as it +is a graph-colouring problem, so an initial round may not allocate +machine registers optimally, or may even run interpreted. + +Whilst the standard implementation will only really be a psuedo-runtime, +the initial implementation will simply be an interpreted VM. + +To be clear, this project is defining a VM target, a VM interpreter, and +a VM compiler with JIT/AOT. + +Given the stack-register hybrid approach, temporaries will be stored in +one of 2 places to be determined at runtime: 1. A dynamic array of +registers that is determined at VM startup/compile-time to match the +host architecture. This is the \"memory\" in memory machine. 2. The +regular stack. 3. The heap. + +Given the simplicity, beauty, and popularity of RISC machines, the +instruction set of RemedyVM will be very simple and should be similar to +ARM, MIPS, and other RISC instruction sets. + +RemedyVM should be as platform agnostic as possible, though it\'s hard +to get a \"one implementation fits all\", the underlying platform +assumptions should try to be as minimal as possible. + +Given that RemedyVM is inherently not a very stable target, compilers do +not have much control over what goes into registers, and what goes onto +the stack. This is the hardest problem that this project aims to solve. +This issue is discussed in more depth in the [unstable target +section](#unstable-target-solutions), but the solution picked by +RemedyVM is to have a *memory machine* RISC architecture. + +## Unstable compiler target solutions {#unstable-target-solutions} + +Given the virtual machine decides the number of registers to allocate at +run/comptime, compilers have an unstable target! + +How can a compiler be sure that an architecture will be able to put 20 +variables into registers? It can\'t! + +Some solutions: 1. Treat all temporaries as being in an ordered dynamic +list, the runtime register allocation takes the $K$ lowest temporaries +and puts them into registers. (Where $K$ is the number of registers for +a target) - This is the method used in the `dis`{.verbatim} VM by Bell +Labs, they use a very similar Memory Machine architecture. 2. Leave it +up to the compiler, any excess \"registers\" get treated as +temporaries. - This would be bad for targets that have too few or many +registers. 3. Stabilise the VM target and just pick a number of +registers. - The JVM takes this to the extreme and has only machine +registers, PC registers, and the stack. 4. Restrict compilers to just +use a stack, the JIT will decide on registers. - This would make +RemedyVM a simple stack-based VM + +It seems from these that the best solution is solution 1, to go with an +infinite-register hybrid VM, or rather a Memory machine. Unlike +`dis`{.verbatim}, RemedyVM will be a RISC-like architecture for ease of +instruction selection. RemedyVM is thus a *memory virtual machine* with +stack & heap components for records that might not fit well into the +architecture. + +## Goals + +- [ ] Lay-out the basic instruction set + - [ ] Move + - [ ] Stack + - [ ] Heap + - [ ] Arithmetic + - [ ] Logical + - [ ] Jumps + - [ ] Conditional instructions + - [ ] All instructions should be conditional +- [ ] Implement an interpreter +- [ ] Compilation + - [ ] Assembly generation + - [ ] Linking + - [ ] Static linking + - [ ] Dynamic linking (?) + - *Note: Linking will probably just be done with the regular + gcc tooling, at least initially.* + - [ ] Optimise instruction selection for RISC platforms + - [ ] Optimise instruction selection for x86 platforms + - [ ] Implement AOT compiler + - [ ] Link to libc etc + - [ ] Implement JIT compiler + - [ ] Runtime implementation (?) + - [ ] Lazily compiled thunk implementation +- [ ] Implement a basic compiler targeting RemedyVM + - [ ] Basic lisp-like LL(1) language + - [ ] Simpler Lisp-Haskell hybrid + - [ ] Mojo +- [ ] Tests + - [ ] Unit VM tests + - [ ] Fibonacci + - [ ] Factorial + - [ ] Mandelbrot + - [ ] Game of life + - [ ] Memory tests + - [ ] Arithmetic tests +- [ ] Cross-Platform + - [ ] Linux + - [ ] Plan9(front) + - [ ] OpenBSD + - [ ] MacOS + - [ ] Windows +- [ ] Documentation (ugh) + - [ ] Design doc + - [ ] Target spec +- [ ] Implementation languages + - [ ] C + - The most cross-platform, will support every target + - [ ] Haskell + - Will support most targets + - [ ] Rust (?) + - [ ] Go (?) + - [ ] Zig (?) + - [ ] Java (?) + - Icky, but also have already written *some* of a compiler in + Java. + +### Note + +- This is a personal project, and is not going to be a serious virtual + machine. +- It is just experience, my first virtual machine! +- The more advanced features may not ever be implemented. diff --git a/outline.org b/outline.org @@ -0,0 +1,155 @@ +* RemedyVM, the register-memory dynamic virtual machine +** Overview +RemedyVM will be a (memory↔stack↔heap)-based VM target, with an AOT +compiler, a JIT compiler, and a simple interpreter. + +The memory/stack/heap approach allows for register allocation +optimisations at run/compile-time, whilst still offering compilers +targeting RemedyVM the option to put memory into a stack or the heap. + +A memory machine (MM) is a machine with "infinite" registers, each +corresponding with a temporary value. Much like a register machine, +temporaries can easily occupy real registers in JIT compilation, but +much like a stack machine, the registers are in a growing list. + +There are currently no plans for garbage collection, but if it is to be +implemented in the future, it will likely be done similarly to Bell +Labs' =dis= VM, taking a hybrid reference count/mark and sweep approach. +According to Winterbottom and Pike, the memory machine architecture +makes this easier to accomplish. + +The bytecode should be compilable straight to machine code ahead of time +(AOT), removing /any/ runtime in the process. The JIT compiler will need +a psuedo-runtime, all functions are /thunks/ until first run, then they +are "evaluated" (JIT compiled) to a function that runs with no overhead. + +Some functions/labels may force themselves to be JIT compiled or +interpreted so as to adopt a structure that could only come about from +being lazily evaluated on the fly. Potentially a full runtime could be +set-up to allow for AOT compiled lazy-thunks. + +The runtime JIT process may need to have multiple steps, akin to the V8 +JavaScript runtime. Register allocation may be heavy on resources as it +is a graph-colouring problem, so an initial round may not allocate +machine registers optimally, or may even run interpreted. + +Whilst the standard implementation will only really be a psuedo-runtime, +the initial implementation will simply be an interpreted VM. + +To be clear, this project is defining a VM target, a VM interpreter, and +a VM compiler with JIT/AOT. + +Given the stack-register hybrid approach, temporaries will be stored in +one of 2 places to be determined at runtime: 1. A dynamic array of +registers that is determined at VM startup/compile-time to match the +host architecture. This is the "memory" in memory machine. 2. The +regular stack. 3. The heap. + +Given the simplicity, beauty, and popularity of RISC machines, the +instruction set of RemedyVM will be very simple and should be similar +to ARM, MIPS, and other RISC instruction sets. + +RemedyVM should be as platform agnostic as possible, though it's hard +to get a "one implementation fits all", the underlying platform +assumptions should try to be as minimal as possible. + +Given that RemedyVM is inherently not a very stable target, compilers +do not have much control over what goes into registers, and what goes +onto the stack. This is the hardest problem that this project aims to +solve. This issue is discussed in more depth in the +[[#unstable-target-solutions][unstable target section]], but +the solution picked by RemedyVM is to have a /memory machine/ RISC +architecture. + +** Unstable compiler target solutions +:PROPERTIES: +:CUSTOM_ID: unstable-target-solutions +:END: +Given the virtual machine decides the number of registers to allocate at +run/comptime, compilers have an unstable target! + +How can a compiler be sure that an architecture will be able to put 20 +variables into registers? It can't! + +Some solutions: 1. Treat all temporaries as being in an ordered dynamic +list, the runtime register allocation takes the \(K\) lowest temporaries +and puts them into registers. (Where \(K\) is the number of registers +for a target) - This is the method used in the =dis= VM by Bell Labs, +they use a very similar Memory Machine architecture. 2. Leave it up to +the compiler, any excess "registers" get treated as temporaries. - This +would be bad for targets that have too few or many registers. 3. +Stabilise the VM target and just pick a number of registers. - The JVM +takes this to the extreme and has only machine registers, PC registers, +and the stack. 4. Restrict compilers to just use a stack, the JIT will +decide on registers. - This would make RemedyVM a simple stack-based +VM + +It seems from these that the best solution is solution 1, to go with an +infinite-register hybrid VM, or rather a Memory machine. Unlike =dis=, +RemedyVM will be a RISC-like architecture for ease of instruction +selection. RemedyVM is thus a /memory virtual machine/ with stack & +heap components for records that might not fit well into the +architecture. + +** Goals +- [ ] Lay-out the basic instruction set + - [ ] Move + - [ ] Stack + - [ ] Heap + - [ ] Arithmetic + - [ ] Logical + - [ ] Jumps + - [ ] Conditional instructions + - [ ] All instructions should be conditional +- [ ] Implement an interpreter +- [ ] Compilation + - [ ] Assembly generation + - [ ] Linking + - [ ] Static linking + - [ ] Dynamic linking (?) + - /Note: Linking will probably just be done with the regular gcc + tooling, at least initially./ + - [ ] Optimise instruction selection for RISC platforms + - [ ] Optimise instruction selection for x86 platforms + - [ ] Implement AOT compiler + - [ ] Link to libc etc + - [ ] Implement JIT compiler + - [ ] Runtime implementation (?) + - [ ] Lazily compiled thunk implementation +- [ ] Implement a basic compiler targeting RemedyVM + - [ ] Basic lisp-like LL(1) language + - [ ] Simpler Lisp-Haskell hybrid + - [ ] Mojo +- [ ] Tests + - [ ] Unit VM tests + - [ ] Fibonacci + - [ ] Factorial + - [ ] Mandelbrot + - [ ] Game of life + - [ ] Memory tests + - [ ] Arithmetic tests +- [ ] Cross-Platform + - [ ] Linux + - [ ] Plan9(front) + - [ ] OpenBSD + - [ ] MacOS + - [ ] Windows +- [ ] Documentation (ugh) + - [ ] Design doc + - [ ] Target spec +- [ ] Implementation languages + - [ ] C + - The most cross-platform, will support every target + - [ ] Haskell + - Will support most targets + - [ ] Rust (?) + - [ ] Go (?) + - [ ] Zig (?) + - [ ] Java (?) + - Icky, but also have already written /some/ of a compiler in Java. + +*** Note +- This is a personal project, and is not going to be a serious virtual + machine. +- It is just experience, my first virtual machine! +- The more advanced features may not ever be implemented.