├── .gitignore ├── LICENSE ├── Makefile ├── README.org ├── assembler.c ├── bif.rbt ├── blogstyle.css ├── deref.rbt ├── disassembler.c ├── rabbit.c ├── rabbit.html ├── rabbit.org ├── rabbit.pdf ├── rabbit_bif.h ├── rabbit_codewords.c ├── rabbit_codewords.h ├── rabbit_io.c ├── rabbit_io.h ├── rabbit_types.h ├── rabbitvm.html ├── rabbitvm.org ├── rabbitvm.pdf ├── style.css └── test.rbt /.gitignore: -------------------------------------------------------------------------------- 1 | *.bc 2 | *.ll 3 | \#* 4 | *.tex 5 | *.out 6 | *~ 7 | rabbit 8 | rabbit-asm 9 | rabbit-dis 10 | *.rbto 11 | *.dSYM 12 | *.o -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Maxwell Bernstein 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS=-Wall -Wextra -Wpedantic 3 | LFLAGS= 4 | RABBIT_OBJS=rabbit.o rabbit_io.o rabbit_codewords.o 5 | ASSEMBLER_OBJS=assembler.o rabbit_io.o 6 | DISASSEMBLER_OBJS=disassembler.o rabbit_io.o rabbit_codewords.o 7 | 8 | all: rabbit rabbit-asm rabbit-dis 9 | 10 | clean: 11 | rm -f rabbit rabbit-asm rabbit-dis 12 | rm -f $(RABBIT_OBJS) $(ASSEMBLER_OBJS) $(DISASSEMBLER_OBJS) 13 | 14 | rabbit: $(RABBIT_OBJS) 15 | $(CC) $(CFLAGS) $(LFLAGS) $(RABBIT_OBJS) -o rabbit 16 | 17 | rabbit-asm: $(ASSEMBLER_OBJS) 18 | $(CC) $(CFLAGS) $(LFLAGS) $(ASSEMBLER_OBJS) -o rabbit-asm 19 | 20 | rabbit-dis: $(DISASSEMBLER_OBJS) 21 | $(CC) $(CFLAGS) $(LFLAGS) $(DISASSEMBLER_OBJS) -o rabbit-dis 22 | 23 | %.o: %.c 24 | $(CC) $(CFLAGS) -o $@ -c $< 25 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | rabbit.org -------------------------------------------------------------------------------- /assembler.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "rabbit_bif.h" 9 | #include "rabbit_codewords.h" 10 | 11 | /* 12 | Instruction format is as follows: 13 | 14 | add r0, r1, r2 15 | out r1 16 | 17 | cmov r1, r2, r3 18 | 19 | MAKE SURE YOU HAVE A BLANK LINE AT THE END!!! 20 | */ 21 | 22 | typedef uint32_t word; 23 | 24 | void error(const char *msg, const char *optional_arg); 25 | void check_usage(int argc); 26 | FILE *open_file(const char *fn, const char *mode); 27 | word file_lines(FILE *fp); 28 | void assemble_file(FILE *input, FILE *output); 29 | 30 | int main(int argc, char **argv) { 31 | check_usage(argc); 32 | 33 | const char *inputfn = argv[1]; 34 | size_t inputfnlen = strlen(inputfn); 35 | char *outputfn = malloc(inputfnlen + 2 + 1); 36 | if (outputfn == NULL) { 37 | error("Could not allocate space for filename.", NULL); 38 | return 1; 39 | } 40 | 41 | strncpy(outputfn, inputfn, inputfnlen); 42 | outputfn[inputfnlen] = '.'; 43 | outputfn[inputfnlen + 1] = 'o'; 44 | outputfn[inputfnlen + 2] = 0; 45 | 46 | FILE *input = open_file(inputfn, "r"); 47 | FILE *output = open_file(outputfn, "wb+"); 48 | assemble_file(input, output); 49 | fclose(input); 50 | fclose(output); 51 | } 52 | 53 | void error(const char *msg, const char *optional_arg) { 54 | fprintf(stderr, "Error: "); 55 | 56 | if (optional_arg) { 57 | fprintf(stderr, msg, optional_arg); 58 | putchar('\n'); 59 | } else { 60 | fprintf(stderr, "%s\n", msg); 61 | } 62 | 63 | exit(EXIT_FAILURE); 64 | } 65 | 66 | void check_usage(int argc) { 67 | if (argc != 2) { 68 | error("Invalid usage. Please run like: " 69 | "./asm ", 70 | NULL); 71 | } 72 | } 73 | 74 | FILE *open_file(const char *fn, const char *mode) { 75 | FILE *fp = fopen(fn, mode); 76 | 77 | if (fp == NULL) { 78 | error("Could not open file `%s'.", fn); 79 | } 80 | 81 | return fp; 82 | } 83 | 84 | word file_lines(FILE *fp) { 85 | word lines = 0; 86 | 87 | /* Copied straight from http://stackoverflow.com/a/12733630/569183 */ 88 | while (!feof(fp)) { 89 | if (fgetc(fp) == '\n') { 90 | lines++; 91 | } 92 | } 93 | 94 | rewind(fp); 95 | return lines; 96 | } 97 | 98 | static int instr_lookup(char *instr) { 99 | for (int i = 0; i < kNumInstrs; i++) { 100 | if (strcmp(instr, instr_nargs[i].name) == 0) { 101 | return i; 102 | } 103 | } 104 | return kInvalidInstr; 105 | } 106 | 107 | #define OP_WIDTH 4 108 | static const char OP_LSB = 32 - OP_WIDTH; 109 | 110 | static const unsigned char OP_MASK = 0xF; 111 | static const unsigned char REG_MASK = 0xF; 112 | 113 | enum { REG, IMMINT, IMMSTR }; 114 | 115 | struct instrarg { 116 | int argtype; 117 | int is_deref; 118 | word i; 119 | }; 120 | 121 | struct instrarg emptyarg() { 122 | return (struct instrarg){.argtype = REG, .is_deref = 0, .i = 0}; 123 | } 124 | 125 | typedef struct { 126 | int is_imm; 127 | word instr; 128 | word imm; 129 | } instr_t; 130 | 131 | static instr_t three_register(unsigned char op, struct instrarg a, 132 | struct instrarg b, struct instrarg c) { 133 | static const unsigned char RA_LSB = 0; 134 | static const unsigned char RB_LSB = 4; 135 | static const unsigned char RC_LSB = 8; 136 | static const unsigned char MODES_LSB = 24; 137 | static const unsigned char RB_ADDRA = 1U << 0, RB_ADDRB = 1U << 1, 138 | RB_ADDRC = 1U << 2, RB_IMMED = 1U << 3; 139 | 140 | uint8_t modes = 0; 141 | if (a.is_deref == 1) { 142 | modes |= RB_ADDRA; 143 | } 144 | if (b.is_deref == 1) { 145 | modes |= RB_ADDRB; 146 | } 147 | if (c.is_deref == 1) { 148 | modes |= RB_ADDRC; 149 | } 150 | if (c.argtype == IMMINT) { 151 | modes |= RB_IMMED; 152 | } 153 | 154 | word w = 0; 155 | w |= (op & OP_MASK) << OP_LSB; 156 | w |= (a.i & REG_MASK) << RA_LSB; 157 | w |= (b.i & REG_MASK) << RB_LSB; 158 | w |= (c.i & REG_MASK) << RC_LSB; 159 | w |= (modes & 0xF) << MODES_LSB; 160 | 161 | instr_t instr = {.is_imm = 0, .instr = w, .imm = 0}; 162 | if (c.argtype == IMMINT) { 163 | instr.is_imm = 1; 164 | instr.imm = c.i; 165 | } 166 | 167 | return instr; 168 | } 169 | 170 | int read_int(FILE *input) { 171 | int val; 172 | if (fscanf(input, "%d", &val) != 1) { 173 | error("Could not parse instruction argument. " 174 | "Was expecting integer.", 175 | NULL); 176 | } 177 | return val; 178 | } 179 | 180 | int read_whitespace(FILE *input) { 181 | int c; 182 | do { 183 | c = fgetc(input); 184 | } while (c != EOF && isspace(c)); 185 | return c; 186 | } 187 | 188 | int bif_lookup(char *bif_name) { 189 | for (int i = 0; i < kNumBifs; i++) { 190 | if (strcmp(bif_name, bif_names[i]) == 0) { 191 | return i; 192 | } 193 | } 194 | return kInvalidBif; 195 | } 196 | 197 | int read_bif(FILE *input) { 198 | char bif_name[12] = {0}; 199 | if (fscanf(input, "%11s", bif_name) != 1) { 200 | error("Could not parse bif argument. " 201 | "Was expecting bif name.", 202 | NULL); 203 | } 204 | int bif_id = bif_lookup(bif_name); 205 | if (bif_id == kInvalidBif) { 206 | error("Invalid bif with name `%s'.", bif_name); 207 | } 208 | return bif_id; 209 | } 210 | 211 | static struct instrarg read_arg(FILE *input) { 212 | int c = read_whitespace(input); 213 | if (c == EOF) { 214 | error("Reached end of file while reading an instruction.", NULL); 215 | } 216 | 217 | struct instrarg arg = emptyarg(); 218 | if (c == '(') { 219 | arg.is_deref = 1; 220 | c = fgetc(input); 221 | } 222 | 223 | switch (c) { 224 | case '$': 225 | arg.i = read_int(input); 226 | arg.argtype = IMMINT; 227 | break; 228 | case 'r': 229 | arg.i = read_int(input); 230 | arg.argtype = REG; 231 | break; 232 | case '@': 233 | arg.i = read_bif(input); 234 | arg.argtype = IMMINT; 235 | break; 236 | default: 237 | error("Could not parse instruction argument. Expecting: $x, rx.", NULL); 238 | break; 239 | } 240 | 241 | if (arg.is_deref == 1 && fgetc(input) != ')') { 242 | error("Could not parse instruction argument. Expecting: ).", NULL); 243 | } 244 | 245 | return arg; 246 | } 247 | 248 | static instr_t read_instr(FILE *input, char *op_str) { 249 | unsigned instr = instr_lookup(op_str); 250 | if (instr == kInvalidInstr) { 251 | error("Do not know instruction: `%s'.", op_str); 252 | } 253 | unsigned nargs = instr_nargs[instr].nargs; 254 | if (nargs == 0) { 255 | return three_register(instr, emptyarg(), emptyarg(), emptyarg()); 256 | } 257 | if (nargs == 1) { 258 | struct instrarg c = read_arg(input); 259 | return three_register(instr, emptyarg(), emptyarg(), c); 260 | } 261 | if (nargs == 2) { 262 | struct instrarg b = read_arg(input); 263 | struct instrarg c = read_arg(input); 264 | return three_register(instr, emptyarg(), b, c); 265 | } 266 | if (nargs == 3) { 267 | struct instrarg a = read_arg(input); 268 | struct instrarg b = read_arg(input); 269 | struct instrarg c = read_arg(input); 270 | return three_register(instr, a, b, c); 271 | } 272 | error("Unreachable (op `%s').", op_str); 273 | assert(0); 274 | } 275 | 276 | void write_word(FILE *fp, word *w) { 277 | unsigned char *wordp = (unsigned char *)w; 278 | for (int i = 3; i >= 0; i--) { 279 | fwrite(&wordp[i], sizeof wordp[i], 1, fp); 280 | } 281 | } 282 | 283 | void assemble_file(FILE *input, FILE *output) { 284 | word lines = file_lines(input); 285 | 286 | for (word i = 0; i < lines; i++) { 287 | char instr_string[7]; 288 | fscanf(input, "%s", instr_string); 289 | instr_t instr = read_instr(input, instr_string); 290 | write_word(output, &instr.instr); 291 | if (instr.is_imm == 1) { 292 | write_word(output, &instr.imm); 293 | } 294 | } 295 | } 296 | -------------------------------------------------------------------------------- /bif.rbt: -------------------------------------------------------------------------------- 1 | bif @hello 2 | halt 3 | -------------------------------------------------------------------------------- /blogstyle.css: -------------------------------------------------------------------------------- 1 | body, 2 | #content p { 3 | margin: 0 auto; 4 | max-width: 700px; 5 | } 6 | 7 | h1, h2, h3, h4, h5, h6, 8 | #content ol, 9 | .footdef { 10 | margin-left: 20px; 11 | } 12 | 13 | #content ol { 14 | max-width: 480px; 15 | text-align: justify; 16 | padding-left: 15px; 17 | } 18 | 19 | #content p, 20 | #content h2, 21 | .footdef { 22 | position: relative; 23 | max-width: 500px; 24 | text-align: justify; 25 | } 26 | 27 | /* paragraphs preceded by paragraphs have space above */ 28 | #content p + p { 29 | margin-top: 20px; 30 | } 31 | 32 | #content li + li { 33 | margin-top: 10px; 34 | } 35 | 36 | #postamble { 37 | margin: 0 auto; 38 | max-width: 500px; 39 | margin-top: 20px; 40 | } -------------------------------------------------------------------------------- /deref.rbt: -------------------------------------------------------------------------------- 1 | move r1 $10 2 | move (r1) $97 3 | out (r1) 4 | out $10 5 | halt 6 | -------------------------------------------------------------------------------- /disassembler.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "rabbit_bif.h" 7 | #include "rabbit_codewords.h" 8 | #include "rabbit_io.h" 9 | 10 | int main(int argc, char **argv) { 11 | if (argc != 2) { 12 | fprintf(stderr, "Need to pass file to execute.\n"); 13 | return 1; 14 | } 15 | 16 | char *fn = argv[1]; 17 | FILE *fp = fopen(fn, "r"); 18 | if (fp == NULL) { 19 | fprintf(stderr, "Can't open `%s'.\n", fn); 20 | return 1; 21 | } 22 | 23 | while (1) { 24 | uint32_t word = 0; 25 | int nread = read_word(fp, &word); 26 | if (nread == 0) { 27 | fclose(fp); 28 | return 0; 29 | } 30 | printf("%.8x:\t", word); 31 | 32 | struct unpacked_s i = decode(word); 33 | assert(i.opcode < kNumInstrs); 34 | const char *name = instr_nargs[i.opcode].name; 35 | unsigned nargs = instr_nargs[i.opcode].nargs; 36 | 37 | uint32_t immediate = 0; 38 | printf("%s", name); 39 | if (i.opcode == Bif) { 40 | assert(i.rega < kNumBifs); 41 | printf(" %s\n", bif_names[i.rega]); 42 | continue; 43 | } 44 | 45 | if (nargs == 1 || nargs == 2 || nargs == 3) { 46 | putchar(' '); 47 | if (nargs == 2 || nargs == 3) { 48 | if (nargs == 3) { 49 | if (i.modes.rega_deref == 1) { 50 | printf("(r%d), ", i.rega); 51 | } else { 52 | printf("r%d, ", i.rega); 53 | } 54 | } 55 | 56 | if (i.modes.regb_deref == 1) { 57 | printf("(r%d), ", i.regb); 58 | } else { 59 | printf("r%d, ", i.regb); 60 | } 61 | } 62 | 63 | if (i.modes.regc_deref == 1) { 64 | putchar('('); 65 | } 66 | 67 | if (i.modes.immediate == 1) { 68 | nread = read_word(fp, &immediate); 69 | if (nread == 0) { 70 | return 1; 71 | } 72 | 73 | printf("$%d", immediate); 74 | } else { 75 | printf("r%d", i.regc); 76 | } 77 | 78 | if (i.modes.regc_deref) { 79 | putchar(')'); 80 | } 81 | } 82 | putchar('\n'); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /rabbit.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "rabbit_bif.h" 9 | #include "rabbit_codewords.h" 10 | #include "rabbit_io.h" 11 | #include "rabbit_types.h" 12 | 13 | /* Exit/error codes and statuses. */ 14 | typedef enum rabbits { 15 | RB_SUCCESS = 0, 16 | RB_FAIL, 17 | RB_OVERFLOW, 18 | RB_ILLEGAL, 19 | } rabbits; 20 | 21 | /* Registers available for use. */ 22 | typedef enum rabbitr { 23 | RB_ZERO = 0, 24 | RB_R1, 25 | RB_R2, 26 | RB_R3, 27 | RB_R4, 28 | RB_R5, 29 | RB_R6, 30 | RB_R7, 31 | RB_R8, 32 | RB_R9, 33 | RB_IP, 34 | RB_SP, 35 | RB_RET, 36 | RB_TMP, 37 | RB_FLAGS, 38 | RB_NUMREGS, 39 | } rabbitr; 40 | 41 | const unsigned kZeroFlag = 0x2U; 42 | 43 | #define fetch_immediate() mem[regs[RB_IP]++] 44 | 45 | /* Most instructions have a destination address and two operands. This struct 46 | * holds those. */ 47 | struct abc_s { 48 | rabbitw *dst, b, c; 49 | }; 50 | 51 | /* Using the options/modes given by the user, fetch the destination address and 52 | * operands. */ 53 | static struct abc_s getabc(rabbitw *regs, rabbitw *mem, struct unpacked_s i) { 54 | rabbitw *dst = i.modes.rega_deref ? &mem[regs[i.rega]] : ®s[i.rega]; 55 | rabbitw bval = i.modes.regb_deref ? mem[regs[i.regb]] : regs[i.regb]; 56 | rabbitw cval = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 57 | cval = i.modes.regc_deref ? mem[cval] : cval; 58 | return (struct abc_s){.dst = dst, .b = bval, .c = cval}; 59 | } 60 | 61 | rabbitw hello(rabbitw *regs, rabbitw *mem) { 62 | (void)regs; 63 | (void)mem; 64 | fprintf(stdout, "hello\n"); 65 | return 0; 66 | } 67 | 68 | typedef rabbitw (*bif)(rabbitw *regs, rabbitw *mem); 69 | 70 | #define FN_ENTRY(id, fn_name) fn_name, 71 | 72 | static const bif bif_functions[] = {BIF_TABLE(FN_ENTRY)}; 73 | 74 | #undef FN_ENTRY 75 | 76 | int main(int argc, char **argv) { 77 | if (argc != 2) { 78 | fprintf(stderr, "Need to pass file to execute.\n"); 79 | return RB_FAIL; 80 | } 81 | 82 | char *fn = argv[1]; 83 | FILE *fp = fopen(fn, "rb"); 84 | if (fp == NULL) { 85 | fprintf(stderr, "Can't open `%s'.\n", fn); 86 | return RB_FAIL; 87 | } 88 | 89 | /* Get the size of the file so that we can allocate memory for it. */ 90 | struct stat st; 91 | if (fstat(fileno(fp), &st) != 0) { 92 | fprintf(stderr, "Can't stat `%s'.\n", fn); 93 | fclose(fp); 94 | return RB_FAIL; 95 | } 96 | 97 | /* Allocate memory with program first, stack second. */ 98 | size_t stacksize = 1000; 99 | off_t size = st.st_size; 100 | rabbitw *mem = malloc(stacksize + size * sizeof *mem); 101 | if (mem == NULL) { 102 | fprintf(stderr, 103 | "Not enough memory. Could not allocate stack of size" 104 | "%zu + program of size %ld.\n", 105 | stacksize, size); 106 | return RB_FAIL; 107 | } 108 | 109 | /* Read the file into memory. */ 110 | size_t i = 0; 111 | rabbitw word = 0; 112 | /* We cannot use fread because of endian-ness issues. */ 113 | while (read_word(fp, &word) != 0) { 114 | mem[i++] = word; 115 | } 116 | fclose(fp); 117 | 118 | struct abc_s abc; 119 | rabbitw regs[RB_NUMREGS] = {0}; 120 | regs[RB_SP] = i; 121 | 122 | /* Main fetch-decode-execute loop. */ 123 | while (1) { 124 | /* Fetch the current instruction word. */ 125 | rabbitw word = mem[regs[RB_IP]++]; 126 | 127 | /* Decode it. */ 128 | struct unpacked_s i = decode(word); 129 | 130 | /* Execute it. */ 131 | switch (i.opcode) { 132 | case Halt: 133 | free(mem); 134 | return RB_SUCCESS; 135 | break; 136 | case Move: { 137 | /* Move is special because it has one source instead of two 138 | * operands. */ 139 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 140 | rabbitw *dst = i.modes.regb_deref ? &mem[regs[i.regb]] : ®s[i.regb]; 141 | *dst = i.modes.regc_deref ? mem[src] : src; 142 | break; 143 | } 144 | case Add: 145 | abc = getabc(regs, mem, i); 146 | *abc.dst = abc.b + abc.c; 147 | break; 148 | case Sub: 149 | abc = getabc(regs, mem, i); 150 | rabbitw res = abc.b - abc.c; 151 | *abc.dst = res; 152 | if (res == 0) { 153 | regs[RB_FLAGS] |= kZeroFlag; 154 | } 155 | break; 156 | case Mul: 157 | abc = getabc(regs, mem, i); 158 | *abc.dst = abc.b * abc.c; 159 | break; 160 | case Div: 161 | abc = getabc(regs, mem, i); 162 | if (abc.c == 0) { 163 | free(mem); 164 | return RB_ILLEGAL; 165 | } 166 | 167 | *abc.dst = abc.b / abc.c; 168 | break; 169 | case Shr: 170 | abc = getabc(regs, mem, i); 171 | *abc.dst = abc.b >> abc.c; 172 | break; 173 | case Shl: 174 | abc = getabc(regs, mem, i); 175 | *abc.dst = abc.b << abc.c; 176 | break; 177 | case Nand: 178 | abc = getabc(regs, mem, i); 179 | *abc.dst = ~(abc.b & abc.c); 180 | break; 181 | case Xor: 182 | abc = getabc(regs, mem, i); 183 | *abc.dst = abc.b ^ abc.c; 184 | break; 185 | case Br: { 186 | /* Branch is special because it only has one argument. */ 187 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 188 | regs[RB_IP] = i.modes.regc_deref ? mem[src] : src; 189 | break; 190 | } 191 | case Brz: 192 | /* Branch if zero is special because it only has one argument. */ 193 | if ((regs[RB_FLAGS] & kZeroFlag) == 0) { 194 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 195 | regs[RB_IP] = i.modes.regc_deref ? mem[src] : src; 196 | } 197 | break; 198 | case Brnz: 199 | /* Branch not zero is special because it only has one argument. */ 200 | if ((regs[RB_FLAGS] & kZeroFlag) != 0) { 201 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 202 | regs[RB_IP] = i.modes.regc_deref ? mem[src] : src; 203 | } 204 | break; 205 | case In: { 206 | /* Input is special because it does not have an argument. */ 207 | rabbitw *dstp = i.modes.regc_deref ? &mem[regs[i.regc]] : ®s[i.regc]; 208 | *dstp = getchar(); 209 | break; 210 | } 211 | case Out: { 212 | /* Output is special because it has one argument and no 213 | * destination. */ 214 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 215 | src = i.modes.regc_deref ? mem[src] : src; 216 | putchar(src); 217 | break; 218 | } 219 | case Bif: { 220 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 221 | src = i.modes.regc_deref ? mem[src] : src; 222 | if (src > kNumBifs) { 223 | fprintf(stderr, "Invalid bif: `%u'.\n", src); 224 | return RB_FAIL; 225 | } 226 | bif f = bif_functions[src]; 227 | f(regs, mem); 228 | break; 229 | } 230 | /* case RB_CFF: 231 | break; 232 | */ 233 | default: 234 | free(mem); 235 | return RB_ILLEGAL; 236 | break; 237 | } 238 | } 239 | 240 | return 0; 241 | } 242 | 243 | #undef fetch_immediate 244 | -------------------------------------------------------------------------------- /rabbit.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | Rabbit VM RFC 7 | 8 | 9 | 10 | 11 | 91 | 92 | 138 | 139 | 140 |
141 |

Rabbit VM RFC

142 |
143 |

Table of Contents

144 |
145 | 183 |
184 |
185 | 186 |
187 |

1 Why?

188 |
189 |

190 | I would like to make a RISC architecture that is capable of comfortably sitting 191 | on top of nearly any other architecture. If it's possible to compile to Rabbit, 192 | then a program can run on more or less any hardware or virtualized architecture. 193 |

194 | 195 |

196 | Rabbit can be optimized per-architecture, while maintaining the same 197 | interface. It may, for example, take advantage of Intel's SIMD behind the 198 | scenes. 199 |

200 |
201 |
202 | 203 |
204 |

2 What?

205 |
206 |
207 |

2.1 Definitions

208 |
209 |

210 | space: A register or memory location. 211 |

212 |
213 |
214 | 215 |
216 |

2.2 Registers

217 |
218 |

219 | Registers are 32 bits wide. 220 |

221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 |
ValueRegisterUse
0x0zeroContains 0. MIPS style.
0x1 .. 0x9r1 .. r9General purpose.
0xAipInstruction pointer.
0xBspStack pointer.
0xCretReturned value.
0xDtmpTemporary register.
0xEflagsFlags used for comparison.
283 | 284 |

285 | 286 |

287 |
288 |
289 |

2.2.1 Flags

290 |
291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 |
BitFlagMeaning
0x0SFSign flag. On if sign bit of result is on.
0x1ZFZero flag. On if result is zero or numbers were the same.
0x2 .. 0x20 Reserved.
328 |
329 |
330 |
331 | 332 |
333 |

2.3 Instruction set

334 |
335 |
336 |

2.3.1 Real instructions

337 |
338 |

339 | When it makes sense, the destination register is the first argument to an 340 | instruction. The last argument to the following instructions may also be an 341 | immediate value, denoted with a prefix of $: move, add, sub, mul, div, 342 | shr, shl, nand, xor, br, brz, brnz. 343 |

344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 |
ValueInstructionUsageExplanationDescription
0x0halthalt Stop the execution of the machine immediately.
0x1movemove %rB, %rCr[B] := r[C]Move one space into another.
0x2addadd %rA, %rB, %rCr[A] := r[B] + r[C]Add two spaces into a third.
0x3subsub %rA, %rB, %rCr[A] := r[B] - r[C]Subtract two spaces into a third. Sets flags.
0x4mulmul %rA, %rB, %rCr[A] := r[B] * r[C]Multiply two spaces into a third.
0x5divdiv %rA, %rB, %rCr[A] := r[B] / r[C]Divide two spaces into a third.
0x6shrshr %rA, %rB, %rCr[A] := r[B] >> r[C]Shift right one space a number of times.
0x7shlshl %rA, %rB, %rCr[A] := r[B] << r[C]Shift left one space a number of times.
0x8nandnand %rA, %rB, %rCr[A] := not(r[B] & r[C])NAND two spaces.
0x9xorxor %rA, %rB, %rCr[A] := r[B] ^ r[C]XOR two spaces.
0xAbrbr %rCgoto r[C]Branch.
0xBbrzbrz %rCif (ZF set) goto r[C]Branch if ZF is set.
0xCbrnzbrnz %rCif (!(ZF set)) goto r[C]Branch if ZF is not set.
0xDinin %rCr[C] := getchar()Read one character from stdin into a space.
0xEoutout %rCputchar(r[C])Print one character from a space to stdout.
490 |
491 |
492 | 493 |
494 |

2.3.2 Assembler macros

495 |
496 |

497 | The last argument to the following macros may also be an immediate value, 498 | denoted with a prefix of $: cmp, not, push, call. 499 |

500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 |
MacroUsageExpansion
cmpcmp A, Bsub %tmp, A, B
notnot A, Bnand A, B, B
oror A, B, C(A nand A) nand (B nand B)
andand A, B, Cnand A, B, C // not A, A
pushpush Amove (%sp), A // sub %sp, %sp, $1
poppop Aadd %sp, %sp, $1 // move A, (%sp)
callcall Apush %ip // br A
retretpop %ip
568 |
569 |
570 |
571 | 572 |
573 |

2.4 Addressing modes

574 |
575 |

576 | There are two addressing modes: %reg and (%reg). The former uses the value 577 | in the register, and the latter uses the word at the address in the register. 578 |

579 |
580 |
581 |
582 | 583 |
584 |

3 How?

585 |
586 |
587 |

3.1 Instruction formats

588 |
589 |
590 | instr %rA, %rB, %rC
591 | instr %rA, %rB
592 | instr %rA
593 | 
594 | 595 |
596 |      +-----Immediate bit
597 |      |+----Addressing mode bit C
598 |      ||+---Addressing mode bit B
599 |      |||+--Addressing mode bit A
600 |      |||| +Dead space+     regB
601 |      vvvv vvvvvvvvvvvv     vvvv
602 | IIII MDDD 000000000000 CCCCBBBBAAAA
603 | ^^^^                   ^^^^    ^^^^
604 | Opcode                 regC    regA
605 | 
606 | VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV
607 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
608 | Immediate value
609 | 
610 | 611 |

612 | Every bit in "Dead space" must be turned off. If one is turned on, the result 613 | is undefined. 614 |

615 | 616 |

617 | If the immediate bit is on, then the instruction disregards rC and instead 618 | looks for its third argument in the 32 bits after the first instruction. For example: 619 |

620 | 621 |
622 | 1: 0001 1 000 000000000000 0001 0000 0000
623 | 2: 0000 0 000 000000000000 0000 0000 0111
624 | 
625 | 626 |

627 | represents a move instruction with the immediate bit set. It will therefore 628 | look for an immediate value in the following word (in this case, the value is 629 | 7), and then store it in r1. 630 |

631 | 632 |

633 | Addition works in a similar fashion: 634 |

635 | 636 |
637 | 1: 0010 1 000 000000000000 0001 0001 0000
638 | 2: 0000 0 000 000000000000 0000 0000 0001
639 | 
640 | 641 |

642 | represents an add instruction with the immediate bit set. It looks for an 643 | immediate value in the following word (in this case, 1), adds it to the value in 644 | r1, then stores the result in r1. So this instruction would be an increment 645 | instruction. 646 |

647 | 648 |

649 | The addressing mode bits are simple; if a register's addressing mode bit is on, 650 | then the address in the register is dereferenced when the instruction is being 651 | executed, and that data is used instead. For example: 652 |

653 | 654 |
655 | 1: 0010 0 100 000000000000 0111 0001 0000
656 | 
657 | 658 |

659 | Performs an addition operation that adds the contents of zero with r1 and 660 | stores the result in memory at the address in r7. 661 |

662 |
663 |
664 | 665 |
666 |

3.2 Stages of compilation

667 |
668 |
669 |

3.2.1 TODO Preprocessing

670 |
671 |

672 | The preprocessor will be responsible for macro expansion and label to address 673 | translation. Macros exist in the form of instruction expansions, done behind the 674 | scenes. 675 |

676 |
677 |
678 | 679 |
680 |

3.2.2 TODO Peephole optimization

681 |
682 | 683 |
684 |

3.2.3 TODO Assembling

685 |
686 |
687 |
688 |
689 | 690 |
691 |

3.3 TODO

692 |
693 |
694 |

3.3.1 Floating point

695 |
696 |

697 | Floating point computation is left to the client (an exercise for the reader, if 698 | you will). 699 |

700 |
701 |
702 | 703 |
704 |

3.3.2 Memory layout

705 |
706 |

707 | The memory layout is completely flat right now. 708 |

709 |
710 |
711 |
712 |
713 |
714 |
715 |

Date: March 29, 2016

716 |

Author: Maxwell Bernstein

717 |

Created: 2016-03-30 Wed 17:15

718 |

Emacs 24.5.1 (Org mode 8.2.10)

719 |

Validate

720 |
721 | 722 | 723 | -------------------------------------------------------------------------------- /rabbit.org: -------------------------------------------------------------------------------- 1 | #+title: Rabbit VM RFC 2 | #+author: Maxwell Bernstein 3 | #+date: March 29, 2016 4 | # #+options: toc:nil 5 | 6 | #+html_head: 7 | 8 | * Why? 9 | 10 | I would like to make a RISC architecture that is capable of comfortably sitting 11 | on top of nearly any other architecture. If it's possible to compile to Rabbit, 12 | then a program can run on more or less any hardware or virtualized architecture. 13 | 14 | Rabbit can be optimized per-architecture, while maintaining the same 15 | interface. It may, for example, take advantage of Intel's SIMD behind the 16 | scenes. 17 | 18 | * What? 19 | 20 | ** Definitions 21 | 22 | *space:* A register or memory location. 23 | 24 | ** Registers 25 | 26 | Registers are 32 bits wide. 27 | 28 | |----------------+--------------+----------------------------| 29 | | /Value/ | /Register/ | /Use/ | 30 | |----------------+--------------+----------------------------| 31 | | ~0x0~ | ~zero~ | Contains 0. MIPS style. | 32 | | ~0x1~ .. ~0x9~ | ~r1~ .. ~r9~ | General purpose. | 33 | | ~0xA~ | ~ip~ | Instruction pointer. | 34 | | ~0xB~ | ~sp~ | Stack pointer. | 35 | | ~0xC~ | ~ret~ | Returned value. | 36 | | ~0xD~ | ~tmp~ | Temporary register. | 37 | | ~0xE~ | ~flags~ | Flags used for comparison. | 38 | |----------------+--------------+----------------------------| 39 | 40 | <> 41 | *** Flags 42 | 43 | |-----------------+--------+-----------------------------------------------------------| 44 | | /Bit/ | /Flag/ | /Meaning/ | 45 | |-----------------+--------+-----------------------------------------------------------| 46 | | ~0x0~ | ~SF~ | Sign flag. On if sign bit of result is on. | 47 | | ~0x1~ | ~ZF~ | Zero flag. On if result is zero or numbers were the same. | 48 | | ~0x2~ .. ~0x20~ | | Reserved. | 49 | |-----------------+--------+-----------------------------------------------------------| 50 | 51 | ** Instruction set 52 | 53 | *** Real instructions 54 | 55 | When it makes sense, the destination register is the first argument to an 56 | instruction. The last argument to the following instructions may also be an 57 | immediate value, denoted with a prefix of ~$~: ~move~, ~add~, ~sub~, ~mul~, ~div~, 58 | ~shr~, ~shl~, ~nand~, ~xor~, ~br~, ~brz~, ~brnz~. 59 | 60 | |---------+---------------+----------------------+----------------------------+------------------------------------------------| 61 | | /Value/ | /Instruction/ | /Usage/ | /Explanation/ | /Description/ | 62 | |---------+---------------+----------------------+----------------------------+------------------------------------------------| 63 | | ~0x0~ | ~halt~ | ~halt~ | | Stop the execution of the machine immediately. | 64 | | ~0x1~ | ~move~ | ~move %rB, %rC~ | ~r[B] := r[C]~ | Move one space into another. | 65 | | ~0x2~ | ~add~ | ~add %rA, %rB, %rC~ | ~r[A] := r[B] + r[C]~ | Add two spaces into a third. | 66 | | ~0x3~ | ~sub~ | ~sub %rA, %rB, %rC~ | ~r[A] := r[B] - r[C]~ | Subtract two spaces into a third. Sets [[flags_section][flags]]. | 67 | | ~0x4~ | ~mul~ | ~mul %rA, %rB, %rC~ | ~r[A] := r[B] * r[C]~ | Multiply two spaces into a third. | 68 | | ~0x5~ | ~div~ | ~div %rA, %rB, %rC~ | ~r[A] := r[B] / r[C]~ | Divide two spaces into a third. | 69 | | ~0x6~ | ~shr~ | ~shr %rA, %rB, %rC~ | ~r[A] := r[B] >> r[C]~ | Shift right one space a number of times. | 70 | | ~0x7~ | ~shl~ | ~shl %rA, %rB, %rC~ | ~r[A] := r[B] << r[C]~ | Shift left one space a number of times. | 71 | | ~0x8~ | ~nand~ | ~nand %rA, %rB, %rC~ | ~r[A] := not(r[B] & r[C])~ | NAND two spaces. | 72 | | ~0x9~ | ~xor~ | ~xor %rA, %rB, %rC~ | ~r[A] := r[B] ^ r[C]~ | XOR two spaces. | 73 | | ~0xA~ | ~br~ | ~br %rC~ | ~goto r[C]~ | Branch. | 74 | | ~0xB~ | ~brz~ | ~brz %rC~ | ~if (ZF set) goto r[C]~ | Branch if ~ZF~ is set. | 75 | | ~0xC~ | ~brnz~ | ~brnz %rC~ | ~if (!(ZF set)) goto r[C]~ | Branch if ~ZF~ is not set. | 76 | | ~0xD~ | ~in~ | ~in %rC~ | ~r[C] := getchar()~ | Read one character from ~stdin~ into a space. | 77 | | ~0xE~ | ~out~ | ~out %rC~ | ~putchar(r[C])~ | Print one character from a space to ~stdout~. | 78 | |---------+---------------+----------------------+----------------------------+------------------------------------------------| 79 | 80 | *** Assembler macros 81 | 82 | The last argument to the following macros may also be an immediate value, 83 | denoted with a prefix of ~$~: ~cmp~, ~not~, ~push~, ~call~. 84 | 85 | |---------+---------------+---------------------------------------| 86 | | /Macro/ | /Usage/ | /Expansion/ | 87 | |---------+---------------+---------------------------------------| 88 | | ~cmp~ | ~cmp A, B~ | ~sub %tmp, A, B~ | 89 | | ~not~ | ~not A, B~ | ~nand A, B, B~ | 90 | | ~or~ | ~or A, B, C~ | ~(A nand A) nand (B nand B)~ | 91 | | ~and~ | ~and A, B, C~ | ~nand A, B, C~ // ~not A, A~ | 92 | | ~push~ | ~push A~ | ~move (%sp), A~ // ~sub %sp, %sp, $1~ | 93 | | ~pop~ | ~pop A~ | ~add %sp, %sp, $1~ // ~move A, (%sp)~ | 94 | | ~call~ | ~call A~ | ~push %ip~ // ~br A~ | 95 | | ~ret~ | ~ret~ | ~pop %ip~ | 96 | |---------+---------------+---------------------------------------| 97 | 98 | ** Addressing modes 99 | 100 | There are two addressing modes: ~%reg~ and ~(%reg)~. The former uses the value 101 | in the register, and the latter uses the word at the address in the register. 102 | 103 | * How? 104 | 105 | ** Instruction formats 106 | 107 | #+begin_src 108 | instr %rA, %rB, %rC 109 | instr %rA, %rB 110 | instr %rA 111 | #+end_src 112 | 113 | #+begin_src 114 | +-----Immediate bit 115 | |+----Addressing mode bit C 116 | ||+---Addressing mode bit B 117 | |||+--Addressing mode bit A 118 | |||| +Dead space+ regB 119 | vvvv vvvvvvvvvvvv vvvv 120 | IIII MDDD 000000000000 CCCCBBBBAAAA 121 | ^^^^ ^^^^ ^^^^ 122 | Opcode regC regA 123 | 124 | VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV 125 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 126 | Immediate value 127 | #+end_src 128 | 129 | Every bit in "Dead space" must be turned off. If one is turned on, the result 130 | is undefined. 131 | 132 | If the immediate bit is on, then the instruction disregards ~rC~ and instead 133 | looks for its third argument in the 32 bits after the first instruction. For example: 134 | 135 | : 1: 0001 1 000 000000000000 0001 0000 0000 136 | : 2: 0000 0 000 000000000000 0000 0000 0111 137 | 138 | represents a ~move~ instruction with the immediate bit set. It will therefore 139 | look for an immediate value in the following word (in this case, the value is 140 | 7), and then store it in ~r1~. 141 | 142 | Addition works in a similar fashion: 143 | 144 | : 1: 0010 1 000 000000000000 0001 0001 0000 145 | : 2: 0000 0 000 000000000000 0000 0000 0001 146 | 147 | represents an ~add~ instruction with the immediate bit set. It looks for an 148 | immediate value in the following word (in this case, 1), adds it to the value in 149 | ~r1~, then stores the result in ~r1~. So this instruction would be an increment 150 | instruction. 151 | 152 | The addressing mode bits are simple; if a register's addressing mode bit is on, 153 | then the address in the register is dereferenced when the instruction is being 154 | executed, and that data is used instead. For example: 155 | 156 | : 1: 0010 0 100 000000000000 0111 0001 0000 157 | 158 | Performs an addition operation that adds the contents of ~zero~ with ~r1~ and 159 | stores the result in memory at the address in ~r7~. 160 | 161 | ** Stages of compilation 162 | 163 | *** TODO Preprocessing 164 | 165 | The preprocessor will be responsible for macro expansion and label to address 166 | translation. Macros exist in the form of instruction expansions, done behind the 167 | scenes. 168 | 169 | *** TODO Peephole optimization 170 | 171 | *** TODO Assembling 172 | 173 | # TODO: add C interface 174 | # ** C interface 175 | 176 | # #+INCLUDE: "rabbit.h" src c 177 | 178 | ** TODO 179 | 180 | *** Floating point 181 | 182 | Floating point computation is left to the client (an exercise for the reader, if 183 | you will). 184 | 185 | *** Memory layout 186 | 187 | The memory layout is completely flat right now. 188 | -------------------------------------------------------------------------------- /rabbit.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RabbitVM/rabbit/bede639aab6179867d564b412dead7e06344ed3e/rabbit.pdf -------------------------------------------------------------------------------- /rabbit_bif.h: -------------------------------------------------------------------------------- 1 | #ifndef RABBIT_BIF_H 2 | #define RABBIT_BIF_H 3 | 4 | #define BIF_TABLE(V) \ 5 | V(Hello, hello) 6 | 7 | #define BIF_ID(id, name) id, 8 | 9 | // clang-format off 10 | enum BifId { 11 | BIF_TABLE(BIF_ID) 12 | kNumBifs, 13 | kInvalidBif, 14 | }; 15 | // clang-format on 16 | 17 | #undef BIF_ID 18 | 19 | #define BIF_NAME(id, name) #name, 20 | 21 | const char *bif_names[] = {BIF_TABLE(BIF_NAME)}; 22 | 23 | #undef BIF_NAME 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /rabbit_codewords.c: -------------------------------------------------------------------------------- 1 | 2 | #include "rabbit_codewords.h" 3 | 4 | /* Transform the packed representation of an instruction into the unpacked 5 | * representation. */ 6 | struct unpacked_s decode(rabbitw instr) { 7 | /* Fetch the space modes from the instruction. */ 8 | uint8_t modes = (instr >> 24) & 0xF; 9 | 10 | /* Offsets of the space modes in the mode nibble. */ 11 | static const uint8_t RB_ADDRA_LSB = 0, RB_ADDRB_LSB = 1, RB_ADDRC_LSB = 2, 12 | RB_IMMED_LSB = 3; 13 | 14 | return (struct unpacked_s){ 15 | .modes = 16 | { 17 | .immediate = (modes >> RB_IMMED_LSB) & 0x1, 18 | .regc_deref = (modes >> RB_ADDRC_LSB) & 0x1, 19 | .regb_deref = (modes >> RB_ADDRB_LSB) & 0x1, 20 | .rega_deref = (modes >> RB_ADDRA_LSB) & 0x1, 21 | }, 22 | .opcode = instr >> 28, 23 | .regc = (instr >> 8) & 0xF, 24 | .regb = (instr >> 4) & 0xF, 25 | .rega = instr & 0xF, 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /rabbit_codewords.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RABBIT_CODEWORDS_H 3 | #define RABBIT_CODEWORDS_H 4 | 5 | #include "rabbit_types.h" 6 | 7 | #define INSTR_TABLE(V) \ 8 | V(Halt, halt, 0) \ 9 | V(Move, move, 2) \ 10 | V(Add, add, 3) \ 11 | V(Sub, sub, 3) \ 12 | V(Mul, mul, 3) \ 13 | V(Div, div, 3) \ 14 | V(Shr, shr, 3) \ 15 | V(Shl, shl, 3) \ 16 | V(Nand, nand, 3) \ 17 | V(Xor, xor, 3) \ 18 | V(Br, br, 1) \ 19 | V(Brz, brz, 1) \ 20 | V(Brnz, brnz, 1) \ 21 | V(In, in, 1) \ 22 | V(Out, out, 1) \ 23 | V(Bif, bif, 1) 24 | 25 | #define INSTR_ID(id, name, nargs) id, 26 | 27 | // clang-format off 28 | enum InstrId { 29 | INSTR_TABLE(INSTR_ID) 30 | kNumInstrs, 31 | kInvalidInstr, 32 | }; 33 | // clang-format on 34 | 35 | #undef INSTR_ID 36 | 37 | struct instr_s { 38 | const char *name; 39 | unsigned nargs; 40 | }; 41 | 42 | #define INSTR_NARGS(id, name, nargs) {#name, nargs}, 43 | 44 | static const struct instr_s instr_nargs[] = {INSTR_TABLE(INSTR_NARGS)}; 45 | 46 | #undef INSTR_NARGS 47 | 48 | /* Options for operand use. Is space C an immediate value or a register? Should 49 | * any of spaces A, B, and C be dereferenced? */ 50 | struct modes_s { 51 | uint8_t immediate; 52 | uint8_t regc_deref; 53 | uint8_t regb_deref; 54 | uint8_t rega_deref; 55 | }; 56 | 57 | /* Holds an unpacked representation of an instruction (sans immediate value, if 58 | * it has one). */ 59 | struct unpacked_s { 60 | struct modes_s modes; 61 | uint8_t opcode; 62 | uint8_t regc; 63 | uint8_t regb; 64 | uint8_t rega; 65 | }; 66 | 67 | struct unpacked_s decode(rabbitw instr); 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /rabbit_io.c: -------------------------------------------------------------------------------- 1 | 2 | #include "rabbit_io.h" 3 | 4 | int read_word(FILE *fp, rabbitw *w) { 5 | int nread = 0; 6 | unsigned char *wordp = (unsigned char *)w; 7 | for (int i = 3; i >= 0; i--) { 8 | int tempread = fread(&wordp[i], sizeof wordp[i], 1, fp); 9 | if (tempread == 0) { 10 | return 0; 11 | } 12 | 13 | nread += tempread; 14 | } 15 | 16 | return nread; 17 | } 18 | -------------------------------------------------------------------------------- /rabbit_io.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RABBIT_IO_H 3 | #define RABBIT_IO_H 4 | 5 | #include 6 | 7 | #include "rabbit_types.h" 8 | 9 | int read_word(FILE *fp, rabbitw *w); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /rabbit_types.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef RABBIT_TYPES_H 3 | #define RABBIT_TYPES_H 4 | 5 | #include 6 | 7 | typedef uint32_t rabbitw; 8 | 9 | #endif 10 | -------------------------------------------------------------------------------- /rabbitvm.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | Rabbit VM Implementation 7 | 8 | 9 | 10 | 11 | 91 | 92 | 138 | 139 | 140 |
141 |

Rabbit VM Implementation

142 |

143 | Our emulator can exit or abort with a couple of status codes. They are as 144 | follows: 145 |

146 | 147 |
148 | 149 |
/* Exit/error codes and statuses. */
150 | typedef enum rabbits {
151 |     RB_SUCCESS = 0, RB_FAIL, RB_OVERFLOW, RB_ILLEGAL,
152 | } rabbits;
153 | 
154 |
155 | 156 |

157 | RB_SUCCESS indicates no error. RB_FAIL indicates a generic 158 | failure. RB_OVERFLOW indicates a stack overflow. RB_ILLEGAL indicates an 159 | illegal instruction. 160 |

161 | 162 |
163 |

Emulator structure

164 |
165 |
166 | 167 |
#include <stdio.h>
168 | #include <inttypes.h>
169 | #include <stdlib.h>
170 | #include <string.h>
171 | #include <sys/stat.h>
172 | 
173 | #include "rabbit_types.h"
174 | #include "rabbit_io.h"
175 | #include "rabbit_codewords.h"
176 | 
177 | {{{declarations}}}
178 | 
179 | {{{builtin_functions}}}
180 | 
181 | int main(int argc, char **argv) {
182 |     {{{check_cli_arguments}}}
183 | 
184 |     {{{open_code_file}}}
185 | 
186 |     {{{stat_code_file}}}
187 | 
188 |     {{{allocate_memory}}}
189 | 
190 |     {{{read_code_into_memory}}}
191 | 
192 |     {{{fetch_decode_execute_loop}}}
193 | 
194 |     return 0;
195 | }
196 | 
197 | {{{macro_cleanup}}}
198 | 
199 |
200 |
201 |
202 | 203 |
204 |

Registers

205 |
206 |
207 | 208 |
/* Registers available for use. */
209 | typedef enum rabbitr {
210 |     RB_ZERO = 0, RB_R1, RB_R2, RB_R3, RB_R4, RB_R5, RB_R6, RB_R7, RB_R8, RB_R9,
211 |     RB_IP, RB_SP, RB_RET, RB_TMP, RB_FLAGS, RB_NUMREGS,
212 | } rabbitr;
213 | 
214 |
215 |
216 |
217 | 218 |
219 |

Instructions

220 |
221 |

222 | There are four types of instructions: 223 |

224 | 225 |
226 | instr %rA, %rB, %rC
227 | instr %rA, %rB
228 | instr %rA
229 | instr
230 | 
231 | 232 |

233 | There is only one instruction in the last category: halt. 234 |

235 | 236 |

237 | All instructions are represented as 32-bit words: 238 |

239 | 240 |
241 | 242 |
#ifndef RABBIT_TYPES_H
243 | #define RABBIT_TYPES_H
244 | 
245 | #include <inttypes.h>
246 | 
247 | typedef uint32_t rabbitw;
248 | 
249 | #endif
250 | 
251 |
252 | 253 |

254 | These 32 bits are laid out as follows: 255 |

256 | 257 |
258 |      +-----Immediate bit
259 |      |+----Addressing mode bit C
260 |      ||+---Addressing mode bit B
261 |      |||+--Addressing mode bit A
262 |      |||| +Dead space+     regB
263 |      vvvv vvvvvvvvvvvv     vvvv
264 | IIII MDDD 000000000000 CCCCBBBBAAAA
265 | ^^^^                   ^^^^    ^^^^
266 | Opcode                 regC    regA
267 | 
268 | VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV
269 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
270 | Immediate value
271 | 
272 | 273 |

274 | Note that every bit in “Dead space” must be turned off. If one is turned on, 275 | the result of executing that instruction is undefined. 276 |

277 | 278 |

279 | Internally, the emulator uses structs with 280 | bit fields to 281 | represent instructions. It uses bit fields only to save space; they can be 282 | removed with no consequences. 283 |

284 | 285 |
286 | 287 |
/* Options for operand use. Is space C an immediate value or a register? Should
288 |  * any of spaces A, B, and C be dereferenced? */
289 | struct modes_s {
290 |     uint8_t immediate  : 1;
291 |     uint8_t regc_deref : 1;
292 |     uint8_t regb_deref : 1;
293 |     uint8_t rega_deref : 1;
294 | };
295 | 
296 |
297 | 298 |

299 | The full unpacked representation of the instruction includes this unpacked 300 | modes struct along with the opcode and three operands. 301 |

302 | 303 |
304 | 305 |
/* Holds an unpacked representation of an instruction (sans immediate value, if
306 |  * it has one). */
307 | struct unpacked_s {
308 |     struct modes_s modes;
309 |     uint8_t opcode : 4;
310 |     uint8_t regc   : 4;
311 |     uint8_t regb   : 4;
312 |     uint8_t rega   : 4;
313 | };
314 | 
315 |
316 | 317 |

318 | The immediate value follows the instruction in memory; the emulator fetches it 319 | upon execution. 320 |

321 |
322 | 323 |
324 |

Decoding

325 |
326 |

327 | With the knowledge of the instruction format and its in-emulator representation, 328 | it is not difficult to write a decode function that takes an instruction and 329 | returns the unpacked representation. 330 |

331 | 332 |

333 | First, decode fetches the mode from the instruction. Its least significant bit 334 | is 24 and it contains at most four bits. 335 |

336 | 337 |
338 | 339 |
/* Fetch the space modes from the instruction. */
340 | uint8_t modes = (instr >> 24) & 0xF;
341 | 
342 |
343 | 344 |

345 | As the modes are stored in a bit vector, it’s useful to have masks for each of 346 | the addressing modes and the immediate mode. 347 |

348 | 349 |
350 | 351 |
/* Offsets of the space modes in the mode nibble. */
352 | static const uint8_t
353 |     RB_ADDRA_LSB = 0,
354 |     RB_ADDRB_LSB = 1,
355 |     RB_ADDRC_LSB = 2,
356 |     RB_IMMED_LSB = 3;
357 | 
358 |
359 | 360 |

361 | From here it’s reasonably trivial to fill the unpacked_s struct. instr needs 362 | to be shifted and masked a couple of times to fetch the opcode and registers, 363 | but nothing major. 364 |

365 | 366 |
367 | 368 |
#include "rabbit_codewords.h"
369 | 
370 | /* Transform the packed representation of an instruction into the unpacked
371 |  * representation. */
372 | struct unpacked_s decode(rabbitw instr) {
373 |     {{{fetch_modes}}}
374 | 
375 |     {{{mode_offsets}}}
376 | 
377 |     return (struct unpacked_s) {
378 | 	.modes = {
379 | 	    .immediate  = (modes >> RB_IMMED_LSB) & 0x1,
380 | 	    .regc_deref = (modes >> RB_ADDRC_LSB) & 0x1,
381 | 	    .regb_deref = (modes >> RB_ADDRB_LSB) & 0x1,
382 | 	    .rega_deref = (modes >> RB_ADDRA_LSB) & 0x1,
383 | 	},
384 | 	.opcode = instr >> 28,
385 | 	.regc =  (instr >> 8) & 0xF,
386 | 	.regb =  (instr >> 4) & 0xF,
387 | 	.rega = instr & 0xF,
388 |     };
389 | }
390 | 
391 |
392 |
393 |
394 | 395 |
396 |

Modes

397 |
398 | 399 |
400 |

Following mode rules

401 |
402 |
403 | 404 |
405 |

VM setup

406 |
407 |
408 |

Check CLI arguments

409 |
410 |
411 | 412 |
if (argc != 2) {
413 |     fprintf(stderr, "Need to pass file to execute.\n");
414 |     return RB_FAIL;
415 | }
416 | 
417 |
418 |
419 |
420 | 421 |
422 |

Open code file

423 |
424 |
425 | 426 |
char *fn = argv[1];
427 | FILE *fp = fopen(fn, "rb");
428 | if (fp == NULL) {
429 |     fprintf(stderr, "Can't open `%s'.\n", fn);
430 |     return RB_FAIL;
431 | }
432 | 
433 |
434 |
435 |
436 | 437 |
438 |

Get size of code file

439 |
440 |
441 | 442 |
/* Get the size of the file so that we can allocate memory for it. */
443 | struct stat st;
444 | if (fstat(fileno(fp), &st) != 0) {
445 |     fprintf(stderr, "Can't stat `%s'.\n", fn);
446 |     fclose(fp);
447 |     return RB_FAIL;
448 | }
449 | 
450 |
451 |
452 |
453 | 454 |
455 |

Allocate memory

456 |
457 |
458 | 459 |
/* Allocate memory with program first, stack second. */
460 | size_t stacksize = 1000;
461 | off_t size = st.st_size;
462 | rabbitw *mem = malloc(stacksize + size * sizeof *mem);
463 | if (mem == NULL) {
464 |     fprintf(stderr, "Not enough memory. Could not allocate stack of size"
465 | 		    "%zu + program of size %lld.\n", stacksize, size);
466 |     return RB_FAIL;
467 | }
468 | 
469 |
470 |
471 |
472 | 473 |
474 |

Read code into memory

475 |
476 |
477 | 478 |
/* Read the file into memory. */
479 | size_t i = 0;
480 | rabbitw word = 0;
481 | /* We cannot use fread because of endian-ness issues. */
482 | while (read_word(fp, &word) != 0) {
483 |     mem[i++] = word;
484 | }
485 | fclose(fp);
486 | 
487 |
488 | 489 |
490 | 491 |
/* Instructions available for use. */
492 | typedef enum rabbiti {
493 |     RB_HALT = 0, RB_MOVE, RB_ADD, RB_SUB, RB_MUL, RB_DIV, RB_SHR, RB_SHL,
494 |     RB_NAND, RB_XOR, RB_BR, RB_BRZ, RB_BRNZ, RB_IN, RB_OUT, RB_BIF,
495 |     RB_NUMINSTRS,
496 | } rabbiti;
497 | 
498 |
499 |
500 |
501 | 502 |
503 |

Fetch-decode-execute loop

504 |
505 |
506 | 507 |
struct abc_s abc;
508 | rabbitw regs[RB_NUMINSTRS] = { 0 };
509 | regs[RB_SP] = i;
510 | 
511 | /* Main fetch-decode-execute loop. */
512 | while (1) {
513 |     /* Fetch the current instruction word. */
514 |     rabbitw word = mem[regs[RB_IP]++];
515 | 
516 |     /* Decode it. */
517 |     struct unpacked_s i = decode(word);
518 | 
519 |     /* Execute it. */
520 |     switch (i.opcode) {
521 |     case RB_HALT:
522 | 	free(mem);
523 | 	return RB_SUCCESS;
524 | 	break;
525 |     case RB_MOVE: {
526 | 	/* Move is special because it has one source instead of two
527 | 	 * operands. */
528 | 	rabbitw src = i.modes.immediate   ? fetch_immediate()  : regs[i.regc];
529 | 	rabbitw *dst = i.modes.regb_deref ? &mem[regs[i.regb]] : &regs[i.regb];
530 | 	*dst = i.modes.regc_deref ? mem[src] : src;
531 | 	break;
532 |     }
533 |     case RB_ADD:
534 | 	abc = getabc(regs, mem, i);
535 | 	*abc.dst = abc.b + abc.c;
536 | 	break;
537 |     case RB_SUB:
538 | 	abc = getabc(regs, mem, i);
539 | 	rabbitw res = abc.b - abc.c;
540 | 	*abc.dst = res;
541 | 	if (res == 0) {
542 | 	    /* Set zero flag. */
543 | 	    regs[RB_FLAGS] |= 0x2U;
544 | 	}
545 | 	break;
546 |     case RB_MUL:
547 | 	abc = getabc(regs, mem, i);
548 | 	*abc.dst = abc.b * abc.c;
549 | 	break;
550 |     case RB_DIV:
551 | 	abc = getabc(regs, mem, i);
552 | 	if (abc.c == 0) {
553 | 	    free(mem);
554 | 	    return RB_ILLEGAL;
555 | 	}
556 | 
557 | 	*abc.dst = abc.b / abc.c;
558 | 	break;
559 |     case RB_SHR:
560 | 	abc = getabc(regs, mem, i);
561 | 	*abc.dst = abc.b >> abc.c;
562 | 	break;
563 |     case RB_SHL:
564 | 	abc = getabc(regs, mem, i);
565 | 	*abc.dst = abc.b << abc.c;
566 | 	break;
567 |     case RB_NAND:
568 | 	abc = getabc(regs, mem, i);
569 | 	*abc.dst = ~(abc.b & abc.c);
570 | 	break;
571 |     case RB_XOR:
572 | 	abc = getabc(regs, mem, i);
573 | 	*abc.dst = abc.b ^ abc.c;
574 | 	break;
575 |     case RB_BR: {
576 | 	/* Branch is special because it only has one argument. */
577 | 	rabbitw src = i.modes.immediate  ? fetch_immediate() : regs[i.regc];
578 | 	regs[RB_IP] = i.modes.regc_deref ? mem[src] : src;
579 | 	break;
580 |     }
581 |     case RB_BRZ:
582 | 	/* Branch if zero is special because it only has one argument. */
583 | 	if ((regs[RB_FLAGS] & 0x2U) == 0) {
584 | 	    rabbitw src = i.modes.immediate  ? fetch_immediate() : regs[i.regc];
585 | 	    regs[RB_IP] = i.modes.regc_deref ? mem[src] : src;
586 | 	}
587 | 	break;
588 |     case RB_BRNZ:
589 | 	/* Branch not zero is special because it only has one argument. */
590 | 	if ((regs[RB_FLAGS] & 0x2U) != 0) {
591 | 	    rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc];
592 | 	    regs[RB_IP] = i.modes.regc_deref ? mem[src] : src;
593 | 	}
594 | 	break;
595 |     case RB_IN: {
596 | 	/* Input is special because it does not have an argument. */
597 | 	rabbitw dst = i.modes.immediate ? fetch_immediate() : regs[i.regc];
598 | 	rabbitw *dstp = i.modes.regc_deref ? &mem[regs[i.regc]] : &regs[i.regc];
599 | 	*dstp = getchar();
600 | 	break;
601 |     }
602 |     case RB_OUT: {
603 | 	/* Output is special because it has one argument and no
604 | 	 * destination. */
605 | 	rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc];
606 | 	src = i.modes.regc_deref ? mem[src] : src;
607 | 	putchar(src);
608 | 	break;
609 |     }
610 |     case RB_BIF: {
611 | 	rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc];
612 | 	src = i.modes.regc_deref ? mem[src] : src;
613 | 	if (src > NUM_BIFS) {
614 | 	    fprintf(stderr, "Invalid bif: `%u'.\n", src);
615 | 	    return RB_FAIL;
616 | 	}
617 | 	bif f = biftable[src].f;
618 | 	f(regs, mem);
619 | 	break;
620 |     }
621 | /*    case RB_CFF:
622 | 	break;
623 | */
624 |     default:
625 | 	free(mem);
626 | 	return RB_ILLEGAL;
627 | 	break;
628 |     }
629 | }
630 | 
631 |
632 |
633 |
634 |
635 | 636 |
637 |

Building the VM

638 |
639 |
640 | 641 |
CC=gcc
642 | CFLAGS=-Wall -Wextra -Wpedantic
643 | LFLAGS=
644 | RABBIT_OBJS=rabbit.o rabbit_io.o rabbit_codewords.o
645 | ASSEMBLER_OBJS=assembler.o rabbit_io.o
646 | DISASSEMBLER_OBJS=disassembler.o rabbit_io.o rabbit_codewords.o
647 | 
648 | all: rabbit assembler disassembler
649 | 
650 | clean:
651 | 	rm -f rabbit rabbit-asm rabbit-dis
652 | 	rm -f $(RABBIT_OBJS) $(ASSEMBLER_OBJS) $(DISASSEMBLER_OBJS)
653 | 
654 | rabbit: $(RABBIT_OBJS)
655 | 	$(CC) $(CFLAGS) $(LFLAGS) $(RABBIT_OBJS) -o rabbit
656 | 
657 | assembler: $(ASSEMBLER_OBJS)
658 | 	$(CC) $(CFLAGS) $(LFLAGS) $(ASSEMBLER_OBJS) -o rabbit-asm
659 | 
660 | disassembler: $(DISASSEMBLER_OBJS)
661 | 	$(CC) $(CFLAGS) $(LFLAGS) $(DISASSEMBLER_OBJS) -o rabbit-dis
662 | 
663 | %.o: %.c
664 | 	$(CC) $(CFLAGS) -o $@ -c $<
665 | 
666 |
667 |
668 |
669 |
670 |
671 |

Author: Maxwell Bernstein

672 |

Created: 2016-05-25 Wed 14:18

673 |

Emacs 24.5.1 (Org mode 8.2.10)

674 |

Validate

675 |
676 | 677 | 678 | -------------------------------------------------------------------------------- /rabbitvm.org: -------------------------------------------------------------------------------- 1 | #+title: Rabbit VM Implementation 2 | #+author: Maxwell Bernstein 3 | #+OPTIONS: ':t toc:nil num:0 4 | 5 | #+html_head: 6 | 7 | Our emulator can exit or abort with a couple of status codes. They are as 8 | follows: 9 | 10 | #+name: status_enum 11 | #+begin_src c :exports code 12 | /* Exit/error codes and statuses. */ 13 | typedef enum rabbits { 14 | RB_SUCCESS = 0, RB_FAIL, RB_OVERFLOW, RB_ILLEGAL, 15 | } rabbits; 16 | #+end_src 17 | 18 | ~RB_SUCCESS~ indicates no error. ~RB_FAIL~ indicates a generic 19 | failure. ~RB_OVERFLOW~ indicates a stack overflow. ~RB_ILLEGAL~ indicates an 20 | illegal instruction. 21 | 22 | * Emulator structure 23 | 24 | #+name: getabc 25 | #+begin_src c :exports none 26 | /* Most instructions have a destination address and two operands. This struct 27 | * holds those. */ 28 | struct abc_s { 29 | rabbitw *dst, b, c; 30 | }; 31 | 32 | /* Using the options/modes given by the user, fetch the destination address and 33 | * operands. */ 34 | static struct abc_s getabc(rabbitw *regs, rabbitw *mem, struct unpacked_s i) { 35 | rabbitw *dst = i.modes.rega_deref ? &mem[regs[i.rega]] : ®s[i.rega]; 36 | rabbitw bval = i.modes.regb_deref ? mem[regs[i.regb]] : regs[i.regb]; 37 | rabbitw cval = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 38 | cval = i.modes.regc_deref ? mem[cval] : cval; 39 | return (struct abc_s) { .dst = dst, .b = bval, .c = cval }; 40 | } 41 | #+end_src 42 | 43 | #+name: fetch_immediate 44 | #+begin_src c :exports none 45 | #define fetch_immediate() mem[regs[RB_IP]++] 46 | #+end_src 47 | 48 | #+name: macro_cleanup 49 | #+begin_src c :exports none 50 | #undef fetch_immediate 51 | #+end_src 52 | 53 | #+name: declarations 54 | #+begin_src c :exports none 55 | {{{status_enum}}} 56 | 57 | {{{register_enum}}} 58 | 59 | {{{instruction_enum}}} 60 | 61 | {{{fetch_immediate}}} 62 | 63 | {{{getabc}}} 64 | #+end_src 65 | 66 | #+name: builtin_functions 67 | #+begin_src c :exports none 68 | rabbitw hello(rabbitw *regs, rabbitw *mem) { 69 | (void)regs; 70 | (void)mem; 71 | fprintf(stdout, "hello\n"); 72 | return 0; 73 | } 74 | 75 | typedef rabbitw (*bif)(rabbitw *regs, rabbitw *mem); 76 | 77 | const static struct { 78 | const char *name; 79 | bif f; 80 | } biftable[] = { 81 | { "hello", hello }, 82 | { NULL, NULL }, 83 | }; 84 | 85 | const unsigned int NUM_BIFS = 1; 86 | 87 | /* 88 | static bif biflookup(const char *name) { 89 | for (int i = 0; biftable[i].name; i++) { 90 | if (strcmp(biftable[i].name, name) == 0) { 91 | return biftable[i].f; 92 | } 93 | } 94 | 95 | return NULL; 96 | } 97 | */ 98 | #+end_src 99 | 100 | #+name: rabbit_io_h 101 | #+begin_src c :tangle rabbit_io.h :exports none :noweb tangle 102 | #ifndef RABBIT_IO_H 103 | #define RABBIT_IO_H 104 | 105 | #include 106 | 107 | #include "rabbit_types.h" 108 | 109 | int read_word(FILE *fp, rabbitw *w); 110 | 111 | #endif 112 | #+end_src 113 | 114 | #+name: rabbit_io_c 115 | #+begin_src c :tangle rabbit_io.c :exports none :noweb tangle 116 | #include "rabbit_io.h" 117 | 118 | int read_word(FILE *fp, rabbitw *w) { 119 | int nread = 0; 120 | unsigned char *wordp = (unsigned char *)w; 121 | for (int i = 3; i >= 0; i--) { 122 | int tempread = fread(&wordp[i], sizeof wordp[i], 1, fp); 123 | if (tempread == 0) { 124 | return 0; 125 | } 126 | 127 | nread += tempread; 128 | } 129 | 130 | return nread; 131 | } 132 | #+end_src 133 | 134 | #+begin_src c :tangle rabbit.c :exports code :noweb tangle 135 | #include 136 | #include 137 | #include 138 | #include 139 | #include 140 | 141 | #include "rabbit_types.h" 142 | #include "rabbit_io.h" 143 | #include "rabbit_codewords.h" 144 | 145 | {{{declarations}}} 146 | 147 | {{{builtin_functions}}} 148 | 149 | int main(int argc, char **argv) { 150 | {{{check_cli_arguments}}} 151 | 152 | {{{open_code_file}}} 153 | 154 | {{{stat_code_file}}} 155 | 156 | {{{allocate_memory}}} 157 | 158 | {{{read_code_into_memory}}} 159 | 160 | {{{fetch_decode_execute_loop}}} 161 | 162 | return 0; 163 | } 164 | 165 | {{{macro_cleanup}}} 166 | #+end_src 167 | 168 | * Registers 169 | 170 | #+name: register_enum 171 | #+begin_src c :exports code 172 | /* Registers available for use. */ 173 | typedef enum rabbitr { 174 | RB_ZERO = 0, RB_R1, RB_R2, RB_R3, RB_R4, RB_R5, RB_R6, RB_R7, RB_R8, RB_R9, 175 | RB_IP, RB_SP, RB_RET, RB_TMP, RB_FLAGS, RB_NUMREGS, 176 | } rabbitr; 177 | #+end_src 178 | 179 | * Instructions 180 | 181 | There are four types of instructions: 182 | 183 | #+begin_src 184 | instr %rA, %rB, %rC 185 | instr %rA, %rB 186 | instr %rA 187 | instr 188 | #+end_src 189 | 190 | There is only one instruction in the last category: ~halt~. 191 | 192 | All instructions are represented as 32-bit words: 193 | 194 | #+name: rabbitw_typedef 195 | #+begin_src c :tangle rabbit_types.h :exports code 196 | #ifndef RABBIT_TYPES_H 197 | #define RABBIT_TYPES_H 198 | 199 | #include 200 | 201 | typedef uint32_t rabbitw; 202 | 203 | #endif 204 | #+end_src 205 | 206 | These 32 bits are laid out as follows: 207 | 208 | #+begin_src 209 | +-----Immediate bit 210 | |+----Addressing mode bit C 211 | ||+---Addressing mode bit B 212 | |||+--Addressing mode bit A 213 | |||| +Dead space+ regB 214 | vvvv vvvvvvvvvvvv vvvv 215 | IIII MDDD 000000000000 CCCCBBBBAAAA 216 | ^^^^ ^^^^ ^^^^ 217 | Opcode regC regA 218 | 219 | VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV 220 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 221 | Immediate value 222 | #+end_src 223 | 224 | *Note that every bit in "Dead space" must be turned off.* If one is turned on, 225 | the result of executing that instruction is undefined. 226 | 227 | Internally, the emulator uses structs with 228 | [[http://en.cppreference.com/w/cpp/language/bit_field][bit fields]] to 229 | represent instructions. It uses bit fields only to save space; they can be 230 | removed with no consequences. 231 | 232 | #+name: mode_representation 233 | #+begin_src c :exports code 234 | /* Options for operand use. Is space C an immediate value or a register? Should 235 | * any of spaces A, B, and C be dereferenced? */ 236 | struct modes_s { 237 | uint8_t immediate; 238 | uint8_t regc_deref; 239 | uint8_t regb_deref; 240 | uint8_t rega_deref; 241 | }; 242 | #+end_src 243 | 244 | The full unpacked representation of the instruction includes this unpacked 245 | modes struct along with the opcode and three operands. 246 | 247 | #+name: instruction_representation 248 | #+begin_src c :exports code 249 | /* Holds an unpacked representation of an instruction (sans immediate value, if 250 | * it has one). */ 251 | struct unpacked_s { 252 | struct modes_s modes; 253 | uint8_t opcode; 254 | uint8_t regc; 255 | uint8_t regb; 256 | uint8_t rega; 257 | }; 258 | #+end_src 259 | 260 | The immediate value follows the instruction in memory; the emulator fetches it 261 | upon execution. 262 | 263 | #+name: unpacked_representation 264 | #+begin_src c :exports none 265 | {{{mode_representation}}} 266 | 267 | {{{instruction_representation}}} 268 | #+end_src 269 | 270 | ** Decoding 271 | 272 | With the knowledge of the instruction format and its in-emulator representation, 273 | it is not difficult to write a ~decode~ function that takes an instruction and 274 | returns the unpacked representation. 275 | 276 | First, ~decode~ fetches the mode from the instruction. Its least significant bit 277 | is 24 and it contains at most four bits. 278 | 279 | #+name: fetch_modes 280 | #+begin_src c :exports code 281 | /* Fetch the space modes from the instruction. */ 282 | uint8_t modes = (instr >> 24) & 0xF; 283 | #+end_src 284 | 285 | As the modes are stored in a bit vector, it's useful to have masks for each of 286 | the addressing modes and the immediate mode. 287 | 288 | #+name: mode_offsets 289 | #+begin_src c :exports code 290 | /* Offsets of the space modes in the mode nibble. */ 291 | static const uint8_t 292 | RB_ADDRA_LSB = 0, 293 | RB_ADDRB_LSB = 1, 294 | RB_ADDRC_LSB = 2, 295 | RB_IMMED_LSB = 3; 296 | #+end_src 297 | 298 | From here it's reasonably trivial to fill the ~unpacked_s~ struct. ~instr~ needs 299 | to be shifted and masked a couple of times to fetch the opcode and registers, 300 | but nothing major. 301 | 302 | #+name: rabbit_codewords_h 303 | #+begin_src c :tangle rabbit_codewords.h :exports none :noweb tangle 304 | #ifndef RABBIT_CODEWORDS_H 305 | #define RABBIT_CODEWORDS_H 306 | 307 | #include "rabbit_types.h" 308 | 309 | {{{unpacked_representation}}} 310 | 311 | struct unpacked_s decode(rabbitw instr); 312 | 313 | #endif 314 | #+end_src 315 | 316 | #+name: rabbit_codewords_c 317 | #+begin_src c :tangle rabbit_codewords.c :exports code :noweb tangle 318 | #include "rabbit_codewords.h" 319 | 320 | /* Transform the packed representation of an instruction into the unpacked 321 | * representation. */ 322 | struct unpacked_s decode(rabbitw instr) { 323 | {{{fetch_modes}}} 324 | 325 | {{{mode_offsets}}} 326 | 327 | return (struct unpacked_s) { 328 | .modes = { 329 | .immediate = (modes >> RB_IMMED_LSB) & 0x1, 330 | .regc_deref = (modes >> RB_ADDRC_LSB) & 0x1, 331 | .regb_deref = (modes >> RB_ADDRB_LSB) & 0x1, 332 | .rega_deref = (modes >> RB_ADDRA_LSB) & 0x1, 333 | }, 334 | .opcode = instr >> 28, 335 | .regc = (instr >> 8) & 0xF, 336 | .regb = (instr >> 4) & 0xF, 337 | .rega = instr & 0xF, 338 | }; 339 | } 340 | #+end_src 341 | 342 | ** Modes 343 | 344 | ** Following mode rules 345 | 346 | * VM setup 347 | 348 | ** Check CLI arguments 349 | 350 | #+name: check_cli_arguments 351 | #+begin_src c :exports code 352 | if (argc != 2) { 353 | fprintf(stderr, "Need to pass file to execute.\n"); 354 | return RB_FAIL; 355 | } 356 | #+end_src 357 | 358 | ** Open code file 359 | 360 | #+name: open_code_file 361 | #+begin_src c :exports code 362 | char *fn = argv[1]; 363 | FILE *fp = fopen(fn, "rb"); 364 | if (fp == NULL) { 365 | fprintf(stderr, "Can't open `%s'.\n", fn); 366 | return RB_FAIL; 367 | } 368 | #+end_src 369 | 370 | ** Get size of code file 371 | 372 | #+name: stat_code_file 373 | #+begin_src c :exports code 374 | /* Get the size of the file so that we can allocate memory for it. */ 375 | struct stat st; 376 | if (fstat(fileno(fp), &st) != 0) { 377 | fprintf(stderr, "Can't stat `%s'.\n", fn); 378 | fclose(fp); 379 | return RB_FAIL; 380 | } 381 | #+end_src 382 | 383 | ** Allocate memory 384 | 385 | #+name: allocate_memory 386 | #+begin_src c :exports code 387 | /* Allocate memory with program first, stack second. */ 388 | size_t stacksize = 1000; 389 | off_t size = st.st_size; 390 | rabbitw *mem = malloc(stacksize + size * sizeof *mem); 391 | if (mem == NULL) { 392 | fprintf(stderr, "Not enough memory. Could not allocate stack of size" 393 | "%zu + program of size %lld.\n", stacksize, size); 394 | return RB_FAIL; 395 | } 396 | #+end_src 397 | 398 | ** Read code into memory 399 | 400 | #+name: read_code_into_memory 401 | #+begin_src c :exports code 402 | /* Read the file into memory. */ 403 | size_t i = 0; 404 | rabbitw word = 0; 405 | /* We cannot use fread because of endian-ness issues. */ 406 | while (read_word(fp, &word) != 0) { 407 | mem[i++] = word; 408 | } 409 | fclose(fp); 410 | #+end_src 411 | 412 | #+name: instruction_enum 413 | #+begin_src c :exports code 414 | /* Instructions available for use. */ 415 | typedef enum rabbiti { 416 | RB_HALT = 0, RB_MOVE, RB_ADD, RB_SUB, RB_MUL, RB_DIV, RB_SHR, RB_SHL, 417 | RB_NAND, RB_XOR, RB_BR, RB_BRZ, RB_BRNZ, RB_IN, RB_OUT, RB_BIF, 418 | RB_NUMINSTRS, 419 | } rabbiti; 420 | #+end_src 421 | 422 | ** Fetch-decode-execute loop 423 | 424 | #+name: fetch_decode_execute_loop 425 | #+begin_src c :exports code 426 | struct abc_s abc; 427 | rabbitw regs[RB_NUMINSTRS] = { 0 }; 428 | regs[RB_SP] = i; 429 | 430 | /* Main fetch-decode-execute loop. */ 431 | while (1) { 432 | /* Fetch the current instruction word. */ 433 | rabbitw word = mem[regs[RB_IP]++]; 434 | 435 | /* Decode it. */ 436 | struct unpacked_s i = decode(word); 437 | 438 | /* Execute it. */ 439 | switch (i.opcode) { 440 | case RB_HALT: 441 | free(mem); 442 | return RB_SUCCESS; 443 | break; 444 | case RB_MOVE: { 445 | /* Move is special because it has one source instead of two 446 | * operands. */ 447 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 448 | rabbitw *dst = i.modes.regb_deref ? &mem[regs[i.regb]] : ®s[i.regb]; 449 | *dst = i.modes.regc_deref ? mem[src] : src; 450 | break; 451 | } 452 | case RB_ADD: 453 | abc = getabc(regs, mem, i); 454 | *abc.dst = abc.b + abc.c; 455 | break; 456 | case RB_SUB: 457 | abc = getabc(regs, mem, i); 458 | rabbitw res = abc.b - abc.c; 459 | *abc.dst = res; 460 | if (res == 0) { 461 | /* Set zero flag. */ 462 | regs[RB_FLAGS] |= 0x2U; 463 | } 464 | break; 465 | case RB_MUL: 466 | abc = getabc(regs, mem, i); 467 | *abc.dst = abc.b * abc.c; 468 | break; 469 | case RB_DIV: 470 | abc = getabc(regs, mem, i); 471 | if (abc.c == 0) { 472 | free(mem); 473 | return RB_ILLEGAL; 474 | } 475 | 476 | *abc.dst = abc.b / abc.c; 477 | break; 478 | case RB_SHR: 479 | abc = getabc(regs, mem, i); 480 | *abc.dst = abc.b >> abc.c; 481 | break; 482 | case RB_SHL: 483 | abc = getabc(regs, mem, i); 484 | *abc.dst = abc.b << abc.c; 485 | break; 486 | case RB_NAND: 487 | abc = getabc(regs, mem, i); 488 | *abc.dst = ~(abc.b & abc.c); 489 | break; 490 | case RB_XOR: 491 | abc = getabc(regs, mem, i); 492 | *abc.dst = abc.b ^ abc.c; 493 | break; 494 | case RB_BR: { 495 | /* Branch is special because it only has one argument. */ 496 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 497 | regs[RB_IP] = i.modes.regc_deref ? mem[src] : src; 498 | break; 499 | } 500 | case RB_BRZ: 501 | /* Branch if zero is special because it only has one argument. */ 502 | if ((regs[RB_FLAGS] & 0x2U) == 0) { 503 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 504 | regs[RB_IP] = i.modes.regc_deref ? mem[src] : src; 505 | } 506 | break; 507 | case RB_BRNZ: 508 | /* Branch not zero is special because it only has one argument. */ 509 | if ((regs[RB_FLAGS] & 0x2U) != 0) { 510 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 511 | regs[RB_IP] = i.modes.regc_deref ? mem[src] : src; 512 | } 513 | break; 514 | case RB_IN: { 515 | /* Input is special because it does not have an argument. */ 516 | rabbitw *dstp = i.modes.regc_deref ? &mem[regs[i.regc]] : ®s[i.regc]; 517 | *dstp = getchar(); 518 | break; 519 | } 520 | case RB_OUT: { 521 | /* Output is special because it has one argument and no 522 | * destination. */ 523 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 524 | src = i.modes.regc_deref ? mem[src] : src; 525 | putchar(src); 526 | break; 527 | } 528 | case RB_BIF: { 529 | rabbitw src = i.modes.immediate ? fetch_immediate() : regs[i.regc]; 530 | src = i.modes.regc_deref ? mem[src] : src; 531 | if (src > NUM_BIFS) { 532 | fprintf(stderr, "Invalid bif: `%u'.\n", src); 533 | return RB_FAIL; 534 | } 535 | bif f = biftable[src].f; 536 | f(regs, mem); 537 | break; 538 | } 539 | /* case RB_CFF: 540 | break; 541 | */ 542 | default: 543 | free(mem); 544 | return RB_ILLEGAL; 545 | break; 546 | } 547 | } 548 | #+end_src 549 | 550 | * Building the VM 551 | 552 | #+begin_src make :tangle Makefile :exports code 553 | CC=gcc 554 | CFLAGS=-Wall -Wextra -Wpedantic 555 | LFLAGS= 556 | RABBIT_OBJS=rabbit.o rabbit_io.o rabbit_codewords.o 557 | ASSEMBLER_OBJS=assembler.o rabbit_io.o 558 | DISASSEMBLER_OBJS=disassembler.o rabbit_io.o rabbit_codewords.o 559 | 560 | all: rabbit assembler disassembler 561 | 562 | clean: 563 | rm -f rabbit rabbit-asm rabbit-dis 564 | rm -f $(RABBIT_OBJS) $(ASSEMBLER_OBJS) $(DISASSEMBLER_OBJS) 565 | 566 | rabbit: $(RABBIT_OBJS) 567 | $(CC) $(CFLAGS) $(LFLAGS) $(RABBIT_OBJS) -o rabbit 568 | 569 | assembler: $(ASSEMBLER_OBJS) 570 | $(CC) $(CFLAGS) $(LFLAGS) $(ASSEMBLER_OBJS) -o rabbit-asm 571 | 572 | disassembler: $(DISASSEMBLER_OBJS) 573 | $(CC) $(CFLAGS) $(LFLAGS) $(DISASSEMBLER_OBJS) -o rabbit-dis 574 | 575 | %.o: %.c 576 | $(CC) $(CFLAGS) -o $@ -c $< 577 | #+end_src 578 | 579 | # Local Variables: 580 | # org-src-preserve-indentation: t 581 | # org-babel-noweb-wrap-start: "{{{" 582 | # org-babel-noweb-wrap-end: "}}}" 583 | # End: 584 | -------------------------------------------------------------------------------- /rabbitvm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RabbitVM/rabbit/bede639aab6179867d564b412dead7e06344ed3e/rabbitvm.pdf -------------------------------------------------------------------------------- /style.css: -------------------------------------------------------------------------------- 1 | #content, #postamble { 2 | width: 60%; 3 | position: relative; 4 | margin: 0 auto; 5 | } 6 | 7 | #postamble:before { 8 | content: ""; 9 | display: block; 10 | height: 1px; 11 | width: 100%; 12 | margin-top: 10px; 13 | margin-bottom: 20px; 14 | background: black; 15 | } 16 | -------------------------------------------------------------------------------- /test.rbt: -------------------------------------------------------------------------------- 1 | out $97 2 | out $10 3 | halt 4 | --------------------------------------------------------------------------------