├── .gitignore ├── README ├── bytecode.h ├── compile ├── funcalloc.h ├── jit.c ├── opcode.h ├── runtime.h ├── samples ├── answer.rb ├── answer_with_else.rb └── simple.rb └── vm.c /.gitignore: -------------------------------------------------------------------------------- 1 | a.* -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ** Virtual machine built during the http://proglangmasterclass.com/ ** 2 | 3 | This package contains a virtual machine and a JIT compiler for a subset of the language at https://github.com/proglangclass/interpreter. 4 | 5 | (c) Marc-Andre Cournoyer 6 | -------------------------------------------------------------------------------- /bytecode.h: -------------------------------------------------------------------------------- 1 | /* 2 | This file was compiled by the `compile` script. 3 | It contains the bytecode for the following code: 4 | 5 | print("the answer is:") 6 | a = 30 + 2 7 | if false 8 | print(a) 9 | else 10 | print("no") 11 | end 12 | */ 13 | 14 | #define LITERALS { \ 15 | (void *) "the answer is:", \ 16 | (void *) "print", \ 17 | (void *) 30, \ 18 | (void *) 2, \ 19 | (void *) "no", \ 20 | } 21 | 22 | #define INSTRUCTIONS { 3, 2, 0, 0, 1, 1, 1, 2, 1, 3, 10, 7, 0, 5, 0, 8, 8, 3, 6, 0, 0, 1, 1, 9, 6, 3, 2, 4, 0, 1, 1, 11 } 23 | -------------------------------------------------------------------------------- /compile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby -s 2 | # Script to compile a NotRuby file to Bytecode. 3 | 4 | interpreter_path = File.expand_path("../../interpreter", __FILE__) 5 | 6 | unless File.directory?(interpreter_path) 7 | abort "`interpreter` directory not found.\nMove or clone the interpreter repo at: #{interpreter_path}" 8 | end 9 | 10 | $:.unshift File.expand_path("../../interpreter", __FILE__) 11 | 12 | if $h # -h option 13 | abort < 42 | */ 43 | 44 | #define LITERALS { \<% literals.each do |literal| %> 45 | (void *) <%= literal.inspect %>, \<% end %> 46 | } 47 | 48 | #define INSTRUCTIONS { <%= instructions.join(', ') %> } 49 | ERB 50 | 51 | File.write("bytecode.h", c) -------------------------------------------------------------------------------- /funcalloc.h: -------------------------------------------------------------------------------- 1 | // Utilities to manage function memory. Taken from potion. 2 | 3 | #ifdef __MINGW32__ 4 | #include 5 | #include 6 | 7 | void *funcalloc(size_t length) { 8 | void *mem = VirtualAlloc(NULL, length, MEM_COMMIT, PAGE_EXECUTE_READWRITE); 9 | return mem; 10 | } 11 | 12 | int funcfree(void *mem, size_t len) { 13 | return VirtualFree(mem, len, MEM_DECOMMIT) != 0 ? 0 : -1; 14 | } 15 | 16 | #else 17 | #include 18 | 19 | void *funcalloc(size_t length) { 20 | void *mem = mmap(NULL, length, PROT_EXEC|PROT_READ|PROT_WRITE, (MAP_PRIVATE|MAP_ANON), -1, 0); 21 | if (mem == MAP_FAILED) return NULL; 22 | return mem; 23 | } 24 | 25 | int funcfree(void *mem, size_t len) { 26 | return munmap(mem, len); 27 | } 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /jit.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "opcode.h" 5 | #include "funcalloc.h" 6 | 7 | typedef int (JitFunc)(); 8 | 9 | // Registers 10 | #define EAX 0 11 | #define ECX 1 12 | #define EDX 2 13 | #define EBX 3 14 | 15 | // Helper macros to assemble code 16 | #define EMIT_T(i,T) *ptr = (T)(i); ptr += sizeof(T) 17 | #define EMIT(i) EMIT_T(i, byte) 18 | #define EMIT_INT(i) EMIT_T(i, int) 19 | #define EMIT_REG2REG(r1,r2) EMIT(0xC0 | r1 << 3 | r2); 20 | 21 | // Emit x86-64 machine code dynamicaly 22 | void precompile() { 23 | // Allocate memory to write the instructions to 24 | byte *ptr = funcalloc(4096); 25 | JitFunc *func = (JitFunc *)ptr; 26 | 27 | // Emit x86-64 machine code the equivalent of: 28 | // int *func() { 29 | // return 30 + 2; 30 | // } 31 | 32 | // Setup stack frame (C calling convention) 33 | EMIT(0x55); // push %ebp 34 | EMIT(0x48); EMIT(0x89); EMIT(0xe5); // movq %rsp,%rbp 35 | 36 | EMIT(0xB8 + EAX); EMIT_INT(30); // mov [int],%eax 37 | EMIT(0xB8 + ECX); EMIT_INT(2); // mov [int],%ecx 38 | EMIT(0x01); EMIT_REG2REG(ECX, EAX); // add %ecx,%eax 39 | 40 | EMIT(0xC9); // leave 41 | EMIT(0xC3); // ret 42 | 43 | // Execute the compiled function 44 | int value = func(); 45 | printf("> %d\n", value); 46 | 47 | // Release the function 48 | funcfree(func, 4096); 49 | } 50 | 51 | 52 | /////// Here we compile our VM bytecode to x86-64 machine code /////// 53 | 54 | // Set a register as being in use 55 | #define REG_PUSH() registers[ri++] 56 | // Get and release a register. 57 | #define REG_POP() registers[--ri] 58 | 59 | // Compile instructions to x86-64 machine code into a pointer to a function. 60 | void compile(long literals[], byte instructions[], JitFunc *func) { 61 | byte *ip = instructions; 62 | 63 | byte *ptr = (byte *)func; 64 | byte registers[] = { EAX, ECX, EDX, EBX }; 65 | int ri = 0; // register index 66 | 67 | // Setup stack frame (C calling convention) 68 | EMIT(0x55); // push %ebp 69 | EMIT(0x48); EMIT(0x89); EMIT(0xe5); // movq %rsp,%rbp 70 | 71 | while (1) { 72 | switch (*ip) { 73 | case PUSH_NUMBER: { 74 | ++ip; // advance to operand (literal) 75 | int number = (int)literals[*ip]; 76 | EMIT(0xB8 + REG_PUSH()); EMIT_INT(number); // mov [int],%eax 77 | 78 | break; 79 | } 80 | case ADD: { 81 | byte reg1 = REG_POP(); 82 | byte reg2 = REG_POP(); 83 | EMIT(0x01); EMIT_REG2REG(reg1, reg2); // add %ebx,%eax 84 | break; 85 | } 86 | case RETURN: 87 | EMIT(0xC9); // leave 88 | EMIT(0xC3); // ret 89 | return; 90 | } 91 | ip++; 92 | } 93 | } 94 | 95 | int main(int argc, char const *argv[]) { 96 | long literals[] = { 97 | /* [0] */ (long) 30, 98 | /* [1] */ (long) 2 99 | }; 100 | 101 | byte instructions[] = { 102 | PUSH_NUMBER, 0, // 30 103 | PUSH_NUMBER, 1, // 2 104 | ADD, 105 | RETURN, 106 | }; 107 | 108 | // Allocate memory to write the instructions to 109 | JitFunc *func = funcalloc(4096); 110 | 111 | // Compile to the function pointer (func) 112 | compile(literals, instructions, func); 113 | // Execute the function 114 | printf("> %d\n", func()); 115 | 116 | // Release memory used by function 117 | funcfree(func, 4096); 118 | 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /opcode.h: -------------------------------------------------------------------------------- 1 | enum { 2 | // ------- Stack ------- 3 | // Opcode Operands before after 4 | /* 00 */ CALL, // index, argc [rcv, arg...] [returned] 5 | /* 01 */ PUSH_NUMBER, // index [] [number] 6 | /* 02 */ PUSH_STRING, // index [] [string] 7 | /* 03 */ PUSH_SELF, // [] [self] 8 | /* 04 */ PUSH_NIL, // [] [nil] 9 | /* 05 */ PUSH_BOOL, // 1=t, 0=f [] [true or false] 10 | /* 06 */ GET_LOCAL, // index [] [value] 11 | /* 07 */ SET_LOCAL, // index [value] [] 12 | /* 08 */ JUMP_UNLESS, // offset [test] [] 13 | /* 09 */ JUMP, // offset [] [] 14 | /* 10 */ ADD, // [a, b] [result] 15 | /* 11 */ RETURN // [] [] 16 | }; 17 | 18 | typedef unsigned char byte; // 1 byte -------------------------------------------------------------------------------- /runtime.h: -------------------------------------------------------------------------------- 1 | /* == Mock runtime == 2 | * This runtime is only for demonstration purposes and implements the strict 3 | * minimum for the example shown in the class. 4 | */ 5 | 6 | #include 7 | #include 8 | 9 | enum { tObject, tNumber, tString }; 10 | 11 | typedef struct { 12 | char type; 13 | union { 14 | char *string; 15 | int number; 16 | } value; 17 | int refcount; 18 | } Object; 19 | 20 | static Object *TrueObject; 21 | static Object *FalseObject; 22 | static Object *NilObject; 23 | 24 | Object *Object_new() { 25 | return calloc(1, sizeof(Object)); 26 | } 27 | 28 | /////////////// GC /////////////// 29 | 30 | Object *retain(Object *self) { 31 | self->refcount++; 32 | return self; 33 | } 34 | 35 | void release(Object *self) { 36 | self->refcount--; 37 | if (self->refcount <= 0) 38 | free(self); 39 | } 40 | 41 | /////////////////////////////////// 42 | 43 | 44 | char Object_is_true(Object *self) { 45 | // false and nil are == false, 46 | // everything else is true. 47 | if (self == FalseObject || self == NilObject) return 0; 48 | return 1; 49 | } 50 | 51 | Object *Number_new(int value) { 52 | Object *object = Object_new(); 53 | object->type = tNumber; 54 | object->value.number = value; 55 | return object; 56 | } 57 | 58 | int Number_value(Object *self) { 59 | assert(self->type == tNumber); 60 | return self->value.number; 61 | } 62 | 63 | Object *String_new(char *value) { 64 | Object *object = Object_new(); 65 | object->type = tString; 66 | object->value.string = value; 67 | return object; 68 | } 69 | 70 | Object *call(Object *receiver, char *message, Object *argv[], int argc) { 71 | // HACK hardcoded methods. In a real runtime, you'd have proper classes and methods. 72 | // See runtime.rb in our interpreter for an example. 73 | if (strcmp(message, "+") == 0) { 74 | // Number#+ 75 | return Number_new(receiver->value.number + argv[0]->value.number); 76 | } else if (strcmp(message, "print") == 0) { 77 | // Object#print 78 | switch (argv[0]->type) { 79 | case tNumber: 80 | printf("%d\n", argv[0]->value.number); 81 | return argv[0]; 82 | case tString: 83 | printf("%s\n", argv[0]->value.string); 84 | return argv[0]; 85 | } 86 | } 87 | 88 | return 0; 89 | } 90 | 91 | void init_runtime() { 92 | TrueObject = retain(Object_new()); 93 | FalseObject = retain(Object_new()); 94 | NilObject = retain(Object_new()); 95 | } 96 | 97 | void destroy_runtime() { 98 | release(TrueObject); 99 | release(FalseObject); 100 | release(NilObject); 101 | } -------------------------------------------------------------------------------- /samples/answer.rb: -------------------------------------------------------------------------------- 1 | print("the answer is:") 2 | a = 30 + 2 3 | if true 4 | print(a) 5 | end -------------------------------------------------------------------------------- /samples/answer_with_else.rb: -------------------------------------------------------------------------------- 1 | print("the answer is:") 2 | a = 30 + 2 3 | if false 4 | print(a) 5 | else 6 | print("no") 7 | end -------------------------------------------------------------------------------- /samples/simple.rb: -------------------------------------------------------------------------------- 1 | print("hi") -------------------------------------------------------------------------------- /vm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "runtime.h" 4 | #include "opcode.h" 5 | #include "bytecode.h" 6 | 7 | // Helpers to play with the stack 8 | #define LOCALS_MAX 10 9 | #define STACK_MAX 10 10 | #define STACK_PUSH(I) do { \ 11 | assert(sp-stack < STACK_MAX); \ 12 | *(++sp) = (I); \ 13 | retain(*sp); \ 14 | } while(0) 15 | #define STACK_POP() (*sp--) 16 | 17 | 18 | void run(void *literals[], byte instructions[]) { 19 | byte *ip = instructions; // instruction pointer 20 | 21 | Object *stack[STACK_MAX]; // THE famous stack 22 | Object **sp = stack; // stack pointer, keeps track of current position 23 | 24 | Object *locals[LOCALS_MAX] = {}; // where we store our local variables 25 | 26 | // Setup the runtime 27 | Object *self = Object_new(); 28 | retain(self); 29 | 30 | // Start processing instructions 31 | while (1) { 32 | switch (*ip) { 33 | case PUSH_NUMBER: { 34 | ip++; // advance to the operand (literal index) 35 | STACK_PUSH(Number_new((long)literals[*ip])); 36 | break; 37 | } 38 | case PUSH_STRING: { 39 | ip++; // advance to the operand (literal index) 40 | STACK_PUSH(String_new((char *)literals[*ip])); 41 | break; 42 | } 43 | case PUSH_SELF: { 44 | STACK_PUSH(self); 45 | break; 46 | } 47 | case PUSH_NIL: { 48 | STACK_PUSH(NilObject); 49 | break; 50 | } 51 | case PUSH_BOOL: { 52 | ip++; // advance to operand (0 = false, 1 = true) 53 | if (*ip == 0) { 54 | STACK_PUSH(FalseObject); 55 | } else { 56 | STACK_PUSH(TrueObject); 57 | } 58 | break; 59 | } 60 | case CALL: { 61 | ip++; // advance to operand (method name index in literals) 62 | char *method = literals[*ip]; 63 | ip++; // advance to operand (# of args) 64 | int argc = *ip; 65 | Object *argv[10]; 66 | 67 | int i; 68 | for (i = argc - 1; i >= 0; i--) argv[i] = STACK_POP(); 69 | Object *receiver = STACK_POP(); 70 | 71 | Object *result = call(receiver, method, argv, argc); 72 | 73 | STACK_PUSH(result); 74 | 75 | // Release all the objects 76 | for (i = argc - 1; i >= 0; i--) release(argv[i]); 77 | release(receiver); 78 | 79 | break; 80 | } 81 | case RETURN: { 82 | goto cleanup; 83 | break; 84 | } 85 | case GET_LOCAL: { 86 | ip++; // advance operand (local index) 87 | STACK_PUSH(locals[*ip]); 88 | break; 89 | } 90 | case SET_LOCAL: { 91 | ip++; // local index 92 | locals[*ip] = STACK_POP(); 93 | break; 94 | } 95 | case ADD: { 96 | Object *a = STACK_POP(); 97 | Object *b = STACK_POP(); 98 | 99 | STACK_PUSH(Number_new(Number_value(a) + Number_value(b))); 100 | 101 | release(a); 102 | release(b); 103 | 104 | break; 105 | } 106 | case JUMP_UNLESS: { 107 | ip++; // operand (offset, # of bytes to jump forward) 108 | byte offset = *ip; 109 | Object *condition = STACK_POP(); 110 | 111 | if (!Object_is_true(condition)) ip += offset; 112 | 113 | release(condition); 114 | 115 | break; 116 | } 117 | case JUMP: { 118 | ip++; // operand (offset, # of bytes to jump forward) 119 | byte offset = *ip; 120 | 121 | ip += offset; 122 | 123 | break; 124 | } 125 | } 126 | ip++; 127 | } 128 | 129 | cleanup: 130 | release(self); 131 | int i; 132 | // Release all the local variables 133 | for (i = 0; i < LOCALS_MAX; i++) if (locals[i]) release(locals[i]); 134 | // Release everything left on the stack 135 | while (sp > stack) release(STACK_POP()); 136 | } 137 | 138 | int main (int argc, char const *argv[]) { 139 | // Get compiled literals table and instructions from bytecode.h 140 | void *literals[] = LITERALS; 141 | byte instructions[] = INSTRUCTIONS; 142 | 143 | init_runtime(); 144 | 145 | // Run that thing! 146 | run(literals, instructions); 147 | 148 | destroy_runtime(); 149 | 150 | return 0; 151 | } --------------------------------------------------------------------------------