├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── abi.txt ├── backend.c ├── backend.h ├── bench.lc ├── frontend.c ├── frontend.h ├── haskell ├── .gitignore ├── Makefile ├── Strong.hs └── Weak.hs ├── main.c └── runtime ├── builtins.c ├── builtins.h ├── data_layout.h ├── gc.c ├── gc.h ├── normalize.c ├── normalize.h └── runtime.h /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | lc 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Mark Barbone 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -Wall -O2 -foptimize-sibling-calls -g 2 | 3 | RT_OBJS = build/gc.o build/builtins.o build/normalize.o 4 | OBJS = build/frontend.o build/backend.o build/main.o 5 | 6 | lc: $(RT_OBJS) $(OBJS) 7 | gcc -o $@ $^ 8 | 9 | $(RT_OBJS): build/%.o: runtime/%.c build runtime/*.h 10 | gcc $(CFLAGS) -c $< -o $@ 11 | $(OBJS): build/%.o: %.c build *.h runtime/*.h 12 | gcc $(CFLAGS) -c $< -o $@ 13 | 14 | build: 15 | mkdir -p build 16 | 17 | .PHONY: bench 18 | bench: lc 19 | hyperfine './lc "$$(cat bench.lc)"' 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A λ-calculus interpreter 2 | 3 | A strongly-normalizing interpreter for the untyped lambda calculus. 4 | 5 | ```shell 6 | $ make 7 | $ ./lc "(λ x y. x) (λ x. x)" 8 | Input: (λ x y. x) (λ x. x) 9 | Compiling... Compiled! Normalizing... 10 | Normal form: λ a b. b 11 | ``` 12 | 13 | ## Features 14 | 15 | - Generational copying GC, with a dynamically sized old space 16 | - Custom strongly normalizing lazy evaluation runtime 17 | - Compiles lambda terms to x86\_64 machine code 18 | 19 | Only tested on Linux, and it only supports x86\_64. 20 | 21 | ## What? 22 | 23 | Normalizing a term in the λ-calculus means applying the β-reduction rule until 24 | it can't be simplified any more. While it's usually described using syntactic 25 | substitution, the fastest algorithm in practice (normalization by evaluation) 26 | involves running it like a regular functional programming language, with a 27 | catch: we need to run programs with free variables in them, requiring some 28 | special runtime support. 29 | 30 | There are, of course, many ways to run a regular functional programming 31 | language: eager vs lazy evaluation, interpreted vs bytecode interpreted vs 32 | compiled to machine code, etc. This implementation uses lazy evaluation and 33 | compiles to machine code. 34 | 35 | ## How? 36 | 37 | Short description of the lazy evaluation implementation strategy: 38 | - Push/enter, like Clean and old GHC 39 | - Closures instead of supercombinators, like GHC 40 | - Separate data stack + call stack, like Clean 41 | - Every heap object's a closure, like GHC 42 | - Eager blackholing 43 | - Thunk entry code pushes its own update frame, like GHC 44 | - Collapsing adjacent update frames is handled by the thunk entry code 45 | 46 | It has a two-pass compiler and a simple generational copying GC. 47 | 48 | 49 | ## Simple benchmarks 50 | 51 | The file `bench.lc` contains a lambda calculus program that does some Church 52 | numeral math, with normal form `λ s z. z`. The same lambda term is implemented 53 | in Haskell in different ways in `haskell/{Strong,Weak}.hs`. 54 | 55 | Here are the results, on my computer: 56 | 57 | - **This interpreter**: 3.5 seconds; allocates 5,854,199,808 bytes 58 | - **Haskell, `Strong.hs`:** 2.5 seconds; allocates 8,609,423,616 bytes 59 | 60 | This would perform comparably to a Haskell-based implementation of the 61 | "Tagged normalization" presented in the paper about Coq's `native_compute`. 62 | 63 | - **Haskell, `Weak.hs`:** 2.3 seconds; allocates 6,887,761,200 bytes 64 | 65 | This would perform comparably to a Haskell-based implementation of the 66 | `native_compute` paper's "Untagged normalization". 67 | 68 | Despite allocating a lot, this lambda term uses very little space: GHC reports 69 | ~50K max residency in both cases (and it's similar for my interpreter, but I 70 | haven't measured). 71 | 72 | The extra 1.7GB that the tagged implementation allocates compared to the 73 | untagged implementation come from two sources: both the overhead of including 74 | tags, and the overhead of currying not being optimized. I'm surprised the 75 | overhead is so little. 76 | 77 | This interpreter and the untagged implementation should make pretty much the 78 | same allocations, so I think the extra gigabyte that `Weak.hs` allocates comes 79 | from two cases where GHC's heap objects are larger than mine: 80 | - GHC's thunks all contain an extra word, for lock-free concurrency reasons 81 | - GHC's partial application objects use a 3-word header, while mine use only 2 82 | words 83 | 84 | IMO the main takeaway from this benchmark is that for purely functional 85 | languages, **the garbage collector is key**. How currying is done, how 86 | arguments are passed, etc., all matter much less than having a high-quality 87 | garbage collector. 88 | 89 | The one area where this interpreter beats GHC is compile time: it compiles the 90 | term practically instantly. 91 | 92 | 93 | ## Future things 94 | 95 | Really ought to have: 96 | - [x] Big enough tests to actually test the GC 97 | - [ ] Standardize a global term size limit 98 | - [ ] mmap the data stack with a guard page below it 99 | - [x] Make sure allocations are inlined in the runtime 100 | - [x] Benchmarks 101 | - [ ] Fix the `-Wstrict-aliasing` warnings 102 | - [ ] Document the runtime better 103 | 104 | Want to have: 105 | - Add support for `fix`, let/letrec 106 | - Nice CLI and REPL 107 | 108 | Probably won't have but would be cool: 109 | - Use it in some algorithm that requires strong normalization, like a dependent 110 | typechecker or possibly supercompilation by evaluation 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /abi.txt: -------------------------------------------------------------------------------- 1 | ## Runtime 2 | 3 | ```c 4 | typedef size_t word; 5 | 6 | typedef struct obj { 7 | void (*entrypoint)(void); 8 | word contents[]; 9 | } obj; 10 | 11 | enum gc_tag { FORWARD, REF, FUN, PAP, RIGID, THUNK, BLACKHOLE }; 12 | 13 | struct gc_data { 14 | /** Size of the whole object in words. 15 | * If 0, the first word of contents is a `struct info_word` that contains 16 | * the true size 17 | */ 18 | uint32_t size; 19 | /* enum gc_tag */ uint32_t tag; 20 | }; 21 | 22 | struct info_word { 23 | /** Size of the whole object in words */ 24 | uint32_t size; 25 | /** only in heap objects representing rigid terms */ 26 | uint32_t var; 27 | }; 28 | ``` 29 | 30 | Heap object layout: 31 | 32 | ``` 33 | .dword GC_DATA_SIZE 34 | .dword GC_DATA_TAG 35 | entrypoint: 36 | x86 code... 37 | ``` 38 | entrypoint contains executable code for this object 39 | 40 | (struct gc_data *) (entrypoint - sizeof(struct gc_data)) contains the GC info 41 | table for this object 42 | 43 | --- 44 | 45 | Calling convention/ABI: 46 | 47 | rsp is call stack (obviously) 48 | rbx is self, the closure being evaluated 49 | r12 is data stack 50 | r13 is heap pointer 51 | r14 is heap limit 52 | r15 is argc 53 | rdi, rsi are temporary registers 54 | 55 | All of these (except the temp registers) are callee-saved under the SysV ABI, so 56 | we're OK to call foreign library functions. 57 | 58 | When entering a closure: 59 | - self points to the closure being evaluated 60 | - The data stack contains the (blackholed) thunk to be updated, then argc many 61 | arguments 62 | Operationally, entering `f` pops argc many arguments, evaluates `f args...` to a 63 | value, then returns that value in `self` 64 | → Notably, it's up to the caller to do the thunk update 65 | 66 | 67 | Exposed runtime functions: 68 | rt_too_few_args: package up self and the args into a PAP 69 | rt_update_thunk: update the top of the data stack to `REF self` 70 | rt_avoid_adjacent_update_frames: 71 | like rt_update_thunk but also push self to the data stack 72 | rt_minor_gc: do GC! 73 | Built-in entry codes: 74 | Partial application, rt_pap_entry 75 | Rigid term, rt_rigid_entry 76 | (?) 77 | 78 | 79 | 80 | Compiled entry code actions: 81 | 0 (Closures only). Argc check 82 | If there are too few arguments, then jump to rt_too_few_args 83 | 0 (thunks only). Update frame 84 | If argc == 0: 85 | call rt_avoid_adjacent_update_frames 86 | Else: 87 | push self to data stack 88 | push argc to call stack 89 | set argc := 0 90 | call rest_of_code 91 | pop argc 92 | call rt_update_thunk 93 | jump to self->entrypoint 94 | rest_of_code: 95 | 1. Heap check (when TOTAL_ALLOC_SIZE != 0) 96 | Heap -= TOTAL_ALLOC_SIZE 97 | if Heap < Heap limit: 98 | call rt_minor_gc 99 | Heap -= TOTAL_ALLOC_SIZE 100 | (TOTAL_ALLOC_SIZE is less than the nursery, so no need to check again) 101 | 2. Perform allocations 102 | temp = Heap 103 | Repeat zero or more times: 104 | *temp = entry code 105 | (repeated) *(temp+8N) = Nth env item for this allocation 106 | push temp to the data stack 107 | temp += size of that allocation 108 | 3. Shuffle the data stack and self 109 | Increase data stack size if necessary 110 | Perform parallel move using the temp registers 111 | → before writing to self, blackhole it if it's a thunk 112 | Decrease data stack size if necessary 113 | Set new value of argc 114 | 4. Execute the call! Jump to *self 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /backend.c: -------------------------------------------------------------------------------- 1 | #include "backend.h" 2 | #include "runtime/data_layout.h" 3 | #include "runtime/builtins.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | /************** General utils *************/ 18 | 19 | #define failwith(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) 20 | 21 | static void init_code_buf(void); 22 | 23 | static uint8_t *code_buf_start = NULL; 24 | static uint8_t *code_buf = NULL; 25 | static uint8_t *code_buf_end = NULL; 26 | 27 | static void write_header(uint32_t size, uint32_t tag); 28 | static void write_code(size_t len, const uint8_t code[len]); 29 | 30 | #define CODE(...) do { \ 31 | const uint8_t code[] = { __VA_ARGS__ }; \ 32 | write_code(sizeof(code), code); \ 33 | } while (0) 34 | #define U32(x) \ 35 | (uint8_t) (x) , (uint8_t) ((x) >> 8), \ 36 | (uint8_t) ((x) >> 16), (uint8_t) ((x) >> 24) 37 | #define U64(x) \ 38 | (uint8_t) (x) , (uint8_t) ((x) >> 8), \ 39 | (uint8_t) ((x) >> 16), (uint8_t) ((x) >> 24), \ 40 | (uint8_t) ((x) >> 32), (uint8_t) ((x) >> 40), \ 41 | (uint8_t) ((x) >> 48), (uint8_t) ((x) >> 56) 42 | 43 | enum reg { 44 | RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, 45 | R8, R9, R10, R11, R12, R13, R14, R15 46 | }; 47 | #define SELF RBX 48 | #define DATA_STACK R12 49 | #define HEAP_PTR R13 50 | #define HEAP_LIMIT R14 51 | #define ARGC R15 52 | 53 | static void mem64(uint8_t opcode, enum reg reg, enum reg ptr, int32_t offset); 54 | static void reg64(uint8_t opcode, enum reg reg, enum reg other_reg); 55 | 56 | #define OP_LOAD 0x8b 57 | #define OP_STORE 0x89 58 | 59 | #define LOAD(reg, ptr, offset) \ 60 | mem64(OP_LOAD, reg, ptr, offset) 61 | #define STORE(reg, ptr, offset) \ 62 | mem64(OP_STORE, reg, ptr, offset) 63 | #define MOV_RR(dest, src) reg64(OP_STORE, src, dest) 64 | 65 | // Add constant value 'imm' to register 'reg' 66 | static void add_imm(enum reg reg, int32_t imm); 67 | 68 | // idx 0 is the first env item. env is usually SELF 69 | static void load_env_item(enum reg reg, enum reg env, size_t idx); 70 | // idx 0 is the top of the stack 71 | static void load_arg(enum reg reg, size_t idx); 72 | static void store_arg(size_t idx, enum reg reg); 73 | 74 | static void blackhole_self(void); 75 | 76 | 77 | struct var_info { 78 | bool is_used; 79 | // Only when used 80 | size_t env_idx; 81 | }; 82 | 83 | struct env { 84 | // do we need? 85 | struct env *up; 86 | var args_start; 87 | var lets_start; 88 | size_t envc; 89 | // args_start elements, envc of which are used 90 | struct var_info upvals[]; 91 | }; 92 | 93 | struct compile_result { 94 | void *code; 95 | struct env *env; 96 | }; 97 | 98 | static size_t var_to_stack_index(size_t lvl, struct env *env, var v); 99 | static void make_sure_can_access_var(struct env *env, var v); 100 | 101 | 102 | static void init_code_buf(void) { 103 | if (code_buf) 104 | return; 105 | 106 | // Need to mmap it so that I can mprotect it later. 107 | const size_t len = 8 * 1024 * 1024; 108 | code_buf = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 109 | if (!code_buf) 110 | failwith("Couldn't allocate buffer for code\n"); 111 | code_buf_start = code_buf; 112 | code_buf_end = code_buf_start + len; 113 | } 114 | 115 | void compile_finalize(void) { 116 | // mprotect it 117 | size_t len = code_buf_end - code_buf_start; 118 | if (mprotect(code_buf_start, len, PROT_READ | PROT_EXEC)) 119 | failwith("Couldn't map as executable: %s\n", strerror(errno)); 120 | } 121 | 122 | static void write_code(size_t len, const uint8_t code[len]) { 123 | uint8_t *end = code_buf + len; 124 | if (end > code_buf_end) failwith("Too much code"); 125 | memcpy(code_buf, code, len); 126 | code_buf = end; 127 | } 128 | 129 | #define REXW(R,X,B) \ 130 | (0x48 | ((R >> 1) & 4) | ((X >> 2) & 3) | (B >> 3)) 131 | #define MODRM(Mod, Reg, RM) \ 132 | (((Mod) << 6) | ((Reg & 7) << 3) | (RM & 7)) 133 | 134 | static void reg64(uint8_t opcode, enum reg reg, enum reg other_reg) { 135 | // Mod == 11: r/m 136 | CODE( 137 | REXW(reg, 0, other_reg), 138 | opcode, 139 | MODRM(3, reg, other_reg) 140 | ); 141 | } 142 | 143 | static void mem64(uint8_t opcode, enum reg reg, enum reg ptr, int32_t offset) { 144 | CODE(REXW(reg, 0, ptr), opcode); 145 | 146 | if ((ptr & 7) == RSP) { 147 | // r/m == rsp: [SIB] 148 | 149 | if (offset == 0) 150 | // Mod == 00 && index == rsp: [base] 151 | CODE(MODRM(0, reg, RSP), 0x24); 152 | else if (-128 <= offset && offset < 128) 153 | // Mod == 01 && index == rsp: [base + disp8] 154 | CODE(MODRM(1, reg, RSP), 0x24, (uint8_t) offset); 155 | else 156 | // Mod == 10 && index == rsp: [base + disp32] 157 | CODE(MODRM(2, reg, RSP), 0x24, (uint32_t) offset); 158 | 159 | return; 160 | } 161 | 162 | if (offset == 0 && (ptr & 7) != RBP) 163 | // Mod == 00: [r/m] 164 | CODE(MODRM(0, reg, ptr)); 165 | else if (-128 <= offset && offset < 128) 166 | // Mod == 01: [r/m + disp8] 167 | CODE(MODRM(1, reg, ptr), (uint8_t) offset); 168 | else 169 | // Mod == 10: [r/m + disp32] 170 | CODE(MODRM(2, reg, ptr), U32((uint32_t) offset)); 171 | } 172 | 173 | static void add_imm(enum reg reg, int32_t imm) { 174 | if (imm == 0) return; 175 | if (-128 <= imm && imm < 128) 176 | CODE(REXW(0, 0, reg), 0x83, MODRM(3, 0, reg), (uint8_t) imm); 177 | else 178 | CODE(REXW(0, 0, reg), 0x81, MODRM(3, 0, reg), U32((uint32_t) imm)); 179 | } 180 | 181 | static size_t var_to_stack_index(size_t lvl, struct env *env, var v) { 182 | assert(env->args_start <= v && v < lvl); 183 | if (v >= env->lets_start) 184 | return lvl - v - 1; 185 | else 186 | return v - env->args_start + lvl - env->lets_start; 187 | } 188 | static void make_sure_can_access_var(struct env *env, var v) { 189 | while (v < env->args_start && !env->upvals[v].is_used) { 190 | env->upvals[v].is_used = true; 191 | env->upvals[v].env_idx = env->envc++; 192 | env = env->up; 193 | } 194 | } 195 | 196 | static void load_env_item(enum reg reg, enum reg env, size_t idx) { 197 | assert(idx < INT_MAX / 8 - 8); 198 | LOAD(reg, env, 8 * idx + 8); 199 | } 200 | static void load_arg(enum reg reg, size_t idx) { 201 | assert(idx < INT_MAX / 8); 202 | LOAD(reg, DATA_STACK, 8 * idx); 203 | } 204 | static void store_arg(size_t idx, enum reg reg) { 205 | assert(idx < INT_MAX / 8); 206 | STORE(reg, DATA_STACK, 8 * idx); 207 | } 208 | 209 | static void blackhole_self(void) { 210 | // Use rax as a temporary register since it's possible that both rsi and rdi 211 | // are in use 212 | // movabs rax, rt_blackhole_entry 213 | CODE(0x48, 0xb8, U64((uint64_t) rt_blackhole_entry)); 214 | STORE(RAX, SELF, 0); 215 | CODE(0xb8, U32(2)); // mov esi, 2 216 | STORE(RAX, SELF, 8); 217 | } 218 | 219 | 220 | /******************* Prologue *****************/ 221 | 222 | static void *start_closure(size_t argc, size_t envc); 223 | static void *start_thunk(size_t envc); 224 | 225 | 226 | static void write_header(uint32_t size, uint32_t tag) { 227 | // Align up to nearest word 228 | code_buf = (uint8_t *) (((size_t) code_buf + 7) & ~7); 229 | 230 | if (code_buf + 8 > code_buf_end) failwith("Too much code"); 231 | 232 | memcpy(code_buf, &size, sizeof(uint32_t)); 233 | code_buf += sizeof(uint32_t); 234 | memcpy(code_buf, &tag, sizeof(uint32_t)); 235 | code_buf += sizeof(uint32_t); 236 | } 237 | 238 | static void *start_closure(size_t argc, size_t envc) { 239 | /* assert(argc < INT_MAX); */ 240 | assert(argc < 127); // TODO: allow more args (? maybe) 241 | assert(envc < INT_MAX); 242 | 243 | write_header(envc == 0 ? 0 : envc + 1, FUN); 244 | void *code_start = code_buf; 245 | 246 | CODE( 247 | // cmp r15, argc 248 | 0x49, 0x83, 0xff, (uint8_t) argc, 249 | // jge rest_of_code (+12) 250 | 0x7d, 12, 251 | // movabs rt_too_few_args, %rdi 252 | 0x48, 0xbf, U64((size_t) rt_too_few_args), 253 | // jmp *%rdi 254 | 0xff, 0xe7 255 | // rest_of_code: 256 | ); 257 | 258 | return code_start; 259 | } 260 | 261 | static void *start_thunk(size_t envc) { 262 | assert(envc < INT_MAX); 263 | 264 | write_header(envc == 0 ? 0 : envc + 1, THUNK); 265 | void *code_start = code_buf; 266 | 267 | CODE( 268 | // test argc,argc (argc is %r15) 269 | 0x4d, 0x85, 0xff, 270 | // jz adjacent_updates (+34) 271 | 0x74, 34, 272 | // sub data_stack, $8 (data_stack is %r12) 273 | 0x49, 0x83, 0xec, 0x08, 274 | // mov [data_stack], self (self is rbx) 275 | 0x49, 0x89, 0x1c, 0x24, 276 | // push argc (argc is r15) 277 | 0x41, 0x57, 278 | // xor argc, argc 279 | 0x4d, 0x31, 0xff, 280 | // call rest_of_code (+28) 281 | 0xe8, U32(28), 282 | // movabs rdi, rt_update_thunk 283 | 0x48, 0xbf, U64((size_t) rt_update_thunk), 284 | // call rdi 285 | 0xff, 0xd7, 286 | // pop argc (argc is %r15) 287 | 0x41, 0x5f, 288 | // jmp [qword ptr [self]] (self is rbx) 289 | 0xff, 0x23, 290 | // adjacent_updates: 291 | // FIXME: calls this function with a misaligned stack 292 | // movabs rdi, rt_avoid_adjacent_update_frames 293 | 0x48, 0xbf, U64((size_t) rt_adjacent_update_frames), 294 | // call rdi 295 | 0xff, 0xd7 296 | // rest_of_code: 297 | ); 298 | 299 | return code_start; 300 | } 301 | 302 | 303 | /***************** Allocations ****************/ 304 | 305 | static void do_allocations(struct env *this_env, size_t n, struct compile_result locals[n]); 306 | 307 | 308 | static void heap_check(size_t bytes_allocated) { 309 | // TODO: better maximum allocation size control 310 | assert(0 < bytes_allocated && bytes_allocated < 131072); 311 | 312 | add_imm(HEAP_PTR, - (int32_t) bytes_allocated); 313 | CODE( 314 | // cmp heap, heap limit (r13,r14) 315 | 0x4d, 0x39, 0xf5, 316 | // jae alloc_was_good (offset depends on imm8 vs imm32) 317 | // TODO: add assertions that this is correct 318 | 0x73, (bytes_allocated <= 128 ? 24 : 27), 319 | // sub rsp, 8 (align the stack for the call) 320 | 0x48, 0x83, 0xec, 8, 321 | // movabs rdi, rt_gc 322 | 0x48, 0xbf, U64((size_t) rt_gc), 323 | // call rdi 324 | 0xff, 0xd7, 325 | // add rsp, 8 326 | 0x48, 0x83, 0xc4, 8 327 | ); 328 | add_imm(HEAP_PTR, - (int32_t) bytes_allocated); 329 | // alloc_was_good: 330 | } 331 | 332 | static void load_var(size_t lvl, struct env *this_env, enum reg dest, var v) { 333 | assert(v < lvl); 334 | if (v >= this_env->args_start) { 335 | size_t idx = var_to_stack_index(lvl, this_env, v); 336 | load_arg(dest, idx); 337 | } else { 338 | assert(this_env->upvals[v].is_used); 339 | load_env_item(dest, SELF, this_env->upvals[v].env_idx); 340 | } 341 | } 342 | 343 | static void do_allocations(struct env *this_env, size_t n, struct compile_result locals[n]) { 344 | size_t lvl = this_env->lets_start; 345 | if (n == 0) 346 | return; 347 | 348 | size_t words_allocated = 0; 349 | for (size_t i = 0; i < n; i++) { 350 | if (locals[i].env->envc == 0) 351 | words_allocated += 2; 352 | else 353 | words_allocated += locals[i].env->envc + 1; 354 | } 355 | 356 | heap_check(8 * words_allocated); 357 | 358 | MOV_RR(RDI, HEAP_PTR); 359 | for (size_t i = 0; i < n; i++) { 360 | lvl++; 361 | add_imm(DATA_STACK, -8); 362 | STORE(RDI, DATA_STACK, 0); 363 | 364 | // Store the entrypoint 365 | // movabs rsi, entrypoint 366 | CODE(0x48, 0xbe, U64((uint64_t) locals[i].code)); 367 | STORE(RSI, RDI, 0); 368 | 369 | // Store the contents 370 | struct env *env = locals[i].env; 371 | assert(env->up == this_env); 372 | size_t count = 0; 373 | for (var v = 0; v < env->args_start; v++) { 374 | if (!env->upvals[v].is_used) 375 | continue; 376 | count++; 377 | load_var(lvl, this_env, RSI, v); 378 | size_t offset = 8 + 8*env->upvals[v].env_idx; 379 | STORE(RSI, RDI, offset); 380 | } 381 | assert(count == env->envc); 382 | 383 | if (env->envc == 0) { 384 | // Store the info_word 385 | CODE(0xbe, U32(2)); // mov esi, 2 386 | STORE(RSI, RDI, 8); 387 | } 388 | 389 | // Bump rdi, used as a temporary heap pointer 390 | if (i != n-1) 391 | add_imm(RDI, env->envc == 0 ? 16 : 8 + 8*env->envc); 392 | } 393 | } 394 | 395 | 396 | /***************** Shuffle arguments ****************/ 397 | 398 | static void do_the_moves(size_t lvl, ir term, struct env *env); 399 | 400 | 401 | enum mov_status { NOT_STARTED, IN_PROGRESS, DONE }; 402 | 403 | struct dest_info_item { 404 | enum { FROM_ARGS, FROM_ENV } src_type; 405 | int src_idx; 406 | int next_with_same_src; // -1 if none 407 | enum mov_status status; 408 | }; 409 | 410 | typedef struct { 411 | size_t n; 412 | struct dest_info_item *dest_info; // n + 1 of them 413 | int *src_to_dest; // n + 1 of them 414 | int in_rdi; 415 | bool for_a_thunk; 416 | } mov_state; 417 | 418 | // Store src to all its destinations, so that it can be overwritten afterwards. 419 | static void vacate_one(mov_state *s, int src) { 420 | switch (s->dest_info[src].status) { 421 | case DONE: 422 | break; 423 | 424 | case IN_PROGRESS: 425 | // A cycle! Use rdi as a temporary register to break the cycle 426 | assert(s->in_rdi == -1); 427 | s->in_rdi = src; 428 | if (src == s->n) 429 | MOV_RR(RDI, SELF); 430 | else 431 | load_arg(RDI, src); 432 | break; 433 | 434 | case NOT_STARTED: 435 | if (s->src_to_dest[src] == -1) { 436 | // Don't do anything if it has no destinations 437 | s->dest_info[src].status = DONE; 438 | break; 439 | } 440 | 441 | # define FOREACH_DEST(dest) \ 442 | for (int dest = s->src_to_dest[src]; dest != -1; dest = s->dest_info[dest].next_with_same_src) 443 | 444 | s->dest_info[src].status = IN_PROGRESS; 445 | if (src == s->n) { 446 | // Clear out 'self' by storing all the things from the env 447 | FOREACH_DEST(dest) { 448 | assert(s->dest_info[dest].src_type == FROM_ENV); 449 | 450 | vacate_one(s, dest); 451 | 452 | enum reg self = s->in_rdi == s->n ? RDI : SELF; 453 | if (dest == s->n) { 454 | if (s->for_a_thunk) { 455 | load_env_item(RSI, self, s->dest_info[dest].src_idx); 456 | blackhole_self(); 457 | MOV_RR(SELF, RSI); 458 | } else { 459 | load_env_item(SELF, self, s->dest_info[dest].src_idx); 460 | } 461 | } else { 462 | load_env_item(RSI, self, s->dest_info[dest].src_idx); 463 | store_arg(dest, RSI); 464 | } 465 | } 466 | } else { 467 | // Clear out data_stack[src] 468 | FOREACH_DEST(dest) { 469 | assert(s->dest_info[dest].src_type == FROM_ARGS); 470 | assert(s->dest_info[dest].src_idx == src); 471 | vacate_one(s, dest); 472 | } 473 | enum reg src_reg; 474 | if (s->in_rdi == src) { 475 | src_reg = RDI; 476 | } else { 477 | load_arg(RSI, src); 478 | src_reg = RSI; 479 | } 480 | FOREACH_DEST(dest) { 481 | if (dest == s->n) { 482 | if (s->for_a_thunk) 483 | blackhole_self(); 484 | MOV_RR(SELF, src_reg); 485 | } else { 486 | store_arg(dest, src_reg); 487 | } 488 | } 489 | } 490 | if (s->in_rdi == src) 491 | s->in_rdi = -1; 492 | s->dest_info[src].status = DONE; 493 | # undef FOREACH_DEST 494 | break; 495 | } 496 | } 497 | 498 | static void add_dest_to_mov_state(size_t lvl, struct env *env, mov_state *s, int dest, var v) { 499 | assert(v < lvl); 500 | if (v >= env->args_start) { 501 | // It's from the data stack 502 | int src = var_to_stack_index(lvl, env, v); 503 | s->dest_info[dest] = (struct dest_info_item) { 504 | .src_type = FROM_ARGS, 505 | .src_idx = src, 506 | .next_with_same_src = s->src_to_dest[src], 507 | .status = NOT_STARTED 508 | }; 509 | if (dest != src) 510 | s->src_to_dest[src] = dest; 511 | } else { 512 | // It's from the env 513 | assert(env->upvals[v].is_used); 514 | s->dest_info[dest] = (struct dest_info_item) { 515 | .src_type = FROM_ENV, 516 | .src_idx = env->upvals[v].env_idx, 517 | .next_with_same_src = s->src_to_dest[s->n], 518 | .status = NOT_STARTED 519 | }; 520 | s->src_to_dest[s->n] = dest; 521 | } 522 | } 523 | 524 | static void do_the_moves(size_t lvl, ir term, struct env *env) { 525 | assert(lvl == term->lvl + term->arity + term->lets_len); 526 | assert(term->lvl == env->args_start); 527 | 528 | size_t incoming_argc = term->arity + term->lets_len; 529 | size_t outgoing_argc = 0; 530 | for (arglist arg = term->args; arg; arg = arg->prev) 531 | ++outgoing_argc; 532 | 533 | // Resize the data stack 534 | size_t n; 535 | if (outgoing_argc > incoming_argc) { 536 | n = outgoing_argc; 537 | size_t diff = outgoing_argc - incoming_argc; 538 | assert(diff < INT_MAX / 8); 539 | lvl += diff; 540 | add_imm(DATA_STACK, -8 * (int) diff); 541 | } else { 542 | n = incoming_argc; 543 | } 544 | 545 | // Generate the data structures and stuff 546 | mov_state s = (mov_state) { 547 | .n = n, 548 | .dest_info = malloc(sizeof(struct dest_info_item[n+1])), 549 | .src_to_dest = malloc(sizeof(int[n+1])), 550 | .in_rdi = -1, 551 | .for_a_thunk = term->arity == 0, 552 | }; 553 | 554 | for (int i = 0; i < n + 1; i++) 555 | s.src_to_dest[i] = -1; 556 | 557 | int dest_start = 558 | outgoing_argc < incoming_argc ? incoming_argc - outgoing_argc : 0; 559 | for (int dest = 0; dest < dest_start; dest++) 560 | s.dest_info[dest].status = NOT_STARTED; 561 | int dest = n - 1; 562 | for (arglist arg = term->args; arg; dest--, arg = arg->prev) 563 | add_dest_to_mov_state(lvl, env, &s, dest, arg->arg); 564 | assert(dest == dest_start - 1); 565 | add_dest_to_mov_state(lvl, env, &s, n, term->head); 566 | 567 | // Do all the moving 568 | for (int i = 0; i < n + 1; i++) { 569 | vacate_one(&s, i); 570 | assert(s.in_rdi == -1); 571 | } 572 | 573 | // Resize the data stack and set argc 574 | if (outgoing_argc < incoming_argc) { 575 | size_t diff = incoming_argc - outgoing_argc; 576 | assert(diff < INT_MAX / 8); 577 | add_imm(DATA_STACK, 8 * (int) diff); 578 | } 579 | add_imm(ARGC, outgoing_argc - term->arity); 580 | } 581 | 582 | 583 | /***************** Perform the call! ****************/ 584 | 585 | static void call_self(void) { 586 | // jmp [qword ptr [self]] (self is rbx) 587 | CODE(0xff, 0x23); 588 | } 589 | 590 | 591 | /***************** Tying it all together ****************/ 592 | 593 | void *compile_toplevel(ir term); 594 | 595 | 596 | static struct compile_result compile(struct env *up, ir term) { 597 | size_t lvl = term->lvl; 598 | 599 | // Allocate an environment 600 | struct env *env = malloc(sizeof(struct env) + sizeof(struct var_info[lvl])); 601 | env->up = up; 602 | env->args_start = lvl; 603 | env->lets_start = lvl + term->arity; 604 | env->envc = 0; 605 | for (int i = 0; i < lvl; i++) 606 | env->upvals[i].is_used = false; 607 | 608 | // Populate the env 609 | make_sure_can_access_var(env, term->head); 610 | for (arglist arg = term->args; arg; arg = arg->prev) 611 | make_sure_can_access_var(env, arg->arg); 612 | 613 | // Compile all the lets 614 | struct compile_result *locals = 615 | malloc(sizeof(struct compile_result[term->lets_len])); 616 | int i = 0; 617 | for (letlist let = term->lets; let; let = let->next, i++) 618 | locals[i] = compile(env, let->val); 619 | assert(i == term->lets_len); 620 | 621 | // Prologue 622 | void *code_start; 623 | if (term->arity == 0) 624 | code_start = start_thunk(env->envc); 625 | else 626 | code_start = start_closure(term->arity, env->envc); 627 | 628 | lvl += term->arity; 629 | 630 | // Allocations 631 | do_allocations(env, term->lets_len, locals); 632 | for (int i = 0; i < term->lets_len; i++) 633 | free(locals[i].env); 634 | free(locals); 635 | 636 | lvl += term->lets_len; 637 | 638 | // Set up for call 639 | do_the_moves(lvl, term, env); 640 | 641 | // Execute the call! 642 | call_self(); 643 | 644 | return (struct compile_result) { 645 | .code = code_start, 646 | .env = env, 647 | }; 648 | } 649 | 650 | void *compile_toplevel(ir term) { 651 | init_code_buf(); 652 | assert(term->lvl == 0); 653 | struct compile_result res = compile(NULL, term); 654 | assert(res.env->envc == 0); 655 | free(res.env); 656 | return res.code; 657 | } 658 | 659 | -------------------------------------------------------------------------------- /backend.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include "frontend.h" 3 | 4 | /** Compile a top-level (closed, at level 0) term to machine code. 5 | * 6 | * It returns a void *. This is not executable until codegen_finalize is run. 7 | */ 8 | void *compile_toplevel(ir term); 9 | 10 | /** Remap the codegen'd area from RW to RX. 11 | * 12 | * You may now cast the void *'s from codegen_toplevel to void(*)(void) 13 | */ 14 | void compile_finalize(void); 15 | -------------------------------------------------------------------------------- /bench.lc: -------------------------------------------------------------------------------- 1 | /- Computes 3^(2^4) - 3^(2^4) = 0 = λ s z. z 2 | - Takes about 3.5 seconds to run on my computer 3 | -/ 4 | (λ big_number minus. minus big_number big_number) 5 | /- The big number, 3^(2^4) = 43'046'721 -/ 6 | ((\s z. s(s(s(s z)))) (\s z. s(s z)) (\s z. s(s(s z)))) 7 | /- O(n) church numeral subtraction algorithm -/ 8 | (λ n m s z. 9 | n (λ y k. k (s (y (λ a b. a))) y) 10 | (λ k. k z (λ _. z)) 11 | (m (λ k a b. b k) (λ a b. a))) 12 | 13 | 14 | -------------------------------------------------------------------------------- /frontend.c: -------------------------------------------------------------------------------- 1 | #include "frontend.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define failwith(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) 13 | 14 | /**************** Lowering to IR ****************/ 15 | 16 | static void arena_init(void); 17 | 18 | static bool is_var(ir e); 19 | static bool is_lambda(ir e); 20 | 21 | static ir mkvar(size_t lvl, var v); 22 | static ir mkapp(size_t lvl, ir func, ir arg); 23 | static ir mkabs(size_t lvl, ir body); 24 | 25 | static size_t *ir_arena_start = NULL; 26 | static size_t *ir_arena = NULL; 27 | static size_t *ir_arena_end = NULL; 28 | #define IR_ARENA_SIZE (32 * 1024 * 1024) 29 | 30 | static void arena_init(void) { 31 | if (!ir_arena) { 32 | ir_arena_start = ir_arena = malloc(IR_ARENA_SIZE); 33 | ir_arena_end = ir_arena + IR_ARENA_SIZE / sizeof(*ir_arena); 34 | } 35 | } 36 | 37 | void free_ir(void) { 38 | free(ir_arena_start); 39 | ir_arena = ir_arena_end = NULL; 40 | } 41 | 42 | #define ARENA_ALLOC(ty, ...) \ 43 | ty *node = (ty *) ir_arena; \ 44 | ir_arena += sizeof(ty) / sizeof(*ir_arena); \ 45 | if (ir_arena > ir_arena_end) failwith("Too much code"); \ 46 | *node = (ty) { __VA_ARGS__ }; \ 47 | return node; 48 | 49 | static arglist snoc_arg(arglist prev, var arg) { 50 | ARENA_ALLOC(struct an_arg, prev, arg) 51 | } 52 | static letlist cons_let(ir val, letlist next) { 53 | ARENA_ALLOC(struct a_let, val, next) 54 | } 55 | 56 | static ir mkvar(size_t lvl, var v) { 57 | ARENA_ALLOC(struct exp, lvl, 0, NULL, NULL, 0, v, NULL); 58 | } 59 | #undef ARENA_ALLOC 60 | 61 | static bool is_var(ir e) { 62 | return e->arity == 0 && e->lets == NULL && e->args == NULL; 63 | } 64 | static bool is_lambda(ir e) { 65 | return e->arity > 0; 66 | } 67 | 68 | static ir mkabs(size_t lvl, ir body) { 69 | assert(body->lvl == lvl + 1); 70 | body->lvl = lvl; 71 | body->arity++; 72 | return body; 73 | } 74 | 75 | static ir mkapp(size_t lvl, ir func, ir arg) { 76 | if (is_lambda(func)) { 77 | if (is_var(arg)) { 78 | // Applying a lambda to a var: let f = func in f x 79 | // f becomes lvl 80 | ir res = mkvar(lvl, lvl); 81 | res->lets = cons_let(func, NULL); 82 | res->lets_end = &res->lets->next; 83 | res->lets_len = 1; 84 | res->args = snoc_arg(NULL, arg->head); 85 | return res; 86 | } else { 87 | // Applying a lambda to a complex thing: let f = func ; x = arg in f x 88 | // f becomes lvl, x becomes (lvl+1) 89 | ir res = mkvar(lvl, lvl); 90 | res->lets = cons_let(arg, NULL); 91 | res->lets_end = &res->lets->next; 92 | res->lets = cons_let(func, res->lets); 93 | res->lets_len = 2; 94 | res->args = snoc_arg(NULL, lvl+1); 95 | return res; 96 | } 97 | } else if (is_var(arg)) { 98 | // Applying a thunk to a var: 99 | // (let ... in f args) x ⇒ let ... in f args x 100 | var v = arg->head; 101 | func->args = snoc_arg(func->args, v); 102 | return func; 103 | } else { 104 | // Appying a thunk to something complex: 105 | // (let ... in f args) arg ⇒ let ... ; x = arg in f args x 106 | var new_var = lvl + func->lets_len; 107 | 108 | // add the let 109 | letlist new_let = cons_let(arg, NULL); 110 | if (func->lets_end) 111 | *func->lets_end = new_let; 112 | else 113 | func->lets = new_let; 114 | func->lets_end = &new_let->next; 115 | func->lets_len++; 116 | 117 | // add the arg 118 | func->args = snoc_arg(func->args, new_var); 119 | 120 | return func; 121 | } 122 | } 123 | 124 | /*************** Parser ***************/ 125 | 126 | typedef struct a_scope_item { 127 | const char *name; 128 | size_t name_len; 129 | struct a_scope_item *next; 130 | } *scope; 131 | 132 | static bool skip_whitespace(const char **cursor); 133 | #define SKIP_WHITESPACE(cursor) \ 134 | if (!skip_whitespace(cursor)) return 0 135 | 136 | static size_t parse_ident(const char **cursor); 137 | static ir parse_var(const char **cursor, size_t lvl, scope s); 138 | static ir parse_exp(const char **cursor, size_t lvl, scope s); 139 | static ir parse_atomic_exp(const char **cursor, size_t lvl, scope s); 140 | static ir parse_rest_of_lambda(const char **cursor, size_t lvl, scope s); 141 | 142 | static const char *err_msg = NULL; 143 | static const char *err_loc = NULL; 144 | 145 | 146 | ir parse(const char *text) { 147 | arena_init(); 148 | const char *cursor = text; 149 | ir result = NULL; 150 | 151 | if (!skip_whitespace(&cursor)) 152 | goto end; 153 | 154 | result = parse_exp(&cursor, 0, NULL); 155 | if (result && *cursor != '\0') { 156 | result = NULL; 157 | err_loc = text; 158 | err_msg = "expected eof"; 159 | } 160 | 161 | end: 162 | if (!result) 163 | // TODO: better error message printing 164 | printf("parse error at byte %zu :/\n%s\n", err_loc - text, err_msg); 165 | 166 | return result; 167 | } 168 | 169 | static bool skip_whitespace(const char **cursor) { 170 | const char *text = *cursor; 171 | // comments use /- -/ 172 | for (;;) switch (text[0]) { 173 | case ' ': 174 | case '\n': 175 | case '\t': 176 | text++; 177 | continue; 178 | case '/': 179 | if (text[1] == '-') { 180 | // comment start! 181 | text += 2; 182 | while (text[0]) { 183 | if (text[0] == '-' && text[1] == '/') 184 | break; 185 | text++; 186 | } 187 | if (!text[0]) { 188 | err_loc = *cursor = text; 189 | err_msg = "reached EOF during comment"; 190 | return false; 191 | } 192 | text += 2; 193 | continue; 194 | } else { /* fallthrough */ } 195 | // Fall through 196 | default: 197 | *cursor = text; 198 | return true; 199 | } 200 | } 201 | 202 | // returns the length of the ident starting at *cursor 203 | static size_t parse_ident(const char **cursor) { 204 | // parse [a-zA-Z_]+ 205 | const char *start = *cursor; 206 | const char *end = start; 207 | #define IDENT_CHAR(c) (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_') 208 | while (IDENT_CHAR(*end)) 209 | end++; 210 | *cursor = end; 211 | if (start == end) { 212 | err_loc = start; 213 | err_msg = "expected variable"; 214 | return 0; 215 | } 216 | SKIP_WHITESPACE(cursor); 217 | return end - start; 218 | } 219 | 220 | static ir parse_var(const char **cursor, size_t lvl, scope s) { 221 | const char *start = *cursor; 222 | size_t len = parse_ident(cursor); 223 | if (!len) return NULL; 224 | 225 | // resolve the name 226 | var v = lvl; 227 | scope node = s; 228 | while (node) { 229 | v--; 230 | if (len == node->name_len && strncmp(start, node->name, len) == 0) 231 | return mkvar(lvl, v); 232 | else 233 | node = node->next; 234 | } 235 | err_loc = start; 236 | err_msg = "variable not in scope"; 237 | return NULL; 238 | } 239 | 240 | // atomic_exp ::= var | '(' exp ')' 241 | static ir parse_atomic_exp(const char **cursor, size_t lvl, scope s) { 242 | if (**cursor == '(') { 243 | ++*cursor; 244 | SKIP_WHITESPACE(cursor); 245 | ir result = parse_exp(cursor, lvl, s); 246 | if (**cursor == ')') { 247 | ++*cursor; 248 | SKIP_WHITESPACE(cursor); 249 | return result; 250 | } else { 251 | err_loc = *cursor; 252 | err_msg = "expected ')'"; 253 | return NULL; 254 | } 255 | } 256 | return parse_var(cursor, lvl, s); 257 | } 258 | 259 | // rest_of_lambda ::= var* '.' exp 260 | static ir parse_rest_of_lambda(const char **cursor, size_t lvl, scope s) { 261 | if (!**cursor) { 262 | err_loc = *cursor; 263 | err_msg = "expected '.', got end of file"; 264 | return NULL; 265 | } 266 | if (**cursor == '.') { 267 | ++*cursor; 268 | SKIP_WHITESPACE(cursor); 269 | return parse_exp(cursor, lvl, s); 270 | } else { 271 | const char *name = *cursor; 272 | size_t name_len = parse_ident(cursor); 273 | if (!name_len) return NULL; 274 | struct a_scope_item extended = { name, name_len, s }; 275 | ir body = parse_rest_of_lambda(cursor, lvl+1, &extended); 276 | if (!body) return NULL; 277 | return mkabs(lvl, body); 278 | } 279 | } 280 | 281 | // exp ::= '\' rest_of_lambda | 'λ' rest_of_lambda | atomic_exp atomic_exp* 282 | static ir parse_exp(const char **cursor, size_t lvl, scope s) { 283 | if (**cursor == '\\') { 284 | ++*cursor; 285 | SKIP_WHITESPACE(cursor); 286 | return parse_rest_of_lambda(cursor, lvl, s); 287 | } else if (strncmp(*cursor, "λ", sizeof("λ") - 1) == 0) { 288 | *cursor += sizeof("λ") - 1; 289 | SKIP_WHITESPACE(cursor); 290 | return parse_rest_of_lambda(cursor, lvl, s); 291 | } else { 292 | ir func = parse_atomic_exp(cursor, lvl, s); 293 | if (!func) return NULL; 294 | // Then parse some args 295 | while (**cursor && **cursor != ')') { 296 | ir arg = parse_atomic_exp(cursor, lvl, s); 297 | if (!arg) return NULL; 298 | func = mkapp(lvl, func, arg); 299 | } 300 | return func; 301 | } 302 | } 303 | 304 | /*************** Pretty-printer **************/ 305 | 306 | static void print_var(var v); 307 | static void print_lets(size_t lvl, letlist lets); 308 | static void print_args(arglist args); 309 | static void print_term(ir term); 310 | 311 | static void print_var(var v) { 312 | printf("x_%zu", v); 313 | } 314 | static void print_lets(size_t lvl, letlist lets) { 315 | for (; lets; lets = lets->next, lvl++) { 316 | printf("let "); 317 | print_var(lvl); 318 | printf(" = "); 319 | print_term(lets->val); 320 | printf(" in "); 321 | } 322 | } 323 | static void print_args(arglist args) { 324 | if (args) { 325 | print_args(args->prev); 326 | printf(" "); 327 | print_var(args->arg); 328 | } 329 | } 330 | static void print_term(ir term) { 331 | if (!term->arity && !term->lets && !term->args) { 332 | // Just a var -- no parens necessary 333 | print_var(term->head); 334 | return; 335 | } 336 | printf("("); 337 | if (term->arity) { 338 | printf("λ"); 339 | for (var v = term->lvl; v < term->lvl + term->arity; v++) { 340 | printf(" "); 341 | print_var(v); 342 | } 343 | printf(". "); 344 | } 345 | print_lets(term->lvl + term->arity, term->lets); 346 | print_var(term->head); 347 | print_args(term->args); 348 | printf(")"); 349 | } 350 | 351 | void print_ir(ir term) { 352 | print_term(term); 353 | printf("\n"); 354 | } 355 | 356 | 357 | -------------------------------------------------------------------------------- /frontend.h: -------------------------------------------------------------------------------- 1 | #ifndef FRONTEND_H 2 | #define FRONTEND_H 1 3 | 4 | #include 5 | 6 | typedef size_t var; 7 | 8 | typedef struct a_let { 9 | struct exp *val; 10 | struct a_let *next; 11 | } *letlist; 12 | 13 | typedef struct an_arg { 14 | struct an_arg *prev; 15 | var arg; 16 | } *arglist; 17 | 18 | typedef struct exp { 19 | size_t lvl; 20 | size_t arity; 21 | letlist lets; 22 | letlist *lets_end; 23 | size_t lets_len; 24 | var head; 25 | arglist args; 26 | } *ir; 27 | 28 | /** Parse the given text, reporting errors to the user. 29 | * 30 | * If there's an error, it returns null 31 | * 32 | * Does not free the text 33 | */ 34 | ir parse(const char *text); 35 | 36 | /** All IR is allocated from an arena. 37 | * 38 | * This function frees the arena 39 | */ 40 | void free_ir(void); 41 | 42 | /** For debugging purposes 43 | */ 44 | void print_ir(ir term); 45 | 46 | #endif // FRONTEND_H 47 | -------------------------------------------------------------------------------- /haskell/.gitignore: -------------------------------------------------------------------------------- 1 | Strong 2 | Weak 3 | *.hi 4 | *.o 5 | -------------------------------------------------------------------------------- /haskell/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: Weak Strong 3 | 4 | bench: Weak Strong 5 | hyperfine ./Weak ./Strong 6 | 7 | GHCOPTS = -threaded -rtsopts -O2 -fllvm -fno-cse -fno-full-laziness 8 | 9 | Weak: Weak.hs 10 | ghc $(GHCOPTS) --make Weak.hs -o Weak 11 | Strong: Strong.hs 12 | ghc $(GHCOPTS) --make Strong.hs -o Strong 13 | 14 | -------------------------------------------------------------------------------- /haskell/Strong.hs: -------------------------------------------------------------------------------- 1 | -- Strong normalization for the untyped lambda calculus, using a HOAS deep 2 | -- embedding 3 | -- 4 | -- The purpose of this benchmark is to compare my interpreter with something 5 | -- implemented by compiling to Haskell. 6 | -- 7 | -- Computes the same thing as bench.lc 8 | 9 | {-# LANGUAGE BangPatterns, BlockArguments #-} 10 | 11 | type Var = Int 12 | 13 | data Val = Fun !(Val -> Val) | VVar !Var | VApp !Val Val 14 | data NF = Lam NF | NE Var [NF] deriving (Show, Eq) 15 | 16 | infixl 1 $$ 17 | ($$) :: Val -> Val -> Val 18 | Fun f $$ x = f x 19 | v $$ x = VApp v x 20 | {-# INLINE ($$) #-} 21 | 22 | λ = Fun 23 | 24 | n2 = λ\s -> λ\z -> s $$ (s $$ z) 25 | n3 = λ\s -> λ\z -> s $$ (s $$ (s $$ z)) 26 | n4 = λ\s -> λ\z -> s $$ (s $$ (s $$ (s $$ z))) 27 | 28 | theTerm = minus $$ bigNumber $$ bigNumber 29 | bigNumber = n4 $$ n2 $$ n3 30 | minus = λ\n -> λ\m -> λ\s -> λ\z -> 31 | n $$ (λ\y -> λ\k -> k $$ (s $$ (y $$ λ\a -> λ\b -> a)) $$ y) 32 | $$ (λ\k -> k $$ z $$ (λ\_ -> z)) 33 | $$ (m $$ (λ\k -> λ\a -> λ\b -> b $$ k) $$ (λ\a -> λ\b -> a)) 34 | 35 | readback :: Var -> Val -> NF 36 | readback !lvl (Fun f) = Lam $ readback (lvl+1) $ f (VVar lvl) 37 | readback !lvl (VVar v) = NE v [] 38 | readback !lvl (VApp f x) = 39 | let NE v sp = readback lvl f in NE v (readback lvl x:sp) 40 | 41 | main = do 42 | putStrLn "Normalizing the term..." 43 | print $ readback 0 theTerm 44 | 45 | -------------------------------------------------------------------------------- /haskell/Weak.hs: -------------------------------------------------------------------------------- 1 | -- Weak normalization for the untyped lambda calculus. 2 | -- 3 | -- The purpose of this benchmark is to compare my interpreter with GHC, the 4 | -- highest-performance lazy evaluation runtime out there. 5 | -- 6 | -- If one reimplemented all of GHC to be strongly-normalizing, this gives an 7 | -- indication of how fast it could theoretically be. 8 | 9 | {-# LANGUAGE BangPatterns, BlockArguments #-} 10 | 11 | import Data.Coerce 12 | import Unsafe.Coerce 13 | 14 | newtype L = L (L -> L) 15 | 16 | λ = coerce :: (L -> L) -> L 17 | λ2 = coerce :: (L -> L -> L) -> L 18 | λ3 = coerce :: (L -> L -> L -> L) -> L 19 | λ4 = coerce :: (L -> L -> L -> L -> L) -> L 20 | 21 | ap = coerce :: L -> L -> L 22 | ap2 = coerce :: L -> L -> L -> L 23 | ap3 = coerce :: L -> L -> L -> L -> L 24 | 25 | n2 = λ2\ s z -> ap s (ap s z) 26 | n3 = λ2\ s z -> ap s (ap s (ap s z)) 27 | n4 = λ2\ s z -> ap s (ap s (ap s (ap s z))) 28 | 29 | {-# NOINLINE theTerm #-} 30 | theTerm :: L 31 | theTerm = ap2 minus bigNumber bigNumber 32 | 33 | bigNumber = ap2 n4 n2 n3 34 | 35 | minus = λ4\ n m s z -> 36 | ap3 n 37 | (λ2\ y k -> ap2 k (ap s (ap y (λ2\ a b -> a))) y) 38 | (λ\ k -> ap2 k z (λ\ _ -> z)) 39 | (ap2 m (λ3\ k a b -> ap b k) (λ2\ a b -> a)) 40 | 41 | 42 | -- Really unsafe part =| 43 | -- Converts Church numerals to ints, and other terms to UB 44 | churchToInt :: L -> Int 45 | churchToInt l = (unsafeCoerce l :: (Int -> Int) -> Int -> Int) succ 0 46 | 47 | main = putStrLn $ "The church numerals represents " ++ show (churchToInt theTerm) 48 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "frontend.h" 5 | #include "backend.h" 6 | #include "runtime/normalize.h" 7 | 8 | int main(int argc, const char **argv) { 9 | const char *source = argc >= 2 ? argv[1] : "λ x. x"; 10 | 11 | printf("Input: %s\n", source); 12 | printf("Compiling... "); 13 | fflush(stdout); 14 | 15 | ir term = parse(source); 16 | void *code = compile_toplevel(term); 17 | compile_finalize(); 18 | free_ir(); 19 | 20 | printf("Compiled! Normalizing...\n"); 21 | unsigned int *nf = normalize(code); 22 | 23 | printf("Normal form: "); 24 | print_normal_form(nf); 25 | 26 | free(nf); 27 | } 28 | 29 | -------------------------------------------------------------------------------- /runtime/builtins.c: -------------------------------------------------------------------------------- 1 | #include "gc.h" 2 | #include "builtins.h" 3 | 4 | void rt_gc(void) { 5 | minor_gc(); 6 | } 7 | 8 | void rt_too_few_args(void) { 9 | if (argc == 0) 10 | return; 11 | size_t size = argc + 3; 12 | obj *pap = alloc(rt_pap_entry, size); 13 | *INFO_WORD(pap) = (struct info_word) { .size = size, .var = 0 }; 14 | pap->contents[1] = (word) self; 15 | memcpy(&pap->contents[2], data_stack, sizeof(word[argc])); 16 | data_stack += argc; 17 | argc = 0; 18 | self = pap; 19 | } 20 | 21 | void rt_update_thunk(void) { 22 | assert(argc == 0); 23 | obj *thunk = *data_stack++; 24 | thunk->entrypoint = rt_ref_entry; 25 | thunk->contents[0] = (word) self; 26 | if (!IS_YOUNG(thunk) && IS_YOUNG(self)) 27 | write_barrier(thunk); 28 | } 29 | 30 | void rt_adjacent_update_frames(void) { 31 | obj *thunk = *data_stack; 32 | thunk->entrypoint = rt_ref_entry; 33 | thunk->contents[0] = (word) self; 34 | if (!IS_YOUNG(thunk) && IS_YOUNG(self)) 35 | write_barrier(thunk); 36 | *data_stack = self; 37 | } 38 | 39 | /************** Built-in heap objects *************/ 40 | 41 | #define STRINGIFY(x) #x 42 | #define ENTRY(name, size, tag) \ 43 | asm (\ 44 | " .text\n" \ 45 | " .globl " name "\n" \ 46 | " .int " STRINGIFY(size) "\n" \ 47 | " .int " STRINGIFY(tag) "\n" \ 48 | name ":\n" \ 49 | " jmp " name "_impl\n" \ 50 | ) 51 | 52 | ENTRY("rt_ref_entry", 2, REF); 53 | void rt_ref_entry_impl(void) { 54 | self = (obj *) self->contents[0]; 55 | // Hope for a tail call :/ 56 | return self->entrypoint(); 57 | } 58 | 59 | ENTRY("rt_forward_entry", 2, FORWARD); 60 | void rt_forward_entry_impl(void) { 61 | failwith("unreachable: forward objects only exist during GC\n"); 62 | } 63 | 64 | ENTRY("rt_pap_entry", 0, PAP); 65 | void rt_pap_entry_impl(void) { 66 | // Partial application: push the arguments onto the stack and tail call the 67 | // contained function 68 | obj *fun = (obj *) self->contents[1]; 69 | size_t extra_args = INFO_WORD(self)->size - 3; 70 | argc += extra_args; 71 | data_stack -= extra_args; 72 | memcpy(data_stack, &self->contents[2], sizeof(word[extra_args])); 73 | self = fun; 74 | // Hope for a tail call :/ 75 | return self->entrypoint(); 76 | } 77 | 78 | ENTRY("rt_rigid_entry", 0, RIGID); 79 | void rt_rigid_entry_impl(void) { 80 | // Rigid term: allocate a new rigid term with the new arguments 81 | if (argc == 0) 82 | return; 83 | 84 | // TODO: make overflow checks more legit (should have a max term size) 85 | uint32_t new_size = INFO_WORD(self)->size + argc; 86 | if (new_size < argc) failwith("overflow"); 87 | 88 | obj *new = alloc(rt_rigid_entry, new_size); 89 | *INFO_WORD(new) = (struct info_word) { 90 | .size = new_size, 91 | .var = INFO_WORD(self)->var, 92 | }; 93 | 94 | size_t self_argc = INFO_WORD(self)->size - 2; 95 | memcpy(&new->contents[1], &self->contents[1], sizeof(word[self_argc])); 96 | memcpy(&new->contents[1 + self_argc], data_stack, sizeof(word[argc])); 97 | data_stack += argc; 98 | argc = 0; 99 | 100 | self = new; 101 | } 102 | 103 | ENTRY("rt_blackhole_entry", 0, BLACKHOLE); 104 | void rt_blackhole_entry_impl(void) { 105 | failwith("Black hole (infinite loop?)\n"); 106 | } 107 | -------------------------------------------------------------------------------- /runtime/builtins.h: -------------------------------------------------------------------------------- 1 | void rt_gc(void); 2 | void rt_too_few_args(void); 3 | void rt_update_thunk(void); 4 | void rt_adjacent_update_frames(void); 5 | 6 | void rt_ref_entry(void); 7 | void rt_forward_entry(void); 8 | void rt_pap_entry(void); 9 | void rt_rigid_entry(void); 10 | void rt_blackhole_entry(void); 11 | 12 | -------------------------------------------------------------------------------- /runtime/data_layout.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_LAYOUT_H 2 | #define DATA_LAYOUT_H 1 3 | 4 | #include 5 | 6 | /************* Object layout ***********/ 7 | 8 | typedef size_t word; 9 | typedef uint32_t halfword; 10 | 11 | typedef struct obj { 12 | void (*entrypoint)(void); 13 | word contents[]; 14 | } obj; 15 | 16 | // Can't be an enum since I need to include them in inline assembly 17 | #define FORWARD 0 18 | #define REF 1 19 | #define FUN 2 20 | #define PAP 3 21 | #define RIGID 4 22 | #define THUNK 5 23 | #define BLACKHOLE 6 24 | 25 | struct gc_data { 26 | /** Size of the whole object in words. 27 | * If 0, the first word of contents is a `struct info_word` that contains 28 | * the true size 29 | */ 30 | uint32_t size; 31 | uint32_t tag; 32 | }; 33 | #define GC_DATA(o) \ 34 | ((struct gc_data *) ((size_t) (o->entrypoint) - sizeof(struct gc_data))) 35 | 36 | struct info_word { 37 | /** Size of the whole object in words */ 38 | uint32_t size; 39 | /** only in heap objects representing rigid terms */ 40 | uint32_t var; 41 | }; 42 | #define INFO_WORD(o) ((struct info_word *) &o->contents[0]) 43 | 44 | #endif // DATA_LAYOUT_H 45 | -------------------------------------------------------------------------------- /runtime/gc.c: -------------------------------------------------------------------------------- 1 | #include "gc.h" 2 | #include "builtins.h" 3 | 4 | enum gc_type { MAJOR, MINOR }; 5 | static void major_gc(void); 6 | static obj *copy_to_old_space(obj *o, enum gc_type type); 7 | static void process_copy_stack(enum gc_type type); 8 | static void collect_roots(enum gc_type type); 9 | 10 | static word *old_start; 11 | static word *old_top; 12 | static word *other_old_start; 13 | static size_t old_space_size; 14 | static size_t other_old_space_size; 15 | 16 | // Remembered set: a growable (malloc'd) vector of old objects 'REF ptr' that 17 | // point to the nursery 18 | static obj **remembered_set; 19 | static size_t remembered_set_size; 20 | static size_t remembered_set_cap; 21 | 22 | // Copy stack: during GC, a worklist of new to-space objects whose fields still 23 | // point to the from-space 24 | static obj **copy_stack; 25 | static size_t copy_stack_size; 26 | static size_t copy_stack_cap; 27 | 28 | static obj **data_stack_end; 29 | 30 | void gc_init(void) { 31 | copy_stack = (obj **) malloc(4096); 32 | copy_stack_size = 0; 33 | copy_stack_cap = 4096 / sizeof(obj *); 34 | 35 | remembered_set = (obj **) malloc(4096); 36 | remembered_set_size = 0; 37 | remembered_set_cap = 4096 / sizeof(obj *); 38 | 39 | nursery_start = (word *) malloc(NURSERY_BYTES); 40 | nursery_top = nursery_start + NURSERY_BYTES / sizeof(word); 41 | 42 | old_start = (word *) malloc(2 * NURSERY_BYTES); 43 | old_top = old_start + 2 * NURSERY_BYTES / sizeof(word); 44 | other_old_start = NULL; 45 | old_space_size = other_old_space_size = 2 * NURSERY_BYTES; 46 | 47 | obj **data_stack_start = malloc(DATA_STACK_BYTES); 48 | data_stack = data_stack_end = data_stack_start + DATA_STACK_BYTES / sizeof(obj *); 49 | } 50 | 51 | void minor_gc(void) { 52 | // conservative heap check 53 | if ((size_t) old_top - (size_t) old_start < NURSERY_BYTES) 54 | return major_gc(); 55 | 56 | DEBUG("Minor GC\n"); 57 | 58 | collect_roots(MINOR); 59 | 60 | // Collect the remembered set 61 | obj **remembered_set_end = remembered_set + remembered_set_size; 62 | for (obj **o = remembered_set; o < remembered_set_end; o++) { 63 | // old_obj is 'REF ptr' where ptr points to the nursery 64 | obj *old_obj = *o; 65 | assert(old_obj->entrypoint == rt_ref_entry); 66 | old_obj->contents[0] = 67 | (word) copy_to_old_space((obj *) old_obj->contents[0], MINOR); 68 | } 69 | remembered_set_size = 0; 70 | 71 | process_copy_stack(MINOR); 72 | 73 | nursery_top = nursery_start + NURSERY_BYTES / sizeof(word); 74 | } 75 | 76 | void major_gc(void) { 77 | DEBUG("Major GC: "); 78 | 79 | if (!other_old_start) 80 | other_old_start = (word *) malloc(other_old_space_size); 81 | word *from_space = old_start; 82 | size_t from_space_size = old_space_size; 83 | old_start = other_old_start; 84 | old_space_size = other_old_space_size; 85 | old_top = old_start + old_space_size / sizeof(word); 86 | other_old_start = NULL; 87 | 88 | collect_roots(MAJOR); 89 | process_copy_stack(MAJOR); 90 | 91 | // set a new size for the old space if needed 92 | size_t used_space = (size_t) old_start + old_space_size - (size_t) old_top; 93 | if (used_space + NURSERY_BYTES > old_space_size) 94 | other_old_space_size *= 2; 95 | 96 | if (from_space_size < other_old_space_size) 97 | free(from_space); 98 | else 99 | other_old_start = from_space; 100 | 101 | // reset the nursery and ignore the remembered set 102 | remembered_set_size = 0; 103 | nursery_top = nursery_start + NURSERY_BYTES / sizeof(word); 104 | 105 | DEBUG("copied %zu bytes\n", used_space); 106 | } 107 | 108 | static void collect_roots(enum gc_type type) { 109 | // Collect self 110 | self = copy_to_old_space(self, type); 111 | 112 | // Collect data stack 113 | for (obj **root = data_stack; root < data_stack_end; root++) 114 | *root = copy_to_old_space(*root, type); 115 | } 116 | 117 | static obj *copy_to_old_space(obj *o, enum gc_type type) { 118 | if (type == MINOR && !IS_YOUNG(o)) 119 | return o; 120 | 121 | if (o->entrypoint == rt_forward_entry) { 122 | return (obj *) o->contents[0]; 123 | } else if (o->entrypoint == rt_ref_entry) { 124 | // Compress REF indirections 125 | obj *new = copy_to_old_space((obj *) o->contents[0], type); 126 | o->contents[0] = (word) new; 127 | return new; 128 | } else { 129 | size_t size = GC_DATA(o)->size; 130 | if (!size) size = INFO_WORD(o)->size; 131 | obj *new = (obj *) (old_top -= size); 132 | memcpy(new, o, sizeof(word[size])); 133 | 134 | // set up forwarding 135 | o->entrypoint = rt_forward_entry; 136 | o->contents[0] = (word) new; 137 | 138 | // add to the copy stack 139 | if (copy_stack_size == copy_stack_cap) { 140 | size_t new_cap = 2 * copy_stack_cap + 1; 141 | copy_stack = reallocarray(copy_stack, new_cap, sizeof(obj *)); 142 | copy_stack_cap = new_cap; 143 | } 144 | copy_stack[copy_stack_size++] = new; 145 | 146 | return new; 147 | } 148 | } 149 | 150 | static void process_copy_stack(enum gc_type type) { 151 | while (copy_stack_size > 0) { 152 | obj *o = copy_stack[--copy_stack_size]; 153 | word *start; 154 | size_t size = GC_DATA(o)->size; 155 | if (size) { 156 | // Contains size - 1 many GC pointers 157 | start = &o->contents[0]; 158 | } else { 159 | // Contains size - 2 many GC pointers 160 | size = INFO_WORD(o)->size; 161 | start = &o->contents[1]; 162 | } 163 | word *end = &o->contents[size - 1]; 164 | for (word *ptr = start; ptr < end; ptr++) 165 | *ptr = (word) copy_to_old_space((obj *) *ptr, type); 166 | } 167 | } 168 | 169 | // Write barrier: push thunk to the remembered set 170 | void write_barrier(obj *thunk) { 171 | if (remembered_set_size == remembered_set_cap) { 172 | size_t new_cap = remembered_set_size * 2; 173 | remembered_set = reallocarray(remembered_set, new_cap, sizeof(obj *)); 174 | remembered_set_cap = new_cap; 175 | } 176 | remembered_set[remembered_set_size++] = thunk; 177 | } 178 | 179 | -------------------------------------------------------------------------------- /runtime/gc.h: -------------------------------------------------------------------------------- 1 | #include "runtime.h" 2 | 3 | void gc_init(void); 4 | 5 | void minor_gc(void); 6 | void write_barrier(obj *thunk); 7 | 8 | // This is only called from C code; generated code has this inlined 9 | static inline obj *alloc(void (*entrypoint)(void), size_t size) { 10 | // TODO: have a max term size somewhere 11 | assert(sizeof(word[size]) < 10240); 12 | word *ptr = nursery_top - size; 13 | if (ptr < nursery_start) { 14 | minor_gc(); 15 | ptr = nursery_top - size; 16 | } 17 | nursery_top = ptr; 18 | obj *o = (obj *) ptr; 19 | o->entrypoint = entrypoint; 20 | return o; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /runtime/normalize.c: -------------------------------------------------------------------------------- 1 | #include "gc.h" 2 | #include "builtins.h" 3 | #include "normalize.h" 4 | 5 | static unsigned int *buf; 6 | static size_t buf_len; 7 | static size_t buf_cap; 8 | 9 | static void push_buf(unsigned int x) { 10 | if (buf_len == buf_cap) { 11 | buf_cap *= 2; 12 | buf = reallocarray(buf, buf_cap, sizeof(unsigned int)); 13 | } 14 | buf[buf_len++] = x; 15 | } 16 | 17 | // Pop an object from the data stack and write its normal form to the buffer 18 | static void quote(void); 19 | 20 | // Apply 'self' to an argument, returning the value in 'self' 21 | static void apply(obj *arg); 22 | // Evaluate 'self', returning the value in 'self' 23 | static void eval(void); 24 | 25 | struct saved_regs { 26 | obj *self; 27 | obj **data_stack; 28 | word *nursery_top; 29 | word *nursery_start; 30 | size_t argc; 31 | }; 32 | 33 | static struct saved_regs save_regs(void); 34 | static void restore_regs(struct saved_regs); 35 | 36 | // Caller is normal code, calls the runtime code 37 | // Need to save/restore the callee-saved registers and setup the runtime 38 | unsigned int *normalize(void (*entrypoint)(void)) { 39 | struct saved_regs regs = save_regs(); 40 | gc_init(); 41 | buf_len = 0; 42 | buf_cap = 16; 43 | buf = malloc(sizeof(unsigned int[buf_cap])); 44 | 45 | obj *main = alloc(entrypoint, 2); 46 | *INFO_WORD(main) = (struct info_word) { .size = 2, .var = 0 }; 47 | self = main; 48 | 49 | quote(); 50 | 51 | restore_regs(regs); 52 | return buf; 53 | } 54 | 55 | static struct saved_regs save_regs(void) { 56 | return (struct saved_regs) { 57 | .self = self, 58 | .data_stack = data_stack, 59 | .nursery_top = nursery_top, 60 | .nursery_start = nursery_start, 61 | .argc = argc, 62 | }; 63 | } 64 | static void restore_regs(struct saved_regs regs) { 65 | self = regs.self; 66 | data_stack = regs.data_stack; 67 | nursery_top = regs.nursery_top; 68 | nursery_start = regs.nursery_start; 69 | argc = regs.argc; 70 | } 71 | 72 | // Apply 'self' to an argument, returning the value in 'self' 73 | static void apply(obj *arg) { 74 | obj *blackhole_to_update = alloc(rt_blackhole_entry, 2); 75 | *INFO_WORD(blackhole_to_update) = (struct info_word) { .size = 2, .var = 0 }; 76 | *--data_stack = blackhole_to_update; 77 | *--data_stack = arg; 78 | argc = 1; 79 | self->entrypoint(); 80 | rt_update_thunk(); 81 | } 82 | // Evaluate 'self', returning the value in 'self' 83 | static void eval(void) { 84 | switch (GC_DATA(self)->tag) { 85 | case PAP: 86 | case RIGID: 87 | case FUN: 88 | return; 89 | case REF: 90 | self = (obj *) self->contents[0]; 91 | return eval(); 92 | case THUNK: 93 | obj *blackhole_to_update = alloc(rt_blackhole_entry, 2); 94 | *INFO_WORD(blackhole_to_update) = (struct info_word) { .size = 2, .var = 0 }; 95 | *--data_stack = blackhole_to_update; 96 | argc = 0; 97 | self->entrypoint(); 98 | rt_update_thunk(); 99 | return; 100 | default: 101 | failwith("unreachable"); 102 | } 103 | } 104 | 105 | // Write the normal form of 'self' to the buffer 106 | static void quote(void) { 107 | // the next variable id to use for a lambda 108 | unsigned int next_var = 0; 109 | 110 | eval(); 111 | 112 | // Use the data stack as a worklist 113 | obj **data_stack_end = data_stack; 114 | for (;;) { 115 | switch (GC_DATA(self)->tag) { 116 | case FUN: 117 | case PAP: 118 | { 119 | // Function f: λ x. quote (apply f x) 120 | unsigned int var_id = next_var++; 121 | push_buf(LAM); 122 | push_buf(var_id); 123 | obj *x = alloc(rt_rigid_entry, 2); 124 | *INFO_WORD(x) = (struct info_word) { .size = 2, .var = var_id }; 125 | apply(x); 126 | continue; 127 | } 128 | case RIGID: 129 | { 130 | // Rigid term head args: head (map quote args) 131 | unsigned int argc = INFO_WORD(self)->size - 2; 132 | unsigned int var_id = INFO_WORD(self)->var; 133 | push_buf(NE); 134 | push_buf(argc); 135 | push_buf(var_id); 136 | data_stack -= argc; 137 | memcpy(data_stack, &self->contents[1], argc * sizeof(obj *)); 138 | 139 | // Pop and evaluate the next item off the stack 140 | if (data_stack == data_stack_end) 141 | return; 142 | self = *data_stack++; 143 | eval(); 144 | continue; 145 | } 146 | default: 147 | failwith("unreachable"); 148 | } 149 | } 150 | } 151 | 152 | 153 | /***************** Printing ****************/ 154 | 155 | static unsigned int *print(unsigned int *nf, bool parens); 156 | static unsigned int *print_rest_of_lam(unsigned int *nf); 157 | 158 | void print_normal_form(unsigned int *nf) { 159 | print(nf, false); 160 | printf("\n"); 161 | } 162 | 163 | static void print_var(unsigned int var) { 164 | // TODO: there's probably a nicer way to print variables 165 | if (var < 26) 166 | printf("%c", 'a' + var); 167 | else 168 | printf("v%u", var); 169 | } 170 | 171 | static unsigned int *print(unsigned int *nf, bool parens) { 172 | switch (*nf) { 173 | case LAM: 174 | if (parens) printf("("); 175 | printf("λ"); 176 | nf = print_rest_of_lam(nf); 177 | if (parens) printf(")"); 178 | return nf; 179 | case NE: 180 | nf++; 181 | unsigned int argc = *nf++; 182 | unsigned int var = *nf++; 183 | if (parens && argc) printf("("); 184 | print_var(var); 185 | for (unsigned i = 0; i < argc; i++) { 186 | printf(" "); 187 | nf = print(nf, true); 188 | } 189 | if (parens && argc) printf(")"); 190 | return nf; 191 | default: 192 | failwith("unreachable"); 193 | } 194 | } 195 | 196 | static unsigned int *print_rest_of_lam(unsigned int *nf) { 197 | switch (*nf) { 198 | case LAM: 199 | nf++; 200 | printf(" "); 201 | print_var(*nf++); 202 | return print_rest_of_lam(nf); 203 | case NE: 204 | printf(". "); 205 | return print(nf, false); 206 | default: 207 | failwith("unreachable"); 208 | } 209 | } 210 | 211 | 212 | /************** Converting from church numerals ************/ 213 | 214 | size_t parse_church_numeral(unsigned int *nf) { 215 | # define CONSUME(x) if (*nf++ != x) failwith("Not a church numeral") 216 | CONSUME(LAM); 217 | unsigned s = *nf++; 218 | CONSUME(LAM); 219 | unsigned z = *nf++; 220 | size_t n = 0; 221 | for(;;) { 222 | CONSUME(NE); 223 | unsigned argc = *nf++; 224 | if (argc == 0) { 225 | CONSUME(z); 226 | return n; 227 | } else if (argc == 1) { 228 | CONSUME(s); 229 | n++; 230 | continue; 231 | } else { 232 | failwith("Not a church numeral"); 233 | } 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /runtime/normalize.h: -------------------------------------------------------------------------------- 1 | /** β-normalization of lambda terms 2 | * 3 | * It pre-order serializes the normal form as a malloc'd vector of unsigned 4 | * ints, with this layout: 5 | * nf ::= LAM var nf | NE argc var (argc nf's) 6 | * argc ::= an integer number of arguments 7 | * var ::= an integer variable id 8 | * 9 | */ 10 | 11 | enum nf_tag { LAM, NE }; 12 | 13 | // entrypoint is the entry code for a thunk with no environment representing the 14 | // lambda term 15 | unsigned int *normalize(void (*entrypoint)(void)); 16 | 17 | void print_normal_form(unsigned int *nf); 18 | 19 | size_t parse_church_numeral(unsigned int *nf); 20 | 21 | -------------------------------------------------------------------------------- /runtime/runtime.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "data_layout.h" 9 | 10 | /************* Random utils ***********/ 11 | 12 | #define failwith(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) 13 | 14 | // #define DEBUG(...) fprintf(stderr, __VA_ARGS__) 15 | #define DEBUG(...) ((void) 0) 16 | 17 | /************** Registers **************/ 18 | 19 | // The function/thunk currently being evaluated 20 | register obj *self asm ("rbx"); 21 | 22 | // Data stack, grows downwards. I assume it never overflows 23 | // It contains GC roots 24 | #define DATA_STACK_BYTES (8*1024*1024) 25 | register obj **data_stack asm ("r12"); 26 | 27 | // Simple generational semispace GC 28 | // Allocations go downards 29 | #define NURSERY_BYTES (3*1024*1024) // 3M nursery 30 | register word *nursery_top asm ("r13"); 31 | register word *nursery_start asm ("r14"); 32 | #define IS_YOUNG(o) ((size_t) (o) - (size_t) nursery_start < NURSERY_BYTES) 33 | 34 | register size_t argc asm ("r15"); 35 | 36 | --------------------------------------------------------------------------------