├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── abi.txt
├── backend.c
├── backend.h
├── bench.lc
├── frontend.c
├── frontend.h
├── haskell
    ├── .gitignore
    ├── Makefile
    ├── Strong.hs
    └── Weak.hs
├── main.c
└── runtime
    ├── builtins.c
    ├── builtins.h
    ├── data_layout.h
    ├── gc.c
    ├── gc.h
    ├── normalize.c
    ├── normalize.h
    └── runtime.h


/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | lc
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2022 Mark Barbone
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CFLAGS = -Wall -O2 -foptimize-sibling-calls -g
 2 | 
 3 | RT_OBJS = build/gc.o build/builtins.o build/normalize.o
 4 | OBJS = build/frontend.o build/backend.o build/main.o
 5 | 
 6 | lc: $(RT_OBJS) $(OBJS)
 7 | 	gcc -o $@ $^
 8 | 
 9 | $(RT_OBJS): build/%.o: runtime/%.c build runtime/*.h
10 | 	gcc $(CFLAGS) -c $< -o $@
11 | $(OBJS): build/%.o: %.c build *.h runtime/*.h
12 | 	gcc $(CFLAGS) -c $< -o $@
13 | 
14 | build:
15 | 	mkdir -p build
16 | 
17 | .PHONY: bench
18 | bench: lc
19 | 	hyperfine './lc "$$(cat bench.lc)"'
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A λ-calculus interpreter
  2 | 
  3 | A strongly-normalizing interpreter for the untyped lambda calculus.
  4 | 
  5 | ```shell
  6 | $ make
  7 | $ ./lc "(λ x y. x) (λ x. x)"
  8 | Input: (λ x y. x) (λ x. x)
  9 | Compiling... Compiled! Normalizing...
 10 | Normal form: λ a b. b
 11 | ```
 12 | 
 13 | ## Features
 14 | 
 15 |  - Generational copying GC, with a dynamically sized old space
 16 |  - Custom strongly normalizing lazy evaluation runtime
 17 |  - Compiles lambda terms to x86\_64 machine code
 18 | 
 19 | Only tested on Linux, and it only supports x86\_64.
 20 | 
 21 | ## What?
 22 | 
 23 | Normalizing a term in the λ-calculus means applying the β-reduction rule until
 24 | it can't be simplified any more.  While it's usually described using syntactic
 25 | substitution, the fastest algorithm in practice (normalization by evaluation)
 26 | involves running it like a regular functional programming language, with a
 27 | catch: we need to run programs with free variables in them, requiring some
 28 | special runtime support.
 29 | 
 30 | There are, of course, many ways to run a regular functional programming
 31 | language: eager vs lazy evaluation, interpreted vs bytecode interpreted vs
 32 | compiled to machine code, etc.  This implementation uses lazy evaluation and
 33 | compiles to machine code.
 34 | 
 35 | ## How?
 36 | 
 37 | Short description of the lazy evaluation implementation strategy:
 38 |  - Push/enter, like Clean and old GHC
 39 |  - Closures instead of supercombinators, like GHC
 40 |  - Separate data stack + call stack, like Clean
 41 |  - Every heap object's a closure, like GHC
 42 |  - Eager blackholing
 43 |  - Thunk entry code pushes its own update frame, like GHC
 44 |  - Collapsing adjacent update frames is handled by the thunk entry code
 45 | 
 46 | It has a two-pass compiler and a simple generational copying GC.
 47 | 
 48 | 
 49 | ## Simple benchmarks
 50 | 
 51 | The file `bench.lc` contains a lambda calculus program that does some Church
 52 | numeral math, with normal form `λ s z. z`.  The same lambda term is implemented
 53 | in Haskell in different ways in `haskell/{Strong,Weak}.hs`.
 54 | 
 55 | Here are the results, on my computer:
 56 | 
 57 |  - **This interpreter**: 3.5 seconds; allocates 5,854,199,808 bytes
 58 |  - **Haskell, `Strong.hs`:** 2.5 seconds; allocates 8,609,423,616 bytes
 59 | 
 60 |    This would perform comparably to a Haskell-based implementation of the
 61 |    "Tagged normalization" presented in the paper about Coq's `native_compute`.
 62 | 
 63 |  - **Haskell, `Weak.hs`:** 2.3 seconds; allocates 6,887,761,200 bytes
 64 | 
 65 |    This would perform comparably to a Haskell-based implementation of the
 66 |    `native_compute` paper's "Untagged normalization".
 67 | 
 68 | Despite allocating a lot, this lambda term uses very little space: GHC reports
 69 | ~50K max residency in both cases (and it's similar for my interpreter, but I
 70 | haven't measured).
 71 | 
 72 | The extra 1.7GB that the tagged implementation allocates compared to the
 73 | untagged implementation come from two sources: both the overhead of including
 74 | tags, and the overhead of currying not being optimized. I'm surprised the
 75 | overhead is so little.
 76 | 
 77 | This interpreter and the untagged implementation should make pretty much the
 78 | same allocations, so I think the extra gigabyte that `Weak.hs` allocates comes
 79 | from two cases where GHC's heap objects are larger than mine:
 80 |  - GHC's thunks all contain an extra word, for lock-free concurrency reasons
 81 |  - GHC's partial application objects use a 3-word header, while mine use only 2
 82 |    words
 83 | 
 84 | IMO the main takeaway from this benchmark is that for purely functional
 85 | languages, **the garbage collector is key**.  How currying is done, how
 86 | arguments are passed, etc., all matter much less than having a high-quality
 87 | garbage collector.
 88 | 
 89 | The one area where this interpreter beats GHC is compile time: it compiles the
 90 | term practically instantly.
 91 | 
 92 | 
 93 | ## Future things
 94 | 
 95 | Really ought to have:
 96 |  - [x] Big enough tests to actually test the GC
 97 |  - [ ] Standardize a global term size limit
 98 |  - [ ] mmap the data stack with a guard page below it
 99 |  - [x] Make sure allocations are inlined in the runtime
100 |  - [x] Benchmarks
101 |  - [ ] Fix the `-Wstrict-aliasing` warnings
102 |  - [ ] Document the runtime better
103 | 
104 | Want to have:
105 |  - Add support for `fix`, let/letrec
106 |  - Nice CLI and REPL
107 | 
108 | Probably won't have but would be cool:
109 |  - Use it in some algorithm that requires strong normalization, like a dependent
110 |    typechecker or possibly supercompilation by evaluation
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/abi.txt:
--------------------------------------------------------------------------------
  1 | ## Runtime
  2 | 
  3 | ```c
  4 | typedef size_t word;
  5 | 
  6 | typedef struct obj {
  7 |   void (*entrypoint)(void);
  8 |   word contents[];
  9 | } obj;
 10 | 
 11 | enum gc_tag { FORWARD, REF, FUN, PAP, RIGID, THUNK, BLACKHOLE };
 12 | 
 13 | struct gc_data {
 14 |   /** Size of the whole object in words.
 15 |    * If 0, the first word of contents is a `struct info_word` that contains
 16 |    * the true size
 17 |    */
 18 |   uint32_t size;
 19 |   /* enum gc_tag */ uint32_t tag;
 20 | };
 21 | 
 22 | struct info_word {
 23 |   /** Size of the whole object in words */
 24 |   uint32_t size;
 25 |   /** only in heap objects representing rigid terms */
 26 |   uint32_t var;
 27 | };
 28 | ```
 29 | 
 30 | Heap object layout:
 31 | 
 32 | ```
 33 |     .dword GC_DATA_SIZE
 34 |     .dword GC_DATA_TAG
 35 | entrypoint:
 36 |     x86 code...
 37 | ```
 38 | entrypoint contains executable code for this object
 39 | 
 40 | (struct gc_data *) (entrypoint - sizeof(struct gc_data)) contains the GC info
 41 | table for this object
 42 | 
 43 | ---
 44 | 
 45 | Calling convention/ABI:
 46 | 
 47 | rsp is call stack (obviously)
 48 | rbx is self, the closure being evaluated
 49 | r12 is data stack
 50 | r13 is heap pointer
 51 | r14 is heap limit
 52 | r15 is argc
 53 | rdi, rsi are temporary registers
 54 | 
 55 | All of these (except the temp registers) are callee-saved under the SysV ABI, so
 56 | we're OK to call foreign library functions.
 57 | 
 58 | When entering a closure:
 59 |  - self points to the closure being evaluated
 60 |  - The data stack contains the (blackholed) thunk to be updated, then argc many
 61 |    arguments
 62 | Operationally, entering `f` pops argc many arguments, evaluates `f args...` to a
 63 | value, then returns that value in `self`
 64 |  → Notably, it's up to the caller to do the thunk update
 65 | 
 66 | 
 67 | Exposed runtime functions:
 68 |   rt_too_few_args: package up self and the args into a PAP
 69 |   rt_update_thunk: update the top of the data stack to `REF self`
 70 |   rt_avoid_adjacent_update_frames:
 71 |     like rt_update_thunk but also push self to the data stack
 72 |   rt_minor_gc: do GC!
 73 | Built-in entry codes:
 74 |   Partial application, rt_pap_entry
 75 |   Rigid term, rt_rigid_entry
 76 |   (?)
 77 | 
 78 | 
 79 | 
 80 | Compiled entry code actions:
 81 |  0 (Closures only). Argc check
 82 |     If there are too few arguments, then jump to rt_too_few_args
 83 |  0 (thunks only). Update frame
 84 |     If argc == 0:
 85 |       call rt_avoid_adjacent_update_frames
 86 |     Else:
 87 |       push self to data stack
 88 |       push argc to call stack
 89 |       set argc := 0
 90 |       call rest_of_code
 91 |       pop argc
 92 |       call rt_update_thunk
 93 |       jump to self->entrypoint
 94 | rest_of_code:
 95 |   1. Heap check (when TOTAL_ALLOC_SIZE != 0)
 96 |     Heap -= TOTAL_ALLOC_SIZE
 97 |     if Heap < Heap limit:
 98 |       call rt_minor_gc
 99 |       Heap -= TOTAL_ALLOC_SIZE
100 |       (TOTAL_ALLOC_SIZE is less than the nursery, so no need to check again)
101 |   2. Perform allocations
102 |     temp = Heap
103 |     Repeat zero or more times:
104 |       *temp = entry code
105 |       (repeated) *(temp+8N) = Nth env item for this allocation
106 |       push temp to the data stack
107 |       temp += size of that allocation
108 |   3. Shuffle the data stack and self
109 |     Increase data stack size if necessary
110 |     Perform parallel move using the temp registers
111 |      → before writing to self, blackhole it if it's a thunk
112 |     Decrease data stack size if necessary
113 |     Set new value of argc
114 |   4. Execute the call! Jump to *self
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/backend.c:
--------------------------------------------------------------------------------
  1 | #include "backend.h"
  2 | #include "runtime/data_layout.h"
  3 | #include "runtime/builtins.h"
  4 | 
  5 | #include <stddef.h>
  6 | #include <stdbool.h>
  7 | #include <stdio.h>
  8 | #include <string.h>
  9 | #include <stdint.h>
 10 | #include <stdlib.h>
 11 | #include <limits.h>
 12 | #include <assert.h>
 13 | #include <sys/mman.h>
 14 | #include <errno.h>
 15 | 
 16 | 
 17 | /************** General utils *************/
 18 | 
 19 | #define failwith(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0)
 20 | 
 21 | static void init_code_buf(void);
 22 | 
 23 | static uint8_t *code_buf_start = NULL;
 24 | static uint8_t *code_buf = NULL;
 25 | static uint8_t *code_buf_end = NULL;
 26 | 
 27 | static void write_header(uint32_t size, uint32_t tag);
 28 | static void write_code(size_t len, const uint8_t code[len]);
 29 | 
 30 | #define CODE(...) do { \
 31 |     const uint8_t code[] = { __VA_ARGS__ }; \
 32 |     write_code(sizeof(code), code); \
 33 |   } while (0)
 34 | #define U32(x) \
 35 |   (uint8_t)  (x)       , (uint8_t) ((x) >> 8), \
 36 |   (uint8_t) ((x) >> 16), (uint8_t) ((x) >> 24)
 37 | #define U64(x) \
 38 |   (uint8_t)  (x)       , (uint8_t) ((x) >> 8),  \
 39 |   (uint8_t) ((x) >> 16), (uint8_t) ((x) >> 24), \
 40 |   (uint8_t) ((x) >> 32), (uint8_t) ((x) >> 40), \
 41 |   (uint8_t) ((x) >> 48), (uint8_t) ((x) >> 56)
 42 | 
 43 | enum reg {
 44 |   RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
 45 |   R8,  R9,  R10, R11, R12, R13, R14, R15
 46 | };
 47 | #define SELF        RBX
 48 | #define DATA_STACK  R12
 49 | #define HEAP_PTR    R13
 50 | #define HEAP_LIMIT  R14
 51 | #define ARGC        R15
 52 | 
 53 | static void mem64(uint8_t opcode, enum reg reg, enum reg ptr, int32_t offset);
 54 | static void reg64(uint8_t opcode, enum reg reg, enum reg other_reg);
 55 | 
 56 | #define OP_LOAD 0x8b
 57 | #define OP_STORE 0x89
 58 | 
 59 | #define LOAD(reg, ptr, offset) \
 60 |   mem64(OP_LOAD, reg, ptr, offset)
 61 | #define STORE(reg, ptr, offset) \
 62 |   mem64(OP_STORE, reg, ptr, offset)
 63 | #define MOV_RR(dest, src) reg64(OP_STORE, src, dest)
 64 | 
 65 | // Add constant value 'imm' to register 'reg'
 66 | static void add_imm(enum reg reg, int32_t imm);
 67 | 
 68 | // idx 0 is the first env item. env is usually SELF
 69 | static void load_env_item(enum reg reg, enum reg env, size_t idx);
 70 | // idx 0 is the top of the stack
 71 | static void load_arg(enum reg reg, size_t idx);
 72 | static void store_arg(size_t idx, enum reg reg);
 73 | 
 74 | static void blackhole_self(void);
 75 | 
 76 | 
 77 | struct var_info {
 78 |   bool is_used;
 79 |   // Only when used
 80 |   size_t env_idx;
 81 | };
 82 | 
 83 | struct env {
 84 |   // do we need?
 85 |   struct env *up;
 86 |   var args_start;
 87 |   var lets_start;
 88 |   size_t envc;
 89 |   // args_start elements, envc of which are used
 90 |   struct var_info upvals[];
 91 | };
 92 | 
 93 | struct compile_result {
 94 |   void *code;
 95 |   struct env *env;
 96 | };
 97 | 
 98 | static size_t var_to_stack_index(size_t lvl, struct env *env, var v);
 99 | static void make_sure_can_access_var(struct env *env, var v);
100 | 
101 | 
102 | static void init_code_buf(void) {
103 |   if (code_buf)
104 |     return;
105 | 
106 |   // Need to mmap it so that I can mprotect it later.
107 |   const size_t len = 8 * 1024 * 1024;
108 |   code_buf = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
109 |   if (!code_buf)
110 |     failwith("Couldn't allocate buffer for code\n");
111 |   code_buf_start = code_buf;
112 |   code_buf_end = code_buf_start + len;
113 | }
114 | 
115 | void compile_finalize(void) {
116 |   // mprotect it
117 |   size_t len = code_buf_end - code_buf_start;
118 |   if (mprotect(code_buf_start, len, PROT_READ | PROT_EXEC))
119 |     failwith("Couldn't map as executable: %s\n", strerror(errno));
120 | }
121 | 
122 | static void write_code(size_t len, const uint8_t code[len]) {
123 |   uint8_t *end = code_buf + len;
124 |   if (end > code_buf_end) failwith("Too much code");
125 |   memcpy(code_buf, code, len);
126 |   code_buf = end;
127 | }
128 | 
129 | #define REXW(R,X,B) \
130 |   (0x48 | ((R >> 1) & 4) | ((X >> 2) & 3) | (B >> 3))
131 | #define MODRM(Mod, Reg, RM) \
132 |   (((Mod) << 6) | ((Reg & 7) << 3) | (RM & 7))
133 | 
134 | static void reg64(uint8_t opcode, enum reg reg, enum reg other_reg) {
135 |   // Mod == 11: r/m
136 |   CODE(
137 |     REXW(reg, 0, other_reg),
138 |     opcode,
139 |     MODRM(3, reg, other_reg)
140 |   );
141 | }
142 | 
143 | static void mem64(uint8_t opcode, enum reg reg, enum reg ptr, int32_t offset) {
144 |   CODE(REXW(reg, 0, ptr), opcode);
145 | 
146 |   if ((ptr & 7) == RSP) {
147 |     // r/m == rsp: [SIB]
148 | 
149 |     if (offset == 0)
150 |       // Mod == 00 && index == rsp: [base]
151 |       CODE(MODRM(0, reg, RSP), 0x24);
152 |     else if (-128 <= offset && offset < 128)
153 |       // Mod == 01 && index == rsp: [base + disp8]
154 |       CODE(MODRM(1, reg, RSP), 0x24, (uint8_t) offset);
155 |     else
156 |       // Mod == 10 && index == rsp: [base + disp32]
157 |       CODE(MODRM(2, reg, RSP), 0x24, (uint32_t) offset);
158 | 
159 |     return;
160 |   }
161 | 
162 |   if (offset == 0 && (ptr & 7) != RBP)
163 |     // Mod == 00: [r/m]
164 |     CODE(MODRM(0, reg, ptr));
165 |   else if (-128 <= offset && offset < 128)
166 |     // Mod == 01: [r/m + disp8]
167 |     CODE(MODRM(1, reg, ptr), (uint8_t) offset);
168 |   else
169 |     // Mod == 10: [r/m + disp32]
170 |     CODE(MODRM(2, reg, ptr), U32((uint32_t) offset));
171 | }
172 | 
173 | static void add_imm(enum reg reg, int32_t imm) {
174 |   if (imm == 0) return;
175 |   if (-128 <= imm && imm < 128)
176 |     CODE(REXW(0, 0, reg), 0x83, MODRM(3, 0, reg), (uint8_t) imm);
177 |   else
178 |     CODE(REXW(0, 0, reg), 0x81, MODRM(3, 0, reg), U32((uint32_t) imm));
179 | }
180 | 
181 | static size_t var_to_stack_index(size_t lvl, struct env *env, var v) {
182 |   assert(env->args_start <= v && v < lvl);
183 |   if (v >= env->lets_start)
184 |     return lvl - v - 1;
185 |   else
186 |     return v - env->args_start + lvl - env->lets_start;
187 | }
188 | static void make_sure_can_access_var(struct env *env, var v) {
189 |   while (v < env->args_start && !env->upvals[v].is_used) {
190 |     env->upvals[v].is_used = true;
191 |     env->upvals[v].env_idx = env->envc++;
192 |     env = env->up;
193 |   }
194 | }
195 | 
196 | static void load_env_item(enum reg reg, enum reg env, size_t idx) {
197 |   assert(idx < INT_MAX / 8 - 8);
198 |   LOAD(reg, env, 8 * idx + 8);
199 | }
200 | static void load_arg(enum reg reg, size_t idx) {
201 |   assert(idx < INT_MAX / 8);
202 |   LOAD(reg, DATA_STACK, 8 * idx);
203 | }
204 | static void store_arg(size_t idx, enum reg reg) {
205 |   assert(idx < INT_MAX / 8);
206 |   STORE(reg, DATA_STACK, 8 * idx);
207 | }
208 | 
209 | static void blackhole_self(void) {
210 |   // Use rax as a temporary register since it's possible that both rsi and rdi
211 |   // are in use
212 |   // movabs rax, rt_blackhole_entry
213 |   CODE(0x48, 0xb8, U64((uint64_t) rt_blackhole_entry));
214 |   STORE(RAX, SELF, 0);
215 |   CODE(0xb8, U32(2)); // mov esi, 2
216 |   STORE(RAX, SELF, 8);
217 | }
218 | 
219 | 
220 | /******************* Prologue *****************/
221 | 
222 | static void *start_closure(size_t argc, size_t envc);
223 | static void *start_thunk(size_t envc);
224 | 
225 | 
226 | static void write_header(uint32_t size, uint32_t tag) {
227 |   // Align up to nearest word
228 |   code_buf = (uint8_t *) (((size_t) code_buf + 7) & ~7);
229 | 
230 |   if (code_buf + 8 > code_buf_end) failwith("Too much code");
231 | 
232 |   memcpy(code_buf, &size, sizeof(uint32_t));
233 |   code_buf += sizeof(uint32_t);
234 |   memcpy(code_buf, &tag, sizeof(uint32_t));
235 |   code_buf += sizeof(uint32_t);
236 | }
237 | 
238 | static void *start_closure(size_t argc, size_t envc) {
239 |   /* assert(argc < INT_MAX); */
240 |   assert(argc < 127); // TODO: allow more args (? maybe)
241 |   assert(envc < INT_MAX);
242 | 
243 |   write_header(envc == 0 ? 0 : envc + 1, FUN);
244 |   void *code_start = code_buf;
245 | 
246 |   CODE(
247 |     // cmp r15, argc
248 |     0x49, 0x83, 0xff, (uint8_t) argc,
249 |     // jge rest_of_code (+12)
250 |     0x7d, 12,
251 |     // movabs rt_too_few_args, %rdi
252 |     0x48, 0xbf, U64((size_t) rt_too_few_args),
253 |     // jmp *%rdi
254 |     0xff, 0xe7
255 |     // rest_of_code:
256 |   );
257 | 
258 |   return code_start;
259 | }
260 | 
261 | static void *start_thunk(size_t envc) {
262 |   assert(envc < INT_MAX);
263 | 
264 |   write_header(envc == 0 ? 0 : envc + 1, THUNK);
265 |   void *code_start = code_buf;
266 | 
267 |   CODE(
268 |     // test argc,argc (argc is %r15)
269 |     0x4d, 0x85, 0xff,
270 |     // jz adjacent_updates (+34)
271 |     0x74, 34,
272 |     // sub data_stack, $8 (data_stack is %r12)
273 |     0x49, 0x83, 0xec, 0x08,
274 |     // mov [data_stack], self (self is rbx)
275 |     0x49, 0x89, 0x1c, 0x24,
276 |     // push argc (argc is r15)
277 |     0x41, 0x57,
278 |     // xor argc, argc
279 |     0x4d, 0x31, 0xff,
280 |     // call rest_of_code (+28)
281 |     0xe8, U32(28),
282 |     // movabs rdi, rt_update_thunk
283 |     0x48, 0xbf, U64((size_t) rt_update_thunk),
284 |     // call rdi
285 |     0xff, 0xd7,
286 |     // pop argc (argc is %r15)
287 |     0x41, 0x5f,
288 |     // jmp [qword ptr [self]] (self is rbx)
289 |     0xff, 0x23,
290 |     // adjacent_updates:
291 |     // FIXME: calls this function with a misaligned stack
292 |     // movabs rdi, rt_avoid_adjacent_update_frames
293 |     0x48, 0xbf, U64((size_t) rt_adjacent_update_frames),
294 |     // call rdi
295 |     0xff, 0xd7
296 |     // rest_of_code:
297 |   );
298 | 
299 |   return code_start;
300 | }
301 | 
302 | 
303 | /***************** Allocations ****************/
304 | 
305 | static void do_allocations(struct env *this_env, size_t n, struct compile_result locals[n]);
306 | 
307 | 
308 | static void heap_check(size_t bytes_allocated) {
309 |   // TODO: better maximum allocation size control
310 |   assert(0 < bytes_allocated && bytes_allocated < 131072);
311 | 
312 |   add_imm(HEAP_PTR, - (int32_t) bytes_allocated);
313 |   CODE(
314 |     // cmp heap, heap limit (r13,r14)
315 |     0x4d, 0x39, 0xf5,
316 |     // jae alloc_was_good (offset depends on imm8 vs imm32)
317 |     // TODO: add assertions that this is correct
318 |     0x73, (bytes_allocated <= 128 ? 24 : 27),
319 |     // sub rsp, 8 (align the stack for the call)
320 |     0x48, 0x83, 0xec, 8,
321 |     // movabs rdi, rt_gc
322 |     0x48, 0xbf, U64((size_t) rt_gc),
323 |     // call rdi
324 |     0xff, 0xd7,
325 |     // add rsp, 8
326 |     0x48, 0x83, 0xc4, 8
327 |   );
328 |   add_imm(HEAP_PTR, - (int32_t) bytes_allocated);
329 |   // alloc_was_good:
330 | }
331 | 
332 | static void load_var(size_t lvl, struct env *this_env, enum reg dest, var v) {
333 |   assert(v < lvl);
334 |   if (v >= this_env->args_start) {
335 |     size_t idx = var_to_stack_index(lvl, this_env, v);
336 |     load_arg(dest, idx);
337 |   } else {
338 |     assert(this_env->upvals[v].is_used);
339 |     load_env_item(dest, SELF, this_env->upvals[v].env_idx);
340 |   }
341 | }
342 | 
343 | static void do_allocations(struct env *this_env, size_t n, struct compile_result locals[n]) {
344 |   size_t lvl = this_env->lets_start;
345 |   if (n == 0)
346 |     return;
347 | 
348 |   size_t words_allocated = 0;
349 |   for (size_t i = 0; i < n; i++) {
350 |     if (locals[i].env->envc == 0)
351 |       words_allocated += 2;
352 |     else
353 |       words_allocated += locals[i].env->envc + 1;
354 |   }
355 | 
356 |   heap_check(8 * words_allocated);
357 | 
358 |   MOV_RR(RDI, HEAP_PTR);
359 |   for (size_t i = 0; i < n; i++) {
360 |     lvl++;
361 |     add_imm(DATA_STACK, -8);
362 |     STORE(RDI, DATA_STACK, 0);
363 | 
364 |     // Store the entrypoint
365 |     // movabs rsi, entrypoint
366 |     CODE(0x48, 0xbe, U64((uint64_t) locals[i].code));
367 |     STORE(RSI, RDI, 0);
368 | 
369 |     // Store the contents
370 |     struct env *env = locals[i].env;
371 |     assert(env->up == this_env);
372 |     size_t count = 0;
373 |     for (var v = 0; v < env->args_start; v++) {
374 |       if (!env->upvals[v].is_used)
375 |         continue;
376 |       count++;
377 |       load_var(lvl, this_env, RSI, v);
378 |       size_t offset = 8 + 8*env->upvals[v].env_idx;
379 |       STORE(RSI, RDI, offset);
380 |     }
381 |     assert(count == env->envc);
382 | 
383 |     if (env->envc == 0) {
384 |       // Store the info_word
385 |       CODE(0xbe, U32(2)); // mov esi, 2
386 |       STORE(RSI, RDI, 8);
387 |     }
388 | 
389 |     // Bump rdi, used as a temporary heap pointer
390 |     if (i != n-1)
391 |       add_imm(RDI, env->envc == 0 ? 16 : 8 + 8*env->envc);
392 |   }
393 | }
394 | 
395 | 
396 | /***************** Shuffle arguments ****************/
397 | 
398 | static void do_the_moves(size_t lvl, ir term, struct env *env);
399 | 
400 | 
401 | enum mov_status { NOT_STARTED, IN_PROGRESS, DONE };
402 | 
403 | struct dest_info_item {
404 |   enum { FROM_ARGS, FROM_ENV } src_type;
405 |   int src_idx;
406 |   int next_with_same_src; // -1 if none
407 |   enum mov_status status;
408 | };
409 | 
410 | typedef struct {
411 |   size_t n;
412 |   struct dest_info_item *dest_info; // n + 1 of them
413 |   int *src_to_dest; // n + 1 of them
414 |   int in_rdi;
415 |   bool for_a_thunk;
416 | } mov_state;
417 | 
418 | // Store src to all its destinations, so that it can be overwritten afterwards.
419 | static void vacate_one(mov_state *s, int src) {
420 |   switch (s->dest_info[src].status) {
421 |   case DONE:
422 |     break;
423 | 
424 |   case IN_PROGRESS:
425 |     // A cycle! Use rdi as a temporary register to break the cycle
426 |     assert(s->in_rdi == -1);
427 |     s->in_rdi = src;
428 |     if (src == s->n)
429 |       MOV_RR(RDI, SELF);
430 |     else
431 |       load_arg(RDI, src);
432 |     break;
433 | 
434 |   case NOT_STARTED:
435 |     if (s->src_to_dest[src] == -1) {
436 |       // Don't do anything if it has no destinations
437 |       s->dest_info[src].status = DONE;
438 |       break;
439 |     }
440 | 
441 | #   define FOREACH_DEST(dest) \
442 |       for (int dest = s->src_to_dest[src]; dest != -1; dest = s->dest_info[dest].next_with_same_src)
443 | 
444 |     s->dest_info[src].status = IN_PROGRESS;
445 |     if (src == s->n) {
446 |       // Clear out 'self' by storing all the things from the env
447 |       FOREACH_DEST(dest) {
448 |         assert(s->dest_info[dest].src_type == FROM_ENV);
449 | 
450 |         vacate_one(s, dest);
451 | 
452 |         enum reg self = s->in_rdi == s->n ? RDI : SELF;
453 |         if (dest == s->n) {
454 |           if (s->for_a_thunk) {
455 |             load_env_item(RSI, self, s->dest_info[dest].src_idx);
456 |             blackhole_self();
457 |             MOV_RR(SELF, RSI);
458 |           } else {
459 |             load_env_item(SELF, self, s->dest_info[dest].src_idx);
460 |           }
461 |         } else {
462 |           load_env_item(RSI, self, s->dest_info[dest].src_idx);
463 |           store_arg(dest, RSI);
464 |         }
465 |       }
466 |     } else {
467 |       // Clear out data_stack[src]
468 |       FOREACH_DEST(dest) {
469 |         assert(s->dest_info[dest].src_type == FROM_ARGS);
470 |         assert(s->dest_info[dest].src_idx == src);
471 |         vacate_one(s, dest);
472 |       }
473 |       enum reg src_reg;
474 |       if (s->in_rdi == src) {
475 |         src_reg = RDI;
476 |       } else {
477 |         load_arg(RSI, src);
478 |         src_reg = RSI;
479 |       }
480 |       FOREACH_DEST(dest) {
481 |         if (dest == s->n) {
482 |           if (s->for_a_thunk)
483 |             blackhole_self();
484 |           MOV_RR(SELF, src_reg);
485 |         } else {
486 |           store_arg(dest, src_reg);
487 |         }
488 |       }
489 |     }
490 |     if (s->in_rdi == src)
491 |       s->in_rdi = -1;
492 |     s->dest_info[src].status = DONE;
493 | #   undef FOREACH_DEST
494 |     break;
495 |   }
496 | }
497 | 
498 | static void add_dest_to_mov_state(size_t lvl, struct env *env, mov_state *s, int dest, var v) {
499 |   assert(v < lvl);
500 |   if (v >= env->args_start) {
501 |     // It's from the data stack
502 |     int src = var_to_stack_index(lvl, env, v);
503 |     s->dest_info[dest] = (struct dest_info_item) {
504 |       .src_type = FROM_ARGS,
505 |       .src_idx = src,
506 |       .next_with_same_src = s->src_to_dest[src],
507 |       .status = NOT_STARTED
508 |     };
509 |     if (dest != src)
510 |       s->src_to_dest[src] = dest;
511 |   } else {
512 |     // It's from the env
513 |     assert(env->upvals[v].is_used);
514 |     s->dest_info[dest] = (struct dest_info_item) {
515 |       .src_type = FROM_ENV,
516 |       .src_idx = env->upvals[v].env_idx,
517 |       .next_with_same_src = s->src_to_dest[s->n],
518 |       .status = NOT_STARTED
519 |     };
520 |     s->src_to_dest[s->n] = dest;
521 |   }
522 | }
523 | 
524 | static void do_the_moves(size_t lvl, ir term, struct env *env) {
525 |   assert(lvl == term->lvl + term->arity + term->lets_len);
526 |   assert(term->lvl == env->args_start);
527 | 
528 |   size_t incoming_argc = term->arity + term->lets_len;
529 |   size_t outgoing_argc = 0;
530 |   for (arglist arg = term->args; arg; arg = arg->prev)
531 |     ++outgoing_argc;
532 | 
533 |   // Resize the data stack
534 |   size_t n;
535 |   if (outgoing_argc > incoming_argc) {
536 |     n = outgoing_argc;
537 |     size_t diff = outgoing_argc - incoming_argc;
538 |     assert(diff < INT_MAX / 8);
539 |     lvl += diff;
540 |     add_imm(DATA_STACK, -8 * (int) diff);
541 |   } else {
542 |     n = incoming_argc;
543 |   }
544 | 
545 |   // Generate the data structures and stuff
546 |   mov_state s = (mov_state) {
547 |     .n = n,
548 |     .dest_info = malloc(sizeof(struct dest_info_item[n+1])),
549 |     .src_to_dest = malloc(sizeof(int[n+1])),
550 |     .in_rdi = -1,
551 |     .for_a_thunk = term->arity == 0,
552 |   };
553 | 
554 |   for (int i = 0; i < n + 1; i++)
555 |     s.src_to_dest[i] = -1;
556 | 
557 |   int dest_start =
558 |     outgoing_argc < incoming_argc ? incoming_argc - outgoing_argc : 0;
559 |   for (int dest = 0; dest < dest_start; dest++)
560 |     s.dest_info[dest].status = NOT_STARTED;
561 |   int dest = n - 1;
562 |   for (arglist arg = term->args; arg; dest--, arg = arg->prev)
563 |     add_dest_to_mov_state(lvl, env, &s, dest, arg->arg);
564 |   assert(dest == dest_start - 1);
565 |   add_dest_to_mov_state(lvl, env, &s, n, term->head);
566 | 
567 |   // Do all the moving
568 |   for (int i = 0; i < n + 1; i++) {
569 |     vacate_one(&s, i);
570 |     assert(s.in_rdi == -1);
571 |   }
572 | 
573 |   // Resize the data stack and set argc
574 |   if (outgoing_argc < incoming_argc) {
575 |     size_t diff = incoming_argc - outgoing_argc;
576 |     assert(diff < INT_MAX / 8);
577 |     add_imm(DATA_STACK, 8 * (int) diff);
578 |   }
579 |   add_imm(ARGC, outgoing_argc - term->arity);
580 | }
581 | 
582 | 
583 | /***************** Perform the call! ****************/
584 | 
585 | static void call_self(void) {
586 |   // jmp [qword ptr [self]]  (self is rbx)
587 |   CODE(0xff, 0x23);
588 | }
589 | 
590 | 
591 | /***************** Tying it all together ****************/
592 | 
593 | void *compile_toplevel(ir term);
594 | 
595 | 
596 | static struct compile_result compile(struct env *up, ir term) {
597 |   size_t lvl = term->lvl;
598 | 
599 |   // Allocate an environment
600 |   struct env *env = malloc(sizeof(struct env) + sizeof(struct var_info[lvl]));
601 |   env->up = up;
602 |   env->args_start = lvl;
603 |   env->lets_start = lvl + term->arity;
604 |   env->envc = 0;
605 |   for (int i = 0; i < lvl; i++)
606 |     env->upvals[i].is_used = false;
607 | 
608 |   // Populate the env
609 |   make_sure_can_access_var(env, term->head);
610 |   for (arglist arg = term->args; arg; arg = arg->prev)
611 |     make_sure_can_access_var(env, arg->arg);
612 | 
613 |   // Compile all the lets
614 |   struct compile_result *locals =
615 |     malloc(sizeof(struct compile_result[term->lets_len]));
616 |   int i = 0;
617 |   for (letlist let = term->lets; let; let = let->next, i++)
618 |     locals[i] = compile(env, let->val);
619 |   assert(i == term->lets_len);
620 | 
621 |   // Prologue
622 |   void *code_start;
623 |   if (term->arity == 0)
624 |     code_start = start_thunk(env->envc);
625 |   else
626 |     code_start = start_closure(term->arity, env->envc);
627 | 
628 |   lvl += term->arity;
629 | 
630 |   // Allocations
631 |   do_allocations(env, term->lets_len, locals);
632 |   for (int i = 0; i < term->lets_len; i++)
633 |     free(locals[i].env);
634 |   free(locals);
635 | 
636 |   lvl += term->lets_len;
637 | 
638 |   // Set up for call
639 |   do_the_moves(lvl, term, env);
640 | 
641 |   // Execute the call!
642 |   call_self();
643 | 
644 |   return (struct compile_result) {
645 |     .code = code_start,
646 |     .env = env,
647 |   };
648 | }
649 | 
650 | void *compile_toplevel(ir term) {
651 |   init_code_buf();
652 |   assert(term->lvl == 0);
653 |   struct compile_result res = compile(NULL, term);
654 |   assert(res.env->envc == 0);
655 |   free(res.env);
656 |   return res.code;
657 | }
658 | 
659 | 


--------------------------------------------------------------------------------
/backend.h:
--------------------------------------------------------------------------------
 1 | #include <stddef.h>
 2 | #include "frontend.h"
 3 | 
 4 | /** Compile a top-level (closed, at level 0) term to machine code.
 5 |  *
 6 |  * It returns a void *. This is not executable until codegen_finalize is run.
 7 |  */
 8 | void *compile_toplevel(ir term);
 9 | 
10 | /** Remap the codegen'd area from RW to RX.
11 |  *
12 |  * You may now cast the void *'s from codegen_toplevel to void(*)(void)
13 |  */
14 | void compile_finalize(void);
15 | 


--------------------------------------------------------------------------------
/bench.lc:
--------------------------------------------------------------------------------
 1 | /- Computes 3^(2^4) - 3^(2^4) = 0 = λ s z. z
 2 |  - Takes about 3.5 seconds to run on my computer
 3 |  -/
 4 | (λ big_number minus. minus big_number big_number)
 5 |   /- The big number, 3^(2^4) = 43'046'721 -/
 6 |   ((\s z. s(s(s(s z)))) (\s z. s(s z)) (\s z. s(s(s z))))
 7 |   /- O(n) church numeral subtraction algorithm -/
 8 |   (λ n m s z.
 9 |     n (λ y k. k (s (y (λ a b. a))) y)
10 |       (λ k. k z (λ _. z))
11 |       (m (λ k a b. b k) (λ a b. a)))
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/frontend.c:
--------------------------------------------------------------------------------
  1 | #include "frontend.h"
  2 | 
  3 | #include <stddef.h>
  4 | #include <stdbool.h>
  5 | #include <stdio.h>
  6 | #include <string.h>
  7 | #include <stdint.h>
  8 | #include <stdlib.h>
  9 | #include <limits.h>
 10 | #include <assert.h>
 11 | 
 12 | #define failwith(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0)
 13 | 
 14 | /**************** Lowering to IR ****************/
 15 | 
 16 | static void arena_init(void);
 17 | 
 18 | static bool is_var(ir e);
 19 | static bool is_lambda(ir e);
 20 | 
 21 | static ir mkvar(size_t lvl, var v);
 22 | static ir mkapp(size_t lvl, ir func, ir arg);
 23 | static ir mkabs(size_t lvl, ir body);
 24 | 
 25 | static size_t *ir_arena_start = NULL;
 26 | static size_t *ir_arena = NULL;
 27 | static size_t *ir_arena_end = NULL;
 28 | #define IR_ARENA_SIZE (32 * 1024 * 1024)
 29 | 
 30 | static void arena_init(void) {
 31 |   if (!ir_arena) {
 32 |     ir_arena_start = ir_arena = malloc(IR_ARENA_SIZE);
 33 |     ir_arena_end = ir_arena + IR_ARENA_SIZE / sizeof(*ir_arena);
 34 |   }
 35 | }
 36 | 
 37 | void free_ir(void) {
 38 |   free(ir_arena_start);
 39 |   ir_arena = ir_arena_end = NULL;
 40 | }
 41 | 
 42 | #define ARENA_ALLOC(ty, ...) \
 43 |   ty *node = (ty *) ir_arena; \
 44 |   ir_arena += sizeof(ty) / sizeof(*ir_arena); \
 45 |   if (ir_arena > ir_arena_end) failwith("Too much code"); \
 46 |   *node = (ty) { __VA_ARGS__ }; \
 47 |   return node;
 48 | 
 49 | static arglist snoc_arg(arglist prev, var arg) {
 50 |   ARENA_ALLOC(struct an_arg, prev, arg)
 51 | }
 52 | static letlist cons_let(ir val, letlist next) {
 53 |   ARENA_ALLOC(struct a_let, val, next)
 54 | }
 55 | 
 56 | static ir mkvar(size_t lvl, var v) {
 57 |   ARENA_ALLOC(struct exp, lvl, 0, NULL, NULL, 0, v, NULL);
 58 | }
 59 | #undef ARENA_ALLOC
 60 | 
 61 | static bool is_var(ir e) {
 62 |   return e->arity == 0 && e->lets == NULL && e->args == NULL;
 63 | }
 64 | static bool is_lambda(ir e) {
 65 |   return e->arity > 0;
 66 | }
 67 | 
 68 | static ir mkabs(size_t lvl, ir body) {
 69 |   assert(body->lvl == lvl + 1);
 70 |   body->lvl = lvl;
 71 |   body->arity++;
 72 |   return body;
 73 | }
 74 | 
 75 | static ir mkapp(size_t lvl, ir func, ir arg) {
 76 |   if (is_lambda(func)) {
 77 |     if (is_var(arg)) {
 78 |       // Applying a lambda to a var: let f = func in f x
 79 |       // f becomes lvl
 80 |       ir res = mkvar(lvl, lvl);
 81 |       res->lets = cons_let(func, NULL);
 82 |       res->lets_end = &res->lets->next;
 83 |       res->lets_len = 1;
 84 |       res->args = snoc_arg(NULL, arg->head);
 85 |       return res;
 86 |     } else {
 87 |       // Applying a lambda to a complex thing: let f = func ; x = arg in f x
 88 |       // f becomes lvl, x becomes (lvl+1)
 89 |       ir res = mkvar(lvl, lvl);
 90 |       res->lets = cons_let(arg, NULL);
 91 |       res->lets_end = &res->lets->next;
 92 |       res->lets = cons_let(func, res->lets);
 93 |       res->lets_len = 2;
 94 |       res->args = snoc_arg(NULL, lvl+1);
 95 |       return res;
 96 |     }
 97 |   } else if (is_var(arg)) {
 98 |     // Applying a thunk to a var:
 99 |     //  (let ... in f args) x  ⇒  let ... in f args x
100 |     var v = arg->head;
101 |     func->args = snoc_arg(func->args, v);
102 |     return func;
103 |   } else {
104 |     // Appying a thunk to something complex:
105 |     //  (let ... in f args) arg ⇒ let ... ; x = arg in f args x
106 |     var new_var = lvl + func->lets_len;
107 | 
108 |     // add the let
109 |     letlist new_let = cons_let(arg, NULL);
110 |     if (func->lets_end)
111 |       *func->lets_end = new_let;
112 |     else
113 |       func->lets = new_let;
114 |     func->lets_end = &new_let->next;
115 |     func->lets_len++;
116 | 
117 |     // add the arg
118 |     func->args = snoc_arg(func->args, new_var);
119 | 
120 |     return func;
121 |   }
122 | }
123 | 
124 | /*************** Parser ***************/
125 | 
126 | typedef struct a_scope_item {
127 |   const char *name;
128 |   size_t name_len;
129 |   struct a_scope_item *next;
130 | } *scope;
131 | 
132 | static bool skip_whitespace(const char **cursor);
133 | #define SKIP_WHITESPACE(cursor) \
134 |   if (!skip_whitespace(cursor)) return 0
135 | 
136 | static size_t parse_ident(const char **cursor);
137 | static ir parse_var(const char **cursor, size_t lvl, scope s);
138 | static ir parse_exp(const char **cursor, size_t lvl, scope s);
139 | static ir parse_atomic_exp(const char **cursor, size_t lvl, scope s);
140 | static ir parse_rest_of_lambda(const char **cursor, size_t lvl, scope s);
141 | 
142 | static const char *err_msg = NULL;
143 | static const char *err_loc = NULL;
144 | 
145 | 
146 | ir parse(const char *text) {
147 |   arena_init();
148 |   const char *cursor = text;
149 |   ir result = NULL;
150 | 
151 |   if (!skip_whitespace(&cursor))
152 |     goto end;
153 | 
154 |   result = parse_exp(&cursor, 0, NULL);
155 |   if (result && *cursor != '\0') {
156 |     result = NULL;
157 |     err_loc = text;
158 |     err_msg = "expected eof";
159 |   }
160 | 
161 | end:
162 |   if (!result)
163 |     // TODO: better error message printing
164 |     printf("parse error at byte %zu :/\n%s\n", err_loc - text, err_msg);
165 | 
166 |   return result;
167 | }
168 | 
169 | static bool skip_whitespace(const char **cursor) {
170 |   const char *text = *cursor;
171 |   // comments use /- -/
172 |   for (;;) switch (text[0]) {
173 |     case ' ':
174 |     case '\n':
175 |     case '\t':
176 |       text++;
177 |       continue;
178 |     case '/':
179 |       if (text[1] == '-') {
180 |         // comment start!
181 |         text += 2;
182 |         while (text[0]) {
183 |           if (text[0] == '-' && text[1] == '/')
184 |             break;
185 |           text++;
186 |         }
187 |         if (!text[0]) {
188 |           err_loc = *cursor = text;
189 |           err_msg = "reached EOF during comment";
190 |           return false;
191 |         }
192 |         text += 2;
193 |         continue;
194 |       } else { /* fallthrough */ }
195 |       // Fall through
196 |     default:
197 |       *cursor = text;
198 |       return true;
199 |   }
200 | }
201 | 
202 | // returns the length of the ident starting at *cursor
203 | static size_t parse_ident(const char **cursor) {
204 |   // parse [a-zA-Z_]+
205 |   const char *start = *cursor;
206 |   const char *end = start;
207 | #define IDENT_CHAR(c) (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_')
208 |   while (IDENT_CHAR(*end))
209 |     end++;
210 |   *cursor = end;
211 |   if (start == end) {
212 |     err_loc = start;
213 |     err_msg = "expected variable";
214 |     return 0;
215 |   }
216 |   SKIP_WHITESPACE(cursor);
217 |   return end - start;
218 | }
219 | 
220 | static ir parse_var(const char **cursor, size_t lvl, scope s) {
221 |   const char *start = *cursor;
222 |   size_t len = parse_ident(cursor);
223 |   if (!len) return NULL;
224 | 
225 |   // resolve the name
226 |   var v = lvl;
227 |   scope node = s;
228 |   while (node) {
229 |     v--;
230 |     if (len == node->name_len && strncmp(start, node->name, len) == 0)
231 |       return mkvar(lvl, v);
232 |     else
233 |       node = node->next;
234 |   }
235 |   err_loc = start;
236 |   err_msg = "variable not in scope";
237 |   return NULL;
238 | }
239 | 
240 | // atomic_exp ::= var | '(' exp ')'
241 | static ir parse_atomic_exp(const char **cursor, size_t lvl, scope s) {
242 |   if (**cursor == '(') {
243 |     ++*cursor;
244 |     SKIP_WHITESPACE(cursor);
245 |     ir result = parse_exp(cursor, lvl, s);
246 |     if (**cursor == ')') {
247 |       ++*cursor;
248 |       SKIP_WHITESPACE(cursor);
249 |       return result;
250 |     } else {
251 |       err_loc = *cursor;
252 |       err_msg = "expected ')'";
253 |       return NULL;
254 |     }
255 |   }
256 |   return parse_var(cursor, lvl, s);
257 | }
258 | 
259 | // rest_of_lambda ::= var* '.' exp
260 | static ir parse_rest_of_lambda(const char **cursor, size_t lvl, scope s) {
261 |   if (!**cursor) {
262 |     err_loc = *cursor;
263 |     err_msg = "expected '.', got end of file";
264 |     return NULL;
265 |   }
266 |   if (**cursor == '.') {
267 |     ++*cursor;
268 |     SKIP_WHITESPACE(cursor);
269 |     return parse_exp(cursor, lvl, s);
270 |   } else {
271 |     const char *name = *cursor;
272 |     size_t name_len = parse_ident(cursor);
273 |     if (!name_len) return NULL;
274 |     struct a_scope_item extended = { name, name_len, s };
275 |     ir body = parse_rest_of_lambda(cursor, lvl+1, &extended);
276 |     if (!body) return NULL;
277 |     return mkabs(lvl, body);
278 |   }
279 | }
280 | 
281 | // exp ::= '\' rest_of_lambda | 'λ' rest_of_lambda | atomic_exp atomic_exp*
282 | static ir parse_exp(const char **cursor, size_t lvl, scope s) {
283 |   if (**cursor == '\\') {
284 |     ++*cursor;
285 |     SKIP_WHITESPACE(cursor);
286 |     return parse_rest_of_lambda(cursor, lvl, s);
287 |   } else if (strncmp(*cursor, "λ", sizeof("λ") - 1) == 0) {
288 |     *cursor += sizeof("λ") - 1;
289 |     SKIP_WHITESPACE(cursor);
290 |     return parse_rest_of_lambda(cursor, lvl, s);
291 |   } else {
292 |     ir func = parse_atomic_exp(cursor, lvl, s);
293 |     if (!func) return NULL;
294 |     // Then parse some args
295 |     while (**cursor && **cursor != ')') {
296 |       ir arg = parse_atomic_exp(cursor, lvl, s);
297 |       if (!arg) return NULL;
298 |       func = mkapp(lvl, func, arg);
299 |     }
300 |     return func;
301 |   }
302 | }
303 | 
304 | /*************** Pretty-printer **************/
305 | 
306 | static void print_var(var v);
307 | static void print_lets(size_t lvl, letlist lets);
308 | static void print_args(arglist args);
309 | static void print_term(ir term);
310 | 
311 | static void print_var(var v) {
312 |   printf("x_%zu", v);
313 | }
314 | static void print_lets(size_t lvl, letlist lets) {
315 |   for (; lets; lets = lets->next, lvl++) {
316 |     printf("let ");
317 |     print_var(lvl);
318 |     printf(" = ");
319 |     print_term(lets->val);
320 |     printf(" in ");
321 |   }
322 | }
323 | static void print_args(arglist args) {
324 |   if (args) {
325 |     print_args(args->prev);
326 |     printf(" ");
327 |     print_var(args->arg);
328 |   }
329 | }
330 | static void print_term(ir term) {
331 |   if (!term->arity && !term->lets && !term->args) {
332 |     // Just a var -- no parens necessary
333 |     print_var(term->head);
334 |     return;
335 |   }
336 |   printf("(");
337 |   if (term->arity) {
338 |     printf("λ");
339 |     for (var v = term->lvl; v < term->lvl + term->arity; v++) {
340 |       printf(" ");
341 |       print_var(v);
342 |     }
343 |     printf(". ");
344 |   }
345 |   print_lets(term->lvl + term->arity, term->lets);
346 |   print_var(term->head);
347 |   print_args(term->args);
348 |   printf(")");
349 | }
350 | 
351 | void print_ir(ir term) {
352 |   print_term(term);
353 |   printf("\n");
354 | }
355 | 
356 | 
357 | 


--------------------------------------------------------------------------------
/frontend.h:
--------------------------------------------------------------------------------
 1 | #ifndef FRONTEND_H
 2 | #define FRONTEND_H 1
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | typedef size_t var;
 7 | 
 8 | typedef struct a_let {
 9 |   struct exp *val;
10 |   struct a_let *next;
11 | } *letlist;
12 | 
13 | typedef struct an_arg {
14 |   struct an_arg *prev;
15 |   var arg;
16 | } *arglist;
17 | 
18 | typedef struct exp {
19 |   size_t lvl;
20 |   size_t arity;
21 |   letlist lets;
22 |   letlist *lets_end;
23 |   size_t lets_len;
24 |   var head;
25 |   arglist args;
26 | } *ir;
27 | 
28 | /** Parse the given text, reporting errors to the user.
29 |  *
30 |  * If there's an error, it returns null
31 |  *
32 |  * Does not free the text
33 |  */
34 | ir parse(const char *text);
35 | 
36 | /** All IR is allocated from an arena.
37 |  *
38 |  * This function frees the arena
39 |  */
40 | void free_ir(void);
41 | 
42 | /** For debugging purposes
43 |  */
44 | void print_ir(ir term);
45 | 
46 | #endif // FRONTEND_H
47 | 


--------------------------------------------------------------------------------
/haskell/.gitignore:
--------------------------------------------------------------------------------
1 | Strong
2 | Weak
3 | *.hi
4 | *.o
5 | 


--------------------------------------------------------------------------------
/haskell/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: Weak Strong
 3 | 
 4 | bench: Weak Strong
 5 | 	hyperfine ./Weak ./Strong
 6 | 
 7 | GHCOPTS = -threaded -rtsopts -O2 -fllvm -fno-cse -fno-full-laziness
 8 | 
 9 | Weak: Weak.hs
10 | 	ghc $(GHCOPTS) --make Weak.hs -o Weak
11 | Strong: Strong.hs
12 | 	ghc $(GHCOPTS) --make Strong.hs -o Strong
13 | 
14 | 


--------------------------------------------------------------------------------
/haskell/Strong.hs:
--------------------------------------------------------------------------------
 1 | -- Strong normalization for the untyped lambda calculus, using a HOAS deep
 2 | -- embedding
 3 | --
 4 | -- The purpose of this benchmark is to compare my interpreter with something
 5 | -- implemented by compiling to Haskell.
 6 | --
 7 | -- Computes the same thing as bench.lc
 8 | 
 9 | {-# LANGUAGE BangPatterns, BlockArguments #-}
10 | 
11 | type Var = Int
12 | 
13 | data Val = Fun !(Val -> Val) | VVar !Var | VApp !Val Val
14 | data NF = Lam NF | NE Var [NF] deriving (Show, Eq)
15 | 
16 | infixl 1 $$
17 | ($$) :: Val -> Val -> Val
18 | Fun f $$ x = f x
19 | v     $$ x = VApp v x
20 | {-# INLINE ($$) #-}
21 | 
22 | λ = Fun
23 | 
24 | n2 = λ\s -> λ\z -> s $$ (s $$ z)
25 | n3 = λ\s -> λ\z -> s $$ (s $$ (s $$ z))
26 | n4 = λ\s -> λ\z -> s $$ (s $$ (s $$ (s $$ z)))
27 | 
28 | theTerm = minus $$ bigNumber $$ bigNumber
29 | bigNumber = n4 $$ n2 $$ n3
30 | minus = λ\n -> λ\m -> λ\s -> λ\z ->
31 |   n $$ (λ\y -> λ\k -> k $$ (s $$ (y $$ λ\a -> λ\b -> a)) $$ y)
32 |     $$ (λ\k -> k $$ z $$ (λ\_ -> z))
33 |     $$ (m $$ (λ\k -> λ\a -> λ\b -> b $$ k) $$ (λ\a -> λ\b -> a))
34 | 
35 | readback :: Var -> Val -> NF
36 | readback !lvl (Fun f) = Lam $ readback (lvl+1) $ f (VVar lvl)
37 | readback !lvl (VVar v) = NE v []
38 | readback !lvl (VApp f x) = 
39 |   let NE v sp = readback lvl f in NE v (readback lvl x:sp)
40 | 
41 | main = do
42 |   putStrLn "Normalizing the term..."
43 |   print $ readback 0 theTerm
44 | 
45 | 


--------------------------------------------------------------------------------
/haskell/Weak.hs:
--------------------------------------------------------------------------------
 1 | -- Weak normalization for the untyped lambda calculus.
 2 | --
 3 | -- The purpose of this benchmark is to compare my interpreter with GHC, the
 4 | -- highest-performance lazy evaluation runtime out there.
 5 | --
 6 | -- If one reimplemented all of GHC to be strongly-normalizing, this gives an
 7 | -- indication of how fast it could theoretically be.
 8 | 
 9 | {-# LANGUAGE BangPatterns, BlockArguments #-}
10 | 
11 | import Data.Coerce
12 | import Unsafe.Coerce
13 | 
14 | newtype L = L (L -> L)
15 | 
16 | λ  = coerce :: (L -> L) -> L
17 | λ2 = coerce :: (L -> L -> L) -> L
18 | λ3 = coerce :: (L -> L -> L -> L) -> L
19 | λ4 = coerce :: (L -> L -> L -> L -> L) -> L
20 | 
21 | ap  = coerce :: L -> L -> L
22 | ap2 = coerce :: L -> L -> L -> L
23 | ap3 = coerce :: L -> L -> L -> L -> L
24 | 
25 | n2 = λ2\ s z -> ap s (ap s z)
26 | n3 = λ2\ s z -> ap s (ap s (ap s z))
27 | n4 = λ2\ s z -> ap s (ap s (ap s (ap s z)))
28 | 
29 | {-# NOINLINE theTerm #-}
30 | theTerm :: L
31 | theTerm = ap2 minus bigNumber bigNumber
32 | 
33 | bigNumber = ap2 n4 n2 n3
34 | 
35 | minus = λ4\ n m s z ->
36 |   ap3 n
37 |     (λ2\ y k -> ap2 k (ap s (ap y (λ2\ a b -> a))) y)
38 |     (λ\ k -> ap2 k z (λ\ _ -> z))
39 |     (ap2 m (λ3\ k a b -> ap b k) (λ2\ a b -> a))
40 | 
41 | 
42 | -- Really unsafe part =|
43 | -- Converts Church numerals to ints, and other terms to UB
44 | churchToInt :: L -> Int
45 | churchToInt l = (unsafeCoerce l :: (Int -> Int) -> Int -> Int) succ 0
46 | 
47 | main = putStrLn $ "The church numerals represents " ++ show (churchToInt theTerm)
48 | 


--------------------------------------------------------------------------------
/main.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <assert.h>
 3 | #include <stdio.h>
 4 | #include "frontend.h"
 5 | #include "backend.h"
 6 | #include "runtime/normalize.h"
 7 | 
 8 | int main(int argc, const char **argv) {
 9 |   const char *source = argc >= 2 ? argv[1] : "λ x. x";
10 | 
11 |   printf("Input: %s\n", source);
12 |   printf("Compiling... ");
13 |   fflush(stdout);
14 | 
15 |   ir term = parse(source);
16 |   void *code = compile_toplevel(term);
17 |   compile_finalize();
18 |   free_ir();
19 | 
20 |   printf("Compiled! Normalizing...\n");
21 |   unsigned int *nf = normalize(code);
22 | 
23 |   printf("Normal form: ");
24 |   print_normal_form(nf);
25 | 
26 |   free(nf);
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/runtime/builtins.c:
--------------------------------------------------------------------------------
  1 | #include "gc.h"
  2 | #include "builtins.h"
  3 | 
  4 | void rt_gc(void) {
  5 |   minor_gc();
  6 | }
  7 | 
  8 | void rt_too_few_args(void) {
  9 |   if (argc == 0)
 10 |     return;
 11 |   size_t size = argc + 3;
 12 |   obj *pap = alloc(rt_pap_entry, size);
 13 |   *INFO_WORD(pap) = (struct info_word) { .size = size, .var = 0 };
 14 |   pap->contents[1] = (word) self;
 15 |   memcpy(&pap->contents[2], data_stack, sizeof(word[argc]));
 16 |   data_stack += argc;
 17 |   argc = 0;
 18 |   self = pap;
 19 | }
 20 | 
 21 | void rt_update_thunk(void) {
 22 |   assert(argc == 0);
 23 |   obj *thunk = *data_stack++;
 24 |   thunk->entrypoint = rt_ref_entry;
 25 |   thunk->contents[0] = (word) self;
 26 |   if (!IS_YOUNG(thunk) && IS_YOUNG(self))
 27 |     write_barrier(thunk);
 28 | }
 29 | 
 30 | void rt_adjacent_update_frames(void) {
 31 |   obj *thunk = *data_stack;
 32 |   thunk->entrypoint = rt_ref_entry;
 33 |   thunk->contents[0] = (word) self;
 34 |   if (!IS_YOUNG(thunk) && IS_YOUNG(self))
 35 |     write_barrier(thunk);
 36 |   *data_stack = self;
 37 | }
 38 | 
 39 | /************** Built-in heap objects *************/
 40 | 
 41 | #define STRINGIFY(x) #x
 42 | #define ENTRY(name, size, tag) \
 43 |   asm (\
 44 |     "  .text\n" \
 45 |     "  .globl " name "\n" \
 46 |     "  .int " STRINGIFY(size) "\n" \
 47 |     "  .int " STRINGIFY(tag) "\n" \
 48 |     name ":\n" \
 49 |     "  jmp " name "_impl\n" \
 50 |   )
 51 | 
 52 | ENTRY("rt_ref_entry", 2, REF);
 53 | void rt_ref_entry_impl(void) {
 54 |   self = (obj *) self->contents[0];
 55 |   // Hope for a tail call :/
 56 |   return self->entrypoint();
 57 | }
 58 | 
 59 | ENTRY("rt_forward_entry", 2, FORWARD);
 60 | void rt_forward_entry_impl(void) {
 61 |   failwith("unreachable: forward objects only exist during GC\n");
 62 | }
 63 | 
 64 | ENTRY("rt_pap_entry", 0, PAP);
 65 | void rt_pap_entry_impl(void) {
 66 |   // Partial application: push the arguments onto the stack and tail call the
 67 |   // contained function
 68 |   obj *fun = (obj *) self->contents[1];
 69 |   size_t extra_args = INFO_WORD(self)->size - 3;
 70 |   argc += extra_args;
 71 |   data_stack -= extra_args;
 72 |   memcpy(data_stack, &self->contents[2], sizeof(word[extra_args]));
 73 |   self = fun;
 74 |   // Hope for a tail call :/
 75 |   return self->entrypoint();
 76 | }
 77 | 
 78 | ENTRY("rt_rigid_entry", 0, RIGID);
 79 | void rt_rigid_entry_impl(void) {
 80 |   // Rigid term: allocate a new rigid term with the new arguments
 81 |   if (argc == 0)
 82 |     return;
 83 | 
 84 |   // TODO: make overflow checks more legit (should have a max term size)
 85 |   uint32_t new_size = INFO_WORD(self)->size + argc;
 86 |   if (new_size < argc) failwith("overflow");
 87 | 
 88 |   obj *new = alloc(rt_rigid_entry, new_size);
 89 |   *INFO_WORD(new) = (struct info_word) {
 90 |     .size = new_size,
 91 |     .var = INFO_WORD(self)->var,
 92 |   };
 93 | 
 94 |   size_t self_argc = INFO_WORD(self)->size - 2;
 95 |   memcpy(&new->contents[1], &self->contents[1], sizeof(word[self_argc]));
 96 |   memcpy(&new->contents[1 + self_argc], data_stack, sizeof(word[argc]));
 97 |   data_stack += argc;
 98 |   argc = 0;
 99 | 
100 |   self = new;
101 | }
102 | 
103 | ENTRY("rt_blackhole_entry", 0, BLACKHOLE);
104 | void rt_blackhole_entry_impl(void) {
105 |   failwith("Black hole (infinite loop?)\n");
106 | }
107 | 


--------------------------------------------------------------------------------
/runtime/builtins.h:
--------------------------------------------------------------------------------
 1 | void rt_gc(void);
 2 | void rt_too_few_args(void);
 3 | void rt_update_thunk(void);
 4 | void rt_adjacent_update_frames(void);
 5 | 
 6 | void rt_ref_entry(void);
 7 | void rt_forward_entry(void);
 8 | void rt_pap_entry(void);
 9 | void rt_rigid_entry(void);
10 | void rt_blackhole_entry(void);
11 | 
12 | 


--------------------------------------------------------------------------------
/runtime/data_layout.h:
--------------------------------------------------------------------------------
 1 | #ifndef DATA_LAYOUT_H
 2 | #define DATA_LAYOUT_H 1
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | /************* Object layout ***********/
 7 | 
 8 | typedef size_t word;
 9 | typedef uint32_t halfword;
10 | 
11 | typedef struct obj {
12 |   void (*entrypoint)(void);
13 |   word contents[];
14 | } obj;
15 | 
16 | // Can't be an enum since I need to include them in inline assembly
17 | #define FORWARD   0
18 | #define REF       1
19 | #define FUN       2
20 | #define PAP       3
21 | #define RIGID     4
22 | #define THUNK     5
23 | #define BLACKHOLE 6
24 | 
25 | struct gc_data {
26 |   /** Size of the whole object in words.
27 |    * If 0, the first word of contents is a `struct info_word` that contains
28 |    * the true size
29 |    */
30 |   uint32_t size;
31 |   uint32_t tag;
32 | };
33 | #define GC_DATA(o) \
34 |   ((struct gc_data *) ((size_t) (o->entrypoint) - sizeof(struct gc_data)))
35 | 
36 | struct info_word {
37 |   /** Size of the whole object in words */
38 |   uint32_t size;
39 |   /** only in heap objects representing rigid terms */
40 |   uint32_t var;
41 | };
42 | #define INFO_WORD(o) ((struct info_word *) &o->contents[0])
43 | 
44 | #endif // DATA_LAYOUT_H
45 | 


--------------------------------------------------------------------------------
/runtime/gc.c:
--------------------------------------------------------------------------------
  1 | #include "gc.h"
  2 | #include "builtins.h"
  3 | 
  4 | enum gc_type { MAJOR, MINOR };
  5 | static void major_gc(void);
  6 | static obj *copy_to_old_space(obj *o, enum gc_type type);
  7 | static void process_copy_stack(enum gc_type type);
  8 | static void collect_roots(enum gc_type type);
  9 | 
 10 | static word *old_start;
 11 | static word *old_top;
 12 | static word *other_old_start;
 13 | static size_t old_space_size;
 14 | static size_t other_old_space_size;
 15 | 
 16 | // Remembered set: a growable (malloc'd) vector of old objects 'REF ptr' that
 17 | // point to the nursery
 18 | static obj **remembered_set;
 19 | static size_t remembered_set_size;
 20 | static size_t remembered_set_cap;
 21 | 
 22 | // Copy stack: during GC, a worklist of new to-space objects whose fields still
 23 | // point to the from-space
 24 | static obj **copy_stack;
 25 | static size_t copy_stack_size;
 26 | static size_t copy_stack_cap;
 27 | 
 28 | static obj **data_stack_end;
 29 | 
 30 | void gc_init(void) {
 31 |   copy_stack = (obj **) malloc(4096);
 32 |   copy_stack_size = 0;
 33 |   copy_stack_cap = 4096 / sizeof(obj *);
 34 | 
 35 |   remembered_set = (obj **) malloc(4096);
 36 |   remembered_set_size = 0;
 37 |   remembered_set_cap = 4096 / sizeof(obj *);
 38 | 
 39 |   nursery_start = (word *) malloc(NURSERY_BYTES);
 40 |   nursery_top = nursery_start + NURSERY_BYTES / sizeof(word);
 41 | 
 42 |   old_start = (word *) malloc(2 * NURSERY_BYTES);
 43 |   old_top = old_start + 2 * NURSERY_BYTES / sizeof(word);
 44 |   other_old_start = NULL;
 45 |   old_space_size = other_old_space_size = 2 * NURSERY_BYTES;
 46 | 
 47 |   obj **data_stack_start = malloc(DATA_STACK_BYTES);
 48 |   data_stack = data_stack_end = data_stack_start + DATA_STACK_BYTES / sizeof(obj *);
 49 | }
 50 | 
 51 | void minor_gc(void) {
 52 |   // conservative heap check
 53 |   if ((size_t) old_top - (size_t) old_start < NURSERY_BYTES)
 54 |     return major_gc();
 55 | 
 56 |   DEBUG("Minor GC\n");
 57 | 
 58 |   collect_roots(MINOR);
 59 | 
 60 |   // Collect the remembered set
 61 |   obj **remembered_set_end = remembered_set + remembered_set_size;
 62 |   for (obj **o = remembered_set; o < remembered_set_end; o++) {
 63 |     // old_obj is 'REF ptr' where ptr points to the nursery
 64 |     obj *old_obj = *o;
 65 |     assert(old_obj->entrypoint == rt_ref_entry);
 66 |     old_obj->contents[0] =
 67 |       (word) copy_to_old_space((obj *) old_obj->contents[0], MINOR);
 68 |   }
 69 |   remembered_set_size = 0;
 70 | 
 71 |   process_copy_stack(MINOR);
 72 | 
 73 |   nursery_top = nursery_start + NURSERY_BYTES / sizeof(word);
 74 | }
 75 | 
 76 | void major_gc(void) {
 77 |   DEBUG("Major GC: ");
 78 | 
 79 |   if (!other_old_start)
 80 |     other_old_start = (word *) malloc(other_old_space_size);
 81 |   word *from_space = old_start;
 82 |   size_t from_space_size = old_space_size;
 83 |   old_start = other_old_start;
 84 |   old_space_size = other_old_space_size;
 85 |   old_top = old_start + old_space_size / sizeof(word);
 86 |   other_old_start = NULL;
 87 | 
 88 |   collect_roots(MAJOR);
 89 |   process_copy_stack(MAJOR);
 90 | 
 91 |   // set a new size for the old space if needed
 92 |   size_t used_space = (size_t) old_start + old_space_size - (size_t) old_top;
 93 |   if (used_space + NURSERY_BYTES > old_space_size)
 94 |     other_old_space_size *= 2;
 95 | 
 96 |   if (from_space_size < other_old_space_size)
 97 |     free(from_space);
 98 |   else
 99 |     other_old_start = from_space;
100 | 
101 |   // reset the nursery and ignore the remembered set
102 |   remembered_set_size = 0;
103 |   nursery_top = nursery_start + NURSERY_BYTES / sizeof(word);
104 | 
105 |   DEBUG("copied %zu bytes\n", used_space);
106 | }
107 | 
108 | static void collect_roots(enum gc_type type) {
109 |   // Collect self
110 |   self = copy_to_old_space(self, type);
111 | 
112 |   // Collect data stack
113 |   for (obj **root = data_stack; root < data_stack_end; root++)
114 |     *root = copy_to_old_space(*root, type);
115 | }
116 | 
117 | static obj *copy_to_old_space(obj *o, enum gc_type type) {
118 |   if (type == MINOR && !IS_YOUNG(o))
119 |     return o;
120 | 
121 |   if (o->entrypoint == rt_forward_entry) {
122 |     return (obj *) o->contents[0];
123 |   } else if (o->entrypoint == rt_ref_entry) {
124 |     // Compress REF indirections
125 |     obj *new = copy_to_old_space((obj *) o->contents[0], type);
126 |     o->contents[0] = (word) new;
127 |     return new;
128 |   } else {
129 |     size_t size = GC_DATA(o)->size;
130 |     if (!size) size = INFO_WORD(o)->size;
131 |     obj *new = (obj *) (old_top -= size);
132 |     memcpy(new, o, sizeof(word[size]));
133 | 
134 |     // set up forwarding
135 |     o->entrypoint = rt_forward_entry;
136 |     o->contents[0] = (word) new;
137 | 
138 |     // add to the copy stack
139 |     if (copy_stack_size == copy_stack_cap) {
140 |       size_t new_cap = 2 * copy_stack_cap + 1;
141 |       copy_stack = reallocarray(copy_stack, new_cap, sizeof(obj *));
142 |       copy_stack_cap = new_cap;
143 |     }
144 |     copy_stack[copy_stack_size++] = new;
145 | 
146 |     return new;
147 |   }
148 | }
149 | 
150 | static void process_copy_stack(enum gc_type type) {
151 |   while (copy_stack_size > 0) {
152 |     obj *o = copy_stack[--copy_stack_size];
153 |     word *start;
154 |     size_t size = GC_DATA(o)->size;
155 |     if (size) {
156 |       // Contains size - 1 many GC pointers
157 |       start = &o->contents[0];
158 |     } else {
159 |       // Contains size - 2 many GC pointers
160 |       size = INFO_WORD(o)->size;
161 |       start = &o->contents[1];
162 |     }
163 |     word *end = &o->contents[size - 1];
164 |     for (word *ptr = start; ptr < end; ptr++)
165 |       *ptr = (word) copy_to_old_space((obj *) *ptr, type);
166 |   }
167 | }
168 | 
169 | // Write barrier: push thunk to the remembered set
170 | void write_barrier(obj *thunk) {
171 |   if (remembered_set_size == remembered_set_cap) {
172 |     size_t new_cap = remembered_set_size * 2;
173 |     remembered_set = reallocarray(remembered_set, new_cap, sizeof(obj *));
174 |     remembered_set_cap = new_cap;
175 |   }
176 |   remembered_set[remembered_set_size++] = thunk;
177 | }
178 | 
179 | 


--------------------------------------------------------------------------------
/runtime/gc.h:
--------------------------------------------------------------------------------
 1 | #include "runtime.h"
 2 | 
 3 | void gc_init(void);
 4 | 
 5 | void minor_gc(void);
 6 | void write_barrier(obj *thunk);
 7 | 
 8 | // This is only called from C code; generated code has this inlined
 9 | static inline obj *alloc(void (*entrypoint)(void), size_t size) {
10 |   // TODO: have a max term size somewhere
11 |   assert(sizeof(word[size]) < 10240);
12 |   word *ptr = nursery_top - size;
13 |   if (ptr < nursery_start) {
14 |     minor_gc();
15 |     ptr = nursery_top - size;
16 |   }
17 |   nursery_top = ptr;
18 |   obj *o = (obj *) ptr;
19 |   o->entrypoint = entrypoint;
20 |   return o;
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/runtime/normalize.c:
--------------------------------------------------------------------------------
  1 | #include "gc.h"
  2 | #include "builtins.h"
  3 | #include "normalize.h"
  4 | 
  5 | static unsigned int *buf;
  6 | static size_t buf_len;
  7 | static size_t buf_cap;
  8 | 
  9 | static void push_buf(unsigned int x) {
 10 |   if (buf_len == buf_cap) {
 11 |     buf_cap *= 2;
 12 |     buf = reallocarray(buf, buf_cap, sizeof(unsigned int));
 13 |   }
 14 |   buf[buf_len++] = x;
 15 | }
 16 | 
 17 | // Pop an object from the data stack and write its normal form to the buffer
 18 | static void quote(void);
 19 | 
 20 | // Apply 'self' to an argument, returning the value in 'self'
 21 | static void apply(obj *arg);
 22 | // Evaluate 'self', returning the value in 'self'
 23 | static void eval(void);
 24 | 
 25 | struct saved_regs {
 26 |   obj *self;
 27 |   obj **data_stack;
 28 |   word *nursery_top;
 29 |   word *nursery_start;
 30 |   size_t argc;
 31 | };
 32 | 
 33 | static struct saved_regs save_regs(void);
 34 | static void restore_regs(struct saved_regs);
 35 | 
 36 | // Caller is normal code, calls the runtime code
 37 | // Need to save/restore the callee-saved registers and setup the runtime
 38 | unsigned int *normalize(void (*entrypoint)(void)) {
 39 |   struct saved_regs regs = save_regs();
 40 |   gc_init();
 41 |   buf_len = 0;
 42 |   buf_cap = 16;
 43 |   buf = malloc(sizeof(unsigned int[buf_cap]));
 44 | 
 45 |   obj *main = alloc(entrypoint, 2);
 46 |   *INFO_WORD(main) = (struct info_word) { .size = 2, .var = 0 };
 47 |   self = main;
 48 | 
 49 |   quote();
 50 | 
 51 |   restore_regs(regs);
 52 |   return buf;
 53 | }
 54 | 
 55 | static struct saved_regs save_regs(void) {
 56 |   return (struct saved_regs) {
 57 |     .self = self,
 58 |     .data_stack = data_stack,
 59 |     .nursery_top = nursery_top,
 60 |     .nursery_start = nursery_start,
 61 |     .argc = argc,
 62 |   };
 63 | }
 64 | static void restore_regs(struct saved_regs regs) {
 65 |   self = regs.self;
 66 |   data_stack = regs.data_stack;
 67 |   nursery_top = regs.nursery_top;
 68 |   nursery_start = regs.nursery_start;
 69 |   argc = regs.argc;
 70 | }
 71 | 
 72 | // Apply 'self' to an argument, returning the value in 'self'
 73 | static void apply(obj *arg) {
 74 |   obj *blackhole_to_update = alloc(rt_blackhole_entry, 2);
 75 |   *INFO_WORD(blackhole_to_update) = (struct info_word) { .size = 2, .var = 0 };
 76 |   *--data_stack = blackhole_to_update;
 77 |   *--data_stack = arg;
 78 |   argc = 1;
 79 |   self->entrypoint();
 80 |   rt_update_thunk();
 81 | }
 82 | // Evaluate 'self', returning the value in 'self'
 83 | static void eval(void) {
 84 |   switch (GC_DATA(self)->tag) {
 85 |   case PAP:
 86 |   case RIGID:
 87 |   case FUN:
 88 |     return;
 89 |   case REF:
 90 |     self = (obj *) self->contents[0];
 91 |     return eval();
 92 |   case THUNK:
 93 |     obj *blackhole_to_update = alloc(rt_blackhole_entry, 2);
 94 |     *INFO_WORD(blackhole_to_update) = (struct info_word) { .size = 2, .var = 0 };
 95 |     *--data_stack = blackhole_to_update;
 96 |     argc = 0;
 97 |     self->entrypoint();
 98 |     rt_update_thunk();
 99 |     return;
100 |   default:
101 |     failwith("unreachable");
102 |   }
103 | }
104 | 
105 | // Write the normal form of 'self' to the buffer
106 | static void quote(void) {
107 |   // the next variable id to use for a lambda
108 |   unsigned int next_var = 0;
109 | 
110 |   eval();
111 | 
112 |   // Use the data stack as a worklist
113 |   obj **data_stack_end = data_stack;
114 |   for (;;) {
115 |     switch (GC_DATA(self)->tag) {
116 |     case FUN:
117 |     case PAP:
118 |       {
119 |         // Function f: λ x. quote (apply f x)
120 |         unsigned int var_id = next_var++;
121 |         push_buf(LAM);
122 |         push_buf(var_id);
123 |         obj *x = alloc(rt_rigid_entry, 2);
124 |         *INFO_WORD(x) = (struct info_word) { .size = 2, .var = var_id };
125 |         apply(x);
126 |         continue;
127 |       }
128 |     case RIGID:
129 |       {
130 |         // Rigid term head args: head (map quote args)
131 |         unsigned int argc = INFO_WORD(self)->size - 2;
132 |         unsigned int var_id = INFO_WORD(self)->var;
133 |         push_buf(NE);
134 |         push_buf(argc);
135 |         push_buf(var_id);
136 |         data_stack -= argc;
137 |         memcpy(data_stack, &self->contents[1], argc * sizeof(obj *));
138 | 
139 |         // Pop and evaluate the next item off the stack
140 |         if (data_stack == data_stack_end)
141 |           return;
142 |         self = *data_stack++;
143 |         eval();
144 |         continue;
145 |       }
146 |     default:
147 |       failwith("unreachable");
148 |     }
149 |   }
150 | }
151 | 
152 | 
153 | /***************** Printing ****************/
154 | 
155 | static unsigned int *print(unsigned int *nf, bool parens);
156 | static unsigned int *print_rest_of_lam(unsigned int *nf);
157 | 
158 | void print_normal_form(unsigned int *nf) {
159 |   print(nf, false);
160 |   printf("\n");
161 | }
162 | 
163 | static void print_var(unsigned int var) {
164 |   // TODO: there's probably a nicer way to print variables
165 |   if (var < 26)
166 |     printf("%c", 'a' + var);
167 |   else
168 |     printf("v%u", var);
169 | }
170 | 
171 | static unsigned int *print(unsigned int *nf, bool parens) {
172 |   switch (*nf) {
173 |   case LAM:
174 |     if (parens) printf("(");
175 |     printf("λ");
176 |     nf = print_rest_of_lam(nf);
177 |     if (parens) printf(")");
178 |     return nf;
179 |   case NE:
180 |     nf++;
181 |     unsigned int argc = *nf++;
182 |     unsigned int var = *nf++;
183 |     if (parens && argc) printf("(");
184 |     print_var(var);
185 |     for (unsigned i = 0; i < argc; i++) {
186 |       printf(" ");
187 |       nf = print(nf, true);
188 |     }
189 |     if (parens && argc) printf(")");
190 |     return nf;
191 |   default:
192 |     failwith("unreachable");
193 |   }
194 | }
195 | 
196 | static unsigned int *print_rest_of_lam(unsigned int *nf) {
197 |   switch (*nf) {
198 |   case LAM:
199 |     nf++;
200 |     printf(" ");
201 |     print_var(*nf++);
202 |     return print_rest_of_lam(nf);
203 |   case NE:
204 |     printf(". ");
205 |     return print(nf, false);
206 |   default:
207 |     failwith("unreachable");
208 |   }
209 | }
210 | 
211 | 
212 | /************** Converting from church numerals ************/
213 | 
214 | size_t parse_church_numeral(unsigned int *nf) {
215 | # define CONSUME(x) if (*nf++ != x) failwith("Not a church numeral")
216 |   CONSUME(LAM);
217 |   unsigned s = *nf++;
218 |   CONSUME(LAM);
219 |   unsigned z = *nf++;
220 |   size_t n = 0;
221 |   for(;;) {
222 |     CONSUME(NE);
223 |     unsigned argc = *nf++;
224 |     if (argc == 0) {
225 |       CONSUME(z);
226 |       return n;
227 |     } else if (argc == 1) {
228 |       CONSUME(s);
229 |       n++;
230 |       continue;
231 |     } else {
232 |       failwith("Not a church numeral");
233 |     }
234 |   }
235 | }
236 | 


--------------------------------------------------------------------------------
/runtime/normalize.h:
--------------------------------------------------------------------------------
 1 | /** β-normalization of lambda terms
 2 |  *
 3 |  * It pre-order serializes the normal form as a malloc'd vector of unsigned
 4 |  * ints, with this layout:
 5 |  *  nf   ::= LAM var nf | NE argc var (argc nf's)
 6 |  *  argc ::= an integer number of arguments
 7 |  *  var  ::= an integer variable id
 8 |  *
 9 |  */
10 | 
11 | enum nf_tag { LAM, NE };
12 | 
13 | // entrypoint is the entry code for a thunk with no environment representing the
14 | // lambda term
15 | unsigned int *normalize(void (*entrypoint)(void));
16 | 
17 | void print_normal_form(unsigned int *nf);
18 | 
19 | size_t parse_church_numeral(unsigned int *nf);
20 | 
21 | 


--------------------------------------------------------------------------------
/runtime/runtime.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <sys/mman.h>
 4 | #include <stdbool.h>
 5 | #include <string.h>
 6 | #include <assert.h>
 7 | 
 8 | #include "data_layout.h"
 9 | 
10 | /************* Random utils ***********/
11 | 
12 | #define failwith(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0)
13 | 
14 | // #define DEBUG(...) fprintf(stderr, __VA_ARGS__)
15 | #define DEBUG(...) ((void) 0)
16 | 
17 | /************** Registers **************/
18 | 
19 | // The function/thunk currently being evaluated
20 | register obj *self asm ("rbx");
21 | 
22 | // Data stack, grows downwards. I assume it never overflows
23 | // It contains GC roots
24 | #define DATA_STACK_BYTES (8*1024*1024)
25 | register obj **data_stack asm ("r12");
26 | 
27 | // Simple generational semispace GC
28 | // Allocations go downards
29 | #define NURSERY_BYTES (3*1024*1024) // 3M nursery
30 | register word *nursery_top asm ("r13");
31 | register word *nursery_start asm ("r14");
32 | #define IS_YOUNG(o) ((size_t) (o) - (size_t) nursery_start < NURSERY_BYTES)
33 | 
34 | register size_t argc asm ("r15");
35 | 
36 | 


--------------------------------------------------------------------------------