├── .gitignore
└── milestones
    ├── m6
        ├── Makefile
        ├── README.md
        ├── test.c
        └── test.js
    ├── m5
        ├── Makefile
        ├── README.md
        ├── test.js
        ├── test.c
        ├── externref-in-llvm.md
        └── walloc.c
    ├── m0
        ├── Makefile
        ├── test.c
        ├── test.js
        ├── README.md
        └── walloc.c
    ├── m1
        ├── Makefile
        ├── test.c
        ├── test.js
        ├── README.md
        └── walloc.c
    ├── m4
        ├── Makefile
        ├── test.wat
        ├── test.ll
        ├── test.S
        ├── test.js
        └── README.md
    ├── m2
        ├── Makefile
        ├── test.wat
        ├── test.S
        ├── README.md
        ├── test.js
        └── walloc.c
    ├── m3
        ├── Makefile
        ├── test.js
        ├── README.md
        ├── test.wat
        ├── test.ll
        ├── test.S
        └── walloc.c
    ├── lib.js
    ├── getting-started.md
    └── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | test.o
2 | test.wasm
3 | walloc.o
4 | 


--------------------------------------------------------------------------------
/milestones/m6/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LD?=wasm-ld
 3 | V8?=d8
 4 | JSC?=jsc
 5 | SPIDERMONKEY?=js
 6 | 
 7 | all: v8.test jsc.test spidermonkey.test
 8 | 
 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc
10 | jsc.test: JS?=$(JSC) --useWeakRefs=true
11 | spidermonkey.test: JS?=$(SPIDERMONKEY)
12 | 
13 | .PHONY: %.test
14 | %.test: test.js ../lib.js test.wasm
15 | 	$(JS) $<
16 | 
17 | %.o: %.c
18 | 	$(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $<
19 | 
20 | test.wasm: test.o
21 | 	$(LD) --no-entry --import-memory -o $@ $^
22 | 
23 | .PHONY: clean
24 | clean:
25 | 	rm -f test.o test.wasm
26 | 


--------------------------------------------------------------------------------
/milestones/m5/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LD?=wasm-ld
 3 | V8?=d8
 4 | JSC?=jsc
 5 | SPIDERMONKEY?=js
 6 | 
 7 | all: v8.test jsc.test spidermonkey.test
 8 | 
 9 | v8.test: JS?=$(V8) --expose-gc 
10 | jsc.test: JS?=$(JSC) --useWeakRefs=true
11 | spidermonkey.test: JS?=$(SPIDERMONKEY)
12 | 
13 | .PHONY: %.test
14 | %.test: test.js ../lib.js test.wasm
15 | 	$(JS) $<
16 | 
17 | %.o: %.c
18 | 	$(CC) -Oz --target=wasm32 -mreference-types -nostdlib -c -o $@ $<
19 | 
20 | test.wasm: test.o walloc.o
21 | 	$(LD) --no-entry --import-memory -o $@ $^
22 | 
23 | .PHONY: clean
24 | clean:
25 | 	rm -f *.o test.wasm
26 | 


--------------------------------------------------------------------------------
/milestones/m0/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LD?=wasm-ld
 3 | V8?=d8
 4 | JSC?=jsc
 5 | SPIDERMONKEY?=js
 6 | 
 7 | all: v8.test jsc.test spidermonkey.test
 8 | 
 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc
10 | jsc.test: JS?=$(JSC) --useWeakRefs=true
11 | spidermonkey.test: JS?=$(SPIDERMONKEY)
12 | 
13 | .PHONY: %.test
14 | %.test: test.js ../lib.js test.wasm
15 | 	$(JS) $<
16 | 
17 | %.o: %.c
18 | 	$(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $<
19 | 
20 | test.wasm: test.o walloc.o
21 | 	$(LD) --no-entry --import-memory -o $@ $^
22 | 
23 | .PHONY: clean
24 | clean:
25 | 	rm -f test.o walloc.o test.wasm
26 | 


--------------------------------------------------------------------------------
/milestones/m1/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LD?=wasm-ld
 3 | V8?=d8
 4 | JSC?=jsc
 5 | SPIDERMONKEY?=js
 6 | 
 7 | all: v8.test jsc.test spidermonkey.test
 8 | 
 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc
10 | jsc.test: JS?=$(JSC) --useWeakRefs=true
11 | spidermonkey.test: JS?=$(SPIDERMONKEY)
12 | 
13 | .PHONY: %.test
14 | %.test: test.js ../lib.js test.wasm
15 | 	$(JS) $<
16 | 
17 | %.o: %.c
18 | 	$(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $<
19 | 
20 | test.wasm: test.o walloc.o
21 | 	$(LD) --no-entry --import-memory -o $@ $^
22 | 
23 | .PHONY: clean
24 | clean:
25 | 	rm -f test.o walloc.o test.wasm
26 | 


--------------------------------------------------------------------------------
/milestones/m6/README.md:
--------------------------------------------------------------------------------
 1 | # Milestone 6: Collectable cycle: C allocates GC-managed memory
 2 | 
 3 | [ previous: [m5](../m5/); next: [m7](../m7/) ]
 4 | 
 5 | ## Overview
 6 | 
 7 | This milestone represents a minimal solution to the cycle problem, but
 8 | from C instead of from raw WebAssembly.  Instead of using classes as
 9 | data abstractions, the C here will use the raw allocation and accessor
10 | imports defined in [milestone 4](../m4).
11 | 
12 | ## Interesting files
13 | 
14 | The difference from [m4](../m4) is that instead of
15 | [`test.wat`](../m4/test.wat), we have [`test.c`](./test.c).
16 | 
17 | ## Results
18 | 
19 | No results yet; this milestone is in progress.
20 | 


--------------------------------------------------------------------------------
/milestones/m4/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LLC?=llc
 3 | LD?=wasm-ld
 4 | V8?=d8
 5 | JSC?=jsc
 6 | SPIDERMONKEY?=js
 7 | 
 8 | all: v8.test jsc.test spidermonkey.test
 9 | 
10 | v8.test: JS?=$(V8) --expose-gc
11 | jsc.test: JS?=$(JSC)
12 | spidermonkey.test: JS?=$(SPIDERMONKEY)
13 | 
14 | .PHONY: %.test
15 | %.test: test.js ../lib.js test.wasm
16 | 	$(JS) $<
17 | 
18 | test.wasm: test.o
19 | 	$(LD) --no-entry --allow-undefined --import-memory -o $@ $^
20 | 
21 | test.o: test.ll
22 | 	$(LLC) -O2 -mattr=+reference-types -filetype=obj -o $@ $<
23 | 
24 | # Alternately:
25 | #test.o: test.wat
26 | #	$(WAT2WASM) --enable-all --relocatable -o $@ $<
27 | # or even
28 | #test.o: test.S
29 | #	$(CC) -Oz -mreference-types --target=wasm32 -nostdlib -c -o $@ $<
30 | 
31 | .PHONY: clean
32 | clean:
33 | 	rm -f test.o test.wasm
34 | 


--------------------------------------------------------------------------------
/milestones/m2/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LD?=wasm-ld
 3 | V8?=d8
 4 | JSC?=jsc
 5 | SPIDERMONKEY?=js
 6 | 
 7 | all: v8.test jsc.test spidermonkey.test
 8 | 
 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc
10 | jsc.test: JS?=$(JSC) --useWeakRefs=true
11 | spidermonkey.test: JS?=$(SPIDERMONKEY)
12 | 
13 | .PHONY: %.test
14 | %.test: test.js ../lib.js test.wasm
15 | 	$(JS) $<
16 | 
17 | test.wasm: test.o walloc.o
18 | 	$(LD) --no-entry --allow-undefined --import-memory -o $@ $^
19 | 
20 | walloc.o: walloc.c
21 | 	$(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $<
22 | 
23 | test.o: test.S
24 | 	$(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $<
25 | 
26 | # Alternately:
27 | # test.o: test.wat
28 | # 	$(WAT2WASM) --enable-all --relocatable -o $@ $<
29 | 
30 | .PHONY: clean
31 | clean:
32 | 	rm -f test.o walloc.o test.wasm
33 | 


--------------------------------------------------------------------------------
/milestones/m3/Makefile:
--------------------------------------------------------------------------------
 1 | CC?=clang
 2 | LLC?=llc
 3 | LD?=wasm-ld
 4 | V8?=d8
 5 | JSC?=jsc
 6 | SPIDERMONKEY?=js
 7 | WAT2WASM?=wat2wasm
 8 | 
 9 | all: v8.test jsc.test spidermonkey.test
10 | 
11 | v8.test: JS?=$(V8) --expose-gc
12 | jsc.test: JS?=$(JSC)
13 | spidermonkey.test: JS?=$(SPIDERMONKEY)
14 | 
15 | .PHONY: %.test
16 | %.test: test.js ../lib.js test.wasm
17 | 	$(JS) $<
18 | 
19 | test.wasm: test.o walloc.o
20 | 	$(LD) --no-entry --import-memory -o $@ $^
21 | 
22 | walloc.o: walloc.c
23 | 	$(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $<
24 | 
25 | test.o: test.ll
26 | 	$(LLC) -O2 --cost-kind=code-size -mattr=+reference-types -filetype=obj -o $@ $<
27 | 
28 | # Alternately:
29 | #test.o: test.wat
30 | #	$(WAT2WASM) --enable-all --relocatable -o $@ $<
31 | # or even
32 | #test.o: test.S
33 | #	$(CC) -Oz -mreference-types --target=wasm32 -nostdlib -c -o $@ $<
34 | 
35 | .PHONY: clean
36 | clean:
37 | 	rm -f test.o walloc.o test.wasm
38 | 


--------------------------------------------------------------------------------
/milestones/m4/test.wat:
--------------------------------------------------------------------------------
 1 | (func $gc_alloc (import "rt" "gc_alloc") (param i32 i32) (result externref))
 2 | (func $gc_ref_obj (import "rt" "gc_ref_obj") (param externref i32) (result externref))
 3 | (func $gc_set_obj (import "rt" "gc_set_obj") (param externref i32 externref))
 4 | 
 5 | (func $invoke (import "rt" "invoke") (param externref))
 6 | 
 7 | (func $make_obj (export "make_obj") (result externref)
 8 |   (local $obj externref)
 9 |   (local.set $obj (call $gc_alloc (i32.const 1) (i32.const 0)))
10 |   (call $gc_set_obj (local.get $obj) (i32.const 0) (ref.null extern))
11 |   (local.get $obj))
12 | 
13 | (func $attach_callback (export "attach_callback")
14 |       (param $obj externref) (param $callback externref)
15 |   (call $gc_set_obj (local.get $obj) (i32.const 0) (local.get $callback)))
16 | 
17 | (func $invoke_callback (export "invoke_callback")
18 |       (param $obj externref)
19 |   (call $invoke (call $gc_ref_obj (local.get $obj) (i32.const 0))))
20 | 


--------------------------------------------------------------------------------
/milestones/m5/README.md:
--------------------------------------------------------------------------------
 1 | # Milestone 5: C manages side table of JavaScript objects
 2 | 
 3 | [ previous: [m4](../m4/); next: [m6](../m6/) ]
 4 | 
 5 | ## Overview
 6 | 
 7 | Having shown where we're going and that milestone 4 solves the cycle
 8 | problem, we need to return to the C++-to-WebAssembly problem: we are
 9 | lacking a way to represent `externref` values in C++, and support in the
10 | LLVM compiler.  As a first step in this direction, we're going to
11 | translate milestone 3's test program back to an extended version of C
12 | that has rudimentary support for reference types.
13 | 
14 | See the [Externref in LLVM](./externref-in-llvm.md) design document
15 | for the high-level overview on how we are going to extend LLVM to
16 | support `externref`.
17 | 
18 | ## Details
19 | 
20 | The difference from [m3](../m3) is that instead of
21 | [`test.wat`](../m3/test.wat), we have [`test.c`](./test.c).
22 | 
23 | Running `make` will build `test.wasm` and run the test as in m3.
24 | 
25 | ## Results
26 | 
27 | No results yet; this milestone is in progress.
28 | 


--------------------------------------------------------------------------------
/milestones/m6/test.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | typedef __SIZE_TYPE__ size_t;
 4 | 
 5 | #define WASM_EXPORT(name) \
 6 |   __attribute__((export_name(#name))) \
 7 |   name
 8 | #define WASM_IMPORT(mod, name) \
 9 |   __attribute__((import_module(#mod))) \
10 |   __attribute__((import_name(#name))) \
11 |   name
12 | 
13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
14 | 
15 | typedef __externref_t externref;
16 | 
17 | void WASM_IMPORT(rt, invoke)(externref);
18 | externref WASM_IMPORT(rt, gc_alloc)(size_t nobjs, size_t nbytes);
19 | externref WASM_IMPORT(rt, gc_ref_obj)(externref obj, size_t i);
20 | void WASM_IMPORT(rt, gc_set_obj)(externref obj, size_t i, externref val);
21 | 
22 | // Useful for debugging.
23 | void WASM_IMPORT(env, wasm_log)(void*);
24 | void WASM_IMPORT(env, wasm_logi)(int);
25 | 
26 | struct externref WASM_EXPORT(make_obj)() {
27 |   externref ret = gc_alloc(1, 0);
28 |   gc_set_obj(ret, 0, __builtin_wasm_ref_null(externref));
29 |   return ret;
30 | }
31 | 
32 | void WASM_EXPORT(attach_callback)(externref obj, externref callback) {
33 |   gc_set_obj(obj, 0, callback);
34 | }
35 | 
36 | void WASM_EXPORT(invoke_callback)(externref obj) {
37 |   invoke(gc_ref_obj(obj, 0));
38 | }
39 | 


--------------------------------------------------------------------------------
/milestones/m0/test.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | typedef __SIZE_TYPE__ size_t;
 4 | 
 5 | #define WASM_EXPORT(name) \
 6 |   __attribute__((export_name(#name))) \
 7 |   name
 8 | #define WASM_IMPORT(mod, name) \
 9 |   __attribute__((import_module(#mod))) \
10 |   __attribute__((import_name(#name))) \
11 |   name
12 | 
13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
14 | 
15 | void WASM_IMPORT(rt, release)(uintptr_t);
16 | void WASM_IMPORT(rt, invoke)(int32_t);
17 | void WASM_IMPORT(rt, out_of_memory)(void);
18 | 
19 | // Useful for debugging.
20 | void WASM_IMPORT(env, wasm_log)(void*);
21 | void WASM_IMPORT(env, wasm_logi)(int);
22 | 
23 | void *malloc(size_t size);
24 | void free(void *p);
25 |                           
26 | struct obj {
27 |   uintptr_t callback_handle;
28 | };
29 | 
30 | struct obj* WASM_EXPORT(make_obj)() {
31 |   struct obj* obj = malloc(sizeof(struct obj));
32 |   if (!obj) {
33 |     out_of_memory();
34 |     __builtin_trap();
35 |   }
36 |   obj->callback_handle = -1;
37 |   return obj;
38 | }
39 | 
40 | void WASM_EXPORT(free_obj)(struct obj* obj)
41 | {
42 |   release(obj->callback_handle);
43 |   free(obj);
44 | }
45 | 
46 | void WASM_EXPORT(attach_callback)(struct obj* obj, uintptr_t callback_handle)
47 | {
48 |   release(obj->callback_handle);
49 |   obj->callback_handle = callback_handle;
50 | }
51 | 
52 | void WASM_EXPORT(invoke_callback)(struct obj* obj)
53 | {
54 |   invoke(obj->callback_handle);
55 | }
56 | 


--------------------------------------------------------------------------------
/milestones/m1/test.c:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | typedef __SIZE_TYPE__ size_t;
 4 | 
 5 | #define WASM_EXPORT(name) \
 6 |   __attribute__((export_name(#name))) \
 7 |   name
 8 | #define WASM_IMPORT(mod, name) \
 9 |   __attribute__((import_module(#mod))) \
10 |   __attribute__((import_name(#name))) \
11 |   name
12 | 
13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
14 | 
15 | void WASM_IMPORT(rt, release)(uintptr_t);
16 | void WASM_IMPORT(rt, invoke)(int32_t);
17 | void WASM_IMPORT(rt, out_of_memory)(void);
18 | 
19 | // Useful for debugging.
20 | void WASM_IMPORT(env, wasm_log)(void*);
21 | void WASM_IMPORT(env, wasm_logi)(int);
22 | 
23 | void *malloc(size_t size);
24 | void free(void *p);
25 |                           
26 | struct obj {
27 |   uintptr_t callback_handle;
28 | };
29 | 
30 | struct obj* WASM_EXPORT(make_obj)() {
31 |   struct obj* obj = malloc(sizeof(struct obj));
32 |   if (!obj) {
33 |     out_of_memory();
34 |     __builtin_trap();
35 |   }
36 |   obj->callback_handle = -1;
37 |   return obj;
38 | }
39 | 
40 | void WASM_EXPORT(free_obj)(struct obj* obj)
41 | {
42 |   release(obj->callback_handle);
43 |   free(obj);
44 | }
45 | 
46 | void WASM_EXPORT(attach_callback)(struct obj* obj, uintptr_t callback_handle)
47 | {
48 |   release(obj->callback_handle);
49 |   obj->callback_handle = callback_handle;
50 | }
51 | 
52 | void WASM_EXPORT(invoke_callback)(struct obj* obj)
53 | {
54 |   invoke(obj->callback_handle);
55 | }
56 | 


--------------------------------------------------------------------------------
/milestones/m2/test.wat:
--------------------------------------------------------------------------------
 1 | (memory $mem (import "env" "__linear_memory") 0)
 2 | (func $malloc (import "env" "malloc") (param i32) (result i32))
 3 | (func $free (import "env" "free") (param i32))
 4 | 
 5 | (func $release (import "rt" "release") (param i32))
 6 | (func $invoke (import "rt" "invoke") (param i32))
 7 | (func $out_of_memory (import "rt" "out_of_memory"))
 8 | 
 9 | (func $make_obj (export "make_obj") (result i32)
10 |   (local $obj i32)
11 | 
12 |   ;; Allocate a new object in linear memory.  If that fails, signal the
13 |   ;; runtime, and then abort.
14 |   (if (i32.eqz (local.tee $obj (call $malloc (i32.const 4))))
15 |       (then (call $out_of_memory) (unreachable)))
16 | 
17 |   ;; Initialize callback handle -1 (invalid) for fresh object.
18 |   (i32.store offset=0 (local.get $obj) (i32.const -1))
19 | 
20 |   (local.get $obj))
21 | 
22 | (func $free_obj (export "free_obj") (param $obj i32)
23 |   ;; Release the callback handle.
24 |   (call $release (i32.load offset=0 (local.get $obj)))
25 |   (call $free (local.get $obj)))
26 | 
27 | (func $attach_callback (export "attach_callback")
28 |       (param $obj i32) (param $callback_handle i32)
29 |   ;; Release old handle.
30 |   (call $release (i32.load offset=0 (local.get $obj)))
31 | 
32 |   ;; Store handle to object.
33 |   (i32.store offset=0
34 |              (local.get $obj)
35 |              (local.get $callback_handle)))
36 | 
37 | (func $invoke_callback (export "invoke_callback")
38 |       (param $obj i32)
39 |   (call $invoke (i32.load offset=0 (local.get $obj))))
40 | 


--------------------------------------------------------------------------------
/milestones/m4/test.ll:
--------------------------------------------------------------------------------
 1 | target triple = "wasm32-unknown-unknown"
 2 | 
 3 | ; Reference types
 4 | %externref = type ptr addrspace(10)
 5 | 
 6 | %funcptr = type void () addrspace(20)*
 7 | %funcref = type i8 addrspace(20)*
 8 | 
 9 | ; intrinsic decl
10 | declare %externref @llvm.wasm.ref.null.extern() nounwind readonly
11 | 
12 | ; External functions
13 | declare %externref @gc_alloc(i32, i32) local_unnamed_addr #0
14 | declare %externref @gc_ref_obj(%externref, i32) local_unnamed_addr #1
15 | declare void @gc_set_obj(%externref, i32, %externref) local_unnamed_addr #2
16 | declare void @invoke(%externref) local_unnamed_addr #3
17 | 
18 | define %externref @make_obj() #4 {
19 |   %obj = call %externref @gc_alloc(i32 1, i32 0)
20 |   %null = call %externref @llvm.wasm.ref.null.extern()
21 |   call void @gc_set_obj(%externref %obj, i32 0, %externref %null)
22 |   ret %externref %obj
23 | }
24 | 
25 | define void @attach_callback(%externref %obj, %externref %callback) #5 {
26 |   call void @gc_set_obj(%externref %obj, i32 0, %externref %callback)
27 |   ret void
28 | }
29 | 
30 | define void @invoke_callback(%externref %obj) #6 {
31 |   %ref = call %externref @gc_ref_obj(%externref %obj, i32 0)
32 |   call void @invoke(%externref %ref)
33 |   ret void
34 | }
35 | 
36 | attributes #0 = { "wasm-import-module"="rt" "wasm-import-name"="gc_alloc" }
37 | attributes #1 = { "wasm-import-module"="rt" "wasm-import-name"="gc_ref_obj" }
38 | attributes #2 = { "wasm-import-module"="rt" "wasm-import-name"="gc_set_obj" }
39 | attributes #3 = { "wasm-import-module"="rt" "wasm-import-name"="invoke" }
40 | 
41 | attributes #4 = { "wasm-export-name"="make_obj" }
42 | attributes #5 = { "wasm-export-name"="attach_callback" }
43 | attributes #6 = { "wasm-export-name"="invoke_callback" }
44 | 


--------------------------------------------------------------------------------
/milestones/m2/test.S:
--------------------------------------------------------------------------------
 1 | 	.functype	out_of_memory () -> ()
 2 | 	.import_module	out_of_memory, rt
 3 | 	.import_name	out_of_memory, out_of_memory
 4 | 	.functype	release (i32) -> ()
 5 | 	.import_module	release, rt
 6 | 	.import_name	release, release
 7 | 	.functype	invoke (i32) -> ()
 8 | 	.import_module	invoke, rt
 9 | 	.import_name	invoke, invoke
10 | 
11 | 	.functype	malloc (i32) -> (i32)
12 | 	.functype	free (i32) -> ()
13 | 
14 | 	.text
15 | 
16 | make_obj:
17 | 	.hidden	make_obj
18 | 	.globl	make_obj
19 | 	.export_name	make_obj, make_obj
20 | 	.no_dead_strip	make_obj
21 | 	.type	make_obj,@function
22 | 	.functype	make_obj () -> (i32)
23 | 	.local  	i32
24 | 	block
25 | 	i32.const	4
26 | 	call	malloc
27 | 	local.tee	0
28 | 	br_if   	0
29 | 	call	out_of_memory
30 | 	unreachable
31 | 	unreachable
32 | 	end_block
33 | 	local.get	0
34 | 	i32.const	-1
35 | 	i32.store	0
36 | 	local.get	0
37 | 	end_function
38 | 
39 | 
40 | free_obj:
41 | 	.hidden	free_obj
42 | 	.globl	free_obj
43 | 	.export_name	free_obj, free_obj
44 | 	.no_dead_strip	free_obj
45 | 	.type	free_obj,@function
46 | 	.functype	free_obj (i32) -> ()
47 | 	local.get	0
48 | 	i32.load	0
49 | 	call	release
50 | 	local.get	0
51 | 	call	free
52 | 	end_function
53 | 
54 | 
55 | attach_callback:
56 | 	.hidden	attach_callback
57 | 	.globl	attach_callback
58 | 	.export_name	attach_callback, attach_callback
59 |         .no_dead_strip	attach_callback
60 | 	.type	attach_callback,@function
61 | 	.functype	attach_callback (i32, i32) -> ()
62 | 	local.get	0
63 | 	i32.load	0
64 | 	call	release
65 | 	local.get	0
66 | 	local.get	1
67 | 	i32.store	0
68 | 	end_function
69 | 
70 | invoke_callback:
71 | 	.hidden	invoke_callback
72 | 	.globl	invoke_callback
73 | 	.export_name	invoke_callback, invoke_callback
74 | 	.no_dead_strip	invoke_callback
75 | 	.type	invoke_callback,@function
76 | 	.functype	invoke_callback (i32) -> ()
77 | 	local.get	0
78 | 	i32.load	0
79 | 	call	invoke
80 | 	end_function
81 | 


--------------------------------------------------------------------------------
/milestones/m4/test.S:
--------------------------------------------------------------------------------
 1 |         .functype       gc_alloc (i32, i32) -> (externref)
 2 |         .import_module  gc_alloc, rt
 3 |         .import_name    gc_alloc, gc_alloc
 4 | 
 5 |         .functype       gc_ref_obj (externref, i32) -> (externref)
 6 |         .import_module  gc_ref_obj, rt
 7 |         .import_name    gc_ref_obj, gc_ref_obj
 8 | 
 9 |         .functype       gc_set_obj (externref, i32, externref) -> ()
10 |         .import_module  gc_set_obj, rt
11 |         .import_name    gc_set_obj, gc_set_obj
12 | 
13 |         .functype       invoke (externref) -> ()
14 |         .import_module  invoke, rt
15 |         .import_name    invoke, invoke
16 | 
17 | make_obj:
18 |         .hidden make_obj
19 |         .globl  make_obj
20 |         .export_name    make_obj, make_obj
21 |         .no_dead_strip  make_obj
22 |         .type   make_obj,@function
23 |         .functype       make_obj () -> (externref)
24 |         .local          externref     # obj
25 |         # Allocate a new object in managed memory.
26 |         i32.const       1 # nobjs
27 |         i32.const       0 # nbytes
28 |         call    gc_alloc
29 | 	local.set 0
30 | 	
31 | 	# Initialize callback field.
32 | 	local.get 0
33 |         i32.const 0
34 | 	ref.null_extern
35 | 	call gc_set_obj
36 | 
37 |         local.get       0
38 |         end_function
39 | 
40 | attach_callback:
41 |         .hidden attach_callback
42 |         .globl  attach_callback
43 |         .export_name    attach_callback, attach_callback
44 |         .no_dead_strip  attach_callback
45 |         .type   attach_callback,@function
46 |         .functype       attach_callback (externref, externref) -> ()
47 |         # Set callback field.
48 | 	local.get 0
49 |         i32.const 0
50 | 	local.get 1
51 | 	call gc_set_obj
52 |         end_function
53 | 
54 | invoke_callback:
55 |         .hidden invoke_callback
56 |         .globl  invoke_callback
57 |         .export_name    invoke_callback, invoke_callback
58 |         .no_dead_strip  invoke_callback
59 |         .type   invoke_callback,@function
60 |         .functype       invoke_callback (externref) -> ()
61 |         local.get       0
62 |         i32.const 0
63 | 	call gc_ref_obj
64 |         call invoke
65 |         end_function
66 | 


--------------------------------------------------------------------------------
/milestones/m3/test.js:
--------------------------------------------------------------------------------
 1 | load('../lib.js');
 2 | 
 3 | class LinearMemory {
 4 |     constructor({initial = 256, maximum = 256}) {
 5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
 6 |     }
 7 |     read_string(offset) {
 8 |         let view = new Uint8Array(this.memory.buffer);
 9 |         let bytes = []
10 |         for (let byte = view[offset]; byte; byte = view[++offset])
11 |             bytes.push(byte);
12 |         return String.fromCharCode(...bytes);
13 |     }
14 |     log(str)      { console.log(`wasm log: ${str}`) }
15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
16 |     env() {
17 |         return {
18 |             memory: this.memory,
19 |             wasm_log: (off) => this.log(this.read_string(off)),
20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
21 |         }
22 |     }
23 | }
24 | 
25 | let finalizers = new FinalizationRegistry(f => { f(); });
26 | 
27 | function invoke(callback) {
28 |     return callback()
29 | }
30 | function out_of_memory() {
31 |     print('error: out of linear memory');
32 |     quit(1);
33 | }
34 | 
35 | let nalloc = 0;
36 | let nfinalized = 0;
37 | let nmax = 0;
38 | class WasmObject {
39 |     constructor(wasm) {        
40 |         this.wasm = wasm
41 |         let obj = wasm.exports.make_obj();
42 |         this.obj = obj;
43 |         nalloc++;
44 |         nmax = Math.max(nalloc - nfinalized, nmax);
45 |         let free_obj = this.wasm.exports.free_obj;
46 |         finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this);
47 |     }
48 |     attachCallback(f) {
49 |         this.wasm.exports.attach_callback(this.obj, f);
50 |     }
51 |     invokeCallback() {
52 |         this.wasm.exports.invoke_callback(this.obj);
53 |     }
54 | }
55 | 
56 | let bytes = readBinaryFile("test.wasm");
57 | let mod = new WebAssembly.Module(bytes);
58 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
59 | let rt = { invoke, out_of_memory };
60 | let imports = { env: memory.env(), rt }
61 | let instance = new WebAssembly.Instance(mod, imports);
62 | 
63 | function test(n) {
64 |     for (let i = 0; i < n; i++) {
65 |         let obj = new WasmObject(instance);
66 |         obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`));
67 |         if (i == 0) obj.invokeCallback();
68 |     }
69 |     print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`);
70 | }
71 | 
72 | async function main() {
73 |     await runTestLoop(1e2, test, 1e3);
74 |     print(`Success; max ${nmax} objects live.`);
75 | }
76 | main()
77 | 


--------------------------------------------------------------------------------
/milestones/m5/test.js:
--------------------------------------------------------------------------------
 1 | load('../lib.js');
 2 | 
 3 | class LinearMemory {
 4 |     constructor({initial = 256, maximum = 256}) {
 5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
 6 |     }
 7 |     read_string(offset) {
 8 |         let view = new Uint8Array(this.memory.buffer);
 9 |         let bytes = []
10 |         for (let byte = view[offset]; byte; byte = view[++offset])
11 |             bytes.push(byte);
12 |         return String.fromCharCode(...bytes);
13 |     }
14 |     log(str)      { console.log(`wasm log: ${str}`) }
15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
16 |     env() {
17 |         return {
18 |             memory: this.memory,
19 |             wasm_log: (off) => this.log(this.read_string(off)),
20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
21 |         }
22 |     }
23 | }
24 | 
25 | let finalizers = new FinalizationRegistry(f => { f(); });
26 | 
27 | function invoke(callback) {
28 |     return callback()
29 | }
30 | function out_of_memory() {
31 |     print('error: out of linear memory');
32 |     quit(1);
33 | }
34 | 
35 | let nalloc = 0;
36 | let nfinalized = 0;
37 | let nmax = 0;
38 | class WasmObject {
39 |     constructor(wasm) {        
40 |         this.wasm = wasm
41 |         let obj = wasm.exports.make_obj();
42 |         this.obj = obj;
43 |         nalloc++;
44 |         nmax = Math.max(nalloc - nfinalized, nmax);
45 |         let free_obj = this.wasm.exports.free_obj;
46 |         finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this);
47 |     }
48 |     attachCallback(f) {
49 |         this.wasm.exports.attach_callback(this.obj, f);
50 |     }
51 |     invokeCallback() {
52 |         this.wasm.exports.invoke_callback(this.obj);
53 |     }
54 | }
55 | 
56 | let bytes = readBinaryFile("test.wasm");
57 | let mod = new WebAssembly.Module(bytes);
58 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
59 | let rt = { invoke, out_of_memory };
60 | let imports = { env: memory.env(), rt }
61 | let instance = new WebAssembly.Instance(mod, imports);
62 | 
63 | function test(n) {
64 |     for (let i = 0; i < n; i++) {
65 |         let obj = new WasmObject(instance);
66 |         obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`));
67 |         if (i == 0) obj.invokeCallback();
68 |     }
69 |     print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`);
70 | }
71 | 
72 | async function main() {
73 |     await runTestLoop(1e2, test, 1e3);
74 |     print(`Success; max ${nmax} objects live.`);
75 | }
76 | main()
77 | 


--------------------------------------------------------------------------------
/milestones/m6/test.js:
--------------------------------------------------------------------------------
 1 | load('../lib.js');
 2 | 
 3 | class LinearMemory {
 4 |     constructor({initial = 256, maximum = 256}) {
 5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
 6 |     }
 7 |     read_string(offset) {
 8 |         let view = new Uint8Array(this.memory.buffer);
 9 |         let bytes = []
10 |         for (let byte = view[offset]; byte; byte = view[++offset])
11 |             bytes.push(byte);
12 |         return String.fromCharCode(...bytes);
13 |     }
14 |     log(str)      { console.log(`wasm log: ${str}`) }
15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
16 |     env() {
17 |         return {
18 |             memory: this.memory,
19 |             wasm_log: (off) => this.log(this.read_string(off)),
20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
21 |         }
22 |     }
23 | }
24 | 
25 | let nalloc = 0;
26 | class WasmObject {
27 |     constructor(wasm) {        
28 |         this.wasm = wasm
29 |         let obj = wasm.exports.make_obj();
30 |         nalloc++;
31 |         this.obj = obj;
32 |     }
33 |     attachCallback(f) {
34 |         this.wasm.exports.attach_callback(this.obj, f);
35 |     }
36 |     invokeCallback() {
37 |         this.wasm.exports.invoke_callback(this.obj);
38 |     }
39 | }
40 | 
41 | function gc_alloc(nobjs, nbytes) {
42 |     return {
43 |         objs: nobjs ? new Array(nobjs) : null,
44 |         bytes: nbytes ? new ArrayBuffer(nbytes) : null
45 |     };
46 | }
47 | function gc_ref_obj(obj, i) { return obj.objs[i]; }
48 | function gc_set_obj(obj, i, val) { obj.objs[i] = val; }
49 | // Room here for gc_ref_u8, gc_set_f32, etc for obj.bytes.
50 | 
51 | function invoke(cb) { return cb(); }
52 | 
53 | let bytes = readBinaryFile("test.wasm");
54 | let mod = new WebAssembly.Module(bytes);
55 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
56 | let rt = { gc_alloc, gc_ref_obj, gc_set_obj, invoke };
57 | let imports = { env: memory.env(), rt }
58 | let instance = new WebAssembly.Instance(mod, imports);
59 | 
60 | function test(n) {
61 |     for (let i = 0; i < n; i++) {
62 |         let obj = new WasmObject(instance);
63 |         obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`));
64 |         if (i == 0) obj.invokeCallback();
65 |     }
66 |     print(`${nalloc} total allocated.`);
67 | }
68 | 
69 | function runTestLoop(n, f, ...args) {
70 |     for (let i = 0; i < n; i++) {
71 |         f(...args);
72 |     }
73 | }
74 | 
75 | function main() {
76 |     runTestLoop(1e2, test, 1e3);
77 |     print(`Success.`);
78 | }
79 | main()
80 | 


--------------------------------------------------------------------------------
/milestones/m4/test.js:
--------------------------------------------------------------------------------
 1 | load('../lib.js');
 2 | 
 3 | class LinearMemory {
 4 |     constructor({initial = 256, maximum = 256}) {
 5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
 6 |     }
 7 |     read_string(offset) {
 8 |         let view = new Uint8Array(this.memory.buffer);
 9 |         let bytes = []
10 |         for (let byte = view[offset]; byte; byte = view[++offset])
11 |             bytes.push(byte);
12 |         return String.fromCharCode(...bytes);
13 |     }
14 |     log(str)      { console.log(`wasm log: ${str}`) }
15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
16 |     env() {
17 |         return {
18 |             memory: this.memory,
19 |             wasm_log: (off) => this.log(this.read_string(off)),
20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
21 |         }
22 |     }
23 | }
24 | 
25 | let nalloc = 0;
26 | class WasmObject {
27 |     constructor(wasm) {        
28 |         this.wasm = wasm
29 |         let obj = wasm.exports.make_obj();
30 |         nalloc++;
31 |         this.obj = obj;
32 |     }
33 |     attachCallback(f) {
34 |         this.wasm.exports.attach_callback(this.obj, f);
35 |     }
36 |     invokeCallback() {
37 |         this.wasm.exports.invoke_callback(this.obj);
38 |     }
39 | }
40 | 
41 | function gc_alloc(nobjs, nbytes) {
42 |     return {
43 |         objs: nobjs ? new Array(nobjs) : null,
44 |         bytes: nbytes ? new ArrayBuffer(nbytes) : null
45 |     };
46 | }
47 | function gc_ref_obj(obj, i) { return obj.objs[i]; }
48 | function gc_set_obj(obj, i, val) { obj.objs[i] = val; }
49 | // Room here for gc_ref_u8, gc_set_f32, etc for obj.bytes.
50 | 
51 | function invoke(cb) { return cb(); }
52 | 
53 | let bytes = readBinaryFile("test.wasm");
54 | let mod = new WebAssembly.Module(bytes);
55 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
56 | let rt = { gc_alloc, gc_ref_obj, gc_set_obj, invoke };
57 | let imports = { env: memory.env(), rt }
58 | let instance = new WebAssembly.Instance(mod, imports);
59 | 
60 | function test(n) {
61 |     for (let i = 0; i < n; i++) {
62 |         let obj = new WasmObject(instance);
63 |         obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`));
64 |         if (i == 0) obj.invokeCallback();
65 |     }
66 |     print(`${nalloc} total allocated.`);
67 | }
68 | 
69 | function runTestSyncLoop(n, f, ...args) {
70 |     for (let i = 0; i < n; i++) {
71 |         f(...args);
72 |     }
73 | }
74 | 
75 | function main() {
76 |     runTestSyncLoop(1e2, test, 1e3);
77 |     print(`Success.`);
78 | }
79 | main()
80 | 


--------------------------------------------------------------------------------
/milestones/lib.js:
--------------------------------------------------------------------------------
 1 | let readBinaryFile = (()=> {
 2 |     if (typeof read !== 'undefined')
 3 |         return f => read(f, 'binary');
 4 |     if (typeof readFile !== 'undefined')
 5 |         return f => readFile(f);
 6 |     let fs = require('fs');
 7 |     return f => fs.readFileSync(f);
 8 | })();
 9 | 
10 | let maybeGC = (()=> {
11 |     // Engines will detect finalizable objects when they run GC.  They
12 |     // run GC according to a number of heuristics, principally among
13 |     // them how much GC-managed allocation has happened since the last
14 |     // GC.  These heuristics are not informed how much off-heap
15 |     // allocation has occured; see
16 |     // https://github.com/tc39/proposal-weakrefs/issues/87.  In
17 |     // practice, all engines will fail our tests, because they don't
18 |     // detect finalizable objects soon enough.  Therefore to make these
19 |     // tests reliable, we insert explicit GC calls.  This bodes poorly
20 |     // for the role of finalizers in robust systems.  V8 does not expose
21 |     // `gc` by default, though it will if you pass --expose-gc.
22 |     if (typeof gc !== 'undefined')
23 |         return gc;
24 |     print('warning: no gc() exposed; no promptness guarantee for finalization');
25 |     return ()=>{};
26 | })();
27 | 
28 | let callLater = (()=> {
29 |     // Note that d8's `setTimeout` ignores the timeout argument; it just
30 |     // queues `f` without any delay.
31 |     if (typeof setTimeout !== 'undefined')
32 |         return f=>setTimeout(f, 0);
33 |     if (typeof enqueueJob !== 'undefined')
34 |         return f=>enqueueJob(f);
35 |     return f=>f();
36 | })();
37 | 
38 | let maybeDrainTasks = (()=> {
39 |     // Weirdly, SpiderMonkey's (nonstandard) enqueueJob has higher
40 |     // priority than finalization tasks.  We need to explicitly drain
41 |     // tasks there to get finalizers to run.
42 |     if (typeof drainJobQueue !== 'undefined')
43 |         return drainJobQueue;
44 |     return ()=>{};
45 | })();
46 | 
47 | // Finalizers run in their own task.  This microsleep is a simple way to
48 | // force the engine to go to the next turn.
49 | async function microsleep() {
50 |     await new Promise((resolve, reject) => callLater(resolve));
51 | }
52 | 
53 | async function allowFinalizersToRun() {
54 |     maybeGC();
55 |     maybeDrainTasks();
56 |     await microsleep();
57 | }
58 | 
59 | // We are testing garbage collection and finalizers, for which async
60 | // functions are a natural fit.  However we avoid putting the test
61 | // itself in an async function, because of a gnarly SpiderMonkey
62 | // limitation (https://bugzilla.mozilla.org/show_bug.cgi?id=1542660).
63 | // Basically, in SpiderMonkey, any closure in an async function captures
64 | // *all* local variables, potentially introducing cycles and preventing
65 | // finalization.
66 | async function runTestLoop(n, f, ...args) {
67 |     for (let i = 0; i < n; i++) {
68 |         f(...args);
69 |         await allowFinalizersToRun();
70 |     }
71 | }
72 | 
73 | function checkSame(expected, actual, what) {
74 |     print(`checking expected ${what}: ${expected}`);
75 |     if (expected !== actual)
76 |         throw new Error(`unexpected ${what}: ${actual}`);
77 | }
78 | 


--------------------------------------------------------------------------------
/milestones/m5/test.c:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | 
  3 | typedef __SIZE_TYPE__ size_t;
  4 | 
  5 | #define WASM_EXPORT(name) \
  6 |   __attribute__((export_name(#name))) \
  7 |   name
  8 | #define WASM_IMPORT(mod, name) \
  9 |   __attribute__((import_module(#mod))) \
 10 |   __attribute__((import_name(#name))) \
 11 |   name
 12 | 
 13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 14 | 
 15 | typedef __externref_t externref;
 16 | 
 17 | void WASM_IMPORT(rt, invoke)(externref);
 18 | void WASM_IMPORT(rt, out_of_memory)(void);
 19 | 
 20 | // Useful for debugging.
 21 | void WASM_IMPORT(env, wasm_log)(void*);
 22 | void WASM_IMPORT(env, wasm_logi)(int);
 23 | 
 24 | void *malloc(size_t size);
 25 | void free(void *p);
 26 |                           
 27 | typedef uint32_t Handle;
 28 | 
 29 | struct freelist {
 30 |   Handle handle;
 31 |   struct freelist *next;
 32 | };
 33 | 
 34 | static struct freelist *freelist = 0;
 35 | 
 36 | static Handle freelist_pop (void) {
 37 |   ASSERT(freelist != 0x0);
 38 |   struct freelist *head = freelist;
 39 |   Handle ret = head->handle;
 40 |   freelist = head->next;
 41 |   free(head);
 42 |   return ret;
 43 | }
 44 | 
 45 | static void freelist_push (Handle h) {
 46 |   struct freelist *head = malloc(sizeof(struct freelist));
 47 |   if (!head) {
 48 |     out_of_memory();
 49 |     __builtin_trap();
 50 |   }
 51 |   head->handle = h;
 52 |   head->next = freelist;
 53 |   freelist = head;
 54 | }
 55 | 
 56 | static externref objects[0];
 57 | 
 58 | static void expand_table (void) {
 59 |   size_t old_size = __builtin_wasm_table_size(objects);
 60 |   size_t grow = (old_size >> 1) + 1;
 61 |   if (__builtin_wasm_table_grow(objects,
 62 |                                 __builtin_wasm_ref_null_extern(),
 63 |                                 grow) == -1) {
 64 |     out_of_memory();
 65 |     __builtin_trap();
 66 |   }
 67 |   size_t end = __builtin_wasm_table_size(objects);
 68 |   while (end != old_size) {
 69 |     freelist_push (--end);
 70 |   }
 71 | }
 72 | 
 73 | static Handle intern(externref obj) {
 74 |   if (!freelist) expand_table();
 75 |   Handle ret = freelist_pop();
 76 |   __builtin_wasm_table_set(objects, ret, obj);
 77 |   return ret;
 78 | }
 79 | 
 80 | static void release(Handle h) {
 81 |   if (h == -1) return;
 82 |   __builtin_wasm_table_set(objects, h, __builtin_wasm_ref_null_extern());
 83 |   freelist_push(h);
 84 | }
 85 | 
 86 | static externref handle_value(Handle h) {
 87 |   return h == -1
 88 |     ? __builtin_wasm_ref_null_extern()
 89 |     : __builtin_wasm_table_get(objects, h);
 90 | }
 91 | 
 92 | struct obj {
 93 |   uintptr_t callback_handle;
 94 | };
 95 | 
 96 | struct obj* WASM_EXPORT(make_obj)() {
 97 |   struct obj* obj = malloc(sizeof(struct obj));
 98 |   if (!obj) {
 99 |     out_of_memory();
100 |     __builtin_trap();
101 |   }
102 |   obj->callback_handle = -1;
103 |   return obj;
104 | }
105 | 
106 | void WASM_EXPORT(free_obj)(struct obj* obj) {
107 |   release(obj->callback_handle);
108 |   free(obj);
109 | }
110 | 
111 | void WASM_EXPORT(attach_callback)(struct obj* obj, externref callback) {
112 |   release(obj->callback_handle);
113 |   obj->callback_handle = intern(callback);
114 | }
115 | 
116 | void WASM_EXPORT(invoke_callback)(struct obj* obj) {
117 |   invoke(handle_value(obj->callback_handle));
118 | }
119 | 


--------------------------------------------------------------------------------
/milestones/m2/README.md:
--------------------------------------------------------------------------------
 1 | # Milestone 2: Re-express test.c as raw WebAssembly test.wat
 2 | 
 3 | [ previous: [m1](../m1/); next: [m3](../m3/) ]
 4 | 
 5 | ## Overview
 6 | 
 7 | In [Milestone 1](../m1/), we showed that using side tables and
 8 | finalizers for WebAssembly<->JavaScript references caused memory leaks
 9 | if the object graph had a cycle traversing the language boundary.  We
10 | also showed that such cycles were easy to make, and hard to debug.
11 | 
12 | We would like to propose some language extensions to C and C++ to fix
13 | this problem.  To keep the discussion concrete, we will first show the
14 | kind of WebAssembly that we would like LLVM to produce, and test it to
15 | show that it solves the cycle problems.  This test will also allow us to
16 | examine different characteristics of the proposed solution.
17 | 
18 | Therefore this milestone is the same as [milestone 0](../m0/), but with
19 | the [`test.c`](../m0/test.c) translated to WebAssembly via `clang -Ox
20 | --target=wasm32 -nostdlib -S -o test.S test.c`.  The resulting
21 | [`test.S`](./test.S) was then cleaned up manually and commented.
22 | 
23 | There is also a commented [test.wat](./test.wat) file, for reference.
24 | The [`test.wat`](./test.wat) file and the [`test.S`](./test.S) file are
25 | equivalent.  The [`Makefile`](./Makefile) has a rule for alternately
26 | producing `test.o` from `test.wat`, via wabt's `wat2wasm`.
27 | 
28 | ## Details
29 | 
30 | The difference from [m0](../m0) is that we translated the former
31 | [`test.c`](../m0/test.c) into [`test.wat`](./test.wat).
32 | 
33 | Running `make` will build `test.wasm` and run the test as in m0.  An
34 | example run:
35 | 
36 | ```
37 | $ make v8.test
38 | ~/src/llvm-project/build/bin/clang --target=wasm32 -nostdlib -c -o test.o test.S
39 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c
40 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --allow-undefined --import-memory -o test.wasm test.o walloc.o
41 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc test.js
42 | Callback after 1 allocated.
43 | 1000 total allocated, 1000 still live.
44 | Callback after 1001 allocated.
45 | 2000 total allocated, 1000 still live.
46 | ...
47 | 99000 total allocated, 1000 still live.
48 | Callback after 99001 allocated.
49 | 100000 total allocated, 1000 still live.
50 | checking expected live object count: 0
51 | Success; max 1000 objects live.
52 | ```
53 | 
54 | ## Results
55 | 
56 | ### Toolchain support for linking C and wat
57 | 
58 | The object file format of WebAssembly files, as produced by clang and
59 | consumed by LLD, is just the standard binary WebAssembly format, with a
60 | few conventions, plus a couple of "custom sections".  See full details
61 | over at [the tool-conventions Linking
62 | document](https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md).
63 | 
64 | By default, the `wat2wasm` tool from `wabt` doesn't produce these
65 | sections.  It does have a `--relocatable` option, however, that was
66 | intended for this purpose, but it had a few bugs that made it unusable.
67 | These were fixed in
68 | [#1535](https://github.com/WebAssembly/wabt/pull/1535),
69 | [#1537](https://github.com/WebAssembly/wabt/pull/1537), and
70 | [#1539](https://github.com/WebAssembly/wabt/pull/1539) from us, and
71 | [#1527](https://github.com/WebAssembly/wabt/pull/1527) from an external
72 | contributor.  All of these changes are now in the main upstream branch.
73 | 
74 | Note that although we initially used `wat2wasm --relocatable`, as later
75 | work focusses on LLVM, we now use the LLVM assembly syntax for the
76 | WebAssembly target.
77 | 


--------------------------------------------------------------------------------
/milestones/m0/test.js:
--------------------------------------------------------------------------------
  1 | load('../lib.js');
  2 | 
  3 | class LinearMemory {
  4 |     constructor({initial = 256, maximum = 256}) {
  5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
  6 |     }
  7 |     read_string(offset) {
  8 |         let view = new Uint8Array(this.memory.buffer);
  9 |         let bytes = []
 10 |         for (let byte = view[offset]; byte; byte = view[++offset])
 11 |             bytes.push(byte);
 12 |         return String.fromCharCode(...bytes);
 13 |     }
 14 |     log(str)      { console.log(`wasm log: ${str}`) }
 15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
 16 |     env() {
 17 |         return {
 18 |             memory: this.memory,
 19 |             wasm_log: (off) => this.log(this.read_string(off)),
 20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
 21 |         }
 22 |     }
 23 | }
 24 | 
 25 | let finalizers = new FinalizationRegistry(f => { f(); });
 26 | 
 27 | class ObjectTable {
 28 |     constructor() {
 29 |         this.objects = [];
 30 |         this.freelist = [];
 31 |     }
 32 |     expand() {
 33 |         let len = this.objects.length;
 34 |         let grow = (len >> 1) + 1;
 35 |         let end = len + grow;
 36 |         this.objects.length = end;
 37 |         while (end != len)
 38 |             this.freelist.push(--end);
 39 |     }
 40 |     intern(obj) {
 41 |         if (!this.freelist.length)
 42 |             this.expand();
 43 |         let handle = this.freelist.pop();
 44 |         this.objects[handle] = obj;
 45 |         return handle;
 46 |     }
 47 |     release(handle) {
 48 |         if (handle === -1) return;
 49 |         this.objects[handle] = null;
 50 |         this.freelist.push(handle);
 51 |     }
 52 |     count() {
 53 |         return this.objects.length - this.freelist.length;
 54 |     }
 55 |     ref(handle) {
 56 |         return this.objects[handle];
 57 |     }
 58 | }
 59 | 
 60 | let table = new ObjectTable;
 61 | function invoke(handle) {
 62 |     if (handle === -1) return;
 63 |     return table.ref(handle)();
 64 | }
 65 | function release(handle) {
 66 |     if (handle === -1) return;
 67 |     table.release(handle);
 68 | }
 69 | function out_of_memory() {
 70 |     print('error: out of linear memory');
 71 |     quit(1);
 72 | }
 73 | 
 74 | let nalloc = 0;
 75 | let nfinalized = 0;
 76 | let nmax = 0;
 77 | class WasmObject {
 78 |     constructor(wasm) {        
 79 |         this.wasm = wasm
 80 |         let obj = wasm.exports.make_obj();
 81 |         this.obj = obj;
 82 |         nalloc++;
 83 |         nmax = Math.max(nalloc - nfinalized, nmax);
 84 |         let free_obj = this.wasm.exports.free_obj;
 85 |         finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this);
 86 |     }
 87 |     attachCallback(f) {
 88 |         let handle = table.intern(f);
 89 |         this.wasm.exports.attach_callback(this.obj, handle);
 90 |     }
 91 |     invokeCallback() {
 92 |         this.wasm.exports.invoke_callback(this.obj);
 93 |     }
 94 | }
 95 | 
 96 | let bytes = readBinaryFile("test.wasm");
 97 | let mod = new WebAssembly.Module(bytes);
 98 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
 99 | let rt = { release, invoke, out_of_memory };
100 | let imports = { env: memory.env(), rt }
101 | let instance = new WebAssembly.Instance(mod, imports);
102 | 
103 | function test(n) {
104 |     for (let i = 0; i < n; i++) {
105 |         let obj = new WasmObject(instance);
106 |         obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`));
107 |         if (i == 0) obj.invokeCallback();
108 |     }
109 |     print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`);
110 | }
111 | 
112 | async function main() {
113 |     await runTestLoop(1e2, test, 1e3);
114 |     checkSame(nalloc - nfinalized, table.count(), "live object count");
115 |     print(`Success; max ${nmax} objects live.`);
116 | }
117 | main()
118 | 


--------------------------------------------------------------------------------
/milestones/m2/test.js:
--------------------------------------------------------------------------------
  1 | load('../lib.js');
  2 | 
  3 | class LinearMemory {
  4 |     constructor({initial = 256, maximum = 256}) {
  5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
  6 |     }
  7 |     read_string(offset) {
  8 |         let view = new Uint8Array(this.memory.buffer);
  9 |         let bytes = []
 10 |         for (let byte = view[offset]; byte; byte = view[++offset])
 11 |             bytes.push(byte);
 12 |         return String.fromCharCode(...bytes);
 13 |     }
 14 |     log(str)      { console.log(`wasm log: ${str}`) }
 15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
 16 |     env() {
 17 |         return {
 18 |             memory: this.memory,
 19 |             wasm_log: (off) => this.log(this.read_string(off)),
 20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
 21 |         }
 22 |     }
 23 | }
 24 | 
 25 | let finalizers = new FinalizationRegistry(f => { f(); });
 26 | 
 27 | class ObjectTable {
 28 |     constructor() {
 29 |         this.objects = [];
 30 |         this.freelist = [];
 31 |     }
 32 |     expand() {
 33 |         let len = this.objects.length;
 34 |         let grow = (len >> 1) + 1;
 35 |         let end = len + grow;
 36 |         this.objects.length = end;
 37 |         while (end != len)
 38 |             this.freelist.push(--end);
 39 |     }
 40 |     intern(obj) {
 41 |         if (!this.freelist.length)
 42 |             this.expand();
 43 |         let handle = this.freelist.pop();
 44 |         this.objects[handle] = obj;
 45 |         return handle;
 46 |     }
 47 |     release(handle) {
 48 |         if (handle === -1) return;
 49 |         this.objects[handle] = null;
 50 |         this.freelist.push(handle);
 51 |     }
 52 |     count() {
 53 |         return this.objects.length - this.freelist.length;
 54 |     }
 55 |     ref(handle) {
 56 |         return this.objects[handle];
 57 |     }
 58 | }
 59 | 
 60 | let table = new ObjectTable;
 61 | function invoke(handle) {
 62 |     if (handle === -1) return;
 63 |     return table.ref(handle)();
 64 | }
 65 | function release(handle) {
 66 |     if (handle === -1) return;
 67 |     table.release(handle);
 68 | }
 69 | function out_of_memory() {
 70 |     print('error: out of linear memory');
 71 |     quit(1);
 72 | }
 73 | 
 74 | let nalloc = 0;
 75 | let nfinalized = 0;
 76 | let nmax = 0;
 77 | class WasmObject {
 78 |     constructor(wasm) {        
 79 |         this.wasm = wasm
 80 |         let obj = wasm.exports.make_obj();
 81 |         this.obj = obj;
 82 |         nalloc++;
 83 |         nmax = Math.max(nalloc - nfinalized, nmax);
 84 |         let free_obj = this.wasm.exports.free_obj;
 85 |         finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this);
 86 |     }
 87 |     attachCallback(f) {
 88 |         let handle = table.intern(f);
 89 |         this.wasm.exports.attach_callback(this.obj, handle);
 90 |     }
 91 |     invokeCallback() {
 92 |         this.wasm.exports.invoke_callback(this.obj);
 93 |     }
 94 | }
 95 | 
 96 | let bytes = readBinaryFile("test.wasm");
 97 | let mod = new WebAssembly.Module(bytes);
 98 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
 99 | let rt = { release, invoke, out_of_memory };
100 | let imports = { env: memory.env(), rt }
101 | let instance = new WebAssembly.Instance(mod, imports);
102 | 
103 | function test(n) {
104 |     for (let i = 0; i < n; i++) {
105 |         let obj = new WasmObject(instance);
106 |         obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`));
107 |         if (i == 0) obj.invokeCallback();
108 |     }
109 |     print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`);
110 | }
111 | 
112 | async function main() {
113 |     await runTestLoop(1e2, test, 1e3);
114 |     checkSame(nalloc - nfinalized, table.count(), "live object count");
115 |     print(`Success; max ${nmax} objects live.`);
116 | }
117 | main()
118 | 


--------------------------------------------------------------------------------
/milestones/m1/test.js:
--------------------------------------------------------------------------------
  1 | load('../lib.js');
  2 | 
  3 | class LinearMemory {
  4 |     constructor({initial = 256, maximum = 256}) {
  5 |         this.memory = new WebAssembly.Memory({ initial, maximum });
  6 |     }
  7 |     read_string(offset) {
  8 |         let view = new Uint8Array(this.memory.buffer);
  9 |         let bytes = []
 10 |         for (let byte = view[offset]; byte; byte = view[++offset])
 11 |             bytes.push(byte);
 12 |         return String.fromCharCode(...bytes);
 13 |     }
 14 |     log(str)      { console.log(`wasm log: ${str}`) }
 15 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
 16 |     env() {
 17 |         return {
 18 |             memory: this.memory,
 19 |             wasm_log: (off) => this.log(this.read_string(off)),
 20 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
 21 |         }
 22 |     }
 23 | }
 24 | 
 25 | let finalizers = new FinalizationRegistry(f => { f(); });
 26 | 
 27 | class ObjectTable {
 28 |     constructor() {
 29 |         this.objects = [];
 30 |         this.freelist = [];
 31 |     }
 32 |     expand() {
 33 |         let len = this.objects.length;
 34 |         let grow = (len >> 1) + 1;
 35 |         let end = len + grow;
 36 |         this.objects.length = end;
 37 |         while (end != len)
 38 |             this.freelist.push(--end);
 39 |     }
 40 |     intern(obj) {
 41 |         if (!this.freelist.length)
 42 |             this.expand();
 43 |         let handle = this.freelist.pop();
 44 |         this.objects[handle] = obj;
 45 |         return handle;
 46 |     }
 47 |     release(handle) {
 48 |         if (handle === -1) return;
 49 |         this.objects[handle] = null;
 50 |         this.freelist.push(handle);
 51 |     }
 52 |     count() {
 53 |         return this.objects.length - this.freelist.length;
 54 |     }
 55 |     ref(handle) {
 56 |         return this.objects[handle];
 57 |     }
 58 | }
 59 | 
 60 | let table = new ObjectTable;
 61 | function invoke(handle) {
 62 |     if (handle === -1) return;
 63 |     return table.ref(handle)();
 64 | }
 65 | function release(handle) {
 66 |     if (handle === -1) return;
 67 |     table.release(handle);
 68 | }
 69 | function out_of_memory() {
 70 |     print('error: out of linear memory');
 71 |     quit(1);
 72 | }
 73 | 
 74 | let nalloc = 0;
 75 | let nfinalized = 0;
 76 | let nmax = 0;
 77 | class WasmObject {
 78 |     constructor(wasm) {        
 79 |         this.wasm = wasm
 80 |         let obj = wasm.exports.make_obj();
 81 |         this.obj = obj;
 82 |         nalloc++;
 83 |         nmax = Math.max(nalloc - nfinalized, nmax);
 84 |         let free_obj = this.wasm.exports.free_obj;
 85 |         finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this);
 86 |     }
 87 |     attachCallback(f) {
 88 |         let handle = table.intern(f);
 89 |         this.wasm.exports.attach_callback(this.obj, handle);
 90 |     }
 91 |     invokeCallback() {
 92 |         this.wasm.exports.invoke_callback(this.obj);
 93 |     }
 94 | }
 95 | 
 96 | let bytes = readBinaryFile("test.wasm");
 97 | let mod = new WebAssembly.Module(bytes);
 98 | let memory = new LinearMemory({ initial: 2, maximum: 10 });
 99 | let rt = { release, invoke, out_of_memory };
100 | let imports = { env: memory.env(), rt }
101 | let instance = new WebAssembly.Instance(mod, imports);
102 | 
103 | function test(n) {
104 |     for (let i = 0; i < n; i++) {
105 |         let obj = new WasmObject(instance);
106 |         obj.attachCallback(
107 |             () => print(`Callback from ${obj} after ${nalloc} allocated.`));
108 |         if (i == 0) obj.invokeCallback();
109 |     }
110 |     print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`);
111 | }
112 | 
113 | async function main() {
114 |     await runTestLoop(1e2, test, 1e3);
115 |     checkSame(nalloc - nfinalized, table.count(), "live object count");
116 |     print(`Success; max ${nmax} objects live.`);
117 | }
118 | main()
119 | 


--------------------------------------------------------------------------------
/milestones/m3/README.md:
--------------------------------------------------------------------------------
 1 | # Milestone 3: Move side table to WebAssembly
 2 | 
 3 | [ previous: [m2](../m2/); next: [m4](../m4/) ]
 4 | 
 5 | ## Overview
 6 | 
 7 | In the previous milestones, the side table associating integer handles
 8 | with GC-managed data from JavaScript was maintained in the JavaScript
 9 | run-time.  However with the [widely-implemented reference types
10 | proposal](https://github.com/WebAssembly/reference-types), it's now
11 | possible to manage that table on the WebAssembly side, reducing the size
12 | of the language-specific run-time.
13 | 
14 | Relative to the previous milestone, this milestone moves the
15 | freelist/object table implementation into the [`test.S`](./test.S)
16 | WebAssembly file.  The algorithm is the same, with the object table
17 | itself being a WebAssembly table holding `externref` values, and the
18 | freelist being a singly-linked-list of malloc'd nodes.
19 | 
20 | For the rest, you can search the `test.S` file for instances of
21 | `externref` -- for example, `attach_callback` now takes an externref as
22 | an argument directly, and handles "interning" it into the object table
23 | without involving JavaScript.
24 | 
25 | ## Details
26 | 
27 | The difference from [m2](../m2) is that we moved the side-table
28 | implementation into [`test.S`](./test.S) and removed it from
29 | [`test.js`](./test.js).
30 | 
31 | This is the first example in which WebAssembly handles externref values,
32 | so the [`Makefile`](./Makefile) arranges to enable the appropriate
33 | feature flags in compiler and in the various engines.
34 | 
35 | Running `make` will build `test.wasm` and then run the test as in m2.
36 | 
37 | ## Results
38 | 
39 | ### LLVM support for reference-types, on assembly level
40 | 
41 | As part of this work, Paulo and Andy added support for `externref`,
42 | instructions that operate on `externref` (`table.ref`, `table.set`,
43 | etc), and support for table relocations to LLVM.  
44 | 
45 | Landed patches: <a href="https://reviews.llvm.org/D88815">D88815</a>, <a
46 | href="https://reviews.llvm.org/D89797">D89797</a>, <a
47 | href="https://reviews.llvm.org/D90608">D90608</a>, <a
48 | href="https://reviews.llvm.org/D90948">D90948</a>, <a
49 | href="https://reviews.llvm.org/D91604">D91604</a>, <a
50 | href="https://reviews.llvm.org/D91635">D91635</a>, <a
51 | href="https://reviews.llvm.org/D91637">D91637</a>, <a
52 | href="https://reviews.llvm.org/D91849">D91849</a>, <a
53 | href="https://reviews.llvm.org/D91870">D91870</a>, <a
54 | href="https://reviews.llvm.org/D92215">D92215</a>, <a
55 | href="https://reviews.llvm.org/D92315">D92315</a>, <a
56 | href="https://reviews.llvm.org/D92320">D92320</a>, <a
57 | href="https://reviews.llvm.org/D92321">D92321</a>, <a
58 | href="https://reviews.llvm.org/D92323">D92323</a>, <a
59 | href="https://reviews.llvm.org/D92840">D92840</a>, <a
60 | href="https://reviews.llvm.org/D94075">D94075</a>, <a
61 | href="https://reviews.llvm.org/D94677">D94677</a>, <a
62 | href="https://reviews.llvm.org/D96001">D96001</a>, <a
63 | href="https://reviews.llvm.org/D96770">D96770</a>, <a
64 | href="https://reviews.llvm.org/D96872">D96872</a>, <a
65 | href="https://reviews.llvm.org/D97761">D97761</a>, <a
66 | href="https://reviews.llvm.org/D97843">D97843</a>, <a
67 | href="https://reviews.llvm.org/D97923">D97923</a>.
68 | 
69 | Complete support only exists on the assembly ("MC") layer and in the
70 | linker (`wasm-ld`); support on the IR level is ongoing, and will be
71 | followed by frontends (`clang`).
72 | 
73 | ### Handle and side-table mechanism implemented in terms of externref
74 | 
75 | In a future where C and C++ programs can reference externref values,
76 | there are going to be many times where you want a C data structure to
77 | reference a GC-managed value, but you can't put a GC-managed value
78 | directly into linear memory.  Side tables and handles are the mechanism
79 | by which this will work: the code that has an externref and needs a
80 | handle will intern the object into a table, and store the integer handle
81 | into memory instead.
82 | 
83 | This milestone shows the needed WebAssembly to do that.  We just have to
84 | figure out how to get the compiler to emit it from C :)
85 | 
86 | ### Side tables still lead to uncollectable cycles
87 | 
88 | Moving the side table to WebAssembly is convenient in some ways but
89 | doesn't magically solve the cycle problem.  Running the tests still
90 | fails, in the same way as in m2.
91 | 


--------------------------------------------------------------------------------
/milestones/m1/README.md:
--------------------------------------------------------------------------------
  1 | # Milestone 1: Cycles between WebAssembly and JavaScript are uncollectable
  2 | 
  3 | [ previous: [m0](../m0/); next: [m2](../m2/) ]
  4 | 
  5 | ## Overview
  6 | 
  7 | This is the same as [m0](../m0/), but with the smallest of changes to
  8 | [`test.js`](./test.js):
  9 | 
 10 | ```diff
 11 | --- ../m0/test.js	2020-09-14 13:27:24.924663561 +0200
 12 | +++ test.js	2020-09-14 13:29:06.658300401 +0200
 13 | @@ -103,7 +103,8 @@
 14 |  function test(n) {
 15 |      for (let i = 0; i < n; i++) {
 16 |          let obj = new WasmObject(instance);
 17 | -        obj.installCallback(() => print(`Callback after ${nalloc} allocated.`));
 18 | +        obj.installCallback(
 19 | +            () => print(`Callback from ${obj} after ${nalloc} allocated.`));
 20 |          if (i == 0) obj.invokeCallback();
 21 |      }
 22 |      print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`);
 23 | ```
 24 | 
 25 | That is, our callback references the object.  Seems like an innocent
 26 | change, but it causes the test to fail because it causes a cycle.
 27 | 
 28 | Note that JavaScript garbage collectors are perfectly capable of
 29 | collecting cycles.  However, while this case exhibits what is logically
 30 | a cycle between the callback, the JS wrapper, and the C object, the
 31 | garbage collector sees only the JavaScript side of things: the side
 32 | table keeps the callback alive, which, as the callback captures the
 33 | wrapper, keeps the wrapper alive.
 34 | 
 35 | Because the allocated objects are never collected, this test will
 36 | eventually crash because it can't allocate any more memory.  In
 37 | practice, our example crashes because it runs out of linear memory.
 38 | 
 39 | This is a simplified representation of the use case that we are trying
 40 | to fix in this effort, and milestone 1 indicates the problem.
 41 | 
 42 | ## Details
 43 | 
 44 | Running `make` will build `test.wasm` and run the test as in m0.  Note
 45 | however that it fails with an out-of-memory error, and therefore that
 46 | the default `test` target stops after `v8.test` fails.  You can verify
 47 | that it fails with JSC via `make jsc.test`, and SpiderMonkey via `make
 48 | spidermonkey.test`.
 49 | 
 50 | An example run:
 51 | 
 52 | ```
 53 | $ make v8.test
 54 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o test.o test.c
 55 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c
 56 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --import-memory -o test.wasm test.o walloc.o
 57 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc test.js
 58 | Callback from [object Object] after 1 allocated.
 59 | 1000 total allocated, 1000 still live.
 60 | Callback from [object Object] after 1001 allocated.
 61 | 2000 total allocated, 2000 still live.
 62 | ...
 63 | 55000 total allocated, 55000 still live.
 64 | Callback from [object Object] after 55001 allocated.
 65 | 56000 total allocated, 56000 still live.
 66 | Callback from [object Object] after 56001 allocated.
 67 | 57000 total allocated, 57000 still live.
 68 | Callback from [object Object] after 57001 allocated.
 69 | error: out of linear memory
 70 | make: *** [Makefile:15: v8.test] Error 1
 71 | ```
 72 | 
 73 | ## Results
 74 | 
 75 | ### We have a simple motivating example
 76 | 
 77 | Although this test program is small and synthetic, it exhibits a pattern
 78 | of object relationships in real programs.  There is reason to hope that
 79 | if we find a solution for the test program, that it will enable large
 80 | systems to be made more reliably.
 81 | 
 82 | ### Cycles are easier to make than one might think
 83 | 
 84 | When constructing this test program, very experienced JS and WebAssembly
 85 | engineers spent way longer than they had planned, ensuring that the
 86 | leaks were present for the reasons we thought, and absent in ways that
 87 | we expect.  [Bug
 88 | 1664463](https://bugzilla.mozilla.org/show_bug.cgi?id=1664463) is a good
 89 | example, but simply allowing finalizers to run was also an issue.  See
 90 | m0 for a longer discussion of those incidental points.
 91 | 
 92 | We are now more convinced that "don't do that" is not a good answer to
 93 | the cycle problem.  Firstly, because it's difficult to know when one
 94 | makes a cycle or not.  Secondly, because sometimes cycles lead to more
 95 | natural program structure.  Finally, the negative consequences of
 96 | uncollectable cycles in hybrid WebAssembly/JS environments have such
 97 | disastrous consequences that happen at unpredictable times that from a
 98 | system reliability point of view, if we can't avoid cycles in practice,
 99 | we must make them benign.
100 | 
101 | ### In practice, the linear memory usually has the more strict limit
102 | 
103 | The failure mechanism of this test is a failure to grow linear memory.
104 | We expect that this will be the failure mechanism for insufficiently
105 | prompt GC and leaky systems, if a maximum is set on linear memory.
106 | 


--------------------------------------------------------------------------------
/milestones/getting-started.md:
--------------------------------------------------------------------------------
  1 | # Before starting
  2 | 
  3 | There are a few dependencies that we use as part of our test setup:
  4 | LLVM, wabt, and a JavaScript shell.  You should make sure to have them
  5 | installed before starting.
  6 | 
  7 | We assume that all of these projects are checked out in some root
  8 | directory bound to the `SRC` environment variable.  Like this:
  9 | 
 10 | ```
 11 | export SRC=~/src
 12 | ```
 13 | 
 14 | ## LLVM
 15 | 
 16 | We use LLVM to compile (Ref) C++ to WebAssembly.  We need the latest
 17 | upstream LLVM, as some features needed by this work have only landed
 18 | recently.
 19 | 
 20 | ```
 21 | cd $SRC
 22 | git clone https://github.com/llvm/llvm-project
 23 | cd llvm-project
 24 | mkdir build && cd build
 25 | cmake -G 'Unix Makefiles' \
 26 |    -DLLVM_TARGETS_TO_BUILD='X86;WebAssembly' \
 27 |    -DLLVM_ENABLE_PROJECTS="clang;lld" \
 28 |    -DCMAKE_BUILD_TYPE=Release \
 29 |    -DLLVM_ENABLE_ASSERTIONS=On \
 30 |    -DCMAKE_EXPORT_COMPILE_COMMANDS=On \
 31 |    -DLLVM_BUILD_32_BITS=Off \
 32 |    -DLLVM_ENABLE_BINDINGS=Off \
 33 |    ../llvm/
 34 | 
 35 | # If you need debug information in LLVM, use -DCMAKE_BUILD_TYPE=Debug.
 36 | # Note however that the build will take longer and use a lot of memory
 37 | # at the link phase.
 38 | make -j$(nproc)
 39 | ```
 40 | 
 41 | Once you're done, set the `LLVM` environment variable to the build dir;
 42 | usually this is sufficient:
 43 | 
 44 | ```
 45 | export LLVM=$SRC/llvm-project/build/bin
 46 | export CC=$LLVM/clang
 47 | export LD=$LLVM/wasm-ld
 48 | ```
 49 | 
 50 | You can check that your LLVM is built correctly by running
 51 | `$LLVM/llc --version`, which should output something like:
 52 | 
 53 | ```
 54 |     LLVM (http://llvm.org/):
 55 |     LLVM version 12.0.0git
 56 |     DEBUG build with assertions.
 57 |     Default target: x86_64-unknown-linux-gnu
 58 |     Host CPU: skylake-avx512
 59 | 
 60 |     Registered Targets:
 61 |       wasm32 - WebAssembly 32-bit
 62 |       wasm64 - WebAssembly 64-bit
 63 |       x86    - 32-bit X86: Pentium-Pro and above
 64 |       x86-64 - 64-bit X86: EM64T and AMD64
 65 | ```
 66 | 
 67 | We added the X86 targets assuming you're compiling on an x86 system;
 68 | it's useful to compare native and wasm compilation.
 69 | 
 70 | ## wabt
 71 | 
 72 | Wabt (pronounced "wabbit") is the WebAssembly Binary Toolkit, and is
 73 | used for its `wat2wasm` tool, which translates a textual representation
 74 | of a WebAssembly module into a corresponding binary, suitable for
 75 | loading into a JavaScript shell.  Check it out and build it:
 76 | 
 77 | ```
 78 | cd $SRC
 79 | git clone --recursive https://github.com/WebAssembly/wabt
 80 | cd wabt
 81 | make -j40 gcc-release
 82 | ```
 83 | 
 84 | Then set `WAT2WASM`:
 85 | 
 86 | ```
 87 | export WAT2WASM=$SRC/wabt/out/gcc/Release/wat2wasm
 88 | ```
 89 | 
 90 | You can check that everything is fine by running `$WAT2WASM --version`,
 91 | which should print a version.
 92 | 
 93 | ## A JavaScript shell
 94 | 
 95 | Ultimately we're targetting WebAssembly as deployed in web browsers, and
 96 | specifically in their JavaScript engines: V8 being the engine in Chrome,
 97 | and SpiderMonkey in Firefox.  However, the underlying "reference types"
 98 | WebAssembly feature used by our Ref C++ prototype is not yet included in
 99 | standard WebAssembly, so although it's present in both V8 and
100 | SpiderMonkey, it's not on by default.  Therefore the easiest way to test
101 | our milestones is with a standalone JavaScript "shell" -- a little
102 | program that just includes the JavaScript / WebAssembly engine from a
103 | web browser.
104 | 
105 | Since we use finalizers as part of some milestones, the best thing is to
106 | build a fresh shell from upstream.  We won't include details here, but
107 | if you have a V8 build you are looking for the `d8` binary:
108 | 
109 | ```
110 | # V8, checked out in ~/src/v8, built in out/x64.release
111 | export D8=$SRC/v8/out/x64.release/d8
112 | ```
113 | 
114 | For Spidermonkey it's just `js`:
115 | 
116 | ```
117 | # SpiderMonkey, checked out in ~/src/mozilla-unified, built in +js-release
118 | export SPIDERMONKEY=$SRC/mozilla-unified/+js-release/dist/bin/js
119 | ```
120 | 
121 | For JavaScriptCore it's `jsc`
122 | ```
123 | # WebKit, checked out in ~/src/webkit, --jsc-only build in release mode
124 | export JSC=$SRC/webkit/WebKitBuild/Release/bin/jsc
125 | ```
126 | 
127 | JavaScript shells doesn't have a standardized interface, but the
128 | different implementations are close enough that our JavaScript code can
129 | paper over the difference.  When you type "make" in a milestone, it will
130 | test all shells.
131 | 
132 | ## Summary
133 | 
134 | In the end if you have built the dependencies as described above, you
135 | should be able to set the expected environment variables like:
136 | 
137 | ```
138 | export SRC=~/src
139 | export LLVM=$SRC/llvm-project/build/bin
140 | export CC=$LLVM/clang
141 | export LD=$LLVM/wasm-ld
142 | export WAT2WASM=$SRC/wabt/out/gcc/Release/wat2wasm
143 | export V8=$SRC/v8/out/x64.release/d8
144 | export SPIDERMONKEY=$SRC/mozilla-unified/+js-release/dist/bin/js
145 | export JSC=$SRC/webkit/WebKitBuild/Release/bin/jsc
146 | ```
147 | 


--------------------------------------------------------------------------------
/milestones/m3/test.wat:
--------------------------------------------------------------------------------
  1 | (memory $mem (import "env" "__linear_memory") 0)
  2 | (func $malloc (import "env" "malloc") (param i32) (result i32))
  3 | (func $free (import "env" "free") (param i32))
  4 | 
  5 | (func $invoke (import "rt" "invoke") (param externref))
  6 | (func $out_of_memory (import "rt" "out_of_memory"))
  7 | 
  8 | ;; struct freelist { uint32_t handle; struct freelist *next; };
  9 | (global $freelist (mut i32) (i32.const 0))
 10 | 
 11 | ;; Pop a handle off the freelist.  Precondition: freelist non-empty.
 12 | (func $freelist_pop (result i32)
 13 |   ;; Abort if the freelist is empty.
 14 |   (if (i32.eqz (global.get $freelist)) (then (unreachable)))
 15 | 
 16 |   ;; Return value is the handle from the head of the freelist.
 17 |   (i32.load offset=0 (global.get $freelist))
 18 | 
 19 |   ;; Push old freelist head.
 20 |   (global.get $freelist)
 21 | 
 22 |   ;; Set $freelist to the old head's "next" link.
 23 |   (global.set $freelist (i32.load offset=4 (global.get $freelist)))
 24 | 
 25 |   ;; Free the old head; the handle return value remains on the stack.
 26 |   (call $free))
 27 | 
 28 | ;; Push a handle onto the freelist.  May call out_of_memory.
 29 | (func $freelist_push (param $idx i32)
 30 |   (local $new_head i32)
 31 | 
 32 |   ;; Allocate a new head.
 33 |   (local.tee $new_head (call $malloc (i32.const 8)))
 34 | 
 35 |   ;; If the malloc failed, signal the runtime, then abort.
 36 |   (if (i32.eqz) (then (call $out_of_memory) (unreachable)))
 37 | 
 38 |   ;; Link new freelist head to old freelist.
 39 |   (i32.store offset=4 (local.get $new_head) (global.get $freelist))
 40 | 
 41 |   ;; Initialize handle for new freelist head.
 42 |   (i32.store offset=0 (local.get $new_head) (local.get $idx))
 43 | 
 44 |   ;; Set new head as $freelist.
 45 |   (global.set $freelist (local.get $new_head)))
 46 | 
 47 | ;; Linear memory references externref values by putting them into the
 48 | ;; object table, then referring to them via index.  We use the word
 49 | ;; "handle" to refer to an externref's index into the object table.
 50 | (table $objects 0 externref)
 51 | 
 52 | ;; When we need to intern a new externref into the object table, we get
 53 | ;; the handle by popping it off a freelist.  Every free slot in the
 54 | ;; table is on the freelist.  If the freelist is empty, then first we
 55 | ;; expand the table, pushing new handles for each new slot onto the free
 56 | ;; list.
 57 | (func $expand_table
 58 |   (local $old_size i32)
 59 |   (local $end i32)
 60 |   (local.set $old_size (table.size $objects))
 61 | 
 62 |   ;; Grow the table by (old_size >> 1) + 1.
 63 |   (table.grow $objects
 64 |               (ref.null extern)
 65 |               (i32.add (i32.shr_u (local.get $old_size)
 66 |                                   (i32.const 1))
 67 |                        (i32.const 1)))
 68 | 
 69 |   ;; If growing the table failed, signal the runtime, then abort.
 70 |   (if (i32.eq (i32.const -1))
 71 |       (then (call $out_of_memory) (unreachable)))
 72 | 
 73 |   ;; Push freelist entries for new slots.
 74 |   (local.set $end (table.size $objects))
 75 |   (loop $loop
 76 |     (if (i32.eq (local.get $end) (local.get $old_size))
 77 |         (then (return)))
 78 |     (local.set $end (i32.sub (local.get $end) (i32.const 1)))
 79 |     (call $freelist_push (local.get $end))
 80 |     (br $loop)))
 81 | 
 82 | ;; Put $obj into the object table, returning a fresh handle.
 83 | (func $intern (param $obj externref) (result i32)
 84 |   (local $handle i32)
 85 | 
 86 |   ;; Expand the table if no slots are free.
 87 |   (if (i32.eqz (global.get $freelist))
 88 |       (then (call $expand_table)))
 89 | 
 90 |   ;; Store $obj into the object table and return its handle.
 91 |   (local.set $handle (call $freelist_pop))
 92 |   (table.set $objects (local.get $handle) (local.get $obj))
 93 |   (local.get $handle))
 94 | 
 95 | ;; Release the slot in the object table corresponding to $handle.
 96 | (func $release (param $handle i32)
 97 |   ;; If $handle was -1 (invalid), just ignore it.
 98 |   (if (i32.eq (local.get $handle) (i32.const -1))
 99 |       (then (return)))
100 |   (table.set $objects (local.get $handle) (ref.null extern))
101 |   (call $freelist_push (local.get $handle)))
102 | 
103 | (func $handle_value (param $handle i32) (result externref)
104 |   (if (result externref)
105 |       (i32.eq (local.get $handle) (i32.const -1))
106 |       (then (ref.null extern))
107 |       (else (table.get $objects (local.get $handle)))))
108 | 
109 | (func $make_obj (export "make_obj") (result i32)
110 |   (local $obj i32)
111 | 
112 |   ;; Allocate a new object in linear memory.  If that fails, signal the
113 |   ;; runtime, and then abort.
114 |   (if (i32.eqz (local.tee $obj (call $malloc (i32.const 4))))
115 |       (then (call $out_of_memory) (unreachable)))
116 | 
117 |   ;; Initialize callback handle -1 (invalid) for fresh object.
118 |   (i32.store offset=0 (local.get $obj) (i32.const -1))
119 | 
120 |   (local.get $obj))
121 | 
122 | (func $free_obj (export "free_obj") (param $obj i32)
123 |   ;; Release the callback handle.
124 |   (call $release (i32.load offset=0 (local.get $obj)))
125 |   (call $free (local.get $obj)))
126 | 
127 | (func $attach_callback (export "attach_callback")
128 |       (param $obj i32) (param $callback externref)
129 |   ;; Release old handle.
130 |   (call $release (i32.load offset=0 (local.get $obj)))
131 | 
132 |   ;; Intern the new callback, and store handle to object.
133 |   (i32.store offset=0
134 |              (local.get $obj)
135 |              (call $intern (local.get $callback))))
136 | 
137 | (func $invoke_callback (export "invoke_callback")
138 |       (param $obj i32)
139 |   (call $invoke (call $handle_value (i32.load offset=0 (local.get $obj)))))
140 | 


--------------------------------------------------------------------------------
/milestones/m0/README.md:
--------------------------------------------------------------------------------
  1 | # Milestone 0: Finalizers clean up WebAssembly resources of JavaScript wrappers
  2 | 
  3 | [ next: [m1](../m1/) ]
  4 | 
  5 | ## Overview
  6 | 
  7 | Here we define a basic C library which defines a simple `make_object`
  8 | facility and which includes extensions to export that interface to its
  9 | WebAssembly host.
 10 | 
 11 | We define a JavaScript run-time that tests this facility, wrapping the
 12 | raw pointers coming from the `make_object` calls in /wrapper objects/ on
 13 | the JavaScript side.
 14 | 
 15 | The exported interface also includes a facility to attach a callback to
 16 | the object.  We define a simple handle-and-side-table mechanism,
 17 | allowing C to refer to JavaScript callbacks by index.  The C code
 18 | exports a `dispose_object` interface that will release associated
 19 | resources from the side table, in addition to freeing linear memory
 20 | associated with an object.
 21 | 
 22 | Finally, we attach a finalizer to the JavaScript wrapper of the C
 23 | object, allowing the system to automatically call `dispose_object` on C
 24 | objects when their associated JavaScript wrappers become unreachable.
 25 | 
 26 | This test illustrates the handle-and-side-table pattern for references
 27 | from C to JavaScript.  It also shows that the finalizer facility can
 28 | clean up "external" resources associated with JavaScript objects,
 29 | e.g. calling `free` on allocations from linear memory.
 30 | 
 31 | However, this test also shows that the finalizers interface has a number
 32 | of pitfalls in practice.
 33 | 
 34 | ## Details
 35 | 
 36 | The C test program is in [`test.c`](./test.c).  The associated
 37 | JavaScript run-time and test harness is in [`test.js`](./test.js).
 38 | 
 39 | The C test program needs a malloc implementation; we include
 40 | [walloc](https://github.com/wingo/walloc) for that purpose.
 41 | 
 42 | Run the tests via a simple `make` (though check the [getting started
 43 | document](../getting-started.md) to make sure you have all the
 44 | prerequisite environment variables set).  The default `test` target will
 45 | run `v8.test`, `jsc.test`, and `spidermonkey.test`, which checks that
 46 | our program runs under all engines.  An example run looks like:
 47 | 
 48 | ```
 49 | $ make v8.test
 50 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o test.o test.c
 51 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c
 52 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --import-memory -o test.wasm test.o walloc.o
 53 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc test.js
 54 | Callback after 1 allocated.
 55 | 1000 total allocated, 1000 still live.
 56 | Callback after 1001 allocated.
 57 | 2000 total allocated, 1000 still live.
 58 | ...
 59 | 99000 total allocated, 1000 still live.
 60 | Callback after 99001 allocated.
 61 | 100000 total allocated, 1000 still live.
 62 | checking expected live object count: 0
 63 | Success; max 1000 objects live.
 64 | ```
 65 | 
 66 | ## Results
 67 | 
 68 | ### Success on all JS engines
 69 | 
 70 | The test as written runs successfully on V8, JSC, and SpiderMonkey, at
 71 | least as of September 2020.
 72 | 
 73 | ### Reasoning about objects referenced from closures is unspecified
 74 | 
 75 | The `FinalizationRegistry` facility specifies that if the "held" value
 76 | passed to the finalizer callback references the finalizable object, the
 77 | object will never be finalized.  The registry holds a strong reference
 78 | onto "held" values.
 79 | 
 80 | In our case, the "held" value is a closure to call when the object
 81 | becomes finalizable.  However, the set of objects referenced by a
 82 | closure is not specified by JavaScript.  Does the callback capture the
 83 | newly constructed `this` or not?  `this` doesn't appear in its text, so
 84 | you would think not; but while developing this test case, we ran into
 85 | [cases where `this` was indeed
 86 | captured](https://bugzilla.mozilla.org/show_bug.cgi?id=1664463).  We
 87 | believe that we have avoided these cases, but it is hard to say.
 88 | 
 89 | ### GC doesn't run often enough
 90 | 
 91 | Engines will run garbage collection according to a number of heuristics,
 92 | principally among them how much GC-managed allocation has happened since
 93 | the last GC.  These heuristics are not informed how much off-heap
 94 | allocation has occured; see
 95 | https://github.com/tc39/proposal-weakrefs/issues/87 for a discussion.
 96 | In practice, without the explicit calls to `gc()`, all engines will fail
 97 | our tests, because they don't detect finalizable objects soon enough.
 98 | 
 99 | Therefore to make these tests reliable, we insert explicit GC calls.
100 | This bodes poorly for the role of finalizers in robust systems.
101 | 
102 | ### Permissibly late finalization can cause out-of-memory
103 | 
104 | If no object is finalized, then this test will run out of linear memory.
105 | (Remove the finalizer and you will see.)  However, we have no useful
106 | guarantee about when finalizers run; this is related to the previous
107 | point.  The interesting thing to note is the failure mode, that late
108 | finalization -- for whatever reason, perhaps because the program didn't
109 | yield to the next microtask -- can result in out-of-memory conditions,
110 | which we imagine would apply to real programs as well.
111 | 
112 | ### Users need to be careful to yield to allow finalization to happen
113 | 
114 | Related again to the previous point, users need to structure their
115 | programs carefully to allow finalizers to run.  Our test runs the
116 | allocation benchmark in a nested loop, yielding to the scheduler within
117 | the outer loop.  See
118 | https://github.com/tc39/proposal-weakrefs/issues/78#issuecomment-485838979
119 | for a related issue.
120 | 
121 | ### SpiderMonkey retains too much data for closures within in async functions
122 | 
123 | Note that in SpiderMonkey, you can't use the obvious idiom of making the
124 | test an async function, because the callback closure ends up retaining
125 | all local variables.  See [bug
126 | 1664463](https://bugzilla.mozilla.org/show_bug.cgi?id=1664463).  We have
127 | worked around this bug in the test.
128 | 


--------------------------------------------------------------------------------
/milestones/README.md:
--------------------------------------------------------------------------------
  1 | # Ref C++ Milestones
  2 | 
  3 | This folder holds milestone examples for different phases of the Ref C++
  4 | effort.
  5 | 
  6 | ## Before starting
  7 | 
  8 | There are some dependencies that we use as part of our test setup.  You
  9 | should make sure to have them installed before starting; see [the
 10 | getting-started guide](./getting-started.md) to get things set up.  In
 11 | the end you should be able to run these commands:
 12 | 
 13 | ```
 14 | $LLVM/llc --version
 15 | $CC --version
 16 | $LD --version
 17 | $WAT2WASM --version
 18 | $D8 -h
 19 | $JSC -h
 20 | $SPIDERMONKEY -h
 21 | ```
 22 | 
 23 | ## [Milestone 0: Finalizers clean up WebAssembly resources of JavaScript wrappers](./m0/)
 24 | 
 25 | Here we define a basic C library which defines a simple `make_object`
 26 | facility and which includes extensions to export that interface to its
 27 | WebAssembly host.
 28 | 
 29 | We define a JavaScript run-time that tests this facility, wrapping the
 30 | raw pointers coming from the `make_object` calls in /wrapper objects/ on
 31 | the JavaScript side.
 32 | 
 33 | The exported interface also includes a facility to attach a callback to
 34 | the object.  We define a simple handle-and-side-table mechanism,
 35 | allowing C to refer to JavaScript callbacks by index.  The C code
 36 | exports a `dispose_object` interface that will release associated
 37 | resources from the side table, in addition to freeing linear memory
 38 | associated with an object.
 39 | 
 40 | Finally, we attach a finalizer to the JavaScript wrapper of the C
 41 | object, allowing the system to automatically call `dispose_object` on C
 42 | objects when their associated JavaScript wrappers become unreachable.
 43 | 
 44 | This test illustrates the handle-and-side-table pattern for references
 45 | from C to JavaScript.  It also shows that the finalizer facility can
 46 | clean up "external" resources associated with JavaScript objects,
 47 | e.g. calling `free` on allocations from linear memory.
 48 | 
 49 | However, this test also shows that the finalizers interface has a number
 50 | of pitfalls in practice.
 51 | 
 52 | ## [Milestone 1: Cycles between WebAssembly and JavaScript are uncollectable](./m1/)
 53 | 
 54 | Here we have the same program as in milestone 0, but the callback
 55 | attached to the C object captures the JavaScript wrapper, preventing it
 56 | from being collected.
 57 | 
 58 | Note that JavaScript garbage collectors are perfectly capable of
 59 | collecting cycles.  However, while this case exhibits what is logically
 60 | a cycle between the callback, the JS wrapper, and the C object, the
 61 | garbage collector sees only the JavaScript side of things: the side
 62 | table keeps the callback alive, which, as the callback captures the
 63 | wrapper, keeps the wrapper alive.
 64 | 
 65 | Because the allocated objects are never collected, it will eventually
 66 | crash because it can't allocate any more memory.  In practice, our
 67 | example crashes because it runs out of linear memory.
 68 | 
 69 | This is a simplified representation of the use case that we are trying
 70 | to fix in this effort, and milestone 1 indicates the problem.
 71 | 
 72 | ## [Milestone 2: Re-express test.c as raw WebAssembly](./m2/)
 73 | 
 74 | We would like to propose some language extensions to C and C++ to fix
 75 | the cycle problem shown by milestone 1.  To keep the discussion
 76 | concrete, we will first show the kind of WebAssembly that we would like
 77 | LLVM to produce, and test it to show that it solves the cycle problems.
 78 | This test will also allow us to examine different characteristics of the
 79 | proposed solution.
 80 | 
 81 | Therefore this milestone is the same as [milestone 0](./m0/), but with
 82 | the C program replaced with corresponding WebAssembly, and "compiled" by
 83 | wabt's `wat2wasm` instead of by LLVM's `llc`.
 84 | 
 85 | ## [Milestone 3: Move side table to WebAssembly](./m3/)
 86 | 
 87 | In milestones 1 and 2, the C++ or WebAssembly library referred to
 88 | JavaScript objects by `i32` index.  The actual objects were stored in a
 89 | table managed by the JavaScript run-time.  Therefore to call a callback,
 90 | the WebAssembly would invoke an import from JavaScript, passing it an
 91 | `i32` index indicating which object in the table to invoke.
 92 | 
 93 | With `externref`, milestone 3 changes to manage this table on the
 94 | WebAssembly side of things, avoiding the need for the JS run-time to
 95 | manage the table.  Otherwise this milestone still has the same
 96 | uncollectable cycle characteristics as milestones 1 and 2.
 97 | 
 98 | ## [Milestone 4: Collectable cycle: WebAssembly allocates GC-managed memory](./m4/)
 99 | 
100 | The side table in milestones 1 is the essence of our uncollectable cycle
101 | problem: it introduces a reference-counted edge in our object graph, but
102 | the garbage collector doesn't understand how to traverse that edge and
103 | instead sees the cycle as being referred to by the GC roots (the table
104 | itself).
105 | 
106 | To fix this problem, we will have the WebAssembly module allocate its
107 | data structures that should participate in GC on the GC-managed heap.
108 | Since only JavaScript has the capability to allocate this memory, we'll
109 | implement the allocation routine in the JS run-time, providing accessor
110 | imports as well.
111 | 
112 | ## [Milestone 5: Same as milestone 3, but with C++ compiled to LLVM](./m5/)
113 | 
114 | Having shown where we're going and that milestone 4 solves the cycle
115 | problem, we need to return to the C++-to-WebAssembly problem: we are
116 | lacking a way to represent `externref` values in C++, and support in the
117 | LLVM compiler.  Although using milestone 3 doesn't solve the cycle
118 | problem, it's a good intermediate step for getting `externref` into
119 | LLVM.
120 | 
121 | ## [Milestone 6: Same as milestone 4, but with C++ compiled to LLVM](./m6/)
122 | 
123 | This milestone represents a minimal solution to the cycle problem, but
124 | from C++ instead of from raw WebAssembly.  Instead of using classes as
125 | data abstractions, the C++ here will use the raw allocation and accessor
126 | imports defined in milestone 4.
127 | 
128 | ## [Milestone 7: Minimal Ref C++](./m7/)
129 | 
130 | In milestone 6, we solved the cycle problem, but at a low level of
131 | abstraction.  Here we will attempt to raise the abstraction level,
132 | defining lowerings of reference-typed classes à la Ref C++ to the same
133 | primitives.  This milestone will result in idiomatic C++ that solves the
134 | cycle problem.
135 | 


--------------------------------------------------------------------------------
/milestones/m4/README.md:
--------------------------------------------------------------------------------
  1 | # Milestone 4: Collectable cycle: WebAssembly allocates GC-managed memory
  2 | 
  3 | [ previous: [m3](../m3/); next: [m5](../m5/) ]
  4 | 
  5 | ## Overview
  6 | 
  7 | Here we make progress!  In the previous milestones, we had a cycle of
  8 | objects across the WebAssembly / JavaScript boundary that wasn't
  9 | traceable as such by the garbage collector.  In this milestone, we solve
 10 | this problem by moving the WebAssembly objects from linear memory to
 11 | GC-managed memory.
 12 | 
 13 | As the cycle now exists only in GC-managed memory, it is transparent to
 14 | the garbage collector, so we have solved the cycle problem.
 15 | Furthermore, there is no need for finalizers, removing an error-prone
 16 | component from the system.
 17 | 
 18 | In this example, as there is nothing else in WebAssembly that needs to
 19 | hold onto an object once it's allocated, so this example omits the side
 20 | table as well.  However we expect that in real programs, we would
 21 | occasionally need a side table for references to GC-managed data from
 22 | C++.  Such handles can refer to any member of a cycle, as long as the
 23 | holder of the handle doesn't participate in a cycle.
 24 | 
 25 | ## Details
 26 | 
 27 | The difference from [m3](../m3) is that we made [`test.S`](./test.S)
 28 | call out to the new `gc_alloc` routines from [`test.js`](./test.js).
 29 | The [`test.js`](./test.js) is also more trim, as it doesn't need
 30 | finalizer support.
 31 | 
 32 | Incidentally, as the test doesn't actually need linear memory, we don't
 33 | need to build in a malloc implementation.
 34 | 
 35 | Running `make` will build `test.wasm` and run the test as in m3.  The
 36 | reporting is a bit different, though, as we don't have the live-object
 37 | telemetry provided by finalizers.
 38 | 
 39 | ```
 40 | ~/src/llvm-project/build/bin/clang -Oz -mreference-types --target=wasm32 -nostdlib -c -o test.o test.S
 41 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --allow-undefined --import-memory -o test.wasm test.o
 42 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc --experimental-wasm-reftypes test.js
 43 | Callback after 1 allocated.
 44 | 1000 total allocated.
 45 | Callback after 1001 allocated.
 46 | 2000 total allocated.
 47 | ...
 48 | Callback after 98001 allocated.
 49 | 99000 total allocated.
 50 | Callback after 99001 allocated.
 51 | 100000 total allocated.
 52 | Success.
 53 | ```
 54 | 
 55 | ## Results
 56 | 
 57 | ### Heap provided by GC-capable host
 58 | 
 59 | In this example, the `gc_alloc` function, provided by
 60 | [`test.js`](./test.js), allocates an object with slots to hold a fixed
 61 | number of reference-typed objects as well as a fixed number of bytes.
 62 | 
 63 | The `gc_alloc` facility models a heap that can create objects containing
 64 | both GC-managed and "raw" fields.  To WebAssembly, such a value is
 65 | opaque; we need to call out to accessors defined by the host (by
 66 | JavaScript) to initialize, reference, and modify object fields.
 67 | 
 68 | This approach to memory management was first prototyped in the [Schism
 69 | self-hosted implementation of Scheme](https://github.com/google/schism).
 70 | 
 71 | ### Path towards GC proposal
 72 | 
 73 | Having to call out to the host to allocate objects and access their
 74 | fields is evidently suboptimal.  Future parts of the [GC
 75 | proposal](https://github.com/WebAssembly/gc/blob/master/proposals/gc/Overview.md)
 76 | allow for these operations to occur within WebAssembly, which will make
 77 | for more efficient systems with smaller demands on the host run-time.
 78 | 
 79 | ### No need to break up main loop into async function
 80 | 
 81 | Because finalizers can only run asynchronously, they cause programs to
 82 | keep memory alive for longer than they would otherwise.  That's why we
 83 | had to break up the earlier test loop into an inner and an outer loop,
 84 | running garbage collection in the middle; see the [discussion in
 85 | milestone 0](../m0/) for more details.  Because this is no longer
 86 | necessary without finalizers, this program can be simpler on the
 87 | JavaScript side as well.
 88 | 
 89 | ### Less telemetry
 90 | 
 91 | On the other hand, not having finalizers gives us less telemetry into
 92 | the total numbers of allocated objects.  While this can be added back,
 93 | if it is not needed, it is just overhead.
 94 | 
 95 | ### WebKit brought up to date
 96 | 
 97 | In our initial investigations, the `jsc.test` target was failing due to
 98 | the recent change in `ref.null` encoding.  Dmitry Bezhetskov landed a number of 
 99 | patches recently to bring WebKit up to date, and now JSC passes all of
100 | these tests.
101 | 
102 | For details, see bugs <a
103 | href="https://bugs.webkit.org/show_bug.cgi?id=218331">218331</a>, <a
104 | href="https://bugs.webkit.org/show_bug.cgi?id=218744">218744</a>, <a
105 | href="https://bugs.webkit.org/show_bug.cgi?id=218885">218885</a>, <a
106 | href="https://bugs.webkit.org/show_bug.cgi?id=219192">219192</a>, <a
107 | href="https://bugs.webkit.org/show_bug.cgi?id=219297">219297</a>, <a
108 | href="https://bugs.webkit.org/show_bug.cgi?id=219427">219427</a>, <a
109 | href="https://bugs.webkit.org/show_bug.cgi?id=219595">219595</a>, <a
110 | href="https://bugs.webkit.org/show_bug.cgi?id=219725">219725</a>, <a
111 | href="https://bugs.webkit.org/show_bug.cgi?id=219848">219848</a>, <a
112 | href="https://bugs.webkit.org/show_bug.cgi?id=219943">219943</a>, <a
113 | href="https://bugs.webkit.org/show_bug.cgi?id=220005">220005</a>, <a
114 | href="https://bugs.webkit.org/show_bug.cgi?id=220007">220007</a>, <a
115 | href="https://bugs.webkit.org/show_bug.cgi?id=220009">220009</a>, <a
116 | href="https://bugs.webkit.org/show_bug.cgi?id=220161">220161</a>, <a
117 | href="https://bugs.webkit.org/show_bug.cgi?id=220181">220181</a>, <a
118 | href="https://bugs.webkit.org/show_bug.cgi?id=220235">220235</a>, <a
119 | href="https://bugs.webkit.org/show_bug.cgi?id=220311">220311</a>, <a
120 | href="https://bugs.webkit.org/show_bug.cgi?id=220314">220314</a>, <a
121 | href="https://bugs.webkit.org/show_bug.cgi?id=220323">220323</a>, <a
122 | href="https://bugs.webkit.org/show_bug.cgi?id=220890">220890</a>, <a
123 | href="https://bugs.webkit.org/show_bug.cgi?id=220914">220914</a>, <a
124 | href="https://bugs.webkit.org/show_bug.cgi?id=220918">220918</a>, <a
125 | href="https://bugs.webkit.org/show_bug.cgi?id=222351">222351</a>, <a
126 | href="https://bugs.webkit.org/show_bug.cgi?id=222400">222400</a>, and <a
127 | href="https://bugs.webkit.org/show_bug.cgi?id=222779">222779</a>.
128 | 
129 | ### Lower run-time overhead than m0
130 | 
131 | It's not precisely a fair comparison, given that we don't have to
132 | explicitly run GC, we don't do finalizers, and we don't need extra
133 | turns, but this test takes less time for all engines than the versions
134 | that have side tables, despite the need for indirect field access via
135 | the `gc_ref_obj` family of functions.
136 | 
137 | ### TODO: Impact on memory overhead
138 | 
139 | Would be nice to check the process size and compare to m0.  Also, we
140 | should be able to run indefinitely without stopping.
141 | 


--------------------------------------------------------------------------------
/milestones/m3/test.ll:
--------------------------------------------------------------------------------
  1 | target triple = "wasm32-unknown-unknown"
  2 | 
  3 | ; Reference types
  4 | %externref = type ptr addrspace(10)
  5 | 
  6 | %funcptr = type void () addrspace(20)*
  7 | %funcref = type i8 addrspace(20)*
  8 | 
  9 | ; External functions
 10 | declare void @free(i8*) local_unnamed_addr
 11 | declare hidden i8* @malloc(i32) local_unnamed_addr
 12 | declare void @out_of_memory() local_unnamed_addr #1
 13 | declare void @invoke(%externref) local_unnamed_addr #7
 14 | 
 15 | ; Intrinsics
 16 | declare void @llvm.trap()
 17 | declare i32 @llvm.wasm.table.grow.externref(i8 addrspace(1)*, %externref, i32) nounwind readonly
 18 | declare %externref @llvm.wasm.ref.null.extern() nounwind readonly
 19 | declare %externref @llvm.wasm.table.get.externref(i8 addrspace(1)*, i32) nounwind
 20 | declare void @llvm.wasm.table.set.externref(i8 addrspace(1)*, i32, %externref) nounwind
 21 | declare i32 @llvm.wasm.table.size(i8 addrspace(1)*) nounwind readonly
 22 | 
 23 | %struct.freelist = type { i32, %struct.freelist* }
 24 | @freelist = hidden local_unnamed_addr global %struct.freelist* null
 25 | 
 26 | ; Returns the value at the top of the list and frees the head
 27 | ; Reduces the length of list by one.
 28 | ; Fails if list is empty.
 29 | define i32 @freelist_pop() {
 30 |   ; Check if the freelist is null
 31 |   %head = load %struct.freelist*, %struct.freelist** @freelist, align 4
 32 |   %head_is_null = icmp eq %struct.freelist* %head, null
 33 |   br i1 %head_is_null, label %null_head, label %good
 34 | 
 35 |   ; fail if it is
 36 | null_head:
 37 |   unreachable
 38 | 
 39 | good:
 40 |   ; Load v in order to return it
 41 |   %tmp = getelementptr inbounds %struct.freelist, %struct.freelist* %head, i32 0, i32 0
 42 |   %v = load i32, i32* %tmp
 43 | 
 44 |   ; Load a ptr to the next element
 45 |   %nextptr = getelementptr inbounds %struct.freelist, %struct.freelist* %head, i32 0, i32 1
 46 |   %next = load %struct.freelist*, %struct.freelist** %nextptr
 47 | 
 48 |   ; Store next into freelist
 49 |   store %struct.freelist* %next, %struct.freelist** @freelist
 50 | 
 51 |   ; Free head and return the value that we loaded from head earlier on
 52 |   %headi8 = bitcast %struct.freelist* %head to i8*
 53 |   call void @free(i8* nonnull %headi8)
 54 |   
 55 |   ret i32 %v
 56 | }
 57 | 
 58 | ; Adds an element to the top of the list
 59 | ; Increases the length of list by one.
 60 | define void @freelist_push(i32 %v) {
 61 |   ; allocate new node and check that malloc did not return null
 62 |   %tmp = call align 16 dereferenceable_or_null(8) i8* @malloc(i32 8)
 63 |   %node = bitcast i8* %tmp to i32*
 64 |   %is_null = icmp eq i32* %node, null  
 65 |   br i1 %is_null, label %oom, label %good
 66 | 
 67 | oom:
 68 |   call void @out_of_memory()
 69 |   unreachable
 70 | 
 71 | good:
 72 |   ; store v in the new node
 73 |   store i32 %v, i32* %node
 74 | 
 75 |   ; setup pointers to freelist and node->next
 76 |   %freelist = load %struct.freelist*, %struct.freelist** @freelist
 77 |   %node_next = getelementptr inbounds i32, i32* %node, i32 1
 78 |   %node_next_ptr = bitcast i32* %node_next to %struct.freelist**
 79 |   
 80 |   ; store freelist in node->next
 81 |   store %struct.freelist* %freelist, %struct.freelist** %node_next_ptr
 82 |   ; store node into freelist
 83 |   store i32* %node, i32** bitcast (%struct.freelist** @freelist to i32**)
 84 | 
 85 |   ret void
 86 | }
 87 | 
 88 | ; Linear memory references externref values by putting them into the object
 89 | ; table, then referring them via index.
 90 | @objects = local_unnamed_addr addrspace(1) global [0 x %externref] undef
 91 | 
 92 | define void @expand_table() #3 {
 93 |   ; get current table size
 94 |   %sz = call i32 @llvm.wasm.table.size(i8 addrspace(1)* @objects)
 95 | 
 96 |   ; grow the table by (old_size >> 1) + 1.
 97 |   %shf = lshr i32 %sz, 1
 98 |   %incsize = add nuw i32 %shf, 1
 99 |   %null = call %externref @llvm.wasm.ref.null.extern()
100 |   %ret = call i32 @llvm.wasm.table.grow.externref(i8 addrspace(1)* @objects, %externref %null, i32 %incsize)
101 | 
102 |   ; if growing the table failed, signal the runtime, then abort
103 |   %failed = icmp eq i32 %ret, -1
104 |   br i1 %failed, label %oom, label %good
105 | 
106 | oom:
107 |   call void @out_of_memory()
108 |   unreachable
109 | 
110 | good:
111 |   %newsize = add i32 %sz, %incsize
112 |   br label %loophd
113 | 
114 | loophd:
115 |   %newi = phi i32 [ %newsize, %good ], [ %i, %loopbody]
116 |   %done = icmp eq i32 %newi, %sz
117 |   br i1 %done, label %end, label %loopbody
118 | 
119 | end:
120 |   ret void
121 | 
122 | loopbody:
123 |   %i = add i32 %newi, -1
124 |   call void @freelist_push(i32 %i)
125 |   br label %loophd
126 | }
127 | 
128 | define i32 @intern(%externref %ref) {
129 |   %head = load %struct.freelist*, %struct.freelist** @freelist, align 4
130 |   %is_null = icmp eq %struct.freelist* %head, null
131 |   br i1 %is_null, label %need_expand, label %do_intern
132 | 
133 | need_expand:      
134 |   call void @expand_table()
135 |   br label %do_intern
136 | 
137 | do_intern:
138 |   %handle = call i32 @freelist_pop()
139 |   call void @llvm.wasm.table.set.externref(i8 addrspace(1)* @objects, i32 %handle, %externref %ref)
140 |   ret i32 %handle
141 | }
142 | 
143 | ; release the slot in the object table corresponding to the handle
144 | define void @release(i32 %handle) {
145 |   ; if handle is -1 then ignore it
146 |   %is_neg = icmp eq i32 %handle, -1
147 |   br i1 %is_neg, label %end, label %body
148 | 
149 | body:
150 |   %null = call %externref @llvm.wasm.ref.null.extern()
151 |   call void @llvm.wasm.table.set.externref(i8 addrspace(1)* @objects, i32 %handle, %externref %null)
152 |   call void @freelist_push(i32 %handle)
153 |   br label %end
154 | 
155 | end:
156 |   ret void
157 | }
158 | 
159 | define %externref @handle_value(i32 %handle) {
160 |   ; if handle is -1 then return null
161 |   %is_neg = icmp eq i32 %handle, -1
162 |   br i1 %is_neg, label %retnull, label %body
163 | 
164 | retnull:
165 |   %null = call %externref @llvm.wasm.ref.null.extern()
166 |   br label %end
167 | 
168 | body:
169 |   %v = call %externref @llvm.wasm.table.get.externref(i8 addrspace(1)* @objects, i32 %handle)
170 |   br label %end
171 | 
172 | end:
173 |   %retv = phi %externref [%null, %retnull], [%v, %body]
174 |   ret %externref %retv
175 | }
176 | 
177 | ; allocates a new object in linear memory and if 
178 | ; that fails it signals the runtime and aborts
179 | define i32* @make_obj() #0 {
180 |   ; allocates a new i32 representing an obj
181 |   %tmp = call dereferenceable_or_null(4) i8* @malloc(i32 4) 
182 |   %ptr = bitcast i8* %tmp to i32*
183 |   %is_null = icmp eq i32* %ptr, null
184 |   br i1 %is_null, label %oom, label %body
185 | 
186 | oom:
187 |   call void @out_of_memory()
188 |   unreachable
189 | 
190 | body:
191 |   ; initialize callback handle -1 (invalid) for a fresh object
192 |   store i32 -1, i32* %ptr
193 |   ret i32* %ptr
194 | }
195 | 
196 | define void @free_obj(i32* %obj) #8 {
197 |   %v = load i32, i32* %obj
198 |   call void @release(i32 %v)
199 |   %ptr = bitcast i32* %obj to i8*
200 |   call void @free(i8* %ptr)
201 |   ret void
202 | }
203 | 
204 | define void @attach_callback(i32* %obj, %externref %callback) #2 {
205 |   ; release old handle
206 |   %oldhandle = load i32, i32* %obj
207 |   call void @release(i32 %oldhandle)
208 |   ; intern the new callback, and store handle to object
209 |   %handle = call i32 @intern(%externref %callback)
210 |   store i32 %handle, i32* %obj
211 |   ret void
212 | }
213 | 
214 | define void @invoke_callback(i32* %obj) #6 {
215 |   %handle = load i32, i32* %obj
216 |   %v = call %externref @handle_value(i32 %handle)
217 |   call void @invoke(%externref %v)
218 |   ret void
219 | }
220 | 
221 | attributes #0 = { "wasm-export-name"="make_obj" }
222 | attributes #8 = { "wasm-export-name"="free_obj" }
223 | attributes #1 = { noreturn "wasm-import-module"="rt" "wasm-import-name"="out_of_memory" }
224 | attributes #2 = { "wasm-export-name"="attach_callback" }
225 | attributes #3 = { "wasm-export-name"="expand_table" }
226 | attributes #6 = { "wasm-export-name"="invoke_callback" }
227 | attributes #7 = { "wasm-import-module"="rt" "wasm-import-name"="invoke" }
228 | 


--------------------------------------------------------------------------------
/milestones/m3/test.S:
--------------------------------------------------------------------------------
  1 |         .functype       out_of_memory () -> ()
  2 |         .import_module  out_of_memory, rt
  3 |         .import_name    out_of_memory, out_of_memory
  4 |         .functype       invoke (externref) -> ()
  5 |         .import_module  invoke, rt
  6 |         .import_name    invoke, invoke
  7 | 
  8 |         .functype       malloc (i32) -> (i32)
  9 |         .functype       free (i32) -> ()
 10 | 
 11 |         .text
 12 | 
 13 | freelist:
 14 |         # struct freelist { uint32_t handle; struct freelist *next; };
 15 |         .globaltype freelist, i32
 16 | 
 17 | ## Pop a handle off the freelist.  Precondition: freelist non-empty.
 18 | freelist_pop:
 19 |         .hidden freelist_pop
 20 |         .globl freelist_pop
 21 |         .type freelist_pop,@function
 22 |         .functype       freelist_pop () -> (i32)
 23 |         # Abort if the freelist is empty.
 24 |         global.get freelist
 25 |         i32.eqz
 26 |         if
 27 |         unreachable
 28 |         end_if
 29 |         # Return value is the handle from the head of the freelist.
 30 |         global.get freelist
 31 |         i32.load 0
 32 |         # Push old freelist head.
 33 |         global.get freelist
 34 |         # Set $freelist to the old head's "next" link.
 35 |         global.get freelist
 36 |         i32.load 4
 37 |         global.set freelist
 38 |         # Free the old head; the handle return value remains on the stack.
 39 |         call free
 40 |         end_function
 41 | 
 42 | ## Push a handle onto the freelist.  May call out_of_memory.
 43 | freelist_push:
 44 |         .hidden freelist_push
 45 |         .globl freelist_push
 46 |         .type freelist_push,@function
 47 |         .functype freelist_push (i32) -> ()
 48 |         .local i32              # new_head
 49 |         # Allocate a new head.
 50 |         i32.const 8
 51 |         call malloc
 52 |         local.tee 1
 53 |         
 54 |         # If the malloc failed, signal the runtime, then abort.
 55 |         i32.eqz
 56 |         if
 57 |         call out_of_memory
 58 |         unreachable
 59 |         end_if
 60 | 
 61 |         # Link new freelist head to old freelist.
 62 |         local.get 1
 63 |         global.get freelist
 64 |         i32.store 4
 65 | 
 66 |         # Initialize handle for new freelist head.
 67 |         local.get 1
 68 |         local.get 0
 69 |         i32.store 0
 70 | 
 71 |         # Set new head as $freelist.
 72 |         local.get 1
 73 |         global.set freelist
 74 |         end_function
 75 | 
 76 | # Linear memory references externref values by putting them into the
 77 | # object table, then referring to them via index.  We use the word
 78 | # "handle" to refer to an externref's index into the object table.
 79 | objects:
 80 |         .tabletype objects,externref
 81 |         
 82 | # When we need to intern a new externref into the object table, we get
 83 | # the handle by popping it off a freelist.  Every free slot in the
 84 | # table is on the freelist.  If the freelist is empty, then first we
 85 | # expand the table, pushing new handles for each new slot onto the free
 86 | # list.
 87 | expand_table:
 88 |         .hidden expand_table
 89 |         .globl expand_table
 90 |         .type expand_table,@function
 91 |         .functype expand_table () -> ()
 92 |         .local i32,i32          # old_size, end
 93 |         table.size objects
 94 |         local.set 0
 95 |         # Grow the table by (old_size >> 1) + 1.
 96 |         ref.null_extern
 97 |         local.get 0
 98 |         i32.const 1
 99 |         i32.shr_u
100 |         i32.const 1
101 | 	i32.add
102 |         table.grow objects
103 |         # If growing the table failed, signal the runtime, then abort.
104 |         i32.const -1
105 |         i32.eq
106 |         if
107 |         call out_of_memory
108 |         unreachable
109 |         end_if
110 |         # Push freelist entries for new slots.
111 |         table.size objects
112 |         local.set 1
113 |         loop
114 |         local.get 1
115 |         local.get 0
116 |         i32.eq
117 |         if
118 |         return
119 |         end_if
120 |         local.get 1
121 |         i32.const 1
122 |         i32.sub
123 |         local.set 1
124 |         local.get 1
125 |         call freelist_push
126 |         br 0
127 | 	end_loop
128 |         end_function
129 | 
130 | ## Put $obj into the object table, returning a fresh handle.
131 | intern:
132 |         .hidden intern
133 |         .globl intern
134 |         .type intern,@function
135 |         .functype intern (externref) -> (i32)
136 |         .local i32              # handle
137 |         global.get freelist
138 |         i32.eqz
139 |         if
140 |         call expand_table
141 |         end_if
142 |         call freelist_pop
143 |         local.set 1
144 |         local.get 1
145 |         local.get 0
146 |         table.set objects
147 |         local.get 1
148 |         end_function
149 | 
150 | ## Release the slot in the object table corresponding to $handle.
151 | release:
152 |         .hidden release
153 |         .globl release
154 |         .type release,@function
155 |         .functype       release (i32) -> ()
156 |         # If $handle was -1 (invalid), just ignore it.
157 |         local.get 0
158 |         i32.const -1
159 |         i32.eq
160 |         if
161 |         return
162 |         end_if
163 |         local.get 0
164 |         ref.null_extern
165 |         table.set objects
166 |         local.get 0
167 |         call freelist_push
168 |         end_function
169 | 
170 | handle_value:
171 |         .hidden handle_value
172 |         .globl handle_value
173 |         .type handle_value,@function
174 |         .functype       handle_value (i32) -> (externref)
175 |         local.get 0
176 |         i32.const -1
177 |         i32.eq
178 |         if externref
179 |         ref.null_extern
180 |         else
181 |         local.get 0
182 |         table.get objects
183 |         end_if
184 |         end_function
185 | 
186 | make_obj:
187 |         .hidden make_obj
188 |         .globl  make_obj
189 |         .export_name    make_obj, make_obj
190 |         .no_dead_strip  make_obj
191 |         .type   make_obj,@function
192 |         .functype       make_obj () -> (i32)
193 |         .local          i32     # obj
194 |         # Allocate a new object in linear memory.  If that fails,
195 |         # signal the runtime, and then abort.
196 |         i32.const       4
197 |         call    malloc
198 |         i32.eqz
199 |         if
200 |         call    out_of_memory
201 |         unreachable
202 |         end_if
203 |         # Initialize callback handle -1 (invalid) for fresh object.
204 |         local.get       0
205 |         i32.const       -1
206 |         i32.store       0
207 |         local.get       0
208 |         end_function
209 | 
210 | 
211 | free_obj:
212 |         .hidden free_obj
213 |         .globl  free_obj
214 |         .export_name    free_obj, free_obj
215 |         .no_dead_strip  free_obj
216 |         .type   free_obj,@function
217 |         .functype       free_obj (i32) -> ()
218 |         # Release the callback handle.
219 |         local.get       0
220 |         i32.load        0
221 |         call release
222 |         local.get       0
223 |         call free
224 |         end_function
225 | 
226 | 
227 | attach_callback:
228 |         .hidden attach_callback
229 |         .globl  attach_callback
230 |         .export_name    attach_callback, attach_callback
231 |         .no_dead_strip  attach_callback
232 |         .type   attach_callback,@function
233 |         .functype       attach_callback (i32, externref) -> ()
234 |         # Release old handle.
235 |         local.get       0
236 |         i32.load        0
237 |         call release
238 |         # Intern the new callback, and store handle to object.
239 |         local.get       0
240 |         local.get       1
241 |         call intern
242 |         i32.store       0
243 |         end_function
244 | 
245 | invoke_callback:
246 |         .hidden invoke_callback
247 |         .globl  invoke_callback
248 |         .export_name    invoke_callback, invoke_callback
249 |         .no_dead_strip  invoke_callback
250 |         .type   invoke_callback,@function
251 |         .functype       invoke_callback (i32) -> ()
252 |         local.get       0
253 |         i32.load        0
254 |         call handle_value
255 |         call invoke
256 |         end_function
257 | 


--------------------------------------------------------------------------------
/milestones/m3/walloc.c:
--------------------------------------------------------------------------------
  1 | // walloc.c: a small malloc implementation for use in WebAssembly targets
  2 | // Copyright (c) 2020 Igalia, S.L.
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | typedef __SIZE_TYPE__ size_t;
 24 | typedef __UINTPTR_TYPE__ uintptr_t;
 25 | typedef __UINT8_TYPE__ uint8_t;
 26 | 
 27 | #define NULL ((void *) 0)
 28 | 
 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
 30 | 
 31 | #ifndef NDEBUG
 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 33 | #else
 34 | #define ASSERT(x) do { } while (0)
 35 | #endif
 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b))
 37 | 
 38 | static inline size_t max(size_t a, size_t b) {
 39 |   return a < b ? b : a;
 40 | }
 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) {
 42 |   return (val + alignment - 1) & ~(alignment - 1);
 43 | }
 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y))
 45 | 
 46 | #define CHUNK_SIZE 256
 47 | #define CHUNK_SIZE_LOG_2 8
 48 | #define CHUNK_MASK (CHUNK_SIZE - 1)
 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2);
 50 | 
 51 | #define PAGE_SIZE 65536
 52 | #define PAGE_SIZE_LOG_2 16
 53 | #define PAGE_MASK (PAGE_SIZE - 1)
 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2);
 55 | 
 56 | #define CHUNKS_PER_PAGE 256
 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE);
 58 | 
 59 | #define GRANULE_SIZE 8
 60 | #define GRANULE_SIZE_LOG_2 3
 61 | #define LARGE_OBJECT_THRESHOLD 256
 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32
 63 | 
 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
 66 |                  LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
 67 | 
 68 | struct chunk {
 69 |   char data[CHUNK_SIZE];
 70 | };
 71 | 
 72 | // There are small object pages for allocations of these sizes.
 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \
 74 |   M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32)
 75 | 
 76 | enum chunk_kind {
 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i,
 78 |   FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND)
 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND
 80 | 
 81 |   SMALL_OBJECT_CHUNK_KINDS,
 82 |   FREE_CHUNK = 254,
 83 |   LARGE_OBJECT = 255
 84 | };
 85 | 
 86 | static const uint8_t small_object_granule_sizes[] = 
 87 | {
 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i,
 89 |   FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE)
 90 | #undef SMALL_OBJECT_GRANULE_SIZE
 91 | };
 92 | 
 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) {
 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i;
 95 |   FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE);
 96 | #undef TEST_GRANULE_SIZE
 97 |   return LARGE_OBJECT;
 98 | }
 99 |   
100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) {
101 |   switch (kind) {
102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i;
103 |   FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE);
104 | #undef CHUNK_KIND_GRANULE_SIZE
105 |     default:
106 |       return -1;
107 |   }
108 | }
109 | 
110 | // Given a pointer P returned by malloc(), we get a header pointer via
111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE.  If
112 | // chunk_kinds[chunk_idx] is LARGE_OBJECT, then the pointer is a large object,
113 | // otherwise the kind indicates the size in granules of the objects in the
114 | // chunk.
115 | struct page_header {
116 |   uint8_t chunk_kinds[CHUNKS_PER_PAGE];
117 | };
118 | 
119 | struct page {
120 |   union {
121 |     struct page_header header;
122 |     struct chunk chunks[CHUNKS_PER_PAGE];
123 |   };
124 | };
125 | 
126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header))
127 | #define FIRST_ALLOCATABLE_CHUNK 1
128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE);
129 | 
130 | static struct page* get_page(void *ptr) {
131 |   return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK);
132 | }
133 | static unsigned get_chunk_index(void *ptr) {
134 |   return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
135 | }
136 | 
137 | struct freelist {
138 |   struct freelist *next;
139 | };
140 | 
141 | struct large_object {
142 |   struct large_object *next;
143 |   size_t size;
144 | };
145 | 
146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object))
147 | 
148 | static inline void* get_large_object_payload(struct large_object *obj) {
149 |   return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE;
150 | }
151 | static inline struct large_object* get_large_object(void *ptr) {
152 |   return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE);
153 | }
154 | 
155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS];
156 | static struct large_object *large_objects;
157 | 
158 | extern void __heap_base;
159 | static size_t walloc_heap_size;
160 | 
161 | static struct page*
162 | allocate_pages(size_t payload_size, size_t *n_allocated) {
163 |   size_t needed = payload_size + PAGE_HEADER_SIZE;
164 |   size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE;
165 |   uintptr_t base = heap_size;
166 |   uintptr_t preallocated = 0, grow = 0;
167 | 
168 |   if (!walloc_heap_size) {
169 |     // We are allocating the initial pages, if any.  We skip the first 64 kB,
170 |     // then take any additional space up to the memory size.
171 |     uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE);
172 |     preallocated = heap_size - heap_base; // Preallocated pages.
173 |     walloc_heap_size = preallocated;
174 |     base -= preallocated;
175 |   }
176 | 
177 |   if (preallocated < needed) {
178 |     // Always grow the walloc heap at least by 50%.
179 |     grow = align(max(walloc_heap_size / 2, needed - preallocated),
180 |                  PAGE_SIZE);
181 |     ASSERT(grow);
182 |     if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) {
183 |       return NULL;
184 |     }
185 |     walloc_heap_size += grow;
186 |   }
187 |   
188 |   struct page *ret = (struct page *)base;
189 |   size_t size = grow + preallocated;
190 |   ASSERT(size);
191 |   ASSERT_ALIGNED(size, PAGE_SIZE);
192 |   *n_allocated = size / PAGE_SIZE;
193 |   return ret;
194 | }
195 | 
196 | static char*
197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind)
198 | {
199 |   page->header.chunk_kinds[idx] = kind;
200 |   return page->chunks[idx].data;
201 | }
202 | 
203 | // Allocate a large object with enough space for SIZE payload bytes.  Returns a
204 | // large object with a header, aligned on a chunk boundary, whose payload size
205 | // may be larger than SIZE, and whose total size (header included) is
206 | // chunk-aligned.  Either a suitable allocation is found in the large object
207 | // freelist, or we ask the OS for some more pages and treat those pages as a
208 | // large object.  If the allocation fits in that large object and there's more
209 | // than an aligned chunk's worth of data free at the end, the large object is
210 | // split.
211 | //
212 | // The return value's corresponding chunk in the page as starting a large
213 | // object.
214 | static struct large_object*
215 | allocate_large_object(size_t size) {
216 |   struct large_object *best = NULL, **best_prev = &large_objects;
217 |   size_t best_size = -1;
218 |   for (struct large_object *prev = NULL, *walk = large_objects;
219 |        walk;
220 |        prev = walk, walk = walk->next) {
221 |     if (walk->size >= size && walk->size < best_size) {
222 |       best_size = walk->size;
223 |       best = walk;
224 |       if (prev) best_prev = &prev->next;
225 |       if (best_size + LARGE_OBJECT_HEADER_SIZE
226 |           == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
227 |         // Not going to do any better than this; just return it.
228 |         break;
229 |     }
230 |   }
231 | 
232 |   if (!best) {
233 |     // The large object freelist doesn't have an object big enough for this
234 |     // allocation.  Allocate one or more pages from the OS, and treat that new
235 |     // sequence of pages as a fresh large object.  It will be split if
236 |     // necessary.
237 |     size_t size_with_header = size + sizeof(struct large_object);
238 |     size_t n_allocated = 0;
239 |     struct page *page = allocate_pages(size_with_header, &n_allocated);
240 |     if (!page) {
241 |       return NULL;
242 |     }
243 |     char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
244 |     best = (struct large_object *)ptr;
245 |     size_t page_header = ptr - ((char*) page);
246 |     best->next = large_objects;
247 |     best->size = best_size =
248 |       n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
249 |     ASSERT(best_size >= size_with_header);
250 |   }
251 | 
252 |   struct large_object *next = best->next;
253 |   *best_prev = next;
254 | 
255 |   size_t tail_size = (best_size - size) & ~CHUNK_MASK;
256 |   if (tail_size) {
257 |     // The best-fitting object has 1 or more aligned chunks free after the
258 |     // requested allocation; split the tail off into a fresh aligned object.
259 |     struct page *start_page = get_page(best);
260 |     char *start = get_large_object_payload(best);
261 |     char *end = start + best_size;
262 | 
263 |     if (start_page == get_page(end - tail_size - 1)) {
264 |       // The allocation does not span a page boundary; yay.
265 |       ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
266 |     } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
267 |       // If the allocation itself smaller than a page, split off the head, then
268 |       // fall through to maybe split the tail.
269 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
270 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
271 |       struct large_object *head = best;
272 |       head->size = first_page_size;
273 |       head->next = large_objects;
274 |       large_objects = head;
275 | 
276 |       struct page *next_page = start_page + 1;
277 |       char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
278 |       best = (struct large_object *) ptr;
279 |       best->size = best_size = best_size - first_page_size - CHUNK_SIZE;
280 |       ASSERT(best_size >= size);
281 |       start = get_large_object_payload(best);
282 |       tail_size = (best_size - size) & ~CHUNK_MASK;
283 |     } else {
284 |       // A large object that spans more than one page will consume all of its
285 |       // tail pages.  Therefore if the split traverses a page boundary, round up
286 |       // to page size.
287 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
288 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
289 |       size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE);
290 |       size = first_page_size + tail_pages_size;
291 |       tail_size = best_size - size;
292 |     }
293 |     best->size -= tail_size;
294 |     
295 |     unsigned tail_idx = get_chunk_index(end - tail_size);
296 |     while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
297 |       // We would be splitting in a page header; don't do that.
298 |       tail_size -= CHUNK_SIZE;
299 |       tail_idx++;
300 |     }
301 |     
302 |     if (tail_size) {
303 |       struct page *page = get_page(end - tail_size);
304 |       char *tail_ptr = allocate_chunk(page, tail_idx, LARGE_OBJECT);
305 |       struct large_object *tail = (struct large_object *) tail_ptr;
306 |       tail->next = large_objects;
307 |       tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE;
308 |       ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE);
309 |       large_objects = tail;
310 |     }
311 |   }
312 | 
313 |   return best;
314 | }
315 | 
316 | static struct freelist*
317 | obtain_small_objects(enum chunk_kind kind) {
318 |   struct large_object *obj = allocate_large_object(0);
319 |   if (!obj) {
320 |     return NULL;
321 |   }
322 |   char *ptr = allocate_chunk(get_page(obj), get_chunk_index(obj), kind);
323 |   char *end = ptr + CHUNK_SIZE;
324 |   struct freelist *next = NULL;
325 |   size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
326 |   for (size_t i = size; i <= CHUNK_SIZE; i += size) {
327 |     struct freelist *head = (struct freelist*) (end - i);
328 |     head->next = next;
329 |     next = head;
330 |   }
331 |   return next;
332 | }
333 | 
334 | static inline size_t size_to_granules(size_t size) {
335 |   return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
336 | }
337 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) {
338 |   ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS);
339 |   return &small_object_freelists[kind];
340 | }
341 | 
342 | static void*
343 | allocate_small(enum chunk_kind kind) {
344 |   struct freelist **loc = get_small_object_freelist(kind);
345 |   if (!*loc) {
346 |     struct freelist *freelist = obtain_small_objects(kind);
347 |     if (!freelist) {
348 |       return NULL;
349 |     }
350 |     *loc = freelist;
351 |   }
352 |   struct freelist *ret = *loc;
353 |   *loc = ret->next;
354 |   return (void *) ret;
355 | }
356 | 
357 | static void*
358 | allocate_large(size_t size) {
359 |   struct large_object *obj = allocate_large_object(size);
360 |   return obj ? get_large_object_payload(obj) : NULL;
361 | }
362 |   
363 | void*
364 | malloc(size_t size) {
365 |   size_t granules = size_to_granules(size);
366 |   enum chunk_kind kind = granules_to_chunk_kind(granules);
367 |   return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
368 | }
369 | 
370 | void
371 | free(void *ptr) {
372 |   if (!ptr) return;
373 |   struct page *page = get_page(ptr);
374 |   unsigned chunk = get_chunk_index(ptr);
375 |   uint8_t kind = page->header.chunk_kinds[chunk];
376 |   if (kind == LARGE_OBJECT) {
377 |     struct large_object *obj = get_large_object(ptr);
378 |     obj->next = large_objects;
379 |     large_objects = obj;
380 |   } else {
381 |     size_t granules = kind;
382 |     struct freelist **loc = get_small_object_freelist(granules);
383 |     struct freelist *obj = ptr;
384 |     obj->next = *loc;
385 |     *loc = obj;
386 |   }
387 | }
388 | 


--------------------------------------------------------------------------------
/milestones/m5/externref-in-llvm.md:
--------------------------------------------------------------------------------
  1 | # Externref in Clang/LLVM: Design Document
  2 | 
  3 | ## Problem statement
  4 | 
  5 | ### Externrefs in WebAssembly
  6 | 
  7 | Externref values are weird.
  8 | 
  9 | From a low-level perspective, externref values are unlike any other
 10 | value in LLVM.  They can't be stored to linear memory at all -- not to
 11 | the heap, not to data, not to the external stack.
 12 | 
 13 | WebAssembly has its own *internal* stack, of course, consisting of local
 14 | variables and ephemeral temporary values inside function activations.
 15 | There, we can have externref values, flowing into functions as
 16 | parameters, out as results, and within as mutable locals and ephemeral
 17 | temporaries.  But when compiling C++, we often find that a function
 18 | needs to allocate with automatic duration, and that goes out to the
 19 | *external* stack in linear memory.  Externrefs can't go there.
 20 | 
 21 | Externrefs can't be created by a WebAssembly program; they can only be
 22 | passed into it from the outside.  This can happen in four ways:
 23 | 
 24 |  * Returned from a call to an imported function.
 25 | ```wasm
 26 | (func $foo (import "rt" "make_foo") (result externref))
 27 | (func $bar (result externref)
 28 |   (call $foo))
 29 | ```
 30 |  * As an argument to an exported function.
 31 | ```wasm
 32 | (func $take_foo (export "take_foo") (param externref) (result externref)
 33 |   (local.get 0))
 34 | ```
 35 |  * Loaded from a table via `table.get`.  The table can be populated
 36 | externally, or the WebAssembly program can store externref values
 37 | there.
 38 | ```wasm
 39 | (table $objects 0 externref)
 40 | (func $get_foo (result externref)
 41 |   (table.get $objects (i32.const 0)))
 42 | ```
 43 |  * Loaded from a global of type `externref`.  As with tables, this
 44 | variable can be populated externally or internally.
 45 | ```wasm
 46 | (global $the_foo (mut externref) (ref.null extern))
 47 | (func $get_the_foo (result externref)
 48 |   (global.get $the_foo))
 49 | ```
 50 | 
 51 | ### Externrefs don't exist in C
 52 | 
 53 | Let's assume that we would like to be able to make C programs that can
 54 | compile to these example WebAssembly snippets.  What should the C
 55 | programs look like?  How should they compile?
 56 | 
 57 | A first important thing to note is that there is no such C code "out
 58 | there" right now.  Unlike the original goal for emscripten, which was to
 59 | allow existing C and C++ programs to run in web browsers, there is no C
 60 | or C++ program out there now that deals in these externref values.  The
 61 | reason being, the low-level particularies of externrefs do not translate
 62 | well into C.
 63 | 
 64 | To illustrate this, assume the existence of an externref type in C.
 65 | Consider:
 66 | 
 67 | ```c
 68 | struct a { int32_t b; externref c; };
 69 | 
 70 | struct a *a = malloc(sizeof struct a); // What is a->c ?
 71 | a->b = 42;
 72 | a->c = get_foo(); // Error!
 73 | ```
 74 | 
 75 | What is the byte representation of an externref value?  All C++ types
 76 | can be written to memory, but we know nothing about externrefs, neither
 77 | from a semantics perspective nor from a low-level WebAssembly
 78 | perspective -- there are no operations that can tell you how to
 79 | represent it as bytes.
 80 | 
 81 | (We expect externref values to be implemented as GC-managed values in
 82 | WebAssembly implementations, as indeed they are in browsers.  But we
 83 | have no way of accessing the bits of the value.  We're careful not to
 84 | say "managed pointer" and "pointer address" here, because the GC-managed
 85 | value could be a tagged immediate without an associated address.)
 86 | 
 87 | ## Externrefs are a problem for LLVM
 88 | 
 89 | Let's take another look at the first example.  If we were working with
 90 | `i32` values instead, we'd have:
 91 | 
 92 | ```c
 93 | int foo(void);
 94 | int bar(void) {
 95 |   int value = foo();
 96 |   return value;
 97 | }
 98 | ```
 99 | 
100 | If we compile with -emit-llvm, we can see what clang does to produce
101 | LLVM IR for this function:
102 | 
103 | ```llvm
104 | declare i32 @foo()
105 | 
106 | define i32 @bar() {
107 |   %1 = alloca i32, align 4
108 |   %2 = call i32 @foo()
109 |   store i32 %2, i32* %1, align 4
110 |   %3 = load i32, i32* %1, align 4
111 |   ret i32 %3
112 | }
113 | ```
114 | 
115 | What we can see is that the clang frontend will `alloca` some bytes for
116 | each local.  Initializing `value` stores a value to that location, and
117 | referencing `value` performs the corresponding memory load.  Clang
118 | relies on the LLVM optimizer to turn these memory operations into SSA
119 | values that don't need memory (the
120 | [mem2reg](https://llvm.org/docs/Passes.html#mem2reg-promote-memory-to-register)
121 | pass).
122 | 
123 | As a compilation strategy, this doesn't work for raw WebAssembly
124 | externref values.  If we assume an `externref` type, and we have:
125 | 
126 | ```c
127 | externref foo(void);
128 | externref bar(void) {
129 |   externref value = foo();
130 |   return value;
131 | }
132 | ```
133 | 
134 | Then clang would lower it as
135 | 
136 | ```
137 | %externref = type opaque ; ????
138 | 
139 | define %externref @bar() {
140 |   %1 = alloca %externref ; ????
141 |   %2 = call %externref @foo() ; OK
142 |   store %externref %2, %externref* %1 ; ????
143 |   %3 = load %externref, %externref* %1 ; ????
144 |   ret %externref %3 ; OK
145 | }
146 | ```
147 | 
148 | How could we `alloca` memory for a value whose representation is opaque?
149 | 
150 | How could we load and store values into that memory?
151 | 
152 | Externrefs are a problem for clang, in the sense that its strategy of
153 | using `alloca` for locals assumes that all locals can be stored to
154 | linear memory.
155 | 
156 | ## Solutions
157 | 
158 | ### Idea 1: Indirection
159 | 
160 | Externref values have an impedance mismatch with C and with LLVM.  One
161 | way to fix this would be to introduce an indirection.
162 | 
163 | When externref values enter WebAssembly, via one of the four methods
164 | described earlier, we arrange to make the compiler write them to a
165 | table.  All values of type `externref` in the C program are transformed
166 | to operate on indices into this table.
167 | 
168 | The table becomes a parallel store of externref values -- like
169 | linear memory, but for externrefs.  Like the linear memory heap, some
170 | values have static duration (the data section), some have automatic
171 | duration (the stack), and some have dynamic duration (the heap).  The
172 | compiler and the linker will need to be extended in the usual ways to
173 | manage this heap, allocating static objects, determining a
174 | `__heap_base`, a stack size, and so on.
175 | 
176 | The compiler will have to emit corresponding operations to free values
177 | from the externref heap when their duration ends.  When a function
178 | activation with `alloca`'d externrefs returns, we'd have to reset the
179 | "externref stack pointer", and to avoid retaining garbage, also null out
180 | the freed entries in the stack part of the externref heap.
181 | 
182 | To C, an externref would then be like a pointer -- but not a pointer to
183 | linear memory.  Dereferencing it doesn't really make sense, because
184 | we're hiding externref values from C; the whole point of the indirection
185 | is to prevent raw externref values from leaking to C.  So maybe a
186 | pointer to an opaque type.  But, it's a pointer that can't really be
187 | cast to any other pointer; so, perhaps a pointer in a different address
188 | space, using the `__attribute__((address_space(N)))` facility originally
189 | introduced for OpenCL and other partitioned-heap languages.  While we're
190 | doing that, we could use the `N` in the address space to indicate
191 | precisely which table the externref is stored in; it might be a neat
192 | trick.
193 | 
194 | If we look at our example program, it could compile as:
195 | 
196 | ```llvm
197 | %externref = type opaque
198 | %indirect_externref = %externref *
199 | 
200 | declare %indirect_externref @foo()
201 | 
202 | define %indirect_externref @bar() {
203 |   %1 = alloca %indirect_externref
204 |   %2 = call %indirect_externref @foo()
205 |   store %indirect_externref %2, %indirect_externref* %1
206 |   %3 = load %indirect_externref, %indirect_externref* %1
207 |   ret %indirect_externref %3
208 | }
209 | ```
210 | 
211 | But, perhaps this is a bit magical.  Who actually produces the indirect
212 | externrefs?  Who puts the externref values in a table?  Who is
213 | responsible for freeing the values when their duration ends?
214 | 
215 | Perhaps it's more illustrative to consider clang as performing the
216 | indirection at all function boundaries, not just imported functions.  In
217 | that case, `foo` returns a raw externref value, and `bar` would have to
218 | intern it, then reload it before returning.  We're putting a large
219 | burden on the optimizer here, but let's see where this takes us:
220 | 
221 | ```llvm
222 | %externref = type opaque ; ???
223 | %opaque = type opaque
224 | %indirect_externref = %opaque *
225 | 
226 | ; Push one value on the externref stack, initializing to
227 | ; (ref.null extern).  Return its index.
228 | declare i32 @externref_alloca()
229 | 
230 | ; Set the externref corresponding to a table index.
231 | declare void @externref_store(i32 %0, %externref %1);
232 | 
233 | ; Retrieve the externref corresponding to a table index.
234 | declare %externref @externref_load(i32 %0);
235 | 
236 | ; Free one value from the externref stack, popping the
237 | ; externref stack pointer.
238 | declare void @externref_freea()
239 | 
240 | ; @foo and @bar return raw (not indirected) externrefs.
241 | declare %externref @foo()
242 | 
243 | define %externref @bar() {
244 |   %1 = alloca %indirect_externref
245 |   %2 = call %externref @foo()
246 | 
247 |   ; Intern the raw externref %2 to the externref table.
248 |   %3 = call i32 @externref_alloca()
249 |   call @externref_store(%3, %2)
250 | 
251 |   ; Treat the index into the externref table as a pointer.
252 |   ; This is the visible-to-C value of the externref.
253 |   %4 = bitcast i32 %3 to %indirect_externref
254 | 
255 |   ; Since clang put this local on the stack, write it there.
256 |   store %indirect_externref %4, %indirect_externref* %1
257 | 
258 |   ; To reference the local, load it from the stack.
259 |   %5 = load %indirect_externref, %indirect_externref* %1
260 | 
261 |   ; In this example, functions take and return raw externref
262 |   ; values, so strip the indirection, freeing the table slot.
263 |   %6 = bitcast %indirect_externref %5 to i32
264 |   %7 = call @externref_load(%6)
265 |   call @externref_freea()
266 | 
267 |   ret %externref %7
268 | }
269 | ```
270 | 
271 | There are a few things that pop out at us in this unoptimized code
272 | listing.
273 | 
274 |  1. We now have a double indirection: one, that the %indirect_externref
275 |     starts life in memory, and must be lifted to SSA values; and two,
276 |     that the raw externref values themselves are also indirected into
277 |     their own heap.
278 | 
279 |  2. Unlike `alloca`, we need to insert explicit `externref_freea` calls
280 |     on exit paths, and presumably on unwinds as well.
281 | 
282 |  3. There are three types here; we need the "opaque" type to indicate
283 |     that %indirect_externref values can't be usefully dereferenced.
284 | 
285 |  4. For heap data with dynamic duration, it's not immediately clear
286 |     where the compiler should insert `externref_free` calls.
287 | 
288 |  5. You can do pointer arithmetic on %indirect_externref values.  This
289 |     is not a good thing!
290 | 
291 |  6. How would you go about defining your own externref tables, or
292 |     accessing values in those tables?  Would such values be
293 |     doubly-indirected?
294 | 
295 |  7. We can use address spaces, but I am not sure what it buys us besides
296 |     ensuring that %indirect_externref values aren't interpreted by the
297 |     optimizer as aliasing anything on the linear heap.
298 | 
299 | ### Idea 2: Restrictions and builtins
300 | 
301 | Let's back up here a bit.  WebAssembly has instructions to operate on
302 | the linear heap, like `i32.load` and `i8.store`.  You could imagine a
303 | compilation strategy whereby LLVM expected user programs to include
304 | builtin calls every time they accessed linear memory; a load would be a
305 | call to `__builtin_wasm_i32_load(i32 offset)`, stores via
306 | `__builtin_wasm_i8_store(i32 offset, i8 val)`, and so on.  But that
307 | would be silly.  Instead, LLVM treats C pointers as denoting locations
308 | on the linear heap, and the compiler arranges to emit `i32.load`,
309 | `i8.store`, and so on for pointer access.  There is very little
310 | impedance mismatch here betwen C, LLVM, and WebAssembly.
311 | 
312 | With reference types, WebAssembly now also has instructions to operate
313 | on tables, which are a kind of heap, but for externrefs.  The "indirect"
314 | strategy above tries to treat table access as pointers, in analogy to
315 | how access to the linear heap is compiled.  But does it make sense to
316 | take this approach?  We can't dereference the pointers, and arithmetic
317 | makes no sense.  The externref values we're pointing to don't have a
318 | linear representation, and it seems like clang's need to `alloca` local
319 | variable storage is a tail-wagging-the-dog situation: it is what is
320 | causing us problems.
321 | 
322 | What if, instead, we treat externrefs as a new class of value.  There is
323 | no code out there that uses them right now, so this is an option open to
324 | us.  You can't take their address, you can't dereference them, in fact
325 | you can't do anything on them other than use them as function
326 | parameters, return values, and locals.  They have nothing to do with
327 | tables, so forcing them to be a pointer to a table seems a needless
328 | complication.
329 | 
330 | With this approach, if we wanted to store an externref value to a
331 | table, we'd call a builtin to do so.  If we need the null value, we call
332 | a builtin.
333 | 
334 | If we need to store an externref to the (linear) heap -- well, you just
335 | can't do that.  It's a restriction.  We require the user program to
336 | build its own side table if needed.  We'd need explicit support in the
337 | compiler for working with tables, so that the user could implement their
338 | side table, but that's fine.
339 | 
340 | There is an open question about how we would represent raw externref
341 | values in LLVM, but we also have that issue with the indirect strategy,
342 | at the boundaries where values are stored and loaded.
343 | 
344 | Finally, we return to the `alloca` problem.  The reason clang takes this
345 | approach is that locals are mutable; it needs to do a pass to determine
346 | where to place phi variables.  We should adapt to this.  Locals of type
347 | `externref` should be allocated not with standard `alloca`, but rather
348 | something like `externref_alloca`.  The front-end would insert the
349 | corresponding `externref_free` calls as well.  Therefore the IR coming
350 | out of the front-end for our example would look like:
351 | 
352 | ```llvm
353 | define %externref @bar() {
354 |   %1 = call %externref @foo()
355 |   %2 = call i32 @externref_alloca()
356 |   call @externref_store(%2, %1)
357 |   %3 = call @externref_load(%2)
358 |   call @externref_freea()
359 |   ret %externref %3
360 | }
361 | ```
362 | 
363 | We would then add a `mem2reg`-like optimizer pass that could reason
364 | about `externref_alloca` and friends, allowing minimal optimization to
365 | result in:
366 | 
367 | ```llvm
368 | define %externref @bar() {
369 |   %1 = call %externref @foo()
370 |   ret %externref %1
371 | }
372 | ```
373 | 
374 | In summary, we would need to:
375 |  1. Add an opaque type for `externref` to LLVM
376 |  2. Add the externref alloca transformation to the C / C++ front-end
377 |  3. Add front-end restrictions on where externrefs can be stored
378 |  4. Add a mem2reg optimization pass for the externref stack
379 | 
380 | ### Synthesis?
381 | 
382 | These two ideas approach each other from opposite directions.  If we
383 | start from the perspective that externref values should be tightly
384 | integrated with C, we arrive at the indirect approach.  If we start from
385 | the code that we would like to generate, we arrive at the restrictive
386 | approach; but we still need some run-time indirection facilities for
387 | handling the `alloca` transformation.  But if we do support automatic
388 | storage duration, why not add more transformations, for example to
389 | support static durations?  And if we want good code, we already need a
390 | `mem2reg` pass for the externref heap; could this result in good code
391 | coming from the indirect side?
392 | 
393 | We don't know the answer, but we think that the incremental way to start
394 | is to begin with the restrictive approach.  If the indirect approach is
395 | possible (and a good idea), it can be reached next.
396 | 


--------------------------------------------------------------------------------
/milestones/m0/walloc.c:
--------------------------------------------------------------------------------
  1 | // walloc.c: a small malloc implementation for use in WebAssembly targets
  2 | // Copyright (c) 2020 Igalia, S.L.
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | typedef __SIZE_TYPE__ size_t;
 24 | typedef __UINTPTR_TYPE__ uintptr_t;
 25 | typedef __UINT8_TYPE__ uint8_t;
 26 | 
 27 | #define NULL ((void *) 0)
 28 | 
 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
 30 | 
 31 | #ifndef NDEBUG
 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 33 | #else
 34 | #define ASSERT(x) do { } while (0)
 35 | #endif
 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b))
 37 | 
 38 | static inline size_t max(size_t a, size_t b) {
 39 |   return a < b ? b : a;
 40 | }
 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) {
 42 |   return (val + alignment - 1) & ~(alignment - 1);
 43 | }
 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y))
 45 | 
 46 | #define CHUNK_SIZE 256
 47 | #define CHUNK_SIZE_LOG_2 8
 48 | #define CHUNK_MASK (CHUNK_SIZE - 1)
 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2);
 50 | 
 51 | #define PAGE_SIZE 65536
 52 | #define PAGE_SIZE_LOG_2 16
 53 | #define PAGE_MASK (PAGE_SIZE - 1)
 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2);
 55 | 
 56 | #define CHUNKS_PER_PAGE 256
 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE);
 58 | 
 59 | #define GRANULE_SIZE 8
 60 | #define GRANULE_SIZE_LOG_2 3
 61 | #define LARGE_OBJECT_THRESHOLD 256
 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32
 63 | 
 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
 66 |                  LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
 67 | 
 68 | struct chunk {
 69 |   char data[CHUNK_SIZE];
 70 | };
 71 | 
 72 | // There are small object pages for allocations of these sizes.
 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \
 74 |   M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32)
 75 | 
 76 | enum chunk_kind {
 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i,
 78 |   FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND)
 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND
 80 | 
 81 |   SMALL_OBJECT_CHUNK_KINDS,
 82 |   FREE_LARGE_OBJECT = 254,
 83 |   LARGE_OBJECT = 255
 84 | };
 85 | 
 86 | static const uint8_t small_object_granule_sizes[] = 
 87 | {
 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i,
 89 |   FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE)
 90 | #undef SMALL_OBJECT_GRANULE_SIZE
 91 | };
 92 | 
 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) {
 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i;
 95 |   FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE);
 96 | #undef TEST_GRANULE_SIZE
 97 |   return LARGE_OBJECT;
 98 | }
 99 |   
100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) {
101 |   switch (kind) {
102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i;
103 |   FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE);
104 | #undef CHUNK_KIND_GRANULE_SIZE
105 |     default:
106 |       return -1;
107 |   }
108 | }
109 | 
110 | // Given a pointer P returned by malloc(), we get a header pointer via
111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE.  If
112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large
113 | // object, otherwise the kind indicates the size in granules of the objects in
114 | // the chunk.
115 | struct page_header {
116 |   uint8_t chunk_kinds[CHUNKS_PER_PAGE];
117 | };
118 | 
119 | struct page {
120 |   union {
121 |     struct page_header header;
122 |     struct chunk chunks[CHUNKS_PER_PAGE];
123 |   };
124 | };
125 | 
126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header))
127 | #define FIRST_ALLOCATABLE_CHUNK 1
128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE);
129 | 
130 | static struct page* get_page(void *ptr) {
131 |   return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK);
132 | }
133 | static unsigned get_chunk_index(void *ptr) {
134 |   return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
135 | }
136 | 
137 | struct freelist {
138 |   struct freelist *next;
139 | };
140 | 
141 | struct large_object {
142 |   struct large_object *next;
143 |   size_t size;
144 | };
145 | 
146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object))
147 | 
148 | static inline void* get_large_object_payload(struct large_object *obj) {
149 |   return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE;
150 | }
151 | static inline struct large_object* get_large_object(void *ptr) {
152 |   return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE);
153 | }
154 | 
155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS];
156 | static struct large_object *large_objects;
157 | 
158 | extern void __heap_base;
159 | static size_t walloc_heap_size;
160 | 
161 | static struct page*
162 | allocate_pages(size_t payload_size, size_t *n_allocated) {
163 |   size_t needed = payload_size + PAGE_HEADER_SIZE;
164 |   size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE;
165 |   uintptr_t base = heap_size;
166 |   uintptr_t preallocated = 0, grow = 0;
167 | 
168 |   if (!walloc_heap_size) {
169 |     // We are allocating the initial pages, if any.  We skip the first 64 kB,
170 |     // then take any additional space up to the memory size.
171 |     uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE);
172 |     preallocated = heap_size - heap_base; // Preallocated pages.
173 |     walloc_heap_size = preallocated;
174 |     base -= preallocated;
175 |   }
176 | 
177 |   if (preallocated < needed) {
178 |     // Always grow the walloc heap at least by 50%.
179 |     grow = align(max(walloc_heap_size / 2, needed - preallocated),
180 |                  PAGE_SIZE);
181 |     ASSERT(grow);
182 |     if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) {
183 |       return NULL;
184 |     }
185 |     walloc_heap_size += grow;
186 |   }
187 |   
188 |   struct page *ret = (struct page *)base;
189 |   size_t size = grow + preallocated;
190 |   ASSERT(size);
191 |   ASSERT_ALIGNED(size, PAGE_SIZE);
192 |   *n_allocated = size / PAGE_SIZE;
193 |   return ret;
194 | }
195 | 
196 | static char*
197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) {
198 |   page->header.chunk_kinds[idx] = kind;
199 |   return page->chunks[idx].data;
200 | }
201 | 
202 | // It's possible for splitting to produce a large object of size 248 (256 minus
203 | // the header size) -- i.e. spanning a single chunk.  In that case, push the
204 | // chunk back on the GRANULES_32 small object freelist.
205 | static void maybe_repurpose_single_chunk_large_objects_head(void) {
206 |   if (large_objects->size < CHUNK_SIZE) {
207 |     unsigned idx = get_chunk_index(large_objects);
208 |     char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32);
209 |     large_objects = large_objects->next;
210 |     struct freelist* head = (struct freelist *)ptr;
211 |     head->next = small_object_freelists[GRANULES_32];
212 |     small_object_freelists[GRANULES_32] = head;
213 |   }
214 | }
215 | 
216 | // If there have been any large-object frees since the last large object
217 | // allocation, go through the freelist and merge any adjacent objects.
218 | static int pending_large_object_compact = 0;
219 | static struct large_object**
220 | maybe_merge_free_large_object(struct large_object** prev) {
221 |   struct large_object *obj = *prev;
222 |   while (1) {
223 |     char *end = get_large_object_payload(obj) + obj->size;
224 |     ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
225 |     unsigned chunk = get_chunk_index(end);
226 |     if (chunk < FIRST_ALLOCATABLE_CHUNK) {
227 |       // Merging can't create a large object that newly spans the header chunk.
228 |       // This check also catches the end-of-heap case.
229 |       return prev;
230 |     }
231 |     struct page *page = get_page(end);
232 |     if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) {
233 |       return prev;
234 |     }
235 |     struct large_object *next = (struct large_object*) end;
236 | 
237 |     struct large_object **prev_prev = &large_objects, *walk = large_objects;
238 |     while (1) {
239 |       ASSERT(walk);
240 |       if (walk == next) {
241 |         obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size;
242 |         *prev_prev = walk->next;
243 |         if (prev == &walk->next) {
244 |           prev = prev_prev;
245 |         }
246 |         break;
247 |       }
248 |       prev_prev = &walk->next;
249 |       walk = walk->next;
250 |     }
251 |   }
252 | }
253 | static void
254 | maybe_compact_free_large_objects(void) {
255 |   if (pending_large_object_compact) {
256 |     pending_large_object_compact = 0;
257 |     struct large_object **prev = &large_objects;
258 |     while (*prev) {
259 |       prev = &(*maybe_merge_free_large_object(prev))->next;
260 |     }
261 |   }
262 | }
263 | 
264 | // Allocate a large object with enough space for SIZE payload bytes.  Returns a
265 | // large object with a header, aligned on a chunk boundary, whose payload size
266 | // may be larger than SIZE, and whose total size (header included) is
267 | // chunk-aligned.  Either a suitable allocation is found in the large object
268 | // freelist, or we ask the OS for some more pages and treat those pages as a
269 | // large object.  If the allocation fits in that large object and there's more
270 | // than an aligned chunk's worth of data free at the end, the large object is
271 | // split.
272 | //
273 | // The return value's corresponding chunk in the page as starting a large
274 | // object.
275 | static struct large_object*
276 | allocate_large_object(size_t size) {
277 |   maybe_compact_free_large_objects();
278 |   struct large_object *best = NULL, **best_prev = &large_objects;
279 |   size_t best_size = -1;
280 |   for (struct large_object **prev = &large_objects, *walk = large_objects;
281 |        walk;
282 |        prev = &walk->next, walk = walk->next) {
283 |     if (walk->size >= size && walk->size < best_size) {
284 |       best_size = walk->size;
285 |       best = walk;
286 |       best_prev = prev;
287 |       if (best_size + LARGE_OBJECT_HEADER_SIZE
288 |           == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
289 |         // Not going to do any better than this; just return it.
290 |         break;
291 |     }
292 |   }
293 | 
294 |   if (!best) {
295 |     // The large object freelist doesn't have an object big enough for this
296 |     // allocation.  Allocate one or more pages from the OS, and treat that new
297 |     // sequence of pages as a fresh large object.  It will be split if
298 |     // necessary.
299 |     size_t size_with_header = size + sizeof(struct large_object);
300 |     size_t n_allocated = 0;
301 |     struct page *page = allocate_pages(size_with_header, &n_allocated);
302 |     if (!page) {
303 |       return NULL;
304 |     }
305 |     char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
306 |     best = (struct large_object *)ptr;
307 |     size_t page_header = ptr - ((char*) page);
308 |     best->next = large_objects;
309 |     best->size = best_size =
310 |       n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
311 |     ASSERT(best_size >= size_with_header);
312 |   }
313 | 
314 |   allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT);
315 | 
316 |   struct large_object *next = best->next;
317 |   *best_prev = next;
318 | 
319 |   size_t tail_size = (best_size - size) & ~CHUNK_MASK;
320 |   if (tail_size) {
321 |     // The best-fitting object has 1 or more aligned chunks free after the
322 |     // requested allocation; split the tail off into a fresh aligned object.
323 |     struct page *start_page = get_page(best);
324 |     char *start = get_large_object_payload(best);
325 |     char *end = start + best_size;
326 | 
327 |     if (start_page == get_page(end - tail_size - 1)) {
328 |       // The allocation does not span a page boundary; yay.
329 |       ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
330 |     } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
331 |       // If the allocation itself smaller than a page, split off the head, then
332 |       // fall through to maybe split the tail.
333 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
334 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
335 |       struct large_object *head = best;
336 |       allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT);
337 |       head->size = first_page_size;
338 |       head->next = large_objects;
339 |       large_objects = head;
340 | 
341 |       maybe_repurpose_single_chunk_large_objects_head();
342 | 
343 |       struct page *next_page = start_page + 1;
344 |       char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
345 |       best = (struct large_object *) ptr;
346 |       best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE;
347 |       ASSERT(best_size >= size);
348 |       start = get_large_object_payload(best);
349 |       tail_size = (best_size - size) & ~CHUNK_MASK;
350 |     } else {
351 |       // A large object that spans more than one page will consume all of its
352 |       // tail pages.  Therefore if the split traverses a page boundary, round up
353 |       // to page size.
354 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
355 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
356 |       size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE);
357 |       size = first_page_size + tail_pages_size;
358 |       tail_size = best_size - size;
359 |     }
360 |     best->size -= tail_size;
361 |     
362 |     unsigned tail_idx = get_chunk_index(end - tail_size);
363 |     while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
364 |       // We would be splitting in a page header; don't do that.
365 |       tail_size -= CHUNK_SIZE;
366 |       tail_idx++;
367 |     }
368 |     
369 |     if (tail_size) {
370 |       struct page *page = get_page(end - tail_size);
371 |       char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT);
372 |       struct large_object *tail = (struct large_object *) tail_ptr;
373 |       tail->next = large_objects;
374 |       tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE;
375 |       ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE);
376 |       large_objects = tail;
377 | 
378 |       maybe_repurpose_single_chunk_large_objects_head();
379 |     }
380 |   }
381 | 
382 |   ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE);
383 |   return best;
384 | }
385 | 
386 | static struct freelist*
387 | obtain_small_objects(enum chunk_kind kind) {
388 |   struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32];
389 |   void *chunk;
390 |   if (*whole_chunk_freelist) {
391 |     chunk = *whole_chunk_freelist;
392 |     *whole_chunk_freelist = (*whole_chunk_freelist)->next;
393 |   } else {
394 |     chunk = allocate_large_object(0);
395 |     if (!chunk) {
396 |       return NULL;
397 |     }
398 |   }
399 |   char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind);
400 |   char *end = ptr + CHUNK_SIZE;
401 |   struct freelist *next = NULL;
402 |   size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
403 |   for (size_t i = size; i <= CHUNK_SIZE; i += size) {
404 |     struct freelist *head = (struct freelist*) (end - i);
405 |     head->next = next;
406 |     next = head;
407 |   }
408 |   return next;
409 | }
410 | 
411 | static inline size_t size_to_granules(size_t size) {
412 |   return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
413 | }
414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) {
415 |   ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS);
416 |   return &small_object_freelists[kind];
417 | }
418 | 
419 | static void*
420 | allocate_small(enum chunk_kind kind) {
421 |   struct freelist **loc = get_small_object_freelist(kind);
422 |   if (!*loc) {
423 |     struct freelist *freelist = obtain_small_objects(kind);
424 |     if (!freelist) {
425 |       return NULL;
426 |     }
427 |     *loc = freelist;
428 |   }
429 |   struct freelist *ret = *loc;
430 |   *loc = ret->next;
431 |   return (void *) ret;
432 | }
433 | 
434 | static void*
435 | allocate_large(size_t size) {
436 |   struct large_object *obj = allocate_large_object(size);
437 |   return obj ? get_large_object_payload(obj) : NULL;
438 | }
439 |   
440 | void*
441 | malloc(size_t size) {
442 |   size_t granules = size_to_granules(size);
443 |   enum chunk_kind kind = granules_to_chunk_kind(granules);
444 |   return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
445 | }
446 | 
447 | void
448 | free(void *ptr) {
449 |   if (!ptr) return;
450 |   struct page *page = get_page(ptr);
451 |   unsigned chunk = get_chunk_index(ptr);
452 |   uint8_t kind = page->header.chunk_kinds[chunk];
453 |   if (kind == LARGE_OBJECT) {
454 |     struct large_object *obj = get_large_object(ptr);
455 |     obj->next = large_objects;
456 |     large_objects = obj;
457 |     allocate_chunk(page, chunk, FREE_LARGE_OBJECT);
458 |     pending_large_object_compact = 1;
459 |   } else {
460 |     size_t granules = kind;
461 |     struct freelist **loc = get_small_object_freelist(granules);
462 |     struct freelist *obj = ptr;
463 |     obj->next = *loc;
464 |     *loc = obj;
465 |   }
466 | }
467 | 


--------------------------------------------------------------------------------
/milestones/m1/walloc.c:
--------------------------------------------------------------------------------
  1 | // walloc.c: a small malloc implementation for use in WebAssembly targets
  2 | // Copyright (c) 2020 Igalia, S.L.
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | typedef __SIZE_TYPE__ size_t;
 24 | typedef __UINTPTR_TYPE__ uintptr_t;
 25 | typedef __UINT8_TYPE__ uint8_t;
 26 | 
 27 | #define NULL ((void *) 0)
 28 | 
 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
 30 | 
 31 | #ifndef NDEBUG
 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 33 | #else
 34 | #define ASSERT(x) do { } while (0)
 35 | #endif
 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b))
 37 | 
 38 | static inline size_t max(size_t a, size_t b) {
 39 |   return a < b ? b : a;
 40 | }
 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) {
 42 |   return (val + alignment - 1) & ~(alignment - 1);
 43 | }
 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y))
 45 | 
 46 | #define CHUNK_SIZE 256
 47 | #define CHUNK_SIZE_LOG_2 8
 48 | #define CHUNK_MASK (CHUNK_SIZE - 1)
 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2);
 50 | 
 51 | #define PAGE_SIZE 65536
 52 | #define PAGE_SIZE_LOG_2 16
 53 | #define PAGE_MASK (PAGE_SIZE - 1)
 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2);
 55 | 
 56 | #define CHUNKS_PER_PAGE 256
 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE);
 58 | 
 59 | #define GRANULE_SIZE 8
 60 | #define GRANULE_SIZE_LOG_2 3
 61 | #define LARGE_OBJECT_THRESHOLD 256
 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32
 63 | 
 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
 66 |                  LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
 67 | 
 68 | struct chunk {
 69 |   char data[CHUNK_SIZE];
 70 | };
 71 | 
 72 | // There are small object pages for allocations of these sizes.
 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \
 74 |   M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32)
 75 | 
 76 | enum chunk_kind {
 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i,
 78 |   FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND)
 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND
 80 | 
 81 |   SMALL_OBJECT_CHUNK_KINDS,
 82 |   FREE_LARGE_OBJECT = 254,
 83 |   LARGE_OBJECT = 255
 84 | };
 85 | 
 86 | static const uint8_t small_object_granule_sizes[] = 
 87 | {
 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i,
 89 |   FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE)
 90 | #undef SMALL_OBJECT_GRANULE_SIZE
 91 | };
 92 | 
 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) {
 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i;
 95 |   FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE);
 96 | #undef TEST_GRANULE_SIZE
 97 |   return LARGE_OBJECT;
 98 | }
 99 |   
100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) {
101 |   switch (kind) {
102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i;
103 |   FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE);
104 | #undef CHUNK_KIND_GRANULE_SIZE
105 |     default:
106 |       return -1;
107 |   }
108 | }
109 | 
110 | // Given a pointer P returned by malloc(), we get a header pointer via
111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE.  If
112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large
113 | // object, otherwise the kind indicates the size in granules of the objects in
114 | // the chunk.
115 | struct page_header {
116 |   uint8_t chunk_kinds[CHUNKS_PER_PAGE];
117 | };
118 | 
119 | struct page {
120 |   union {
121 |     struct page_header header;
122 |     struct chunk chunks[CHUNKS_PER_PAGE];
123 |   };
124 | };
125 | 
126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header))
127 | #define FIRST_ALLOCATABLE_CHUNK 1
128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE);
129 | 
130 | static struct page* get_page(void *ptr) {
131 |   return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK);
132 | }
133 | static unsigned get_chunk_index(void *ptr) {
134 |   return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
135 | }
136 | 
137 | struct freelist {
138 |   struct freelist *next;
139 | };
140 | 
141 | struct large_object {
142 |   struct large_object *next;
143 |   size_t size;
144 | };
145 | 
146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object))
147 | 
148 | static inline void* get_large_object_payload(struct large_object *obj) {
149 |   return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE;
150 | }
151 | static inline struct large_object* get_large_object(void *ptr) {
152 |   return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE);
153 | }
154 | 
155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS];
156 | static struct large_object *large_objects;
157 | 
158 | extern void __heap_base;
159 | static size_t walloc_heap_size;
160 | 
161 | static struct page*
162 | allocate_pages(size_t payload_size, size_t *n_allocated) {
163 |   size_t needed = payload_size + PAGE_HEADER_SIZE;
164 |   size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE;
165 |   uintptr_t base = heap_size;
166 |   uintptr_t preallocated = 0, grow = 0;
167 | 
168 |   if (!walloc_heap_size) {
169 |     // We are allocating the initial pages, if any.  We skip the first 64 kB,
170 |     // then take any additional space up to the memory size.
171 |     uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE);
172 |     preallocated = heap_size - heap_base; // Preallocated pages.
173 |     walloc_heap_size = preallocated;
174 |     base -= preallocated;
175 |   }
176 | 
177 |   if (preallocated < needed) {
178 |     // Always grow the walloc heap at least by 50%.
179 |     grow = align(max(walloc_heap_size / 2, needed - preallocated),
180 |                  PAGE_SIZE);
181 |     ASSERT(grow);
182 |     if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) {
183 |       return NULL;
184 |     }
185 |     walloc_heap_size += grow;
186 |   }
187 |   
188 |   struct page *ret = (struct page *)base;
189 |   size_t size = grow + preallocated;
190 |   ASSERT(size);
191 |   ASSERT_ALIGNED(size, PAGE_SIZE);
192 |   *n_allocated = size / PAGE_SIZE;
193 |   return ret;
194 | }
195 | 
196 | static char*
197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) {
198 |   page->header.chunk_kinds[idx] = kind;
199 |   return page->chunks[idx].data;
200 | }
201 | 
202 | // It's possible for splitting to produce a large object of size 248 (256 minus
203 | // the header size) -- i.e. spanning a single chunk.  In that case, push the
204 | // chunk back on the GRANULES_32 small object freelist.
205 | static void maybe_repurpose_single_chunk_large_objects_head(void) {
206 |   if (large_objects->size < CHUNK_SIZE) {
207 |     unsigned idx = get_chunk_index(large_objects);
208 |     char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32);
209 |     large_objects = large_objects->next;
210 |     struct freelist* head = (struct freelist *)ptr;
211 |     head->next = small_object_freelists[GRANULES_32];
212 |     small_object_freelists[GRANULES_32] = head;
213 |   }
214 | }
215 | 
216 | // If there have been any large-object frees since the last large object
217 | // allocation, go through the freelist and merge any adjacent objects.
218 | static int pending_large_object_compact = 0;
219 | static struct large_object**
220 | maybe_merge_free_large_object(struct large_object** prev) {
221 |   struct large_object *obj = *prev;
222 |   while (1) {
223 |     char *end = get_large_object_payload(obj) + obj->size;
224 |     ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
225 |     unsigned chunk = get_chunk_index(end);
226 |     if (chunk < FIRST_ALLOCATABLE_CHUNK) {
227 |       // Merging can't create a large object that newly spans the header chunk.
228 |       // This check also catches the end-of-heap case.
229 |       return prev;
230 |     }
231 |     struct page *page = get_page(end);
232 |     if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) {
233 |       return prev;
234 |     }
235 |     struct large_object *next = (struct large_object*) end;
236 | 
237 |     struct large_object **prev_prev = &large_objects, *walk = large_objects;
238 |     while (1) {
239 |       ASSERT(walk);
240 |       if (walk == next) {
241 |         obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size;
242 |         *prev_prev = walk->next;
243 |         if (prev == &walk->next) {
244 |           prev = prev_prev;
245 |         }
246 |         break;
247 |       }
248 |       prev_prev = &walk->next;
249 |       walk = walk->next;
250 |     }
251 |   }
252 | }
253 | static void
254 | maybe_compact_free_large_objects(void) {
255 |   if (pending_large_object_compact) {
256 |     pending_large_object_compact = 0;
257 |     struct large_object **prev = &large_objects;
258 |     while (*prev) {
259 |       prev = &(*maybe_merge_free_large_object(prev))->next;
260 |     }
261 |   }
262 | }
263 | 
264 | // Allocate a large object with enough space for SIZE payload bytes.  Returns a
265 | // large object with a header, aligned on a chunk boundary, whose payload size
266 | // may be larger than SIZE, and whose total size (header included) is
267 | // chunk-aligned.  Either a suitable allocation is found in the large object
268 | // freelist, or we ask the OS for some more pages and treat those pages as a
269 | // large object.  If the allocation fits in that large object and there's more
270 | // than an aligned chunk's worth of data free at the end, the large object is
271 | // split.
272 | //
273 | // The return value's corresponding chunk in the page as starting a large
274 | // object.
275 | static struct large_object*
276 | allocate_large_object(size_t size) {
277 |   maybe_compact_free_large_objects();
278 |   struct large_object *best = NULL, **best_prev = &large_objects;
279 |   size_t best_size = -1;
280 |   for (struct large_object **prev = &large_objects, *walk = large_objects;
281 |        walk;
282 |        prev = &walk->next, walk = walk->next) {
283 |     if (walk->size >= size && walk->size < best_size) {
284 |       best_size = walk->size;
285 |       best = walk;
286 |       best_prev = prev;
287 |       if (best_size + LARGE_OBJECT_HEADER_SIZE
288 |           == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
289 |         // Not going to do any better than this; just return it.
290 |         break;
291 |     }
292 |   }
293 | 
294 |   if (!best) {
295 |     // The large object freelist doesn't have an object big enough for this
296 |     // allocation.  Allocate one or more pages from the OS, and treat that new
297 |     // sequence of pages as a fresh large object.  It will be split if
298 |     // necessary.
299 |     size_t size_with_header = size + sizeof(struct large_object);
300 |     size_t n_allocated = 0;
301 |     struct page *page = allocate_pages(size_with_header, &n_allocated);
302 |     if (!page) {
303 |       return NULL;
304 |     }
305 |     char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
306 |     best = (struct large_object *)ptr;
307 |     size_t page_header = ptr - ((char*) page);
308 |     best->next = large_objects;
309 |     best->size = best_size =
310 |       n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
311 |     ASSERT(best_size >= size_with_header);
312 |   }
313 | 
314 |   allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT);
315 | 
316 |   struct large_object *next = best->next;
317 |   *best_prev = next;
318 | 
319 |   size_t tail_size = (best_size - size) & ~CHUNK_MASK;
320 |   if (tail_size) {
321 |     // The best-fitting object has 1 or more aligned chunks free after the
322 |     // requested allocation; split the tail off into a fresh aligned object.
323 |     struct page *start_page = get_page(best);
324 |     char *start = get_large_object_payload(best);
325 |     char *end = start + best_size;
326 | 
327 |     if (start_page == get_page(end - tail_size - 1)) {
328 |       // The allocation does not span a page boundary; yay.
329 |       ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
330 |     } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
331 |       // If the allocation itself smaller than a page, split off the head, then
332 |       // fall through to maybe split the tail.
333 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
334 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
335 |       struct large_object *head = best;
336 |       allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT);
337 |       head->size = first_page_size;
338 |       head->next = large_objects;
339 |       large_objects = head;
340 | 
341 |       maybe_repurpose_single_chunk_large_objects_head();
342 | 
343 |       struct page *next_page = start_page + 1;
344 |       char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
345 |       best = (struct large_object *) ptr;
346 |       best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE;
347 |       ASSERT(best_size >= size);
348 |       start = get_large_object_payload(best);
349 |       tail_size = (best_size - size) & ~CHUNK_MASK;
350 |     } else {
351 |       // A large object that spans more than one page will consume all of its
352 |       // tail pages.  Therefore if the split traverses a page boundary, round up
353 |       // to page size.
354 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
355 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
356 |       size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE);
357 |       size = first_page_size + tail_pages_size;
358 |       tail_size = best_size - size;
359 |     }
360 |     best->size -= tail_size;
361 |     
362 |     unsigned tail_idx = get_chunk_index(end - tail_size);
363 |     while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
364 |       // We would be splitting in a page header; don't do that.
365 |       tail_size -= CHUNK_SIZE;
366 |       tail_idx++;
367 |     }
368 |     
369 |     if (tail_size) {
370 |       struct page *page = get_page(end - tail_size);
371 |       char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT);
372 |       struct large_object *tail = (struct large_object *) tail_ptr;
373 |       tail->next = large_objects;
374 |       tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE;
375 |       ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE);
376 |       large_objects = tail;
377 | 
378 |       maybe_repurpose_single_chunk_large_objects_head();
379 |     }
380 |   }
381 | 
382 |   ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE);
383 |   return best;
384 | }
385 | 
386 | static struct freelist*
387 | obtain_small_objects(enum chunk_kind kind) {
388 |   struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32];
389 |   void *chunk;
390 |   if (*whole_chunk_freelist) {
391 |     chunk = *whole_chunk_freelist;
392 |     *whole_chunk_freelist = (*whole_chunk_freelist)->next;
393 |   } else {
394 |     chunk = allocate_large_object(0);
395 |     if (!chunk) {
396 |       return NULL;
397 |     }
398 |   }
399 |   char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind);
400 |   char *end = ptr + CHUNK_SIZE;
401 |   struct freelist *next = NULL;
402 |   size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
403 |   for (size_t i = size; i <= CHUNK_SIZE; i += size) {
404 |     struct freelist *head = (struct freelist*) (end - i);
405 |     head->next = next;
406 |     next = head;
407 |   }
408 |   return next;
409 | }
410 | 
411 | static inline size_t size_to_granules(size_t size) {
412 |   return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
413 | }
414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) {
415 |   ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS);
416 |   return &small_object_freelists[kind];
417 | }
418 | 
419 | static void*
420 | allocate_small(enum chunk_kind kind) {
421 |   struct freelist **loc = get_small_object_freelist(kind);
422 |   if (!*loc) {
423 |     struct freelist *freelist = obtain_small_objects(kind);
424 |     if (!freelist) {
425 |       return NULL;
426 |     }
427 |     *loc = freelist;
428 |   }
429 |   struct freelist *ret = *loc;
430 |   *loc = ret->next;
431 |   return (void *) ret;
432 | }
433 | 
434 | static void*
435 | allocate_large(size_t size) {
436 |   struct large_object *obj = allocate_large_object(size);
437 |   return obj ? get_large_object_payload(obj) : NULL;
438 | }
439 |   
440 | void*
441 | malloc(size_t size) {
442 |   size_t granules = size_to_granules(size);
443 |   enum chunk_kind kind = granules_to_chunk_kind(granules);
444 |   return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
445 | }
446 | 
447 | void
448 | free(void *ptr) {
449 |   if (!ptr) return;
450 |   struct page *page = get_page(ptr);
451 |   unsigned chunk = get_chunk_index(ptr);
452 |   uint8_t kind = page->header.chunk_kinds[chunk];
453 |   if (kind == LARGE_OBJECT) {
454 |     struct large_object *obj = get_large_object(ptr);
455 |     obj->next = large_objects;
456 |     large_objects = obj;
457 |     allocate_chunk(page, chunk, FREE_LARGE_OBJECT);
458 |     pending_large_object_compact = 1;
459 |   } else {
460 |     size_t granules = kind;
461 |     struct freelist **loc = get_small_object_freelist(granules);
462 |     struct freelist *obj = ptr;
463 |     obj->next = *loc;
464 |     *loc = obj;
465 |   }
466 | }
467 | 


--------------------------------------------------------------------------------
/milestones/m2/walloc.c:
--------------------------------------------------------------------------------
  1 | // walloc.c: a small malloc implementation for use in WebAssembly targets
  2 | // Copyright (c) 2020 Igalia, S.L.
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | typedef __SIZE_TYPE__ size_t;
 24 | typedef __UINTPTR_TYPE__ uintptr_t;
 25 | typedef __UINT8_TYPE__ uint8_t;
 26 | 
 27 | #define NULL ((void *) 0)
 28 | 
 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
 30 | 
 31 | #ifndef NDEBUG
 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 33 | #else
 34 | #define ASSERT(x) do { } while (0)
 35 | #endif
 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b))
 37 | 
 38 | static inline size_t max(size_t a, size_t b) {
 39 |   return a < b ? b : a;
 40 | }
 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) {
 42 |   return (val + alignment - 1) & ~(alignment - 1);
 43 | }
 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y))
 45 | 
 46 | #define CHUNK_SIZE 256
 47 | #define CHUNK_SIZE_LOG_2 8
 48 | #define CHUNK_MASK (CHUNK_SIZE - 1)
 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2);
 50 | 
 51 | #define PAGE_SIZE 65536
 52 | #define PAGE_SIZE_LOG_2 16
 53 | #define PAGE_MASK (PAGE_SIZE - 1)
 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2);
 55 | 
 56 | #define CHUNKS_PER_PAGE 256
 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE);
 58 | 
 59 | #define GRANULE_SIZE 8
 60 | #define GRANULE_SIZE_LOG_2 3
 61 | #define LARGE_OBJECT_THRESHOLD 256
 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32
 63 | 
 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
 66 |                  LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
 67 | 
 68 | struct chunk {
 69 |   char data[CHUNK_SIZE];
 70 | };
 71 | 
 72 | // There are small object pages for allocations of these sizes.
 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \
 74 |   M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32)
 75 | 
 76 | enum chunk_kind {
 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i,
 78 |   FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND)
 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND
 80 | 
 81 |   SMALL_OBJECT_CHUNK_KINDS,
 82 |   FREE_LARGE_OBJECT = 254,
 83 |   LARGE_OBJECT = 255
 84 | };
 85 | 
 86 | static const uint8_t small_object_granule_sizes[] = 
 87 | {
 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i,
 89 |   FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE)
 90 | #undef SMALL_OBJECT_GRANULE_SIZE
 91 | };
 92 | 
 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) {
 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i;
 95 |   FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE);
 96 | #undef TEST_GRANULE_SIZE
 97 |   return LARGE_OBJECT;
 98 | }
 99 |   
100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) {
101 |   switch (kind) {
102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i;
103 |   FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE);
104 | #undef CHUNK_KIND_GRANULE_SIZE
105 |     default:
106 |       return -1;
107 |   }
108 | }
109 | 
110 | // Given a pointer P returned by malloc(), we get a header pointer via
111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE.  If
112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large
113 | // object, otherwise the kind indicates the size in granules of the objects in
114 | // the chunk.
115 | struct page_header {
116 |   uint8_t chunk_kinds[CHUNKS_PER_PAGE];
117 | };
118 | 
119 | struct page {
120 |   union {
121 |     struct page_header header;
122 |     struct chunk chunks[CHUNKS_PER_PAGE];
123 |   };
124 | };
125 | 
126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header))
127 | #define FIRST_ALLOCATABLE_CHUNK 1
128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE);
129 | 
130 | static struct page* get_page(void *ptr) {
131 |   return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK);
132 | }
133 | static unsigned get_chunk_index(void *ptr) {
134 |   return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
135 | }
136 | 
137 | struct freelist {
138 |   struct freelist *next;
139 | };
140 | 
141 | struct large_object {
142 |   struct large_object *next;
143 |   size_t size;
144 | };
145 | 
146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object))
147 | 
148 | static inline void* get_large_object_payload(struct large_object *obj) {
149 |   return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE;
150 | }
151 | static inline struct large_object* get_large_object(void *ptr) {
152 |   return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE);
153 | }
154 | 
155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS];
156 | static struct large_object *large_objects;
157 | 
158 | extern void __heap_base;
159 | static size_t walloc_heap_size;
160 | 
161 | static struct page*
162 | allocate_pages(size_t payload_size, size_t *n_allocated) {
163 |   size_t needed = payload_size + PAGE_HEADER_SIZE;
164 |   size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE;
165 |   uintptr_t base = heap_size;
166 |   uintptr_t preallocated = 0, grow = 0;
167 | 
168 |   if (!walloc_heap_size) {
169 |     // We are allocating the initial pages, if any.  We skip the first 64 kB,
170 |     // then take any additional space up to the memory size.
171 |     uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE);
172 |     preallocated = heap_size - heap_base; // Preallocated pages.
173 |     walloc_heap_size = preallocated;
174 |     base -= preallocated;
175 |   }
176 | 
177 |   if (preallocated < needed) {
178 |     // Always grow the walloc heap at least by 50%.
179 |     grow = align(max(walloc_heap_size / 2, needed - preallocated),
180 |                  PAGE_SIZE);
181 |     ASSERT(grow);
182 |     if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) {
183 |       return NULL;
184 |     }
185 |     walloc_heap_size += grow;
186 |   }
187 |   
188 |   struct page *ret = (struct page *)base;
189 |   size_t size = grow + preallocated;
190 |   ASSERT(size);
191 |   ASSERT_ALIGNED(size, PAGE_SIZE);
192 |   *n_allocated = size / PAGE_SIZE;
193 |   return ret;
194 | }
195 | 
196 | static char*
197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) {
198 |   page->header.chunk_kinds[idx] = kind;
199 |   return page->chunks[idx].data;
200 | }
201 | 
202 | // It's possible for splitting to produce a large object of size 248 (256 minus
203 | // the header size) -- i.e. spanning a single chunk.  In that case, push the
204 | // chunk back on the GRANULES_32 small object freelist.
205 | static void maybe_repurpose_single_chunk_large_objects_head(void) {
206 |   if (large_objects->size < CHUNK_SIZE) {
207 |     unsigned idx = get_chunk_index(large_objects);
208 |     char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32);
209 |     large_objects = large_objects->next;
210 |     struct freelist* head = (struct freelist *)ptr;
211 |     head->next = small_object_freelists[GRANULES_32];
212 |     small_object_freelists[GRANULES_32] = head;
213 |   }
214 | }
215 | 
216 | // If there have been any large-object frees since the last large object
217 | // allocation, go through the freelist and merge any adjacent objects.
218 | static int pending_large_object_compact = 0;
219 | static struct large_object**
220 | maybe_merge_free_large_object(struct large_object** prev) {
221 |   struct large_object *obj = *prev;
222 |   while (1) {
223 |     char *end = get_large_object_payload(obj) + obj->size;
224 |     ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
225 |     unsigned chunk = get_chunk_index(end);
226 |     if (chunk < FIRST_ALLOCATABLE_CHUNK) {
227 |       // Merging can't create a large object that newly spans the header chunk.
228 |       // This check also catches the end-of-heap case.
229 |       return prev;
230 |     }
231 |     struct page *page = get_page(end);
232 |     if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) {
233 |       return prev;
234 |     }
235 |     struct large_object *next = (struct large_object*) end;
236 | 
237 |     struct large_object **prev_prev = &large_objects, *walk = large_objects;
238 |     while (1) {
239 |       ASSERT(walk);
240 |       if (walk == next) {
241 |         obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size;
242 |         *prev_prev = walk->next;
243 |         if (prev == &walk->next) {
244 |           prev = prev_prev;
245 |         }
246 |         break;
247 |       }
248 |       prev_prev = &walk->next;
249 |       walk = walk->next;
250 |     }
251 |   }
252 | }
253 | static void
254 | maybe_compact_free_large_objects(void) {
255 |   if (pending_large_object_compact) {
256 |     pending_large_object_compact = 0;
257 |     struct large_object **prev = &large_objects;
258 |     while (*prev) {
259 |       prev = &(*maybe_merge_free_large_object(prev))->next;
260 |     }
261 |   }
262 | }
263 | 
264 | // Allocate a large object with enough space for SIZE payload bytes.  Returns a
265 | // large object with a header, aligned on a chunk boundary, whose payload size
266 | // may be larger than SIZE, and whose total size (header included) is
267 | // chunk-aligned.  Either a suitable allocation is found in the large object
268 | // freelist, or we ask the OS for some more pages and treat those pages as a
269 | // large object.  If the allocation fits in that large object and there's more
270 | // than an aligned chunk's worth of data free at the end, the large object is
271 | // split.
272 | //
273 | // The return value's corresponding chunk in the page as starting a large
274 | // object.
275 | static struct large_object*
276 | allocate_large_object(size_t size) {
277 |   maybe_compact_free_large_objects();
278 |   struct large_object *best = NULL, **best_prev = &large_objects;
279 |   size_t best_size = -1;
280 |   for (struct large_object **prev = &large_objects, *walk = large_objects;
281 |        walk;
282 |        prev = &walk->next, walk = walk->next) {
283 |     if (walk->size >= size && walk->size < best_size) {
284 |       best_size = walk->size;
285 |       best = walk;
286 |       best_prev = prev;
287 |       if (best_size + LARGE_OBJECT_HEADER_SIZE
288 |           == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
289 |         // Not going to do any better than this; just return it.
290 |         break;
291 |     }
292 |   }
293 | 
294 |   if (!best) {
295 |     // The large object freelist doesn't have an object big enough for this
296 |     // allocation.  Allocate one or more pages from the OS, and treat that new
297 |     // sequence of pages as a fresh large object.  It will be split if
298 |     // necessary.
299 |     size_t size_with_header = size + sizeof(struct large_object);
300 |     size_t n_allocated = 0;
301 |     struct page *page = allocate_pages(size_with_header, &n_allocated);
302 |     if (!page) {
303 |       return NULL;
304 |     }
305 |     char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
306 |     best = (struct large_object *)ptr;
307 |     size_t page_header = ptr - ((char*) page);
308 |     best->next = large_objects;
309 |     best->size = best_size =
310 |       n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
311 |     ASSERT(best_size >= size_with_header);
312 |   }
313 | 
314 |   allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT);
315 | 
316 |   struct large_object *next = best->next;
317 |   *best_prev = next;
318 | 
319 |   size_t tail_size = (best_size - size) & ~CHUNK_MASK;
320 |   if (tail_size) {
321 |     // The best-fitting object has 1 or more aligned chunks free after the
322 |     // requested allocation; split the tail off into a fresh aligned object.
323 |     struct page *start_page = get_page(best);
324 |     char *start = get_large_object_payload(best);
325 |     char *end = start + best_size;
326 | 
327 |     if (start_page == get_page(end - tail_size - 1)) {
328 |       // The allocation does not span a page boundary; yay.
329 |       ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
330 |     } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
331 |       // If the allocation itself smaller than a page, split off the head, then
332 |       // fall through to maybe split the tail.
333 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
334 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
335 |       struct large_object *head = best;
336 |       allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT);
337 |       head->size = first_page_size;
338 |       head->next = large_objects;
339 |       large_objects = head;
340 | 
341 |       maybe_repurpose_single_chunk_large_objects_head();
342 | 
343 |       struct page *next_page = start_page + 1;
344 |       char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
345 |       best = (struct large_object *) ptr;
346 |       best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE;
347 |       ASSERT(best_size >= size);
348 |       start = get_large_object_payload(best);
349 |       tail_size = (best_size - size) & ~CHUNK_MASK;
350 |     } else {
351 |       // A large object that spans more than one page will consume all of its
352 |       // tail pages.  Therefore if the split traverses a page boundary, round up
353 |       // to page size.
354 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
355 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
356 |       size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE);
357 |       size = first_page_size + tail_pages_size;
358 |       tail_size = best_size - size;
359 |     }
360 |     best->size -= tail_size;
361 |     
362 |     unsigned tail_idx = get_chunk_index(end - tail_size);
363 |     while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
364 |       // We would be splitting in a page header; don't do that.
365 |       tail_size -= CHUNK_SIZE;
366 |       tail_idx++;
367 |     }
368 |     
369 |     if (tail_size) {
370 |       struct page *page = get_page(end - tail_size);
371 |       char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT);
372 |       struct large_object *tail = (struct large_object *) tail_ptr;
373 |       tail->next = large_objects;
374 |       tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE;
375 |       ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE);
376 |       large_objects = tail;
377 | 
378 |       maybe_repurpose_single_chunk_large_objects_head();
379 |     }
380 |   }
381 | 
382 |   ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE);
383 |   return best;
384 | }
385 | 
386 | static struct freelist*
387 | obtain_small_objects(enum chunk_kind kind) {
388 |   struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32];
389 |   void *chunk;
390 |   if (*whole_chunk_freelist) {
391 |     chunk = *whole_chunk_freelist;
392 |     *whole_chunk_freelist = (*whole_chunk_freelist)->next;
393 |   } else {
394 |     chunk = allocate_large_object(0);
395 |     if (!chunk) {
396 |       return NULL;
397 |     }
398 |   }
399 |   char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind);
400 |   char *end = ptr + CHUNK_SIZE;
401 |   struct freelist *next = NULL;
402 |   size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
403 |   for (size_t i = size; i <= CHUNK_SIZE; i += size) {
404 |     struct freelist *head = (struct freelist*) (end - i);
405 |     head->next = next;
406 |     next = head;
407 |   }
408 |   return next;
409 | }
410 | 
411 | static inline size_t size_to_granules(size_t size) {
412 |   return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
413 | }
414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) {
415 |   ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS);
416 |   return &small_object_freelists[kind];
417 | }
418 | 
419 | static void*
420 | allocate_small(enum chunk_kind kind) {
421 |   struct freelist **loc = get_small_object_freelist(kind);
422 |   if (!*loc) {
423 |     struct freelist *freelist = obtain_small_objects(kind);
424 |     if (!freelist) {
425 |       return NULL;
426 |     }
427 |     *loc = freelist;
428 |   }
429 |   struct freelist *ret = *loc;
430 |   *loc = ret->next;
431 |   return (void *) ret;
432 | }
433 | 
434 | static void*
435 | allocate_large(size_t size) {
436 |   struct large_object *obj = allocate_large_object(size);
437 |   return obj ? get_large_object_payload(obj) : NULL;
438 | }
439 |   
440 | void*
441 | malloc(size_t size) {
442 |   size_t granules = size_to_granules(size);
443 |   enum chunk_kind kind = granules_to_chunk_kind(granules);
444 |   return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
445 | }
446 | 
447 | void
448 | free(void *ptr) {
449 |   if (!ptr) return;
450 |   struct page *page = get_page(ptr);
451 |   unsigned chunk = get_chunk_index(ptr);
452 |   uint8_t kind = page->header.chunk_kinds[chunk];
453 |   if (kind == LARGE_OBJECT) {
454 |     struct large_object *obj = get_large_object(ptr);
455 |     obj->next = large_objects;
456 |     large_objects = obj;
457 |     allocate_chunk(page, chunk, FREE_LARGE_OBJECT);
458 |     pending_large_object_compact = 1;
459 |   } else {
460 |     size_t granules = kind;
461 |     struct freelist **loc = get_small_object_freelist(granules);
462 |     struct freelist *obj = ptr;
463 |     obj->next = *loc;
464 |     *loc = obj;
465 |   }
466 | }
467 | 


--------------------------------------------------------------------------------
/milestones/m5/walloc.c:
--------------------------------------------------------------------------------
  1 | // walloc.c: a small malloc implementation for use in WebAssembly targets
  2 | // Copyright (c) 2020 Igalia, S.L.
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | typedef __SIZE_TYPE__ size_t;
 24 | typedef __UINTPTR_TYPE__ uintptr_t;
 25 | typedef __UINT8_TYPE__ uint8_t;
 26 | 
 27 | #define NULL ((void *) 0)
 28 | 
 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
 30 | 
 31 | #ifndef NDEBUG
 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 33 | #else
 34 | #define ASSERT(x) do { } while (0)
 35 | #endif
 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b))
 37 | 
 38 | static inline size_t max(size_t a, size_t b) {
 39 |   return a < b ? b : a;
 40 | }
 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) {
 42 |   return (val + alignment - 1) & ~(alignment - 1);
 43 | }
 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y))
 45 | 
 46 | #define CHUNK_SIZE 256
 47 | #define CHUNK_SIZE_LOG_2 8
 48 | #define CHUNK_MASK (CHUNK_SIZE - 1)
 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2);
 50 | 
 51 | #define PAGE_SIZE 65536
 52 | #define PAGE_SIZE_LOG_2 16
 53 | #define PAGE_MASK (PAGE_SIZE - 1)
 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2);
 55 | 
 56 | #define CHUNKS_PER_PAGE 256
 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE);
 58 | 
 59 | #define GRANULE_SIZE 8
 60 | #define GRANULE_SIZE_LOG_2 3
 61 | #define LARGE_OBJECT_THRESHOLD 256
 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32
 63 | 
 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
 66 |                  LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
 67 | 
 68 | struct chunk {
 69 |   char data[CHUNK_SIZE];
 70 | };
 71 | 
 72 | // There are small object pages for allocations of these sizes.
 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \
 74 |   M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32)
 75 | 
 76 | enum chunk_kind {
 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i,
 78 |   FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND)
 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND
 80 | 
 81 |   SMALL_OBJECT_CHUNK_KINDS,
 82 |   FREE_LARGE_OBJECT = 254,
 83 |   LARGE_OBJECT = 255
 84 | };
 85 | 
 86 | static const uint8_t small_object_granule_sizes[] = 
 87 | {
 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i,
 89 |   FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE)
 90 | #undef SMALL_OBJECT_GRANULE_SIZE
 91 | };
 92 | 
 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) {
 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i;
 95 |   FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE);
 96 | #undef TEST_GRANULE_SIZE
 97 |   return LARGE_OBJECT;
 98 | }
 99 |   
100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) {
101 |   switch (kind) {
102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i;
103 |   FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE);
104 | #undef CHUNK_KIND_GRANULE_SIZE
105 |     default:
106 |       return -1;
107 |   }
108 | }
109 | 
110 | // Given a pointer P returned by malloc(), we get a header pointer via
111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE.  If
112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large
113 | // object, otherwise the kind indicates the size in granules of the objects in
114 | // the chunk.
115 | struct page_header {
116 |   uint8_t chunk_kinds[CHUNKS_PER_PAGE];
117 | };
118 | 
119 | struct page {
120 |   union {
121 |     struct page_header header;
122 |     struct chunk chunks[CHUNKS_PER_PAGE];
123 |   };
124 | };
125 | 
126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header))
127 | #define FIRST_ALLOCATABLE_CHUNK 1
128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE);
129 | 
130 | static struct page* get_page(void *ptr) {
131 |   return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK);
132 | }
133 | static unsigned get_chunk_index(void *ptr) {
134 |   return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
135 | }
136 | 
137 | struct freelist {
138 |   struct freelist *next;
139 | };
140 | 
141 | struct large_object {
142 |   struct large_object *next;
143 |   size_t size;
144 | };
145 | 
146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object))
147 | 
148 | static inline void* get_large_object_payload(struct large_object *obj) {
149 |   return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE;
150 | }
151 | static inline struct large_object* get_large_object(void *ptr) {
152 |   return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE);
153 | }
154 | 
155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS];
156 | static struct large_object *large_objects;
157 | 
158 | extern void __heap_base;
159 | static size_t walloc_heap_size;
160 | 
161 | static struct page*
162 | allocate_pages(size_t payload_size, size_t *n_allocated) {
163 |   size_t needed = payload_size + PAGE_HEADER_SIZE;
164 |   size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE;
165 |   uintptr_t base = heap_size;
166 |   uintptr_t preallocated = 0, grow = 0;
167 | 
168 |   if (!walloc_heap_size) {
169 |     // We are allocating the initial pages, if any.  We skip the first 64 kB,
170 |     // then take any additional space up to the memory size.
171 |     uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE);
172 |     preallocated = heap_size - heap_base; // Preallocated pages.
173 |     walloc_heap_size = preallocated;
174 |     base -= preallocated;
175 |   }
176 | 
177 |   if (preallocated < needed) {
178 |     // Always grow the walloc heap at least by 50%.
179 |     grow = align(max(walloc_heap_size / 2, needed - preallocated),
180 |                  PAGE_SIZE);
181 |     ASSERT(grow);
182 |     if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) {
183 |       return NULL;
184 |     }
185 |     walloc_heap_size += grow;
186 |   }
187 |   
188 |   struct page *ret = (struct page *)base;
189 |   size_t size = grow + preallocated;
190 |   ASSERT(size);
191 |   ASSERT_ALIGNED(size, PAGE_SIZE);
192 |   *n_allocated = size / PAGE_SIZE;
193 |   return ret;
194 | }
195 | 
196 | static char*
197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) {
198 |   page->header.chunk_kinds[idx] = kind;
199 |   return page->chunks[idx].data;
200 | }
201 | 
202 | // It's possible for splitting to produce a large object of size 248 (256 minus
203 | // the header size) -- i.e. spanning a single chunk.  In that case, push the
204 | // chunk back on the GRANULES_32 small object freelist.
205 | static void maybe_repurpose_single_chunk_large_objects_head(void) {
206 |   if (large_objects->size < CHUNK_SIZE) {
207 |     unsigned idx = get_chunk_index(large_objects);
208 |     char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32);
209 |     large_objects = large_objects->next;
210 |     struct freelist* head = (struct freelist *)ptr;
211 |     head->next = small_object_freelists[GRANULES_32];
212 |     small_object_freelists[GRANULES_32] = head;
213 |   }
214 | }
215 | 
216 | // If there have been any large-object frees since the last large object
217 | // allocation, go through the freelist and merge any adjacent objects.
218 | static int pending_large_object_compact = 0;
219 | static struct large_object**
220 | maybe_merge_free_large_object(struct large_object** prev) {
221 |   struct large_object *obj = *prev;
222 |   while (1) {
223 |     char *end = get_large_object_payload(obj) + obj->size;
224 |     ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
225 |     unsigned chunk = get_chunk_index(end);
226 |     if (chunk < FIRST_ALLOCATABLE_CHUNK) {
227 |       // Merging can't create a large object that newly spans the header chunk.
228 |       // This check also catches the end-of-heap case.
229 |       return prev;
230 |     }
231 |     struct page *page = get_page(end);
232 |     if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) {
233 |       return prev;
234 |     }
235 |     struct large_object *next = (struct large_object*) end;
236 | 
237 |     struct large_object **prev_prev = &large_objects, *walk = large_objects;
238 |     while (1) {
239 |       ASSERT(walk);
240 |       if (walk == next) {
241 |         obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size;
242 |         *prev_prev = walk->next;
243 |         if (prev == &walk->next) {
244 |           prev = prev_prev;
245 |         }
246 |         break;
247 |       }
248 |       prev_prev = &walk->next;
249 |       walk = walk->next;
250 |     }
251 |   }
252 | }
253 | static void
254 | maybe_compact_free_large_objects(void) {
255 |   if (pending_large_object_compact) {
256 |     pending_large_object_compact = 0;
257 |     struct large_object **prev = &large_objects;
258 |     while (*prev) {
259 |       prev = &(*maybe_merge_free_large_object(prev))->next;
260 |     }
261 |   }
262 | }
263 | 
264 | // Allocate a large object with enough space for SIZE payload bytes.  Returns a
265 | // large object with a header, aligned on a chunk boundary, whose payload size
266 | // may be larger than SIZE, and whose total size (header included) is
267 | // chunk-aligned.  Either a suitable allocation is found in the large object
268 | // freelist, or we ask the OS for some more pages and treat those pages as a
269 | // large object.  If the allocation fits in that large object and there's more
270 | // than an aligned chunk's worth of data free at the end, the large object is
271 | // split.
272 | //
273 | // The return value's corresponding chunk in the page as starting a large
274 | // object.
275 | static struct large_object*
276 | allocate_large_object(size_t size) {
277 |   maybe_compact_free_large_objects();
278 |   struct large_object *best = NULL, **best_prev = &large_objects;
279 |   size_t best_size = -1;
280 |   for (struct large_object **prev = &large_objects, *walk = large_objects;
281 |        walk;
282 |        prev = &walk->next, walk = walk->next) {
283 |     if (walk->size >= size && walk->size < best_size) {
284 |       best_size = walk->size;
285 |       best = walk;
286 |       best_prev = prev;
287 |       if (best_size + LARGE_OBJECT_HEADER_SIZE
288 |           == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
289 |         // Not going to do any better than this; just return it.
290 |         break;
291 |     }
292 |   }
293 | 
294 |   if (!best) {
295 |     // The large object freelist doesn't have an object big enough for this
296 |     // allocation.  Allocate one or more pages from the OS, and treat that new
297 |     // sequence of pages as a fresh large object.  It will be split if
298 |     // necessary.
299 |     size_t size_with_header = size + sizeof(struct large_object);
300 |     size_t n_allocated = 0;
301 |     struct page *page = allocate_pages(size_with_header, &n_allocated);
302 |     if (!page) {
303 |       return NULL;
304 |     }
305 |     char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
306 |     best = (struct large_object *)ptr;
307 |     size_t page_header = ptr - ((char*) page);
308 |     best->next = large_objects;
309 |     best->size = best_size =
310 |       n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
311 |     ASSERT(best_size >= size_with_header);
312 |   }
313 | 
314 |   allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT);
315 | 
316 |   struct large_object *next = best->next;
317 |   *best_prev = next;
318 | 
319 |   size_t tail_size = (best_size - size) & ~CHUNK_MASK;
320 |   if (tail_size) {
321 |     // The best-fitting object has 1 or more aligned chunks free after the
322 |     // requested allocation; split the tail off into a fresh aligned object.
323 |     struct page *start_page = get_page(best);
324 |     char *start = get_large_object_payload(best);
325 |     char *end = start + best_size;
326 | 
327 |     if (start_page == get_page(end - tail_size - 1)) {
328 |       // The allocation does not span a page boundary; yay.
329 |       ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
330 |     } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
331 |       // If the allocation itself smaller than a page, split off the head, then
332 |       // fall through to maybe split the tail.
333 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
334 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
335 |       struct large_object *head = best;
336 |       allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT);
337 |       head->size = first_page_size;
338 |       head->next = large_objects;
339 |       large_objects = head;
340 | 
341 |       maybe_repurpose_single_chunk_large_objects_head();
342 | 
343 |       struct page *next_page = start_page + 1;
344 |       char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
345 |       best = (struct large_object *) ptr;
346 |       best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE;
347 |       ASSERT(best_size >= size);
348 |       start = get_large_object_payload(best);
349 |       tail_size = (best_size - size) & ~CHUNK_MASK;
350 |     } else {
351 |       // A large object that spans more than one page will consume all of its
352 |       // tail pages.  Therefore if the split traverses a page boundary, round up
353 |       // to page size.
354 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
355 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
356 |       size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE);
357 |       size = first_page_size + tail_pages_size;
358 |       tail_size = best_size - size;
359 |     }
360 |     best->size -= tail_size;
361 |     
362 |     unsigned tail_idx = get_chunk_index(end - tail_size);
363 |     while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
364 |       // We would be splitting in a page header; don't do that.
365 |       tail_size -= CHUNK_SIZE;
366 |       tail_idx++;
367 |     }
368 |     
369 |     if (tail_size) {
370 |       struct page *page = get_page(end - tail_size);
371 |       char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT);
372 |       struct large_object *tail = (struct large_object *) tail_ptr;
373 |       tail->next = large_objects;
374 |       tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE;
375 |       ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE);
376 |       large_objects = tail;
377 | 
378 |       maybe_repurpose_single_chunk_large_objects_head();
379 |     }
380 |   }
381 | 
382 |   ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE);
383 |   return best;
384 | }
385 | 
386 | static struct freelist*
387 | obtain_small_objects(enum chunk_kind kind) {
388 |   struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32];
389 |   void *chunk;
390 |   if (*whole_chunk_freelist) {
391 |     chunk = *whole_chunk_freelist;
392 |     *whole_chunk_freelist = (*whole_chunk_freelist)->next;
393 |   } else {
394 |     chunk = allocate_large_object(0);
395 |     if (!chunk) {
396 |       return NULL;
397 |     }
398 |   }
399 |   char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind);
400 |   char *end = ptr + CHUNK_SIZE;
401 |   struct freelist *next = NULL;
402 |   size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
403 |   for (size_t i = size; i <= CHUNK_SIZE; i += size) {
404 |     struct freelist *head = (struct freelist*) (end - i);
405 |     head->next = next;
406 |     next = head;
407 |   }
408 |   return next;
409 | }
410 | 
411 | static inline size_t size_to_granules(size_t size) {
412 |   return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
413 | }
414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) {
415 |   ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS);
416 |   return &small_object_freelists[kind];
417 | }
418 | 
419 | static void*
420 | allocate_small(enum chunk_kind kind) {
421 |   struct freelist **loc = get_small_object_freelist(kind);
422 |   if (!*loc) {
423 |     struct freelist *freelist = obtain_small_objects(kind);
424 |     if (!freelist) {
425 |       return NULL;
426 |     }
427 |     *loc = freelist;
428 |   }
429 |   struct freelist *ret = *loc;
430 |   *loc = ret->next;
431 |   return (void *) ret;
432 | }
433 | 
434 | static void*
435 | allocate_large(size_t size) {
436 |   struct large_object *obj = allocate_large_object(size);
437 |   return obj ? get_large_object_payload(obj) : NULL;
438 | }
439 |   
440 | void*
441 | malloc(size_t size) {
442 |   size_t granules = size_to_granules(size);
443 |   enum chunk_kind kind = granules_to_chunk_kind(granules);
444 |   return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
445 | }
446 | 
447 | void
448 | free(void *ptr) {
449 |   if (!ptr) return;
450 |   struct page *page = get_page(ptr);
451 |   unsigned chunk = get_chunk_index(ptr);
452 |   uint8_t kind = page->header.chunk_kinds[chunk];
453 |   if (kind == LARGE_OBJECT) {
454 |     struct large_object *obj = get_large_object(ptr);
455 |     obj->next = large_objects;
456 |     large_objects = obj;
457 |     allocate_chunk(page, chunk, FREE_LARGE_OBJECT);
458 |     pending_large_object_compact = 1;
459 |   } else {
460 |     size_t granules = kind;
461 |     struct freelist **loc = get_small_object_freelist(granules);
462 |     struct freelist *obj = ptr;
463 |     obj->next = *loc;
464 |     *loc = obj;
465 |   }
466 | }
467 | 


--------------------------------------------------------------------------------