├── .gitignore └── milestones ├── m6 ├── Makefile ├── README.md ├── test.c └── test.js ├── m5 ├── Makefile ├── README.md ├── test.js ├── test.c ├── externref-in-llvm.md └── walloc.c ├── m0 ├── Makefile ├── test.c ├── test.js ├── README.md └── walloc.c ├── m1 ├── Makefile ├── test.c ├── test.js ├── README.md └── walloc.c ├── m4 ├── Makefile ├── test.wat ├── test.ll ├── test.S ├── test.js └── README.md ├── m2 ├── Makefile ├── test.wat ├── test.S ├── README.md ├── test.js └── walloc.c ├── m3 ├── Makefile ├── test.js ├── README.md ├── test.wat ├── test.ll ├── test.S └── walloc.c ├── lib.js ├── getting-started.md └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | test.o 2 | test.wasm 3 | walloc.o 4 | -------------------------------------------------------------------------------- /milestones/m6/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LD?=wasm-ld 3 | V8?=d8 4 | JSC?=jsc 5 | SPIDERMONKEY?=js 6 | 7 | all: v8.test jsc.test spidermonkey.test 8 | 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc 10 | jsc.test: JS?=$(JSC) --useWeakRefs=true 11 | spidermonkey.test: JS?=$(SPIDERMONKEY) 12 | 13 | .PHONY: %.test 14 | %.test: test.js ../lib.js test.wasm 15 | $(JS) $< 16 | 17 | %.o: %.c 18 | $(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $< 19 | 20 | test.wasm: test.o 21 | $(LD) --no-entry --import-memory -o $@ $^ 22 | 23 | .PHONY: clean 24 | clean: 25 | rm -f test.o test.wasm 26 | -------------------------------------------------------------------------------- /milestones/m5/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LD?=wasm-ld 3 | V8?=d8 4 | JSC?=jsc 5 | SPIDERMONKEY?=js 6 | 7 | all: v8.test jsc.test spidermonkey.test 8 | 9 | v8.test: JS?=$(V8) --expose-gc 10 | jsc.test: JS?=$(JSC) --useWeakRefs=true 11 | spidermonkey.test: JS?=$(SPIDERMONKEY) 12 | 13 | .PHONY: %.test 14 | %.test: test.js ../lib.js test.wasm 15 | $(JS) $< 16 | 17 | %.o: %.c 18 | $(CC) -Oz --target=wasm32 -mreference-types -nostdlib -c -o $@ $< 19 | 20 | test.wasm: test.o walloc.o 21 | $(LD) --no-entry --import-memory -o $@ $^ 22 | 23 | .PHONY: clean 24 | clean: 25 | rm -f *.o test.wasm 26 | -------------------------------------------------------------------------------- /milestones/m0/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LD?=wasm-ld 3 | V8?=d8 4 | JSC?=jsc 5 | SPIDERMONKEY?=js 6 | 7 | all: v8.test jsc.test spidermonkey.test 8 | 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc 10 | jsc.test: JS?=$(JSC) --useWeakRefs=true 11 | spidermonkey.test: JS?=$(SPIDERMONKEY) 12 | 13 | .PHONY: %.test 14 | %.test: test.js ../lib.js test.wasm 15 | $(JS) $< 16 | 17 | %.o: %.c 18 | $(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $< 19 | 20 | test.wasm: test.o walloc.o 21 | $(LD) --no-entry --import-memory -o $@ $^ 22 | 23 | .PHONY: clean 24 | clean: 25 | rm -f test.o walloc.o test.wasm 26 | -------------------------------------------------------------------------------- /milestones/m1/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LD?=wasm-ld 3 | V8?=d8 4 | JSC?=jsc 5 | SPIDERMONKEY?=js 6 | 7 | all: v8.test jsc.test spidermonkey.test 8 | 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc 10 | jsc.test: JS?=$(JSC) --useWeakRefs=true 11 | spidermonkey.test: JS?=$(SPIDERMONKEY) 12 | 13 | .PHONY: %.test 14 | %.test: test.js ../lib.js test.wasm 15 | $(JS) $< 16 | 17 | %.o: %.c 18 | $(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $< 19 | 20 | test.wasm: test.o walloc.o 21 | $(LD) --no-entry --import-memory -o $@ $^ 22 | 23 | .PHONY: clean 24 | clean: 25 | rm -f test.o walloc.o test.wasm 26 | -------------------------------------------------------------------------------- /milestones/m6/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 6: Collectable cycle: C allocates GC-managed memory 2 | 3 | [ previous: [m5](../m5/); next: [m7](../m7/) ] 4 | 5 | ## Overview 6 | 7 | This milestone represents a minimal solution to the cycle problem, but 8 | from C instead of from raw WebAssembly. Instead of using classes as 9 | data abstractions, the C here will use the raw allocation and accessor 10 | imports defined in [milestone 4](../m4). 11 | 12 | ## Interesting files 13 | 14 | The difference from [m4](../m4) is that instead of 15 | [`test.wat`](../m4/test.wat), we have [`test.c`](./test.c). 16 | 17 | ## Results 18 | 19 | No results yet; this milestone is in progress. 20 | -------------------------------------------------------------------------------- /milestones/m4/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LLC?=llc 3 | LD?=wasm-ld 4 | V8?=d8 5 | JSC?=jsc 6 | SPIDERMONKEY?=js 7 | 8 | all: v8.test jsc.test spidermonkey.test 9 | 10 | v8.test: JS?=$(V8) --expose-gc 11 | jsc.test: JS?=$(JSC) 12 | spidermonkey.test: JS?=$(SPIDERMONKEY) 13 | 14 | .PHONY: %.test 15 | %.test: test.js ../lib.js test.wasm 16 | $(JS) $< 17 | 18 | test.wasm: test.o 19 | $(LD) --no-entry --allow-undefined --import-memory -o $@ $^ 20 | 21 | test.o: test.ll 22 | $(LLC) -O2 -mattr=+reference-types -filetype=obj -o $@ $< 23 | 24 | # Alternately: 25 | #test.o: test.wat 26 | # $(WAT2WASM) --enable-all --relocatable -o $@ $< 27 | # or even 28 | #test.o: test.S 29 | # $(CC) -Oz -mreference-types --target=wasm32 -nostdlib -c -o $@ $< 30 | 31 | .PHONY: clean 32 | clean: 33 | rm -f test.o test.wasm 34 | -------------------------------------------------------------------------------- /milestones/m2/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LD?=wasm-ld 3 | V8?=d8 4 | JSC?=jsc 5 | SPIDERMONKEY?=js 6 | 7 | all: v8.test jsc.test spidermonkey.test 8 | 9 | v8.test: JS?=$(V8) --harmony-weak-refs --expose-gc 10 | jsc.test: JS?=$(JSC) --useWeakRefs=true 11 | spidermonkey.test: JS?=$(SPIDERMONKEY) 12 | 13 | .PHONY: %.test 14 | %.test: test.js ../lib.js test.wasm 15 | $(JS) $< 16 | 17 | test.wasm: test.o walloc.o 18 | $(LD) --no-entry --allow-undefined --import-memory -o $@ $^ 19 | 20 | walloc.o: walloc.c 21 | $(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $< 22 | 23 | test.o: test.S 24 | $(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $< 25 | 26 | # Alternately: 27 | # test.o: test.wat 28 | # $(WAT2WASM) --enable-all --relocatable -o $@ $< 29 | 30 | .PHONY: clean 31 | clean: 32 | rm -f test.o walloc.o test.wasm 33 | -------------------------------------------------------------------------------- /milestones/m3/Makefile: -------------------------------------------------------------------------------- 1 | CC?=clang 2 | LLC?=llc 3 | LD?=wasm-ld 4 | V8?=d8 5 | JSC?=jsc 6 | SPIDERMONKEY?=js 7 | WAT2WASM?=wat2wasm 8 | 9 | all: v8.test jsc.test spidermonkey.test 10 | 11 | v8.test: JS?=$(V8) --expose-gc 12 | jsc.test: JS?=$(JSC) 13 | spidermonkey.test: JS?=$(SPIDERMONKEY) 14 | 15 | .PHONY: %.test 16 | %.test: test.js ../lib.js test.wasm 17 | $(JS) $< 18 | 19 | test.wasm: test.o walloc.o 20 | $(LD) --no-entry --import-memory -o $@ $^ 21 | 22 | walloc.o: walloc.c 23 | $(CC) -Oz --target=wasm32 -nostdlib -c -o $@ $< 24 | 25 | test.o: test.ll 26 | $(LLC) -O2 --cost-kind=code-size -mattr=+reference-types -filetype=obj -o $@ $< 27 | 28 | # Alternately: 29 | #test.o: test.wat 30 | # $(WAT2WASM) --enable-all --relocatable -o $@ $< 31 | # or even 32 | #test.o: test.S 33 | # $(CC) -Oz -mreference-types --target=wasm32 -nostdlib -c -o $@ $< 34 | 35 | .PHONY: clean 36 | clean: 37 | rm -f test.o walloc.o test.wasm 38 | -------------------------------------------------------------------------------- /milestones/m4/test.wat: -------------------------------------------------------------------------------- 1 | (func $gc_alloc (import "rt" "gc_alloc") (param i32 i32) (result externref)) 2 | (func $gc_ref_obj (import "rt" "gc_ref_obj") (param externref i32) (result externref)) 3 | (func $gc_set_obj (import "rt" "gc_set_obj") (param externref i32 externref)) 4 | 5 | (func $invoke (import "rt" "invoke") (param externref)) 6 | 7 | (func $make_obj (export "make_obj") (result externref) 8 | (local $obj externref) 9 | (local.set $obj (call $gc_alloc (i32.const 1) (i32.const 0))) 10 | (call $gc_set_obj (local.get $obj) (i32.const 0) (ref.null extern)) 11 | (local.get $obj)) 12 | 13 | (func $attach_callback (export "attach_callback") 14 | (param $obj externref) (param $callback externref) 15 | (call $gc_set_obj (local.get $obj) (i32.const 0) (local.get $callback))) 16 | 17 | (func $invoke_callback (export "invoke_callback") 18 | (param $obj externref) 19 | (call $invoke (call $gc_ref_obj (local.get $obj) (i32.const 0)))) 20 | -------------------------------------------------------------------------------- /milestones/m5/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 5: C manages side table of JavaScript objects 2 | 3 | [ previous: [m4](../m4/); next: [m6](../m6/) ] 4 | 5 | ## Overview 6 | 7 | Having shown where we're going and that milestone 4 solves the cycle 8 | problem, we need to return to the C++-to-WebAssembly problem: we are 9 | lacking a way to represent `externref` values in C++, and support in the 10 | LLVM compiler. As a first step in this direction, we're going to 11 | translate milestone 3's test program back to an extended version of C 12 | that has rudimentary support for reference types. 13 | 14 | See the [Externref in LLVM](./externref-in-llvm.md) design document 15 | for the high-level overview on how we are going to extend LLVM to 16 | support `externref`. 17 | 18 | ## Details 19 | 20 | The difference from [m3](../m3) is that instead of 21 | [`test.wat`](../m3/test.wat), we have [`test.c`](./test.c). 22 | 23 | Running `make` will build `test.wasm` and run the test as in m3. 24 | 25 | ## Results 26 | 27 | No results yet; this milestone is in progress. 28 | -------------------------------------------------------------------------------- /milestones/m6/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | typedef __SIZE_TYPE__ size_t; 4 | 5 | #define WASM_EXPORT(name) \ 6 | __attribute__((export_name(#name))) \ 7 | name 8 | #define WASM_IMPORT(mod, name) \ 9 | __attribute__((import_module(#mod))) \ 10 | __attribute__((import_name(#name))) \ 11 | name 12 | 13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 14 | 15 | typedef __externref_t externref; 16 | 17 | void WASM_IMPORT(rt, invoke)(externref); 18 | externref WASM_IMPORT(rt, gc_alloc)(size_t nobjs, size_t nbytes); 19 | externref WASM_IMPORT(rt, gc_ref_obj)(externref obj, size_t i); 20 | void WASM_IMPORT(rt, gc_set_obj)(externref obj, size_t i, externref val); 21 | 22 | // Useful for debugging. 23 | void WASM_IMPORT(env, wasm_log)(void*); 24 | void WASM_IMPORT(env, wasm_logi)(int); 25 | 26 | struct externref WASM_EXPORT(make_obj)() { 27 | externref ret = gc_alloc(1, 0); 28 | gc_set_obj(ret, 0, __builtin_wasm_ref_null(externref)); 29 | return ret; 30 | } 31 | 32 | void WASM_EXPORT(attach_callback)(externref obj, externref callback) { 33 | gc_set_obj(obj, 0, callback); 34 | } 35 | 36 | void WASM_EXPORT(invoke_callback)(externref obj) { 37 | invoke(gc_ref_obj(obj, 0)); 38 | } 39 | -------------------------------------------------------------------------------- /milestones/m0/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | typedef __SIZE_TYPE__ size_t; 4 | 5 | #define WASM_EXPORT(name) \ 6 | __attribute__((export_name(#name))) \ 7 | name 8 | #define WASM_IMPORT(mod, name) \ 9 | __attribute__((import_module(#mod))) \ 10 | __attribute__((import_name(#name))) \ 11 | name 12 | 13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 14 | 15 | void WASM_IMPORT(rt, release)(uintptr_t); 16 | void WASM_IMPORT(rt, invoke)(int32_t); 17 | void WASM_IMPORT(rt, out_of_memory)(void); 18 | 19 | // Useful for debugging. 20 | void WASM_IMPORT(env, wasm_log)(void*); 21 | void WASM_IMPORT(env, wasm_logi)(int); 22 | 23 | void *malloc(size_t size); 24 | void free(void *p); 25 | 26 | struct obj { 27 | uintptr_t callback_handle; 28 | }; 29 | 30 | struct obj* WASM_EXPORT(make_obj)() { 31 | struct obj* obj = malloc(sizeof(struct obj)); 32 | if (!obj) { 33 | out_of_memory(); 34 | __builtin_trap(); 35 | } 36 | obj->callback_handle = -1; 37 | return obj; 38 | } 39 | 40 | void WASM_EXPORT(free_obj)(struct obj* obj) 41 | { 42 | release(obj->callback_handle); 43 | free(obj); 44 | } 45 | 46 | void WASM_EXPORT(attach_callback)(struct obj* obj, uintptr_t callback_handle) 47 | { 48 | release(obj->callback_handle); 49 | obj->callback_handle = callback_handle; 50 | } 51 | 52 | void WASM_EXPORT(invoke_callback)(struct obj* obj) 53 | { 54 | invoke(obj->callback_handle); 55 | } 56 | -------------------------------------------------------------------------------- /milestones/m1/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | typedef __SIZE_TYPE__ size_t; 4 | 5 | #define WASM_EXPORT(name) \ 6 | __attribute__((export_name(#name))) \ 7 | name 8 | #define WASM_IMPORT(mod, name) \ 9 | __attribute__((import_module(#mod))) \ 10 | __attribute__((import_name(#name))) \ 11 | name 12 | 13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 14 | 15 | void WASM_IMPORT(rt, release)(uintptr_t); 16 | void WASM_IMPORT(rt, invoke)(int32_t); 17 | void WASM_IMPORT(rt, out_of_memory)(void); 18 | 19 | // Useful for debugging. 20 | void WASM_IMPORT(env, wasm_log)(void*); 21 | void WASM_IMPORT(env, wasm_logi)(int); 22 | 23 | void *malloc(size_t size); 24 | void free(void *p); 25 | 26 | struct obj { 27 | uintptr_t callback_handle; 28 | }; 29 | 30 | struct obj* WASM_EXPORT(make_obj)() { 31 | struct obj* obj = malloc(sizeof(struct obj)); 32 | if (!obj) { 33 | out_of_memory(); 34 | __builtin_trap(); 35 | } 36 | obj->callback_handle = -1; 37 | return obj; 38 | } 39 | 40 | void WASM_EXPORT(free_obj)(struct obj* obj) 41 | { 42 | release(obj->callback_handle); 43 | free(obj); 44 | } 45 | 46 | void WASM_EXPORT(attach_callback)(struct obj* obj, uintptr_t callback_handle) 47 | { 48 | release(obj->callback_handle); 49 | obj->callback_handle = callback_handle; 50 | } 51 | 52 | void WASM_EXPORT(invoke_callback)(struct obj* obj) 53 | { 54 | invoke(obj->callback_handle); 55 | } 56 | -------------------------------------------------------------------------------- /milestones/m2/test.wat: -------------------------------------------------------------------------------- 1 | (memory $mem (import "env" "__linear_memory") 0) 2 | (func $malloc (import "env" "malloc") (param i32) (result i32)) 3 | (func $free (import "env" "free") (param i32)) 4 | 5 | (func $release (import "rt" "release") (param i32)) 6 | (func $invoke (import "rt" "invoke") (param i32)) 7 | (func $out_of_memory (import "rt" "out_of_memory")) 8 | 9 | (func $make_obj (export "make_obj") (result i32) 10 | (local $obj i32) 11 | 12 | ;; Allocate a new object in linear memory. If that fails, signal the 13 | ;; runtime, and then abort. 14 | (if (i32.eqz (local.tee $obj (call $malloc (i32.const 4)))) 15 | (then (call $out_of_memory) (unreachable))) 16 | 17 | ;; Initialize callback handle -1 (invalid) for fresh object. 18 | (i32.store offset=0 (local.get $obj) (i32.const -1)) 19 | 20 | (local.get $obj)) 21 | 22 | (func $free_obj (export "free_obj") (param $obj i32) 23 | ;; Release the callback handle. 24 | (call $release (i32.load offset=0 (local.get $obj))) 25 | (call $free (local.get $obj))) 26 | 27 | (func $attach_callback (export "attach_callback") 28 | (param $obj i32) (param $callback_handle i32) 29 | ;; Release old handle. 30 | (call $release (i32.load offset=0 (local.get $obj))) 31 | 32 | ;; Store handle to object. 33 | (i32.store offset=0 34 | (local.get $obj) 35 | (local.get $callback_handle))) 36 | 37 | (func $invoke_callback (export "invoke_callback") 38 | (param $obj i32) 39 | (call $invoke (i32.load offset=0 (local.get $obj)))) 40 | -------------------------------------------------------------------------------- /milestones/m4/test.ll: -------------------------------------------------------------------------------- 1 | target triple = "wasm32-unknown-unknown" 2 | 3 | ; Reference types 4 | %externref = type ptr addrspace(10) 5 | 6 | %funcptr = type void () addrspace(20)* 7 | %funcref = type i8 addrspace(20)* 8 | 9 | ; intrinsic decl 10 | declare %externref @llvm.wasm.ref.null.extern() nounwind readonly 11 | 12 | ; External functions 13 | declare %externref @gc_alloc(i32, i32) local_unnamed_addr #0 14 | declare %externref @gc_ref_obj(%externref, i32) local_unnamed_addr #1 15 | declare void @gc_set_obj(%externref, i32, %externref) local_unnamed_addr #2 16 | declare void @invoke(%externref) local_unnamed_addr #3 17 | 18 | define %externref @make_obj() #4 { 19 | %obj = call %externref @gc_alloc(i32 1, i32 0) 20 | %null = call %externref @llvm.wasm.ref.null.extern() 21 | call void @gc_set_obj(%externref %obj, i32 0, %externref %null) 22 | ret %externref %obj 23 | } 24 | 25 | define void @attach_callback(%externref %obj, %externref %callback) #5 { 26 | call void @gc_set_obj(%externref %obj, i32 0, %externref %callback) 27 | ret void 28 | } 29 | 30 | define void @invoke_callback(%externref %obj) #6 { 31 | %ref = call %externref @gc_ref_obj(%externref %obj, i32 0) 32 | call void @invoke(%externref %ref) 33 | ret void 34 | } 35 | 36 | attributes #0 = { "wasm-import-module"="rt" "wasm-import-name"="gc_alloc" } 37 | attributes #1 = { "wasm-import-module"="rt" "wasm-import-name"="gc_ref_obj" } 38 | attributes #2 = { "wasm-import-module"="rt" "wasm-import-name"="gc_set_obj" } 39 | attributes #3 = { "wasm-import-module"="rt" "wasm-import-name"="invoke" } 40 | 41 | attributes #4 = { "wasm-export-name"="make_obj" } 42 | attributes #5 = { "wasm-export-name"="attach_callback" } 43 | attributes #6 = { "wasm-export-name"="invoke_callback" } 44 | -------------------------------------------------------------------------------- /milestones/m2/test.S: -------------------------------------------------------------------------------- 1 | .functype out_of_memory () -> () 2 | .import_module out_of_memory, rt 3 | .import_name out_of_memory, out_of_memory 4 | .functype release (i32) -> () 5 | .import_module release, rt 6 | .import_name release, release 7 | .functype invoke (i32) -> () 8 | .import_module invoke, rt 9 | .import_name invoke, invoke 10 | 11 | .functype malloc (i32) -> (i32) 12 | .functype free (i32) -> () 13 | 14 | .text 15 | 16 | make_obj: 17 | .hidden make_obj 18 | .globl make_obj 19 | .export_name make_obj, make_obj 20 | .no_dead_strip make_obj 21 | .type make_obj,@function 22 | .functype make_obj () -> (i32) 23 | .local i32 24 | block 25 | i32.const 4 26 | call malloc 27 | local.tee 0 28 | br_if 0 29 | call out_of_memory 30 | unreachable 31 | unreachable 32 | end_block 33 | local.get 0 34 | i32.const -1 35 | i32.store 0 36 | local.get 0 37 | end_function 38 | 39 | 40 | free_obj: 41 | .hidden free_obj 42 | .globl free_obj 43 | .export_name free_obj, free_obj 44 | .no_dead_strip free_obj 45 | .type free_obj,@function 46 | .functype free_obj (i32) -> () 47 | local.get 0 48 | i32.load 0 49 | call release 50 | local.get 0 51 | call free 52 | end_function 53 | 54 | 55 | attach_callback: 56 | .hidden attach_callback 57 | .globl attach_callback 58 | .export_name attach_callback, attach_callback 59 | .no_dead_strip attach_callback 60 | .type attach_callback,@function 61 | .functype attach_callback (i32, i32) -> () 62 | local.get 0 63 | i32.load 0 64 | call release 65 | local.get 0 66 | local.get 1 67 | i32.store 0 68 | end_function 69 | 70 | invoke_callback: 71 | .hidden invoke_callback 72 | .globl invoke_callback 73 | .export_name invoke_callback, invoke_callback 74 | .no_dead_strip invoke_callback 75 | .type invoke_callback,@function 76 | .functype invoke_callback (i32) -> () 77 | local.get 0 78 | i32.load 0 79 | call invoke 80 | end_function 81 | -------------------------------------------------------------------------------- /milestones/m4/test.S: -------------------------------------------------------------------------------- 1 | .functype gc_alloc (i32, i32) -> (externref) 2 | .import_module gc_alloc, rt 3 | .import_name gc_alloc, gc_alloc 4 | 5 | .functype gc_ref_obj (externref, i32) -> (externref) 6 | .import_module gc_ref_obj, rt 7 | .import_name gc_ref_obj, gc_ref_obj 8 | 9 | .functype gc_set_obj (externref, i32, externref) -> () 10 | .import_module gc_set_obj, rt 11 | .import_name gc_set_obj, gc_set_obj 12 | 13 | .functype invoke (externref) -> () 14 | .import_module invoke, rt 15 | .import_name invoke, invoke 16 | 17 | make_obj: 18 | .hidden make_obj 19 | .globl make_obj 20 | .export_name make_obj, make_obj 21 | .no_dead_strip make_obj 22 | .type make_obj,@function 23 | .functype make_obj () -> (externref) 24 | .local externref # obj 25 | # Allocate a new object in managed memory. 26 | i32.const 1 # nobjs 27 | i32.const 0 # nbytes 28 | call gc_alloc 29 | local.set 0 30 | 31 | # Initialize callback field. 32 | local.get 0 33 | i32.const 0 34 | ref.null_extern 35 | call gc_set_obj 36 | 37 | local.get 0 38 | end_function 39 | 40 | attach_callback: 41 | .hidden attach_callback 42 | .globl attach_callback 43 | .export_name attach_callback, attach_callback 44 | .no_dead_strip attach_callback 45 | .type attach_callback,@function 46 | .functype attach_callback (externref, externref) -> () 47 | # Set callback field. 48 | local.get 0 49 | i32.const 0 50 | local.get 1 51 | call gc_set_obj 52 | end_function 53 | 54 | invoke_callback: 55 | .hidden invoke_callback 56 | .globl invoke_callback 57 | .export_name invoke_callback, invoke_callback 58 | .no_dead_strip invoke_callback 59 | .type invoke_callback,@function 60 | .functype invoke_callback (externref) -> () 61 | local.get 0 62 | i32.const 0 63 | call gc_ref_obj 64 | call invoke 65 | end_function 66 | -------------------------------------------------------------------------------- /milestones/m3/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let finalizers = new FinalizationRegistry(f => { f(); }); 26 | 27 | function invoke(callback) { 28 | return callback() 29 | } 30 | function out_of_memory() { 31 | print('error: out of linear memory'); 32 | quit(1); 33 | } 34 | 35 | let nalloc = 0; 36 | let nfinalized = 0; 37 | let nmax = 0; 38 | class WasmObject { 39 | constructor(wasm) { 40 | this.wasm = wasm 41 | let obj = wasm.exports.make_obj(); 42 | this.obj = obj; 43 | nalloc++; 44 | nmax = Math.max(nalloc - nfinalized, nmax); 45 | let free_obj = this.wasm.exports.free_obj; 46 | finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this); 47 | } 48 | attachCallback(f) { 49 | this.wasm.exports.attach_callback(this.obj, f); 50 | } 51 | invokeCallback() { 52 | this.wasm.exports.invoke_callback(this.obj); 53 | } 54 | } 55 | 56 | let bytes = readBinaryFile("test.wasm"); 57 | let mod = new WebAssembly.Module(bytes); 58 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 59 | let rt = { invoke, out_of_memory }; 60 | let imports = { env: memory.env(), rt } 61 | let instance = new WebAssembly.Instance(mod, imports); 62 | 63 | function test(n) { 64 | for (let i = 0; i < n; i++) { 65 | let obj = new WasmObject(instance); 66 | obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`)); 67 | if (i == 0) obj.invokeCallback(); 68 | } 69 | print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`); 70 | } 71 | 72 | async function main() { 73 | await runTestLoop(1e2, test, 1e3); 74 | print(`Success; max ${nmax} objects live.`); 75 | } 76 | main() 77 | -------------------------------------------------------------------------------- /milestones/m5/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let finalizers = new FinalizationRegistry(f => { f(); }); 26 | 27 | function invoke(callback) { 28 | return callback() 29 | } 30 | function out_of_memory() { 31 | print('error: out of linear memory'); 32 | quit(1); 33 | } 34 | 35 | let nalloc = 0; 36 | let nfinalized = 0; 37 | let nmax = 0; 38 | class WasmObject { 39 | constructor(wasm) { 40 | this.wasm = wasm 41 | let obj = wasm.exports.make_obj(); 42 | this.obj = obj; 43 | nalloc++; 44 | nmax = Math.max(nalloc - nfinalized, nmax); 45 | let free_obj = this.wasm.exports.free_obj; 46 | finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this); 47 | } 48 | attachCallback(f) { 49 | this.wasm.exports.attach_callback(this.obj, f); 50 | } 51 | invokeCallback() { 52 | this.wasm.exports.invoke_callback(this.obj); 53 | } 54 | } 55 | 56 | let bytes = readBinaryFile("test.wasm"); 57 | let mod = new WebAssembly.Module(bytes); 58 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 59 | let rt = { invoke, out_of_memory }; 60 | let imports = { env: memory.env(), rt } 61 | let instance = new WebAssembly.Instance(mod, imports); 62 | 63 | function test(n) { 64 | for (let i = 0; i < n; i++) { 65 | let obj = new WasmObject(instance); 66 | obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`)); 67 | if (i == 0) obj.invokeCallback(); 68 | } 69 | print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`); 70 | } 71 | 72 | async function main() { 73 | await runTestLoop(1e2, test, 1e3); 74 | print(`Success; max ${nmax} objects live.`); 75 | } 76 | main() 77 | -------------------------------------------------------------------------------- /milestones/m6/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let nalloc = 0; 26 | class WasmObject { 27 | constructor(wasm) { 28 | this.wasm = wasm 29 | let obj = wasm.exports.make_obj(); 30 | nalloc++; 31 | this.obj = obj; 32 | } 33 | attachCallback(f) { 34 | this.wasm.exports.attach_callback(this.obj, f); 35 | } 36 | invokeCallback() { 37 | this.wasm.exports.invoke_callback(this.obj); 38 | } 39 | } 40 | 41 | function gc_alloc(nobjs, nbytes) { 42 | return { 43 | objs: nobjs ? new Array(nobjs) : null, 44 | bytes: nbytes ? new ArrayBuffer(nbytes) : null 45 | }; 46 | } 47 | function gc_ref_obj(obj, i) { return obj.objs[i]; } 48 | function gc_set_obj(obj, i, val) { obj.objs[i] = val; } 49 | // Room here for gc_ref_u8, gc_set_f32, etc for obj.bytes. 50 | 51 | function invoke(cb) { return cb(); } 52 | 53 | let bytes = readBinaryFile("test.wasm"); 54 | let mod = new WebAssembly.Module(bytes); 55 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 56 | let rt = { gc_alloc, gc_ref_obj, gc_set_obj, invoke }; 57 | let imports = { env: memory.env(), rt } 58 | let instance = new WebAssembly.Instance(mod, imports); 59 | 60 | function test(n) { 61 | for (let i = 0; i < n; i++) { 62 | let obj = new WasmObject(instance); 63 | obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`)); 64 | if (i == 0) obj.invokeCallback(); 65 | } 66 | print(`${nalloc} total allocated.`); 67 | } 68 | 69 | function runTestLoop(n, f, ...args) { 70 | for (let i = 0; i < n; i++) { 71 | f(...args); 72 | } 73 | } 74 | 75 | function main() { 76 | runTestLoop(1e2, test, 1e3); 77 | print(`Success.`); 78 | } 79 | main() 80 | -------------------------------------------------------------------------------- /milestones/m4/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let nalloc = 0; 26 | class WasmObject { 27 | constructor(wasm) { 28 | this.wasm = wasm 29 | let obj = wasm.exports.make_obj(); 30 | nalloc++; 31 | this.obj = obj; 32 | } 33 | attachCallback(f) { 34 | this.wasm.exports.attach_callback(this.obj, f); 35 | } 36 | invokeCallback() { 37 | this.wasm.exports.invoke_callback(this.obj); 38 | } 39 | } 40 | 41 | function gc_alloc(nobjs, nbytes) { 42 | return { 43 | objs: nobjs ? new Array(nobjs) : null, 44 | bytes: nbytes ? new ArrayBuffer(nbytes) : null 45 | }; 46 | } 47 | function gc_ref_obj(obj, i) { return obj.objs[i]; } 48 | function gc_set_obj(obj, i, val) { obj.objs[i] = val; } 49 | // Room here for gc_ref_u8, gc_set_f32, etc for obj.bytes. 50 | 51 | function invoke(cb) { return cb(); } 52 | 53 | let bytes = readBinaryFile("test.wasm"); 54 | let mod = new WebAssembly.Module(bytes); 55 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 56 | let rt = { gc_alloc, gc_ref_obj, gc_set_obj, invoke }; 57 | let imports = { env: memory.env(), rt } 58 | let instance = new WebAssembly.Instance(mod, imports); 59 | 60 | function test(n) { 61 | for (let i = 0; i < n; i++) { 62 | let obj = new WasmObject(instance); 63 | obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`)); 64 | if (i == 0) obj.invokeCallback(); 65 | } 66 | print(`${nalloc} total allocated.`); 67 | } 68 | 69 | function runTestSyncLoop(n, f, ...args) { 70 | for (let i = 0; i < n; i++) { 71 | f(...args); 72 | } 73 | } 74 | 75 | function main() { 76 | runTestSyncLoop(1e2, test, 1e3); 77 | print(`Success.`); 78 | } 79 | main() 80 | -------------------------------------------------------------------------------- /milestones/lib.js: -------------------------------------------------------------------------------- 1 | let readBinaryFile = (()=> { 2 | if (typeof read !== 'undefined') 3 | return f => read(f, 'binary'); 4 | if (typeof readFile !== 'undefined') 5 | return f => readFile(f); 6 | let fs = require('fs'); 7 | return f => fs.readFileSync(f); 8 | })(); 9 | 10 | let maybeGC = (()=> { 11 | // Engines will detect finalizable objects when they run GC. They 12 | // run GC according to a number of heuristics, principally among 13 | // them how much GC-managed allocation has happened since the last 14 | // GC. These heuristics are not informed how much off-heap 15 | // allocation has occured; see 16 | // https://github.com/tc39/proposal-weakrefs/issues/87. In 17 | // practice, all engines will fail our tests, because they don't 18 | // detect finalizable objects soon enough. Therefore to make these 19 | // tests reliable, we insert explicit GC calls. This bodes poorly 20 | // for the role of finalizers in robust systems. V8 does not expose 21 | // `gc` by default, though it will if you pass --expose-gc. 22 | if (typeof gc !== 'undefined') 23 | return gc; 24 | print('warning: no gc() exposed; no promptness guarantee for finalization'); 25 | return ()=>{}; 26 | })(); 27 | 28 | let callLater = (()=> { 29 | // Note that d8's `setTimeout` ignores the timeout argument; it just 30 | // queues `f` without any delay. 31 | if (typeof setTimeout !== 'undefined') 32 | return f=>setTimeout(f, 0); 33 | if (typeof enqueueJob !== 'undefined') 34 | return f=>enqueueJob(f); 35 | return f=>f(); 36 | })(); 37 | 38 | let maybeDrainTasks = (()=> { 39 | // Weirdly, SpiderMonkey's (nonstandard) enqueueJob has higher 40 | // priority than finalization tasks. We need to explicitly drain 41 | // tasks there to get finalizers to run. 42 | if (typeof drainJobQueue !== 'undefined') 43 | return drainJobQueue; 44 | return ()=>{}; 45 | })(); 46 | 47 | // Finalizers run in their own task. This microsleep is a simple way to 48 | // force the engine to go to the next turn. 49 | async function microsleep() { 50 | await new Promise((resolve, reject) => callLater(resolve)); 51 | } 52 | 53 | async function allowFinalizersToRun() { 54 | maybeGC(); 55 | maybeDrainTasks(); 56 | await microsleep(); 57 | } 58 | 59 | // We are testing garbage collection and finalizers, for which async 60 | // functions are a natural fit. However we avoid putting the test 61 | // itself in an async function, because of a gnarly SpiderMonkey 62 | // limitation (https://bugzilla.mozilla.org/show_bug.cgi?id=1542660). 63 | // Basically, in SpiderMonkey, any closure in an async function captures 64 | // *all* local variables, potentially introducing cycles and preventing 65 | // finalization. 66 | async function runTestLoop(n, f, ...args) { 67 | for (let i = 0; i < n; i++) { 68 | f(...args); 69 | await allowFinalizersToRun(); 70 | } 71 | } 72 | 73 | function checkSame(expected, actual, what) { 74 | print(`checking expected ${what}: ${expected}`); 75 | if (expected !== actual) 76 | throw new Error(`unexpected ${what}: ${actual}`); 77 | } 78 | -------------------------------------------------------------------------------- /milestones/m5/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | typedef __SIZE_TYPE__ size_t; 4 | 5 | #define WASM_EXPORT(name) \ 6 | __attribute__((export_name(#name))) \ 7 | name 8 | #define WASM_IMPORT(mod, name) \ 9 | __attribute__((import_module(#mod))) \ 10 | __attribute__((import_name(#name))) \ 11 | name 12 | 13 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 14 | 15 | typedef __externref_t externref; 16 | 17 | void WASM_IMPORT(rt, invoke)(externref); 18 | void WASM_IMPORT(rt, out_of_memory)(void); 19 | 20 | // Useful for debugging. 21 | void WASM_IMPORT(env, wasm_log)(void*); 22 | void WASM_IMPORT(env, wasm_logi)(int); 23 | 24 | void *malloc(size_t size); 25 | void free(void *p); 26 | 27 | typedef uint32_t Handle; 28 | 29 | struct freelist { 30 | Handle handle; 31 | struct freelist *next; 32 | }; 33 | 34 | static struct freelist *freelist = 0; 35 | 36 | static Handle freelist_pop (void) { 37 | ASSERT(freelist != 0x0); 38 | struct freelist *head = freelist; 39 | Handle ret = head->handle; 40 | freelist = head->next; 41 | free(head); 42 | return ret; 43 | } 44 | 45 | static void freelist_push (Handle h) { 46 | struct freelist *head = malloc(sizeof(struct freelist)); 47 | if (!head) { 48 | out_of_memory(); 49 | __builtin_trap(); 50 | } 51 | head->handle = h; 52 | head->next = freelist; 53 | freelist = head; 54 | } 55 | 56 | static externref objects[0]; 57 | 58 | static void expand_table (void) { 59 | size_t old_size = __builtin_wasm_table_size(objects); 60 | size_t grow = (old_size >> 1) + 1; 61 | if (__builtin_wasm_table_grow(objects, 62 | __builtin_wasm_ref_null_extern(), 63 | grow) == -1) { 64 | out_of_memory(); 65 | __builtin_trap(); 66 | } 67 | size_t end = __builtin_wasm_table_size(objects); 68 | while (end != old_size) { 69 | freelist_push (--end); 70 | } 71 | } 72 | 73 | static Handle intern(externref obj) { 74 | if (!freelist) expand_table(); 75 | Handle ret = freelist_pop(); 76 | __builtin_wasm_table_set(objects, ret, obj); 77 | return ret; 78 | } 79 | 80 | static void release(Handle h) { 81 | if (h == -1) return; 82 | __builtin_wasm_table_set(objects, h, __builtin_wasm_ref_null_extern()); 83 | freelist_push(h); 84 | } 85 | 86 | static externref handle_value(Handle h) { 87 | return h == -1 88 | ? __builtin_wasm_ref_null_extern() 89 | : __builtin_wasm_table_get(objects, h); 90 | } 91 | 92 | struct obj { 93 | uintptr_t callback_handle; 94 | }; 95 | 96 | struct obj* WASM_EXPORT(make_obj)() { 97 | struct obj* obj = malloc(sizeof(struct obj)); 98 | if (!obj) { 99 | out_of_memory(); 100 | __builtin_trap(); 101 | } 102 | obj->callback_handle = -1; 103 | return obj; 104 | } 105 | 106 | void WASM_EXPORT(free_obj)(struct obj* obj) { 107 | release(obj->callback_handle); 108 | free(obj); 109 | } 110 | 111 | void WASM_EXPORT(attach_callback)(struct obj* obj, externref callback) { 112 | release(obj->callback_handle); 113 | obj->callback_handle = intern(callback); 114 | } 115 | 116 | void WASM_EXPORT(invoke_callback)(struct obj* obj) { 117 | invoke(handle_value(obj->callback_handle)); 118 | } 119 | -------------------------------------------------------------------------------- /milestones/m2/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 2: Re-express test.c as raw WebAssembly test.wat 2 | 3 | [ previous: [m1](../m1/); next: [m3](../m3/) ] 4 | 5 | ## Overview 6 | 7 | In [Milestone 1](../m1/), we showed that using side tables and 8 | finalizers for WebAssembly<->JavaScript references caused memory leaks 9 | if the object graph had a cycle traversing the language boundary. We 10 | also showed that such cycles were easy to make, and hard to debug. 11 | 12 | We would like to propose some language extensions to C and C++ to fix 13 | this problem. To keep the discussion concrete, we will first show the 14 | kind of WebAssembly that we would like LLVM to produce, and test it to 15 | show that it solves the cycle problems. This test will also allow us to 16 | examine different characteristics of the proposed solution. 17 | 18 | Therefore this milestone is the same as [milestone 0](../m0/), but with 19 | the [`test.c`](../m0/test.c) translated to WebAssembly via `clang -Ox 20 | --target=wasm32 -nostdlib -S -o test.S test.c`. The resulting 21 | [`test.S`](./test.S) was then cleaned up manually and commented. 22 | 23 | There is also a commented [test.wat](./test.wat) file, for reference. 24 | The [`test.wat`](./test.wat) file and the [`test.S`](./test.S) file are 25 | equivalent. The [`Makefile`](./Makefile) has a rule for alternately 26 | producing `test.o` from `test.wat`, via wabt's `wat2wasm`. 27 | 28 | ## Details 29 | 30 | The difference from [m0](../m0) is that we translated the former 31 | [`test.c`](../m0/test.c) into [`test.wat`](./test.wat). 32 | 33 | Running `make` will build `test.wasm` and run the test as in m0. An 34 | example run: 35 | 36 | ``` 37 | $ make v8.test 38 | ~/src/llvm-project/build/bin/clang --target=wasm32 -nostdlib -c -o test.o test.S 39 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c 40 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --allow-undefined --import-memory -o test.wasm test.o walloc.o 41 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc test.js 42 | Callback after 1 allocated. 43 | 1000 total allocated, 1000 still live. 44 | Callback after 1001 allocated. 45 | 2000 total allocated, 1000 still live. 46 | ... 47 | 99000 total allocated, 1000 still live. 48 | Callback after 99001 allocated. 49 | 100000 total allocated, 1000 still live. 50 | checking expected live object count: 0 51 | Success; max 1000 objects live. 52 | ``` 53 | 54 | ## Results 55 | 56 | ### Toolchain support for linking C and wat 57 | 58 | The object file format of WebAssembly files, as produced by clang and 59 | consumed by LLD, is just the standard binary WebAssembly format, with a 60 | few conventions, plus a couple of "custom sections". See full details 61 | over at [the tool-conventions Linking 62 | document](https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md). 63 | 64 | By default, the `wat2wasm` tool from `wabt` doesn't produce these 65 | sections. It does have a `--relocatable` option, however, that was 66 | intended for this purpose, but it had a few bugs that made it unusable. 67 | These were fixed in 68 | [#1535](https://github.com/WebAssembly/wabt/pull/1535), 69 | [#1537](https://github.com/WebAssembly/wabt/pull/1537), and 70 | [#1539](https://github.com/WebAssembly/wabt/pull/1539) from us, and 71 | [#1527](https://github.com/WebAssembly/wabt/pull/1527) from an external 72 | contributor. All of these changes are now in the main upstream branch. 73 | 74 | Note that although we initially used `wat2wasm --relocatable`, as later 75 | work focusses on LLVM, we now use the LLVM assembly syntax for the 76 | WebAssembly target. 77 | -------------------------------------------------------------------------------- /milestones/m0/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let finalizers = new FinalizationRegistry(f => { f(); }); 26 | 27 | class ObjectTable { 28 | constructor() { 29 | this.objects = []; 30 | this.freelist = []; 31 | } 32 | expand() { 33 | let len = this.objects.length; 34 | let grow = (len >> 1) + 1; 35 | let end = len + grow; 36 | this.objects.length = end; 37 | while (end != len) 38 | this.freelist.push(--end); 39 | } 40 | intern(obj) { 41 | if (!this.freelist.length) 42 | this.expand(); 43 | let handle = this.freelist.pop(); 44 | this.objects[handle] = obj; 45 | return handle; 46 | } 47 | release(handle) { 48 | if (handle === -1) return; 49 | this.objects[handle] = null; 50 | this.freelist.push(handle); 51 | } 52 | count() { 53 | return this.objects.length - this.freelist.length; 54 | } 55 | ref(handle) { 56 | return this.objects[handle]; 57 | } 58 | } 59 | 60 | let table = new ObjectTable; 61 | function invoke(handle) { 62 | if (handle === -1) return; 63 | return table.ref(handle)(); 64 | } 65 | function release(handle) { 66 | if (handle === -1) return; 67 | table.release(handle); 68 | } 69 | function out_of_memory() { 70 | print('error: out of linear memory'); 71 | quit(1); 72 | } 73 | 74 | let nalloc = 0; 75 | let nfinalized = 0; 76 | let nmax = 0; 77 | class WasmObject { 78 | constructor(wasm) { 79 | this.wasm = wasm 80 | let obj = wasm.exports.make_obj(); 81 | this.obj = obj; 82 | nalloc++; 83 | nmax = Math.max(nalloc - nfinalized, nmax); 84 | let free_obj = this.wasm.exports.free_obj; 85 | finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this); 86 | } 87 | attachCallback(f) { 88 | let handle = table.intern(f); 89 | this.wasm.exports.attach_callback(this.obj, handle); 90 | } 91 | invokeCallback() { 92 | this.wasm.exports.invoke_callback(this.obj); 93 | } 94 | } 95 | 96 | let bytes = readBinaryFile("test.wasm"); 97 | let mod = new WebAssembly.Module(bytes); 98 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 99 | let rt = { release, invoke, out_of_memory }; 100 | let imports = { env: memory.env(), rt } 101 | let instance = new WebAssembly.Instance(mod, imports); 102 | 103 | function test(n) { 104 | for (let i = 0; i < n; i++) { 105 | let obj = new WasmObject(instance); 106 | obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`)); 107 | if (i == 0) obj.invokeCallback(); 108 | } 109 | print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`); 110 | } 111 | 112 | async function main() { 113 | await runTestLoop(1e2, test, 1e3); 114 | checkSame(nalloc - nfinalized, table.count(), "live object count"); 115 | print(`Success; max ${nmax} objects live.`); 116 | } 117 | main() 118 | -------------------------------------------------------------------------------- /milestones/m2/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let finalizers = new FinalizationRegistry(f => { f(); }); 26 | 27 | class ObjectTable { 28 | constructor() { 29 | this.objects = []; 30 | this.freelist = []; 31 | } 32 | expand() { 33 | let len = this.objects.length; 34 | let grow = (len >> 1) + 1; 35 | let end = len + grow; 36 | this.objects.length = end; 37 | while (end != len) 38 | this.freelist.push(--end); 39 | } 40 | intern(obj) { 41 | if (!this.freelist.length) 42 | this.expand(); 43 | let handle = this.freelist.pop(); 44 | this.objects[handle] = obj; 45 | return handle; 46 | } 47 | release(handle) { 48 | if (handle === -1) return; 49 | this.objects[handle] = null; 50 | this.freelist.push(handle); 51 | } 52 | count() { 53 | return this.objects.length - this.freelist.length; 54 | } 55 | ref(handle) { 56 | return this.objects[handle]; 57 | } 58 | } 59 | 60 | let table = new ObjectTable; 61 | function invoke(handle) { 62 | if (handle === -1) return; 63 | return table.ref(handle)(); 64 | } 65 | function release(handle) { 66 | if (handle === -1) return; 67 | table.release(handle); 68 | } 69 | function out_of_memory() { 70 | print('error: out of linear memory'); 71 | quit(1); 72 | } 73 | 74 | let nalloc = 0; 75 | let nfinalized = 0; 76 | let nmax = 0; 77 | class WasmObject { 78 | constructor(wasm) { 79 | this.wasm = wasm 80 | let obj = wasm.exports.make_obj(); 81 | this.obj = obj; 82 | nalloc++; 83 | nmax = Math.max(nalloc - nfinalized, nmax); 84 | let free_obj = this.wasm.exports.free_obj; 85 | finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this); 86 | } 87 | attachCallback(f) { 88 | let handle = table.intern(f); 89 | this.wasm.exports.attach_callback(this.obj, handle); 90 | } 91 | invokeCallback() { 92 | this.wasm.exports.invoke_callback(this.obj); 93 | } 94 | } 95 | 96 | let bytes = readBinaryFile("test.wasm"); 97 | let mod = new WebAssembly.Module(bytes); 98 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 99 | let rt = { release, invoke, out_of_memory }; 100 | let imports = { env: memory.env(), rt } 101 | let instance = new WebAssembly.Instance(mod, imports); 102 | 103 | function test(n) { 104 | for (let i = 0; i < n; i++) { 105 | let obj = new WasmObject(instance); 106 | obj.attachCallback(() => print(`Callback after ${nalloc} allocated.`)); 107 | if (i == 0) obj.invokeCallback(); 108 | } 109 | print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`); 110 | } 111 | 112 | async function main() { 113 | await runTestLoop(1e2, test, 1e3); 114 | checkSame(nalloc - nfinalized, table.count(), "live object count"); 115 | print(`Success; max ${nmax} objects live.`); 116 | } 117 | main() 118 | -------------------------------------------------------------------------------- /milestones/m1/test.js: -------------------------------------------------------------------------------- 1 | load('../lib.js'); 2 | 3 | class LinearMemory { 4 | constructor({initial = 256, maximum = 256}) { 5 | this.memory = new WebAssembly.Memory({ initial, maximum }); 6 | } 7 | read_string(offset) { 8 | let view = new Uint8Array(this.memory.buffer); 9 | let bytes = [] 10 | for (let byte = view[offset]; byte; byte = view[++offset]) 11 | bytes.push(byte); 12 | return String.fromCharCode(...bytes); 13 | } 14 | log(str) { console.log(`wasm log: ${str}`) } 15 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 16 | env() { 17 | return { 18 | memory: this.memory, 19 | wasm_log: (off) => this.log(this.read_string(off)), 20 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 21 | } 22 | } 23 | } 24 | 25 | let finalizers = new FinalizationRegistry(f => { f(); }); 26 | 27 | class ObjectTable { 28 | constructor() { 29 | this.objects = []; 30 | this.freelist = []; 31 | } 32 | expand() { 33 | let len = this.objects.length; 34 | let grow = (len >> 1) + 1; 35 | let end = len + grow; 36 | this.objects.length = end; 37 | while (end != len) 38 | this.freelist.push(--end); 39 | } 40 | intern(obj) { 41 | if (!this.freelist.length) 42 | this.expand(); 43 | let handle = this.freelist.pop(); 44 | this.objects[handle] = obj; 45 | return handle; 46 | } 47 | release(handle) { 48 | if (handle === -1) return; 49 | this.objects[handle] = null; 50 | this.freelist.push(handle); 51 | } 52 | count() { 53 | return this.objects.length - this.freelist.length; 54 | } 55 | ref(handle) { 56 | return this.objects[handle]; 57 | } 58 | } 59 | 60 | let table = new ObjectTable; 61 | function invoke(handle) { 62 | if (handle === -1) return; 63 | return table.ref(handle)(); 64 | } 65 | function release(handle) { 66 | if (handle === -1) return; 67 | table.release(handle); 68 | } 69 | function out_of_memory() { 70 | print('error: out of linear memory'); 71 | quit(1); 72 | } 73 | 74 | let nalloc = 0; 75 | let nfinalized = 0; 76 | let nmax = 0; 77 | class WasmObject { 78 | constructor(wasm) { 79 | this.wasm = wasm 80 | let obj = wasm.exports.make_obj(); 81 | this.obj = obj; 82 | nalloc++; 83 | nmax = Math.max(nalloc - nfinalized, nmax); 84 | let free_obj = this.wasm.exports.free_obj; 85 | finalizers.register(this, () => { nfinalized++; free_obj(obj); }, this); 86 | } 87 | attachCallback(f) { 88 | let handle = table.intern(f); 89 | this.wasm.exports.attach_callback(this.obj, handle); 90 | } 91 | invokeCallback() { 92 | this.wasm.exports.invoke_callback(this.obj); 93 | } 94 | } 95 | 96 | let bytes = readBinaryFile("test.wasm"); 97 | let mod = new WebAssembly.Module(bytes); 98 | let memory = new LinearMemory({ initial: 2, maximum: 10 }); 99 | let rt = { release, invoke, out_of_memory }; 100 | let imports = { env: memory.env(), rt } 101 | let instance = new WebAssembly.Instance(mod, imports); 102 | 103 | function test(n) { 104 | for (let i = 0; i < n; i++) { 105 | let obj = new WasmObject(instance); 106 | obj.attachCallback( 107 | () => print(`Callback from ${obj} after ${nalloc} allocated.`)); 108 | if (i == 0) obj.invokeCallback(); 109 | } 110 | print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`); 111 | } 112 | 113 | async function main() { 114 | await runTestLoop(1e2, test, 1e3); 115 | checkSame(nalloc - nfinalized, table.count(), "live object count"); 116 | print(`Success; max ${nmax} objects live.`); 117 | } 118 | main() 119 | -------------------------------------------------------------------------------- /milestones/m3/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 3: Move side table to WebAssembly 2 | 3 | [ previous: [m2](../m2/); next: [m4](../m4/) ] 4 | 5 | ## Overview 6 | 7 | In the previous milestones, the side table associating integer handles 8 | with GC-managed data from JavaScript was maintained in the JavaScript 9 | run-time. However with the [widely-implemented reference types 10 | proposal](https://github.com/WebAssembly/reference-types), it's now 11 | possible to manage that table on the WebAssembly side, reducing the size 12 | of the language-specific run-time. 13 | 14 | Relative to the previous milestone, this milestone moves the 15 | freelist/object table implementation into the [`test.S`](./test.S) 16 | WebAssembly file. The algorithm is the same, with the object table 17 | itself being a WebAssembly table holding `externref` values, and the 18 | freelist being a singly-linked-list of malloc'd nodes. 19 | 20 | For the rest, you can search the `test.S` file for instances of 21 | `externref` -- for example, `attach_callback` now takes an externref as 22 | an argument directly, and handles "interning" it into the object table 23 | without involving JavaScript. 24 | 25 | ## Details 26 | 27 | The difference from [m2](../m2) is that we moved the side-table 28 | implementation into [`test.S`](./test.S) and removed it from 29 | [`test.js`](./test.js). 30 | 31 | This is the first example in which WebAssembly handles externref values, 32 | so the [`Makefile`](./Makefile) arranges to enable the appropriate 33 | feature flags in compiler and in the various engines. 34 | 35 | Running `make` will build `test.wasm` and then run the test as in m2. 36 | 37 | ## Results 38 | 39 | ### LLVM support for reference-types, on assembly level 40 | 41 | As part of this work, Paulo and Andy added support for `externref`, 42 | instructions that operate on `externref` (`table.ref`, `table.set`, 43 | etc), and support for table relocations to LLVM. 44 | 45 | Landed patches: D88815, D89797, D90608, D90948, D91604, D91635, D91637, D91849, D91870, D92215, D92315, D92320, D92321, D92323, D92840, D94075, D94677, D96001, D96770, D96872, D97761, D97843, D97923. 68 | 69 | Complete support only exists on the assembly ("MC") layer and in the 70 | linker (`wasm-ld`); support on the IR level is ongoing, and will be 71 | followed by frontends (`clang`). 72 | 73 | ### Handle and side-table mechanism implemented in terms of externref 74 | 75 | In a future where C and C++ programs can reference externref values, 76 | there are going to be many times where you want a C data structure to 77 | reference a GC-managed value, but you can't put a GC-managed value 78 | directly into linear memory. Side tables and handles are the mechanism 79 | by which this will work: the code that has an externref and needs a 80 | handle will intern the object into a table, and store the integer handle 81 | into memory instead. 82 | 83 | This milestone shows the needed WebAssembly to do that. We just have to 84 | figure out how to get the compiler to emit it from C :) 85 | 86 | ### Side tables still lead to uncollectable cycles 87 | 88 | Moving the side table to WebAssembly is convenient in some ways but 89 | doesn't magically solve the cycle problem. Running the tests still 90 | fails, in the same way as in m2. 91 | -------------------------------------------------------------------------------- /milestones/m1/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 1: Cycles between WebAssembly and JavaScript are uncollectable 2 | 3 | [ previous: [m0](../m0/); next: [m2](../m2/) ] 4 | 5 | ## Overview 6 | 7 | This is the same as [m0](../m0/), but with the smallest of changes to 8 | [`test.js`](./test.js): 9 | 10 | ```diff 11 | --- ../m0/test.js 2020-09-14 13:27:24.924663561 +0200 12 | +++ test.js 2020-09-14 13:29:06.658300401 +0200 13 | @@ -103,7 +103,8 @@ 14 | function test(n) { 15 | for (let i = 0; i < n; i++) { 16 | let obj = new WasmObject(instance); 17 | - obj.installCallback(() => print(`Callback after ${nalloc} allocated.`)); 18 | + obj.installCallback( 19 | + () => print(`Callback from ${obj} after ${nalloc} allocated.`)); 20 | if (i == 0) obj.invokeCallback(); 21 | } 22 | print(`${nalloc} total allocated, ${nalloc - nfinalized} still live.`); 23 | ``` 24 | 25 | That is, our callback references the object. Seems like an innocent 26 | change, but it causes the test to fail because it causes a cycle. 27 | 28 | Note that JavaScript garbage collectors are perfectly capable of 29 | collecting cycles. However, while this case exhibits what is logically 30 | a cycle between the callback, the JS wrapper, and the C object, the 31 | garbage collector sees only the JavaScript side of things: the side 32 | table keeps the callback alive, which, as the callback captures the 33 | wrapper, keeps the wrapper alive. 34 | 35 | Because the allocated objects are never collected, this test will 36 | eventually crash because it can't allocate any more memory. In 37 | practice, our example crashes because it runs out of linear memory. 38 | 39 | This is a simplified representation of the use case that we are trying 40 | to fix in this effort, and milestone 1 indicates the problem. 41 | 42 | ## Details 43 | 44 | Running `make` will build `test.wasm` and run the test as in m0. Note 45 | however that it fails with an out-of-memory error, and therefore that 46 | the default `test` target stops after `v8.test` fails. You can verify 47 | that it fails with JSC via `make jsc.test`, and SpiderMonkey via `make 48 | spidermonkey.test`. 49 | 50 | An example run: 51 | 52 | ``` 53 | $ make v8.test 54 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o test.o test.c 55 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c 56 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --import-memory -o test.wasm test.o walloc.o 57 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc test.js 58 | Callback from [object Object] after 1 allocated. 59 | 1000 total allocated, 1000 still live. 60 | Callback from [object Object] after 1001 allocated. 61 | 2000 total allocated, 2000 still live. 62 | ... 63 | 55000 total allocated, 55000 still live. 64 | Callback from [object Object] after 55001 allocated. 65 | 56000 total allocated, 56000 still live. 66 | Callback from [object Object] after 56001 allocated. 67 | 57000 total allocated, 57000 still live. 68 | Callback from [object Object] after 57001 allocated. 69 | error: out of linear memory 70 | make: *** [Makefile:15: v8.test] Error 1 71 | ``` 72 | 73 | ## Results 74 | 75 | ### We have a simple motivating example 76 | 77 | Although this test program is small and synthetic, it exhibits a pattern 78 | of object relationships in real programs. There is reason to hope that 79 | if we find a solution for the test program, that it will enable large 80 | systems to be made more reliably. 81 | 82 | ### Cycles are easier to make than one might think 83 | 84 | When constructing this test program, very experienced JS and WebAssembly 85 | engineers spent way longer than they had planned, ensuring that the 86 | leaks were present for the reasons we thought, and absent in ways that 87 | we expect. [Bug 88 | 1664463](https://bugzilla.mozilla.org/show_bug.cgi?id=1664463) is a good 89 | example, but simply allowing finalizers to run was also an issue. See 90 | m0 for a longer discussion of those incidental points. 91 | 92 | We are now more convinced that "don't do that" is not a good answer to 93 | the cycle problem. Firstly, because it's difficult to know when one 94 | makes a cycle or not. Secondly, because sometimes cycles lead to more 95 | natural program structure. Finally, the negative consequences of 96 | uncollectable cycles in hybrid WebAssembly/JS environments have such 97 | disastrous consequences that happen at unpredictable times that from a 98 | system reliability point of view, if we can't avoid cycles in practice, 99 | we must make them benign. 100 | 101 | ### In practice, the linear memory usually has the more strict limit 102 | 103 | The failure mechanism of this test is a failure to grow linear memory. 104 | We expect that this will be the failure mechanism for insufficiently 105 | prompt GC and leaky systems, if a maximum is set on linear memory. 106 | -------------------------------------------------------------------------------- /milestones/getting-started.md: -------------------------------------------------------------------------------- 1 | # Before starting 2 | 3 | There are a few dependencies that we use as part of our test setup: 4 | LLVM, wabt, and a JavaScript shell. You should make sure to have them 5 | installed before starting. 6 | 7 | We assume that all of these projects are checked out in some root 8 | directory bound to the `SRC` environment variable. Like this: 9 | 10 | ``` 11 | export SRC=~/src 12 | ``` 13 | 14 | ## LLVM 15 | 16 | We use LLVM to compile (Ref) C++ to WebAssembly. We need the latest 17 | upstream LLVM, as some features needed by this work have only landed 18 | recently. 19 | 20 | ``` 21 | cd $SRC 22 | git clone https://github.com/llvm/llvm-project 23 | cd llvm-project 24 | mkdir build && cd build 25 | cmake -G 'Unix Makefiles' \ 26 | -DLLVM_TARGETS_TO_BUILD='X86;WebAssembly' \ 27 | -DLLVM_ENABLE_PROJECTS="clang;lld" \ 28 | -DCMAKE_BUILD_TYPE=Release \ 29 | -DLLVM_ENABLE_ASSERTIONS=On \ 30 | -DCMAKE_EXPORT_COMPILE_COMMANDS=On \ 31 | -DLLVM_BUILD_32_BITS=Off \ 32 | -DLLVM_ENABLE_BINDINGS=Off \ 33 | ../llvm/ 34 | 35 | # If you need debug information in LLVM, use -DCMAKE_BUILD_TYPE=Debug. 36 | # Note however that the build will take longer and use a lot of memory 37 | # at the link phase. 38 | make -j$(nproc) 39 | ``` 40 | 41 | Once you're done, set the `LLVM` environment variable to the build dir; 42 | usually this is sufficient: 43 | 44 | ``` 45 | export LLVM=$SRC/llvm-project/build/bin 46 | export CC=$LLVM/clang 47 | export LD=$LLVM/wasm-ld 48 | ``` 49 | 50 | You can check that your LLVM is built correctly by running 51 | `$LLVM/llc --version`, which should output something like: 52 | 53 | ``` 54 | LLVM (http://llvm.org/): 55 | LLVM version 12.0.0git 56 | DEBUG build with assertions. 57 | Default target: x86_64-unknown-linux-gnu 58 | Host CPU: skylake-avx512 59 | 60 | Registered Targets: 61 | wasm32 - WebAssembly 32-bit 62 | wasm64 - WebAssembly 64-bit 63 | x86 - 32-bit X86: Pentium-Pro and above 64 | x86-64 - 64-bit X86: EM64T and AMD64 65 | ``` 66 | 67 | We added the X86 targets assuming you're compiling on an x86 system; 68 | it's useful to compare native and wasm compilation. 69 | 70 | ## wabt 71 | 72 | Wabt (pronounced "wabbit") is the WebAssembly Binary Toolkit, and is 73 | used for its `wat2wasm` tool, which translates a textual representation 74 | of a WebAssembly module into a corresponding binary, suitable for 75 | loading into a JavaScript shell. Check it out and build it: 76 | 77 | ``` 78 | cd $SRC 79 | git clone --recursive https://github.com/WebAssembly/wabt 80 | cd wabt 81 | make -j40 gcc-release 82 | ``` 83 | 84 | Then set `WAT2WASM`: 85 | 86 | ``` 87 | export WAT2WASM=$SRC/wabt/out/gcc/Release/wat2wasm 88 | ``` 89 | 90 | You can check that everything is fine by running `$WAT2WASM --version`, 91 | which should print a version. 92 | 93 | ## A JavaScript shell 94 | 95 | Ultimately we're targetting WebAssembly as deployed in web browsers, and 96 | specifically in their JavaScript engines: V8 being the engine in Chrome, 97 | and SpiderMonkey in Firefox. However, the underlying "reference types" 98 | WebAssembly feature used by our Ref C++ prototype is not yet included in 99 | standard WebAssembly, so although it's present in both V8 and 100 | SpiderMonkey, it's not on by default. Therefore the easiest way to test 101 | our milestones is with a standalone JavaScript "shell" -- a little 102 | program that just includes the JavaScript / WebAssembly engine from a 103 | web browser. 104 | 105 | Since we use finalizers as part of some milestones, the best thing is to 106 | build a fresh shell from upstream. We won't include details here, but 107 | if you have a V8 build you are looking for the `d8` binary: 108 | 109 | ``` 110 | # V8, checked out in ~/src/v8, built in out/x64.release 111 | export D8=$SRC/v8/out/x64.release/d8 112 | ``` 113 | 114 | For Spidermonkey it's just `js`: 115 | 116 | ``` 117 | # SpiderMonkey, checked out in ~/src/mozilla-unified, built in +js-release 118 | export SPIDERMONKEY=$SRC/mozilla-unified/+js-release/dist/bin/js 119 | ``` 120 | 121 | For JavaScriptCore it's `jsc` 122 | ``` 123 | # WebKit, checked out in ~/src/webkit, --jsc-only build in release mode 124 | export JSC=$SRC/webkit/WebKitBuild/Release/bin/jsc 125 | ``` 126 | 127 | JavaScript shells doesn't have a standardized interface, but the 128 | different implementations are close enough that our JavaScript code can 129 | paper over the difference. When you type "make" in a milestone, it will 130 | test all shells. 131 | 132 | ## Summary 133 | 134 | In the end if you have built the dependencies as described above, you 135 | should be able to set the expected environment variables like: 136 | 137 | ``` 138 | export SRC=~/src 139 | export LLVM=$SRC/llvm-project/build/bin 140 | export CC=$LLVM/clang 141 | export LD=$LLVM/wasm-ld 142 | export WAT2WASM=$SRC/wabt/out/gcc/Release/wat2wasm 143 | export V8=$SRC/v8/out/x64.release/d8 144 | export SPIDERMONKEY=$SRC/mozilla-unified/+js-release/dist/bin/js 145 | export JSC=$SRC/webkit/WebKitBuild/Release/bin/jsc 146 | ``` 147 | -------------------------------------------------------------------------------- /milestones/m3/test.wat: -------------------------------------------------------------------------------- 1 | (memory $mem (import "env" "__linear_memory") 0) 2 | (func $malloc (import "env" "malloc") (param i32) (result i32)) 3 | (func $free (import "env" "free") (param i32)) 4 | 5 | (func $invoke (import "rt" "invoke") (param externref)) 6 | (func $out_of_memory (import "rt" "out_of_memory")) 7 | 8 | ;; struct freelist { uint32_t handle; struct freelist *next; }; 9 | (global $freelist (mut i32) (i32.const 0)) 10 | 11 | ;; Pop a handle off the freelist. Precondition: freelist non-empty. 12 | (func $freelist_pop (result i32) 13 | ;; Abort if the freelist is empty. 14 | (if (i32.eqz (global.get $freelist)) (then (unreachable))) 15 | 16 | ;; Return value is the handle from the head of the freelist. 17 | (i32.load offset=0 (global.get $freelist)) 18 | 19 | ;; Push old freelist head. 20 | (global.get $freelist) 21 | 22 | ;; Set $freelist to the old head's "next" link. 23 | (global.set $freelist (i32.load offset=4 (global.get $freelist))) 24 | 25 | ;; Free the old head; the handle return value remains on the stack. 26 | (call $free)) 27 | 28 | ;; Push a handle onto the freelist. May call out_of_memory. 29 | (func $freelist_push (param $idx i32) 30 | (local $new_head i32) 31 | 32 | ;; Allocate a new head. 33 | (local.tee $new_head (call $malloc (i32.const 8))) 34 | 35 | ;; If the malloc failed, signal the runtime, then abort. 36 | (if (i32.eqz) (then (call $out_of_memory) (unreachable))) 37 | 38 | ;; Link new freelist head to old freelist. 39 | (i32.store offset=4 (local.get $new_head) (global.get $freelist)) 40 | 41 | ;; Initialize handle for new freelist head. 42 | (i32.store offset=0 (local.get $new_head) (local.get $idx)) 43 | 44 | ;; Set new head as $freelist. 45 | (global.set $freelist (local.get $new_head))) 46 | 47 | ;; Linear memory references externref values by putting them into the 48 | ;; object table, then referring to them via index. We use the word 49 | ;; "handle" to refer to an externref's index into the object table. 50 | (table $objects 0 externref) 51 | 52 | ;; When we need to intern a new externref into the object table, we get 53 | ;; the handle by popping it off a freelist. Every free slot in the 54 | ;; table is on the freelist. If the freelist is empty, then first we 55 | ;; expand the table, pushing new handles for each new slot onto the free 56 | ;; list. 57 | (func $expand_table 58 | (local $old_size i32) 59 | (local $end i32) 60 | (local.set $old_size (table.size $objects)) 61 | 62 | ;; Grow the table by (old_size >> 1) + 1. 63 | (table.grow $objects 64 | (ref.null extern) 65 | (i32.add (i32.shr_u (local.get $old_size) 66 | (i32.const 1)) 67 | (i32.const 1))) 68 | 69 | ;; If growing the table failed, signal the runtime, then abort. 70 | (if (i32.eq (i32.const -1)) 71 | (then (call $out_of_memory) (unreachable))) 72 | 73 | ;; Push freelist entries for new slots. 74 | (local.set $end (table.size $objects)) 75 | (loop $loop 76 | (if (i32.eq (local.get $end) (local.get $old_size)) 77 | (then (return))) 78 | (local.set $end (i32.sub (local.get $end) (i32.const 1))) 79 | (call $freelist_push (local.get $end)) 80 | (br $loop))) 81 | 82 | ;; Put $obj into the object table, returning a fresh handle. 83 | (func $intern (param $obj externref) (result i32) 84 | (local $handle i32) 85 | 86 | ;; Expand the table if no slots are free. 87 | (if (i32.eqz (global.get $freelist)) 88 | (then (call $expand_table))) 89 | 90 | ;; Store $obj into the object table and return its handle. 91 | (local.set $handle (call $freelist_pop)) 92 | (table.set $objects (local.get $handle) (local.get $obj)) 93 | (local.get $handle)) 94 | 95 | ;; Release the slot in the object table corresponding to $handle. 96 | (func $release (param $handle i32) 97 | ;; If $handle was -1 (invalid), just ignore it. 98 | (if (i32.eq (local.get $handle) (i32.const -1)) 99 | (then (return))) 100 | (table.set $objects (local.get $handle) (ref.null extern)) 101 | (call $freelist_push (local.get $handle))) 102 | 103 | (func $handle_value (param $handle i32) (result externref) 104 | (if (result externref) 105 | (i32.eq (local.get $handle) (i32.const -1)) 106 | (then (ref.null extern)) 107 | (else (table.get $objects (local.get $handle))))) 108 | 109 | (func $make_obj (export "make_obj") (result i32) 110 | (local $obj i32) 111 | 112 | ;; Allocate a new object in linear memory. If that fails, signal the 113 | ;; runtime, and then abort. 114 | (if (i32.eqz (local.tee $obj (call $malloc (i32.const 4)))) 115 | (then (call $out_of_memory) (unreachable))) 116 | 117 | ;; Initialize callback handle -1 (invalid) for fresh object. 118 | (i32.store offset=0 (local.get $obj) (i32.const -1)) 119 | 120 | (local.get $obj)) 121 | 122 | (func $free_obj (export "free_obj") (param $obj i32) 123 | ;; Release the callback handle. 124 | (call $release (i32.load offset=0 (local.get $obj))) 125 | (call $free (local.get $obj))) 126 | 127 | (func $attach_callback (export "attach_callback") 128 | (param $obj i32) (param $callback externref) 129 | ;; Release old handle. 130 | (call $release (i32.load offset=0 (local.get $obj))) 131 | 132 | ;; Intern the new callback, and store handle to object. 133 | (i32.store offset=0 134 | (local.get $obj) 135 | (call $intern (local.get $callback)))) 136 | 137 | (func $invoke_callback (export "invoke_callback") 138 | (param $obj i32) 139 | (call $invoke (call $handle_value (i32.load offset=0 (local.get $obj))))) 140 | -------------------------------------------------------------------------------- /milestones/m0/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 0: Finalizers clean up WebAssembly resources of JavaScript wrappers 2 | 3 | [ next: [m1](../m1/) ] 4 | 5 | ## Overview 6 | 7 | Here we define a basic C library which defines a simple `make_object` 8 | facility and which includes extensions to export that interface to its 9 | WebAssembly host. 10 | 11 | We define a JavaScript run-time that tests this facility, wrapping the 12 | raw pointers coming from the `make_object` calls in /wrapper objects/ on 13 | the JavaScript side. 14 | 15 | The exported interface also includes a facility to attach a callback to 16 | the object. We define a simple handle-and-side-table mechanism, 17 | allowing C to refer to JavaScript callbacks by index. The C code 18 | exports a `dispose_object` interface that will release associated 19 | resources from the side table, in addition to freeing linear memory 20 | associated with an object. 21 | 22 | Finally, we attach a finalizer to the JavaScript wrapper of the C 23 | object, allowing the system to automatically call `dispose_object` on C 24 | objects when their associated JavaScript wrappers become unreachable. 25 | 26 | This test illustrates the handle-and-side-table pattern for references 27 | from C to JavaScript. It also shows that the finalizer facility can 28 | clean up "external" resources associated with JavaScript objects, 29 | e.g. calling `free` on allocations from linear memory. 30 | 31 | However, this test also shows that the finalizers interface has a number 32 | of pitfalls in practice. 33 | 34 | ## Details 35 | 36 | The C test program is in [`test.c`](./test.c). The associated 37 | JavaScript run-time and test harness is in [`test.js`](./test.js). 38 | 39 | The C test program needs a malloc implementation; we include 40 | [walloc](https://github.com/wingo/walloc) for that purpose. 41 | 42 | Run the tests via a simple `make` (though check the [getting started 43 | document](../getting-started.md) to make sure you have all the 44 | prerequisite environment variables set). The default `test` target will 45 | run `v8.test`, `jsc.test`, and `spidermonkey.test`, which checks that 46 | our program runs under all engines. An example run looks like: 47 | 48 | ``` 49 | $ make v8.test 50 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o test.o test.c 51 | ~/src/llvm-project/build/bin/clang -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c 52 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --import-memory -o test.wasm test.o walloc.o 53 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc test.js 54 | Callback after 1 allocated. 55 | 1000 total allocated, 1000 still live. 56 | Callback after 1001 allocated. 57 | 2000 total allocated, 1000 still live. 58 | ... 59 | 99000 total allocated, 1000 still live. 60 | Callback after 99001 allocated. 61 | 100000 total allocated, 1000 still live. 62 | checking expected live object count: 0 63 | Success; max 1000 objects live. 64 | ``` 65 | 66 | ## Results 67 | 68 | ### Success on all JS engines 69 | 70 | The test as written runs successfully on V8, JSC, and SpiderMonkey, at 71 | least as of September 2020. 72 | 73 | ### Reasoning about objects referenced from closures is unspecified 74 | 75 | The `FinalizationRegistry` facility specifies that if the "held" value 76 | passed to the finalizer callback references the finalizable object, the 77 | object will never be finalized. The registry holds a strong reference 78 | onto "held" values. 79 | 80 | In our case, the "held" value is a closure to call when the object 81 | becomes finalizable. However, the set of objects referenced by a 82 | closure is not specified by JavaScript. Does the callback capture the 83 | newly constructed `this` or not? `this` doesn't appear in its text, so 84 | you would think not; but while developing this test case, we ran into 85 | [cases where `this` was indeed 86 | captured](https://bugzilla.mozilla.org/show_bug.cgi?id=1664463). We 87 | believe that we have avoided these cases, but it is hard to say. 88 | 89 | ### GC doesn't run often enough 90 | 91 | Engines will run garbage collection according to a number of heuristics, 92 | principally among them how much GC-managed allocation has happened since 93 | the last GC. These heuristics are not informed how much off-heap 94 | allocation has occured; see 95 | https://github.com/tc39/proposal-weakrefs/issues/87 for a discussion. 96 | In practice, without the explicit calls to `gc()`, all engines will fail 97 | our tests, because they don't detect finalizable objects soon enough. 98 | 99 | Therefore to make these tests reliable, we insert explicit GC calls. 100 | This bodes poorly for the role of finalizers in robust systems. 101 | 102 | ### Permissibly late finalization can cause out-of-memory 103 | 104 | If no object is finalized, then this test will run out of linear memory. 105 | (Remove the finalizer and you will see.) However, we have no useful 106 | guarantee about when finalizers run; this is related to the previous 107 | point. The interesting thing to note is the failure mode, that late 108 | finalization -- for whatever reason, perhaps because the program didn't 109 | yield to the next microtask -- can result in out-of-memory conditions, 110 | which we imagine would apply to real programs as well. 111 | 112 | ### Users need to be careful to yield to allow finalization to happen 113 | 114 | Related again to the previous point, users need to structure their 115 | programs carefully to allow finalizers to run. Our test runs the 116 | allocation benchmark in a nested loop, yielding to the scheduler within 117 | the outer loop. See 118 | https://github.com/tc39/proposal-weakrefs/issues/78#issuecomment-485838979 119 | for a related issue. 120 | 121 | ### SpiderMonkey retains too much data for closures within in async functions 122 | 123 | Note that in SpiderMonkey, you can't use the obvious idiom of making the 124 | test an async function, because the callback closure ends up retaining 125 | all local variables. See [bug 126 | 1664463](https://bugzilla.mozilla.org/show_bug.cgi?id=1664463). We have 127 | worked around this bug in the test. 128 | -------------------------------------------------------------------------------- /milestones/README.md: -------------------------------------------------------------------------------- 1 | # Ref C++ Milestones 2 | 3 | This folder holds milestone examples for different phases of the Ref C++ 4 | effort. 5 | 6 | ## Before starting 7 | 8 | There are some dependencies that we use as part of our test setup. You 9 | should make sure to have them installed before starting; see [the 10 | getting-started guide](./getting-started.md) to get things set up. In 11 | the end you should be able to run these commands: 12 | 13 | ``` 14 | $LLVM/llc --version 15 | $CC --version 16 | $LD --version 17 | $WAT2WASM --version 18 | $D8 -h 19 | $JSC -h 20 | $SPIDERMONKEY -h 21 | ``` 22 | 23 | ## [Milestone 0: Finalizers clean up WebAssembly resources of JavaScript wrappers](./m0/) 24 | 25 | Here we define a basic C library which defines a simple `make_object` 26 | facility and which includes extensions to export that interface to its 27 | WebAssembly host. 28 | 29 | We define a JavaScript run-time that tests this facility, wrapping the 30 | raw pointers coming from the `make_object` calls in /wrapper objects/ on 31 | the JavaScript side. 32 | 33 | The exported interface also includes a facility to attach a callback to 34 | the object. We define a simple handle-and-side-table mechanism, 35 | allowing C to refer to JavaScript callbacks by index. The C code 36 | exports a `dispose_object` interface that will release associated 37 | resources from the side table, in addition to freeing linear memory 38 | associated with an object. 39 | 40 | Finally, we attach a finalizer to the JavaScript wrapper of the C 41 | object, allowing the system to automatically call `dispose_object` on C 42 | objects when their associated JavaScript wrappers become unreachable. 43 | 44 | This test illustrates the handle-and-side-table pattern for references 45 | from C to JavaScript. It also shows that the finalizer facility can 46 | clean up "external" resources associated with JavaScript objects, 47 | e.g. calling `free` on allocations from linear memory. 48 | 49 | However, this test also shows that the finalizers interface has a number 50 | of pitfalls in practice. 51 | 52 | ## [Milestone 1: Cycles between WebAssembly and JavaScript are uncollectable](./m1/) 53 | 54 | Here we have the same program as in milestone 0, but the callback 55 | attached to the C object captures the JavaScript wrapper, preventing it 56 | from being collected. 57 | 58 | Note that JavaScript garbage collectors are perfectly capable of 59 | collecting cycles. However, while this case exhibits what is logically 60 | a cycle between the callback, the JS wrapper, and the C object, the 61 | garbage collector sees only the JavaScript side of things: the side 62 | table keeps the callback alive, which, as the callback captures the 63 | wrapper, keeps the wrapper alive. 64 | 65 | Because the allocated objects are never collected, it will eventually 66 | crash because it can't allocate any more memory. In practice, our 67 | example crashes because it runs out of linear memory. 68 | 69 | This is a simplified representation of the use case that we are trying 70 | to fix in this effort, and milestone 1 indicates the problem. 71 | 72 | ## [Milestone 2: Re-express test.c as raw WebAssembly](./m2/) 73 | 74 | We would like to propose some language extensions to C and C++ to fix 75 | the cycle problem shown by milestone 1. To keep the discussion 76 | concrete, we will first show the kind of WebAssembly that we would like 77 | LLVM to produce, and test it to show that it solves the cycle problems. 78 | This test will also allow us to examine different characteristics of the 79 | proposed solution. 80 | 81 | Therefore this milestone is the same as [milestone 0](./m0/), but with 82 | the C program replaced with corresponding WebAssembly, and "compiled" by 83 | wabt's `wat2wasm` instead of by LLVM's `llc`. 84 | 85 | ## [Milestone 3: Move side table to WebAssembly](./m3/) 86 | 87 | In milestones 1 and 2, the C++ or WebAssembly library referred to 88 | JavaScript objects by `i32` index. The actual objects were stored in a 89 | table managed by the JavaScript run-time. Therefore to call a callback, 90 | the WebAssembly would invoke an import from JavaScript, passing it an 91 | `i32` index indicating which object in the table to invoke. 92 | 93 | With `externref`, milestone 3 changes to manage this table on the 94 | WebAssembly side of things, avoiding the need for the JS run-time to 95 | manage the table. Otherwise this milestone still has the same 96 | uncollectable cycle characteristics as milestones 1 and 2. 97 | 98 | ## [Milestone 4: Collectable cycle: WebAssembly allocates GC-managed memory](./m4/) 99 | 100 | The side table in milestones 1 is the essence of our uncollectable cycle 101 | problem: it introduces a reference-counted edge in our object graph, but 102 | the garbage collector doesn't understand how to traverse that edge and 103 | instead sees the cycle as being referred to by the GC roots (the table 104 | itself). 105 | 106 | To fix this problem, we will have the WebAssembly module allocate its 107 | data structures that should participate in GC on the GC-managed heap. 108 | Since only JavaScript has the capability to allocate this memory, we'll 109 | implement the allocation routine in the JS run-time, providing accessor 110 | imports as well. 111 | 112 | ## [Milestone 5: Same as milestone 3, but with C++ compiled to LLVM](./m5/) 113 | 114 | Having shown where we're going and that milestone 4 solves the cycle 115 | problem, we need to return to the C++-to-WebAssembly problem: we are 116 | lacking a way to represent `externref` values in C++, and support in the 117 | LLVM compiler. Although using milestone 3 doesn't solve the cycle 118 | problem, it's a good intermediate step for getting `externref` into 119 | LLVM. 120 | 121 | ## [Milestone 6: Same as milestone 4, but with C++ compiled to LLVM](./m6/) 122 | 123 | This milestone represents a minimal solution to the cycle problem, but 124 | from C++ instead of from raw WebAssembly. Instead of using classes as 125 | data abstractions, the C++ here will use the raw allocation and accessor 126 | imports defined in milestone 4. 127 | 128 | ## [Milestone 7: Minimal Ref C++](./m7/) 129 | 130 | In milestone 6, we solved the cycle problem, but at a low level of 131 | abstraction. Here we will attempt to raise the abstraction level, 132 | defining lowerings of reference-typed classes à la Ref C++ to the same 133 | primitives. This milestone will result in idiomatic C++ that solves the 134 | cycle problem. 135 | -------------------------------------------------------------------------------- /milestones/m4/README.md: -------------------------------------------------------------------------------- 1 | # Milestone 4: Collectable cycle: WebAssembly allocates GC-managed memory 2 | 3 | [ previous: [m3](../m3/); next: [m5](../m5/) ] 4 | 5 | ## Overview 6 | 7 | Here we make progress! In the previous milestones, we had a cycle of 8 | objects across the WebAssembly / JavaScript boundary that wasn't 9 | traceable as such by the garbage collector. In this milestone, we solve 10 | this problem by moving the WebAssembly objects from linear memory to 11 | GC-managed memory. 12 | 13 | As the cycle now exists only in GC-managed memory, it is transparent to 14 | the garbage collector, so we have solved the cycle problem. 15 | Furthermore, there is no need for finalizers, removing an error-prone 16 | component from the system. 17 | 18 | In this example, as there is nothing else in WebAssembly that needs to 19 | hold onto an object once it's allocated, so this example omits the side 20 | table as well. However we expect that in real programs, we would 21 | occasionally need a side table for references to GC-managed data from 22 | C++. Such handles can refer to any member of a cycle, as long as the 23 | holder of the handle doesn't participate in a cycle. 24 | 25 | ## Details 26 | 27 | The difference from [m3](../m3) is that we made [`test.S`](./test.S) 28 | call out to the new `gc_alloc` routines from [`test.js`](./test.js). 29 | The [`test.js`](./test.js) is also more trim, as it doesn't need 30 | finalizer support. 31 | 32 | Incidentally, as the test doesn't actually need linear memory, we don't 33 | need to build in a malloc implementation. 34 | 35 | Running `make` will build `test.wasm` and run the test as in m3. The 36 | reporting is a bit different, though, as we don't have the live-object 37 | telemetry provided by finalizers. 38 | 39 | ``` 40 | ~/src/llvm-project/build/bin/clang -Oz -mreference-types --target=wasm32 -nostdlib -c -o test.o test.S 41 | ~/src/llvm-project/build/bin/wasm-ld --no-entry --allow-undefined --import-memory -o test.wasm test.o 42 | ~/src/v8/out/x64.release/d8 --harmony-weak-refs --expose-gc --experimental-wasm-reftypes test.js 43 | Callback after 1 allocated. 44 | 1000 total allocated. 45 | Callback after 1001 allocated. 46 | 2000 total allocated. 47 | ... 48 | Callback after 98001 allocated. 49 | 99000 total allocated. 50 | Callback after 99001 allocated. 51 | 100000 total allocated. 52 | Success. 53 | ``` 54 | 55 | ## Results 56 | 57 | ### Heap provided by GC-capable host 58 | 59 | In this example, the `gc_alloc` function, provided by 60 | [`test.js`](./test.js), allocates an object with slots to hold a fixed 61 | number of reference-typed objects as well as a fixed number of bytes. 62 | 63 | The `gc_alloc` facility models a heap that can create objects containing 64 | both GC-managed and "raw" fields. To WebAssembly, such a value is 65 | opaque; we need to call out to accessors defined by the host (by 66 | JavaScript) to initialize, reference, and modify object fields. 67 | 68 | This approach to memory management was first prototyped in the [Schism 69 | self-hosted implementation of Scheme](https://github.com/google/schism). 70 | 71 | ### Path towards GC proposal 72 | 73 | Having to call out to the host to allocate objects and access their 74 | fields is evidently suboptimal. Future parts of the [GC 75 | proposal](https://github.com/WebAssembly/gc/blob/master/proposals/gc/Overview.md) 76 | allow for these operations to occur within WebAssembly, which will make 77 | for more efficient systems with smaller demands on the host run-time. 78 | 79 | ### No need to break up main loop into async function 80 | 81 | Because finalizers can only run asynchronously, they cause programs to 82 | keep memory alive for longer than they would otherwise. That's why we 83 | had to break up the earlier test loop into an inner and an outer loop, 84 | running garbage collection in the middle; see the [discussion in 85 | milestone 0](../m0/) for more details. Because this is no longer 86 | necessary without finalizers, this program can be simpler on the 87 | JavaScript side as well. 88 | 89 | ### Less telemetry 90 | 91 | On the other hand, not having finalizers gives us less telemetry into 92 | the total numbers of allocated objects. While this can be added back, 93 | if it is not needed, it is just overhead. 94 | 95 | ### WebKit brought up to date 96 | 97 | In our initial investigations, the `jsc.test` target was failing due to 98 | the recent change in `ref.null` encoding. Dmitry Bezhetskov landed a number of 99 | patches recently to bring WebKit up to date, and now JSC passes all of 100 | these tests. 101 | 102 | For details, see bugs 218331, 218744, 218885, 219192, 219297, 219427, 219595, 219725, 219848, 219943, 220005, 220007, 220009, 220161, 220181, 220235, 220311, 220314, 220323, 220890, 220914, 220918, 222351, 222400, and 222779. 128 | 129 | ### Lower run-time overhead than m0 130 | 131 | It's not precisely a fair comparison, given that we don't have to 132 | explicitly run GC, we don't do finalizers, and we don't need extra 133 | turns, but this test takes less time for all engines than the versions 134 | that have side tables, despite the need for indirect field access via 135 | the `gc_ref_obj` family of functions. 136 | 137 | ### TODO: Impact on memory overhead 138 | 139 | Would be nice to check the process size and compare to m0. Also, we 140 | should be able to run indefinitely without stopping. 141 | -------------------------------------------------------------------------------- /milestones/m3/test.ll: -------------------------------------------------------------------------------- 1 | target triple = "wasm32-unknown-unknown" 2 | 3 | ; Reference types 4 | %externref = type ptr addrspace(10) 5 | 6 | %funcptr = type void () addrspace(20)* 7 | %funcref = type i8 addrspace(20)* 8 | 9 | ; External functions 10 | declare void @free(i8*) local_unnamed_addr 11 | declare hidden i8* @malloc(i32) local_unnamed_addr 12 | declare void @out_of_memory() local_unnamed_addr #1 13 | declare void @invoke(%externref) local_unnamed_addr #7 14 | 15 | ; Intrinsics 16 | declare void @llvm.trap() 17 | declare i32 @llvm.wasm.table.grow.externref(i8 addrspace(1)*, %externref, i32) nounwind readonly 18 | declare %externref @llvm.wasm.ref.null.extern() nounwind readonly 19 | declare %externref @llvm.wasm.table.get.externref(i8 addrspace(1)*, i32) nounwind 20 | declare void @llvm.wasm.table.set.externref(i8 addrspace(1)*, i32, %externref) nounwind 21 | declare i32 @llvm.wasm.table.size(i8 addrspace(1)*) nounwind readonly 22 | 23 | %struct.freelist = type { i32, %struct.freelist* } 24 | @freelist = hidden local_unnamed_addr global %struct.freelist* null 25 | 26 | ; Returns the value at the top of the list and frees the head 27 | ; Reduces the length of list by one. 28 | ; Fails if list is empty. 29 | define i32 @freelist_pop() { 30 | ; Check if the freelist is null 31 | %head = load %struct.freelist*, %struct.freelist** @freelist, align 4 32 | %head_is_null = icmp eq %struct.freelist* %head, null 33 | br i1 %head_is_null, label %null_head, label %good 34 | 35 | ; fail if it is 36 | null_head: 37 | unreachable 38 | 39 | good: 40 | ; Load v in order to return it 41 | %tmp = getelementptr inbounds %struct.freelist, %struct.freelist* %head, i32 0, i32 0 42 | %v = load i32, i32* %tmp 43 | 44 | ; Load a ptr to the next element 45 | %nextptr = getelementptr inbounds %struct.freelist, %struct.freelist* %head, i32 0, i32 1 46 | %next = load %struct.freelist*, %struct.freelist** %nextptr 47 | 48 | ; Store next into freelist 49 | store %struct.freelist* %next, %struct.freelist** @freelist 50 | 51 | ; Free head and return the value that we loaded from head earlier on 52 | %headi8 = bitcast %struct.freelist* %head to i8* 53 | call void @free(i8* nonnull %headi8) 54 | 55 | ret i32 %v 56 | } 57 | 58 | ; Adds an element to the top of the list 59 | ; Increases the length of list by one. 60 | define void @freelist_push(i32 %v) { 61 | ; allocate new node and check that malloc did not return null 62 | %tmp = call align 16 dereferenceable_or_null(8) i8* @malloc(i32 8) 63 | %node = bitcast i8* %tmp to i32* 64 | %is_null = icmp eq i32* %node, null 65 | br i1 %is_null, label %oom, label %good 66 | 67 | oom: 68 | call void @out_of_memory() 69 | unreachable 70 | 71 | good: 72 | ; store v in the new node 73 | store i32 %v, i32* %node 74 | 75 | ; setup pointers to freelist and node->next 76 | %freelist = load %struct.freelist*, %struct.freelist** @freelist 77 | %node_next = getelementptr inbounds i32, i32* %node, i32 1 78 | %node_next_ptr = bitcast i32* %node_next to %struct.freelist** 79 | 80 | ; store freelist in node->next 81 | store %struct.freelist* %freelist, %struct.freelist** %node_next_ptr 82 | ; store node into freelist 83 | store i32* %node, i32** bitcast (%struct.freelist** @freelist to i32**) 84 | 85 | ret void 86 | } 87 | 88 | ; Linear memory references externref values by putting them into the object 89 | ; table, then referring them via index. 90 | @objects = local_unnamed_addr addrspace(1) global [0 x %externref] undef 91 | 92 | define void @expand_table() #3 { 93 | ; get current table size 94 | %sz = call i32 @llvm.wasm.table.size(i8 addrspace(1)* @objects) 95 | 96 | ; grow the table by (old_size >> 1) + 1. 97 | %shf = lshr i32 %sz, 1 98 | %incsize = add nuw i32 %shf, 1 99 | %null = call %externref @llvm.wasm.ref.null.extern() 100 | %ret = call i32 @llvm.wasm.table.grow.externref(i8 addrspace(1)* @objects, %externref %null, i32 %incsize) 101 | 102 | ; if growing the table failed, signal the runtime, then abort 103 | %failed = icmp eq i32 %ret, -1 104 | br i1 %failed, label %oom, label %good 105 | 106 | oom: 107 | call void @out_of_memory() 108 | unreachable 109 | 110 | good: 111 | %newsize = add i32 %sz, %incsize 112 | br label %loophd 113 | 114 | loophd: 115 | %newi = phi i32 [ %newsize, %good ], [ %i, %loopbody] 116 | %done = icmp eq i32 %newi, %sz 117 | br i1 %done, label %end, label %loopbody 118 | 119 | end: 120 | ret void 121 | 122 | loopbody: 123 | %i = add i32 %newi, -1 124 | call void @freelist_push(i32 %i) 125 | br label %loophd 126 | } 127 | 128 | define i32 @intern(%externref %ref) { 129 | %head = load %struct.freelist*, %struct.freelist** @freelist, align 4 130 | %is_null = icmp eq %struct.freelist* %head, null 131 | br i1 %is_null, label %need_expand, label %do_intern 132 | 133 | need_expand: 134 | call void @expand_table() 135 | br label %do_intern 136 | 137 | do_intern: 138 | %handle = call i32 @freelist_pop() 139 | call void @llvm.wasm.table.set.externref(i8 addrspace(1)* @objects, i32 %handle, %externref %ref) 140 | ret i32 %handle 141 | } 142 | 143 | ; release the slot in the object table corresponding to the handle 144 | define void @release(i32 %handle) { 145 | ; if handle is -1 then ignore it 146 | %is_neg = icmp eq i32 %handle, -1 147 | br i1 %is_neg, label %end, label %body 148 | 149 | body: 150 | %null = call %externref @llvm.wasm.ref.null.extern() 151 | call void @llvm.wasm.table.set.externref(i8 addrspace(1)* @objects, i32 %handle, %externref %null) 152 | call void @freelist_push(i32 %handle) 153 | br label %end 154 | 155 | end: 156 | ret void 157 | } 158 | 159 | define %externref @handle_value(i32 %handle) { 160 | ; if handle is -1 then return null 161 | %is_neg = icmp eq i32 %handle, -1 162 | br i1 %is_neg, label %retnull, label %body 163 | 164 | retnull: 165 | %null = call %externref @llvm.wasm.ref.null.extern() 166 | br label %end 167 | 168 | body: 169 | %v = call %externref @llvm.wasm.table.get.externref(i8 addrspace(1)* @objects, i32 %handle) 170 | br label %end 171 | 172 | end: 173 | %retv = phi %externref [%null, %retnull], [%v, %body] 174 | ret %externref %retv 175 | } 176 | 177 | ; allocates a new object in linear memory and if 178 | ; that fails it signals the runtime and aborts 179 | define i32* @make_obj() #0 { 180 | ; allocates a new i32 representing an obj 181 | %tmp = call dereferenceable_or_null(4) i8* @malloc(i32 4) 182 | %ptr = bitcast i8* %tmp to i32* 183 | %is_null = icmp eq i32* %ptr, null 184 | br i1 %is_null, label %oom, label %body 185 | 186 | oom: 187 | call void @out_of_memory() 188 | unreachable 189 | 190 | body: 191 | ; initialize callback handle -1 (invalid) for a fresh object 192 | store i32 -1, i32* %ptr 193 | ret i32* %ptr 194 | } 195 | 196 | define void @free_obj(i32* %obj) #8 { 197 | %v = load i32, i32* %obj 198 | call void @release(i32 %v) 199 | %ptr = bitcast i32* %obj to i8* 200 | call void @free(i8* %ptr) 201 | ret void 202 | } 203 | 204 | define void @attach_callback(i32* %obj, %externref %callback) #2 { 205 | ; release old handle 206 | %oldhandle = load i32, i32* %obj 207 | call void @release(i32 %oldhandle) 208 | ; intern the new callback, and store handle to object 209 | %handle = call i32 @intern(%externref %callback) 210 | store i32 %handle, i32* %obj 211 | ret void 212 | } 213 | 214 | define void @invoke_callback(i32* %obj) #6 { 215 | %handle = load i32, i32* %obj 216 | %v = call %externref @handle_value(i32 %handle) 217 | call void @invoke(%externref %v) 218 | ret void 219 | } 220 | 221 | attributes #0 = { "wasm-export-name"="make_obj" } 222 | attributes #8 = { "wasm-export-name"="free_obj" } 223 | attributes #1 = { noreturn "wasm-import-module"="rt" "wasm-import-name"="out_of_memory" } 224 | attributes #2 = { "wasm-export-name"="attach_callback" } 225 | attributes #3 = { "wasm-export-name"="expand_table" } 226 | attributes #6 = { "wasm-export-name"="invoke_callback" } 227 | attributes #7 = { "wasm-import-module"="rt" "wasm-import-name"="invoke" } 228 | -------------------------------------------------------------------------------- /milestones/m3/test.S: -------------------------------------------------------------------------------- 1 | .functype out_of_memory () -> () 2 | .import_module out_of_memory, rt 3 | .import_name out_of_memory, out_of_memory 4 | .functype invoke (externref) -> () 5 | .import_module invoke, rt 6 | .import_name invoke, invoke 7 | 8 | .functype malloc (i32) -> (i32) 9 | .functype free (i32) -> () 10 | 11 | .text 12 | 13 | freelist: 14 | # struct freelist { uint32_t handle; struct freelist *next; }; 15 | .globaltype freelist, i32 16 | 17 | ## Pop a handle off the freelist. Precondition: freelist non-empty. 18 | freelist_pop: 19 | .hidden freelist_pop 20 | .globl freelist_pop 21 | .type freelist_pop,@function 22 | .functype freelist_pop () -> (i32) 23 | # Abort if the freelist is empty. 24 | global.get freelist 25 | i32.eqz 26 | if 27 | unreachable 28 | end_if 29 | # Return value is the handle from the head of the freelist. 30 | global.get freelist 31 | i32.load 0 32 | # Push old freelist head. 33 | global.get freelist 34 | # Set $freelist to the old head's "next" link. 35 | global.get freelist 36 | i32.load 4 37 | global.set freelist 38 | # Free the old head; the handle return value remains on the stack. 39 | call free 40 | end_function 41 | 42 | ## Push a handle onto the freelist. May call out_of_memory. 43 | freelist_push: 44 | .hidden freelist_push 45 | .globl freelist_push 46 | .type freelist_push,@function 47 | .functype freelist_push (i32) -> () 48 | .local i32 # new_head 49 | # Allocate a new head. 50 | i32.const 8 51 | call malloc 52 | local.tee 1 53 | 54 | # If the malloc failed, signal the runtime, then abort. 55 | i32.eqz 56 | if 57 | call out_of_memory 58 | unreachable 59 | end_if 60 | 61 | # Link new freelist head to old freelist. 62 | local.get 1 63 | global.get freelist 64 | i32.store 4 65 | 66 | # Initialize handle for new freelist head. 67 | local.get 1 68 | local.get 0 69 | i32.store 0 70 | 71 | # Set new head as $freelist. 72 | local.get 1 73 | global.set freelist 74 | end_function 75 | 76 | # Linear memory references externref values by putting them into the 77 | # object table, then referring to them via index. We use the word 78 | # "handle" to refer to an externref's index into the object table. 79 | objects: 80 | .tabletype objects,externref 81 | 82 | # When we need to intern a new externref into the object table, we get 83 | # the handle by popping it off a freelist. Every free slot in the 84 | # table is on the freelist. If the freelist is empty, then first we 85 | # expand the table, pushing new handles for each new slot onto the free 86 | # list. 87 | expand_table: 88 | .hidden expand_table 89 | .globl expand_table 90 | .type expand_table,@function 91 | .functype expand_table () -> () 92 | .local i32,i32 # old_size, end 93 | table.size objects 94 | local.set 0 95 | # Grow the table by (old_size >> 1) + 1. 96 | ref.null_extern 97 | local.get 0 98 | i32.const 1 99 | i32.shr_u 100 | i32.const 1 101 | i32.add 102 | table.grow objects 103 | # If growing the table failed, signal the runtime, then abort. 104 | i32.const -1 105 | i32.eq 106 | if 107 | call out_of_memory 108 | unreachable 109 | end_if 110 | # Push freelist entries for new slots. 111 | table.size objects 112 | local.set 1 113 | loop 114 | local.get 1 115 | local.get 0 116 | i32.eq 117 | if 118 | return 119 | end_if 120 | local.get 1 121 | i32.const 1 122 | i32.sub 123 | local.set 1 124 | local.get 1 125 | call freelist_push 126 | br 0 127 | end_loop 128 | end_function 129 | 130 | ## Put $obj into the object table, returning a fresh handle. 131 | intern: 132 | .hidden intern 133 | .globl intern 134 | .type intern,@function 135 | .functype intern (externref) -> (i32) 136 | .local i32 # handle 137 | global.get freelist 138 | i32.eqz 139 | if 140 | call expand_table 141 | end_if 142 | call freelist_pop 143 | local.set 1 144 | local.get 1 145 | local.get 0 146 | table.set objects 147 | local.get 1 148 | end_function 149 | 150 | ## Release the slot in the object table corresponding to $handle. 151 | release: 152 | .hidden release 153 | .globl release 154 | .type release,@function 155 | .functype release (i32) -> () 156 | # If $handle was -1 (invalid), just ignore it. 157 | local.get 0 158 | i32.const -1 159 | i32.eq 160 | if 161 | return 162 | end_if 163 | local.get 0 164 | ref.null_extern 165 | table.set objects 166 | local.get 0 167 | call freelist_push 168 | end_function 169 | 170 | handle_value: 171 | .hidden handle_value 172 | .globl handle_value 173 | .type handle_value,@function 174 | .functype handle_value (i32) -> (externref) 175 | local.get 0 176 | i32.const -1 177 | i32.eq 178 | if externref 179 | ref.null_extern 180 | else 181 | local.get 0 182 | table.get objects 183 | end_if 184 | end_function 185 | 186 | make_obj: 187 | .hidden make_obj 188 | .globl make_obj 189 | .export_name make_obj, make_obj 190 | .no_dead_strip make_obj 191 | .type make_obj,@function 192 | .functype make_obj () -> (i32) 193 | .local i32 # obj 194 | # Allocate a new object in linear memory. If that fails, 195 | # signal the runtime, and then abort. 196 | i32.const 4 197 | call malloc 198 | i32.eqz 199 | if 200 | call out_of_memory 201 | unreachable 202 | end_if 203 | # Initialize callback handle -1 (invalid) for fresh object. 204 | local.get 0 205 | i32.const -1 206 | i32.store 0 207 | local.get 0 208 | end_function 209 | 210 | 211 | free_obj: 212 | .hidden free_obj 213 | .globl free_obj 214 | .export_name free_obj, free_obj 215 | .no_dead_strip free_obj 216 | .type free_obj,@function 217 | .functype free_obj (i32) -> () 218 | # Release the callback handle. 219 | local.get 0 220 | i32.load 0 221 | call release 222 | local.get 0 223 | call free 224 | end_function 225 | 226 | 227 | attach_callback: 228 | .hidden attach_callback 229 | .globl attach_callback 230 | .export_name attach_callback, attach_callback 231 | .no_dead_strip attach_callback 232 | .type attach_callback,@function 233 | .functype attach_callback (i32, externref) -> () 234 | # Release old handle. 235 | local.get 0 236 | i32.load 0 237 | call release 238 | # Intern the new callback, and store handle to object. 239 | local.get 0 240 | local.get 1 241 | call intern 242 | i32.store 0 243 | end_function 244 | 245 | invoke_callback: 246 | .hidden invoke_callback 247 | .globl invoke_callback 248 | .export_name invoke_callback, invoke_callback 249 | .no_dead_strip invoke_callback 250 | .type invoke_callback,@function 251 | .functype invoke_callback (i32) -> () 252 | local.get 0 253 | i32.load 0 254 | call handle_value 255 | call invoke 256 | end_function 257 | -------------------------------------------------------------------------------- /milestones/m3/walloc.c: -------------------------------------------------------------------------------- 1 | // walloc.c: a small malloc implementation for use in WebAssembly targets 2 | // Copyright (c) 2020 Igalia, S.L. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | typedef __SIZE_TYPE__ size_t; 24 | typedef __UINTPTR_TYPE__ uintptr_t; 25 | typedef __UINT8_TYPE__ uint8_t; 26 | 27 | #define NULL ((void *) 0) 28 | 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") 30 | 31 | #ifndef NDEBUG 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 33 | #else 34 | #define ASSERT(x) do { } while (0) 35 | #endif 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b)) 37 | 38 | static inline size_t max(size_t a, size_t b) { 39 | return a < b ? b : a; 40 | } 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) { 42 | return (val + alignment - 1) & ~(alignment - 1); 43 | } 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) 45 | 46 | #define CHUNK_SIZE 256 47 | #define CHUNK_SIZE_LOG_2 8 48 | #define CHUNK_MASK (CHUNK_SIZE - 1) 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); 50 | 51 | #define PAGE_SIZE 65536 52 | #define PAGE_SIZE_LOG_2 16 53 | #define PAGE_MASK (PAGE_SIZE - 1) 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); 55 | 56 | #define CHUNKS_PER_PAGE 256 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE); 58 | 59 | #define GRANULE_SIZE 8 60 | #define GRANULE_SIZE_LOG_2 3 61 | #define LARGE_OBJECT_THRESHOLD 256 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 63 | 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, 66 | LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 67 | 68 | struct chunk { 69 | char data[CHUNK_SIZE]; 70 | }; 71 | 72 | // There are small object pages for allocations of these sizes. 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ 74 | M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) 75 | 76 | enum chunk_kind { 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, 78 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND 80 | 81 | SMALL_OBJECT_CHUNK_KINDS, 82 | FREE_CHUNK = 254, 83 | LARGE_OBJECT = 255 84 | }; 85 | 86 | static const uint8_t small_object_granule_sizes[] = 87 | { 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, 89 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) 90 | #undef SMALL_OBJECT_GRANULE_SIZE 91 | }; 92 | 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) { 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i; 95 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); 96 | #undef TEST_GRANULE_SIZE 97 | return LARGE_OBJECT; 98 | } 99 | 100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) { 101 | switch (kind) { 102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i; 103 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); 104 | #undef CHUNK_KIND_GRANULE_SIZE 105 | default: 106 | return -1; 107 | } 108 | } 109 | 110 | // Given a pointer P returned by malloc(), we get a header pointer via 111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If 112 | // chunk_kinds[chunk_idx] is LARGE_OBJECT, then the pointer is a large object, 113 | // otherwise the kind indicates the size in granules of the objects in the 114 | // chunk. 115 | struct page_header { 116 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; 117 | }; 118 | 119 | struct page { 120 | union { 121 | struct page_header header; 122 | struct chunk chunks[CHUNKS_PER_PAGE]; 123 | }; 124 | }; 125 | 126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header)) 127 | #define FIRST_ALLOCATABLE_CHUNK 1 128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE); 129 | 130 | static struct page* get_page(void *ptr) { 131 | return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK); 132 | } 133 | static unsigned get_chunk_index(void *ptr) { 134 | return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 135 | } 136 | 137 | struct freelist { 138 | struct freelist *next; 139 | }; 140 | 141 | struct large_object { 142 | struct large_object *next; 143 | size_t size; 144 | }; 145 | 146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object)) 147 | 148 | static inline void* get_large_object_payload(struct large_object *obj) { 149 | return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE; 150 | } 151 | static inline struct large_object* get_large_object(void *ptr) { 152 | return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE); 153 | } 154 | 155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; 156 | static struct large_object *large_objects; 157 | 158 | extern void __heap_base; 159 | static size_t walloc_heap_size; 160 | 161 | static struct page* 162 | allocate_pages(size_t payload_size, size_t *n_allocated) { 163 | size_t needed = payload_size + PAGE_HEADER_SIZE; 164 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; 165 | uintptr_t base = heap_size; 166 | uintptr_t preallocated = 0, grow = 0; 167 | 168 | if (!walloc_heap_size) { 169 | // We are allocating the initial pages, if any. We skip the first 64 kB, 170 | // then take any additional space up to the memory size. 171 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); 172 | preallocated = heap_size - heap_base; // Preallocated pages. 173 | walloc_heap_size = preallocated; 174 | base -= preallocated; 175 | } 176 | 177 | if (preallocated < needed) { 178 | // Always grow the walloc heap at least by 50%. 179 | grow = align(max(walloc_heap_size / 2, needed - preallocated), 180 | PAGE_SIZE); 181 | ASSERT(grow); 182 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) { 183 | return NULL; 184 | } 185 | walloc_heap_size += grow; 186 | } 187 | 188 | struct page *ret = (struct page *)base; 189 | size_t size = grow + preallocated; 190 | ASSERT(size); 191 | ASSERT_ALIGNED(size, PAGE_SIZE); 192 | *n_allocated = size / PAGE_SIZE; 193 | return ret; 194 | } 195 | 196 | static char* 197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) 198 | { 199 | page->header.chunk_kinds[idx] = kind; 200 | return page->chunks[idx].data; 201 | } 202 | 203 | // Allocate a large object with enough space for SIZE payload bytes. Returns a 204 | // large object with a header, aligned on a chunk boundary, whose payload size 205 | // may be larger than SIZE, and whose total size (header included) is 206 | // chunk-aligned. Either a suitable allocation is found in the large object 207 | // freelist, or we ask the OS for some more pages and treat those pages as a 208 | // large object. If the allocation fits in that large object and there's more 209 | // than an aligned chunk's worth of data free at the end, the large object is 210 | // split. 211 | // 212 | // The return value's corresponding chunk in the page as starting a large 213 | // object. 214 | static struct large_object* 215 | allocate_large_object(size_t size) { 216 | struct large_object *best = NULL, **best_prev = &large_objects; 217 | size_t best_size = -1; 218 | for (struct large_object *prev = NULL, *walk = large_objects; 219 | walk; 220 | prev = walk, walk = walk->next) { 221 | if (walk->size >= size && walk->size < best_size) { 222 | best_size = walk->size; 223 | best = walk; 224 | if (prev) best_prev = &prev->next; 225 | if (best_size + LARGE_OBJECT_HEADER_SIZE 226 | == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 227 | // Not going to do any better than this; just return it. 228 | break; 229 | } 230 | } 231 | 232 | if (!best) { 233 | // The large object freelist doesn't have an object big enough for this 234 | // allocation. Allocate one or more pages from the OS, and treat that new 235 | // sequence of pages as a fresh large object. It will be split if 236 | // necessary. 237 | size_t size_with_header = size + sizeof(struct large_object); 238 | size_t n_allocated = 0; 239 | struct page *page = allocate_pages(size_with_header, &n_allocated); 240 | if (!page) { 241 | return NULL; 242 | } 243 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 244 | best = (struct large_object *)ptr; 245 | size_t page_header = ptr - ((char*) page); 246 | best->next = large_objects; 247 | best->size = best_size = 248 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 249 | ASSERT(best_size >= size_with_header); 250 | } 251 | 252 | struct large_object *next = best->next; 253 | *best_prev = next; 254 | 255 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; 256 | if (tail_size) { 257 | // The best-fitting object has 1 or more aligned chunks free after the 258 | // requested allocation; split the tail off into a fresh aligned object. 259 | struct page *start_page = get_page(best); 260 | char *start = get_large_object_payload(best); 261 | char *end = start + best_size; 262 | 263 | if (start_page == get_page(end - tail_size - 1)) { 264 | // The allocation does not span a page boundary; yay. 265 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 266 | } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 267 | // If the allocation itself smaller than a page, split off the head, then 268 | // fall through to maybe split the tail. 269 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 270 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 271 | struct large_object *head = best; 272 | head->size = first_page_size; 273 | head->next = large_objects; 274 | large_objects = head; 275 | 276 | struct page *next_page = start_page + 1; 277 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 278 | best = (struct large_object *) ptr; 279 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE; 280 | ASSERT(best_size >= size); 281 | start = get_large_object_payload(best); 282 | tail_size = (best_size - size) & ~CHUNK_MASK; 283 | } else { 284 | // A large object that spans more than one page will consume all of its 285 | // tail pages. Therefore if the split traverses a page boundary, round up 286 | // to page size. 287 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 288 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 289 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); 290 | size = first_page_size + tail_pages_size; 291 | tail_size = best_size - size; 292 | } 293 | best->size -= tail_size; 294 | 295 | unsigned tail_idx = get_chunk_index(end - tail_size); 296 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 297 | // We would be splitting in a page header; don't do that. 298 | tail_size -= CHUNK_SIZE; 299 | tail_idx++; 300 | } 301 | 302 | if (tail_size) { 303 | struct page *page = get_page(end - tail_size); 304 | char *tail_ptr = allocate_chunk(page, tail_idx, LARGE_OBJECT); 305 | struct large_object *tail = (struct large_object *) tail_ptr; 306 | tail->next = large_objects; 307 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; 308 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); 309 | large_objects = tail; 310 | } 311 | } 312 | 313 | return best; 314 | } 315 | 316 | static struct freelist* 317 | obtain_small_objects(enum chunk_kind kind) { 318 | struct large_object *obj = allocate_large_object(0); 319 | if (!obj) { 320 | return NULL; 321 | } 322 | char *ptr = allocate_chunk(get_page(obj), get_chunk_index(obj), kind); 323 | char *end = ptr + CHUNK_SIZE; 324 | struct freelist *next = NULL; 325 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 326 | for (size_t i = size; i <= CHUNK_SIZE; i += size) { 327 | struct freelist *head = (struct freelist*) (end - i); 328 | head->next = next; 329 | next = head; 330 | } 331 | return next; 332 | } 333 | 334 | static inline size_t size_to_granules(size_t size) { 335 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 336 | } 337 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) { 338 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); 339 | return &small_object_freelists[kind]; 340 | } 341 | 342 | static void* 343 | allocate_small(enum chunk_kind kind) { 344 | struct freelist **loc = get_small_object_freelist(kind); 345 | if (!*loc) { 346 | struct freelist *freelist = obtain_small_objects(kind); 347 | if (!freelist) { 348 | return NULL; 349 | } 350 | *loc = freelist; 351 | } 352 | struct freelist *ret = *loc; 353 | *loc = ret->next; 354 | return (void *) ret; 355 | } 356 | 357 | static void* 358 | allocate_large(size_t size) { 359 | struct large_object *obj = allocate_large_object(size); 360 | return obj ? get_large_object_payload(obj) : NULL; 361 | } 362 | 363 | void* 364 | malloc(size_t size) { 365 | size_t granules = size_to_granules(size); 366 | enum chunk_kind kind = granules_to_chunk_kind(granules); 367 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 368 | } 369 | 370 | void 371 | free(void *ptr) { 372 | if (!ptr) return; 373 | struct page *page = get_page(ptr); 374 | unsigned chunk = get_chunk_index(ptr); 375 | uint8_t kind = page->header.chunk_kinds[chunk]; 376 | if (kind == LARGE_OBJECT) { 377 | struct large_object *obj = get_large_object(ptr); 378 | obj->next = large_objects; 379 | large_objects = obj; 380 | } else { 381 | size_t granules = kind; 382 | struct freelist **loc = get_small_object_freelist(granules); 383 | struct freelist *obj = ptr; 384 | obj->next = *loc; 385 | *loc = obj; 386 | } 387 | } 388 | -------------------------------------------------------------------------------- /milestones/m5/externref-in-llvm.md: -------------------------------------------------------------------------------- 1 | # Externref in Clang/LLVM: Design Document 2 | 3 | ## Problem statement 4 | 5 | ### Externrefs in WebAssembly 6 | 7 | Externref values are weird. 8 | 9 | From a low-level perspective, externref values are unlike any other 10 | value in LLVM. They can't be stored to linear memory at all -- not to 11 | the heap, not to data, not to the external stack. 12 | 13 | WebAssembly has its own *internal* stack, of course, consisting of local 14 | variables and ephemeral temporary values inside function activations. 15 | There, we can have externref values, flowing into functions as 16 | parameters, out as results, and within as mutable locals and ephemeral 17 | temporaries. But when compiling C++, we often find that a function 18 | needs to allocate with automatic duration, and that goes out to the 19 | *external* stack in linear memory. Externrefs can't go there. 20 | 21 | Externrefs can't be created by a WebAssembly program; they can only be 22 | passed into it from the outside. This can happen in four ways: 23 | 24 | * Returned from a call to an imported function. 25 | ```wasm 26 | (func $foo (import "rt" "make_foo") (result externref)) 27 | (func $bar (result externref) 28 | (call $foo)) 29 | ``` 30 | * As an argument to an exported function. 31 | ```wasm 32 | (func $take_foo (export "take_foo") (param externref) (result externref) 33 | (local.get 0)) 34 | ``` 35 | * Loaded from a table via `table.get`. The table can be populated 36 | externally, or the WebAssembly program can store externref values 37 | there. 38 | ```wasm 39 | (table $objects 0 externref) 40 | (func $get_foo (result externref) 41 | (table.get $objects (i32.const 0))) 42 | ``` 43 | * Loaded from a global of type `externref`. As with tables, this 44 | variable can be populated externally or internally. 45 | ```wasm 46 | (global $the_foo (mut externref) (ref.null extern)) 47 | (func $get_the_foo (result externref) 48 | (global.get $the_foo)) 49 | ``` 50 | 51 | ### Externrefs don't exist in C 52 | 53 | Let's assume that we would like to be able to make C programs that can 54 | compile to these example WebAssembly snippets. What should the C 55 | programs look like? How should they compile? 56 | 57 | A first important thing to note is that there is no such C code "out 58 | there" right now. Unlike the original goal for emscripten, which was to 59 | allow existing C and C++ programs to run in web browsers, there is no C 60 | or C++ program out there now that deals in these externref values. The 61 | reason being, the low-level particularies of externrefs do not translate 62 | well into C. 63 | 64 | To illustrate this, assume the existence of an externref type in C. 65 | Consider: 66 | 67 | ```c 68 | struct a { int32_t b; externref c; }; 69 | 70 | struct a *a = malloc(sizeof struct a); // What is a->c ? 71 | a->b = 42; 72 | a->c = get_foo(); // Error! 73 | ``` 74 | 75 | What is the byte representation of an externref value? All C++ types 76 | can be written to memory, but we know nothing about externrefs, neither 77 | from a semantics perspective nor from a low-level WebAssembly 78 | perspective -- there are no operations that can tell you how to 79 | represent it as bytes. 80 | 81 | (We expect externref values to be implemented as GC-managed values in 82 | WebAssembly implementations, as indeed they are in browsers. But we 83 | have no way of accessing the bits of the value. We're careful not to 84 | say "managed pointer" and "pointer address" here, because the GC-managed 85 | value could be a tagged immediate without an associated address.) 86 | 87 | ## Externrefs are a problem for LLVM 88 | 89 | Let's take another look at the first example. If we were working with 90 | `i32` values instead, we'd have: 91 | 92 | ```c 93 | int foo(void); 94 | int bar(void) { 95 | int value = foo(); 96 | return value; 97 | } 98 | ``` 99 | 100 | If we compile with -emit-llvm, we can see what clang does to produce 101 | LLVM IR for this function: 102 | 103 | ```llvm 104 | declare i32 @foo() 105 | 106 | define i32 @bar() { 107 | %1 = alloca i32, align 4 108 | %2 = call i32 @foo() 109 | store i32 %2, i32* %1, align 4 110 | %3 = load i32, i32* %1, align 4 111 | ret i32 %3 112 | } 113 | ``` 114 | 115 | What we can see is that the clang frontend will `alloca` some bytes for 116 | each local. Initializing `value` stores a value to that location, and 117 | referencing `value` performs the corresponding memory load. Clang 118 | relies on the LLVM optimizer to turn these memory operations into SSA 119 | values that don't need memory (the 120 | [mem2reg](https://llvm.org/docs/Passes.html#mem2reg-promote-memory-to-register) 121 | pass). 122 | 123 | As a compilation strategy, this doesn't work for raw WebAssembly 124 | externref values. If we assume an `externref` type, and we have: 125 | 126 | ```c 127 | externref foo(void); 128 | externref bar(void) { 129 | externref value = foo(); 130 | return value; 131 | } 132 | ``` 133 | 134 | Then clang would lower it as 135 | 136 | ``` 137 | %externref = type opaque ; ???? 138 | 139 | define %externref @bar() { 140 | %1 = alloca %externref ; ???? 141 | %2 = call %externref @foo() ; OK 142 | store %externref %2, %externref* %1 ; ???? 143 | %3 = load %externref, %externref* %1 ; ???? 144 | ret %externref %3 ; OK 145 | } 146 | ``` 147 | 148 | How could we `alloca` memory for a value whose representation is opaque? 149 | 150 | How could we load and store values into that memory? 151 | 152 | Externrefs are a problem for clang, in the sense that its strategy of 153 | using `alloca` for locals assumes that all locals can be stored to 154 | linear memory. 155 | 156 | ## Solutions 157 | 158 | ### Idea 1: Indirection 159 | 160 | Externref values have an impedance mismatch with C and with LLVM. One 161 | way to fix this would be to introduce an indirection. 162 | 163 | When externref values enter WebAssembly, via one of the four methods 164 | described earlier, we arrange to make the compiler write them to a 165 | table. All values of type `externref` in the C program are transformed 166 | to operate on indices into this table. 167 | 168 | The table becomes a parallel store of externref values -- like 169 | linear memory, but for externrefs. Like the linear memory heap, some 170 | values have static duration (the data section), some have automatic 171 | duration (the stack), and some have dynamic duration (the heap). The 172 | compiler and the linker will need to be extended in the usual ways to 173 | manage this heap, allocating static objects, determining a 174 | `__heap_base`, a stack size, and so on. 175 | 176 | The compiler will have to emit corresponding operations to free values 177 | from the externref heap when their duration ends. When a function 178 | activation with `alloca`'d externrefs returns, we'd have to reset the 179 | "externref stack pointer", and to avoid retaining garbage, also null out 180 | the freed entries in the stack part of the externref heap. 181 | 182 | To C, an externref would then be like a pointer -- but not a pointer to 183 | linear memory. Dereferencing it doesn't really make sense, because 184 | we're hiding externref values from C; the whole point of the indirection 185 | is to prevent raw externref values from leaking to C. So maybe a 186 | pointer to an opaque type. But, it's a pointer that can't really be 187 | cast to any other pointer; so, perhaps a pointer in a different address 188 | space, using the `__attribute__((address_space(N)))` facility originally 189 | introduced for OpenCL and other partitioned-heap languages. While we're 190 | doing that, we could use the `N` in the address space to indicate 191 | precisely which table the externref is stored in; it might be a neat 192 | trick. 193 | 194 | If we look at our example program, it could compile as: 195 | 196 | ```llvm 197 | %externref = type opaque 198 | %indirect_externref = %externref * 199 | 200 | declare %indirect_externref @foo() 201 | 202 | define %indirect_externref @bar() { 203 | %1 = alloca %indirect_externref 204 | %2 = call %indirect_externref @foo() 205 | store %indirect_externref %2, %indirect_externref* %1 206 | %3 = load %indirect_externref, %indirect_externref* %1 207 | ret %indirect_externref %3 208 | } 209 | ``` 210 | 211 | But, perhaps this is a bit magical. Who actually produces the indirect 212 | externrefs? Who puts the externref values in a table? Who is 213 | responsible for freeing the values when their duration ends? 214 | 215 | Perhaps it's more illustrative to consider clang as performing the 216 | indirection at all function boundaries, not just imported functions. In 217 | that case, `foo` returns a raw externref value, and `bar` would have to 218 | intern it, then reload it before returning. We're putting a large 219 | burden on the optimizer here, but let's see where this takes us: 220 | 221 | ```llvm 222 | %externref = type opaque ; ??? 223 | %opaque = type opaque 224 | %indirect_externref = %opaque * 225 | 226 | ; Push one value on the externref stack, initializing to 227 | ; (ref.null extern). Return its index. 228 | declare i32 @externref_alloca() 229 | 230 | ; Set the externref corresponding to a table index. 231 | declare void @externref_store(i32 %0, %externref %1); 232 | 233 | ; Retrieve the externref corresponding to a table index. 234 | declare %externref @externref_load(i32 %0); 235 | 236 | ; Free one value from the externref stack, popping the 237 | ; externref stack pointer. 238 | declare void @externref_freea() 239 | 240 | ; @foo and @bar return raw (not indirected) externrefs. 241 | declare %externref @foo() 242 | 243 | define %externref @bar() { 244 | %1 = alloca %indirect_externref 245 | %2 = call %externref @foo() 246 | 247 | ; Intern the raw externref %2 to the externref table. 248 | %3 = call i32 @externref_alloca() 249 | call @externref_store(%3, %2) 250 | 251 | ; Treat the index into the externref table as a pointer. 252 | ; This is the visible-to-C value of the externref. 253 | %4 = bitcast i32 %3 to %indirect_externref 254 | 255 | ; Since clang put this local on the stack, write it there. 256 | store %indirect_externref %4, %indirect_externref* %1 257 | 258 | ; To reference the local, load it from the stack. 259 | %5 = load %indirect_externref, %indirect_externref* %1 260 | 261 | ; In this example, functions take and return raw externref 262 | ; values, so strip the indirection, freeing the table slot. 263 | %6 = bitcast %indirect_externref %5 to i32 264 | %7 = call @externref_load(%6) 265 | call @externref_freea() 266 | 267 | ret %externref %7 268 | } 269 | ``` 270 | 271 | There are a few things that pop out at us in this unoptimized code 272 | listing. 273 | 274 | 1. We now have a double indirection: one, that the %indirect_externref 275 | starts life in memory, and must be lifted to SSA values; and two, 276 | that the raw externref values themselves are also indirected into 277 | their own heap. 278 | 279 | 2. Unlike `alloca`, we need to insert explicit `externref_freea` calls 280 | on exit paths, and presumably on unwinds as well. 281 | 282 | 3. There are three types here; we need the "opaque" type to indicate 283 | that %indirect_externref values can't be usefully dereferenced. 284 | 285 | 4. For heap data with dynamic duration, it's not immediately clear 286 | where the compiler should insert `externref_free` calls. 287 | 288 | 5. You can do pointer arithmetic on %indirect_externref values. This 289 | is not a good thing! 290 | 291 | 6. How would you go about defining your own externref tables, or 292 | accessing values in those tables? Would such values be 293 | doubly-indirected? 294 | 295 | 7. We can use address spaces, but I am not sure what it buys us besides 296 | ensuring that %indirect_externref values aren't interpreted by the 297 | optimizer as aliasing anything on the linear heap. 298 | 299 | ### Idea 2: Restrictions and builtins 300 | 301 | Let's back up here a bit. WebAssembly has instructions to operate on 302 | the linear heap, like `i32.load` and `i8.store`. You could imagine a 303 | compilation strategy whereby LLVM expected user programs to include 304 | builtin calls every time they accessed linear memory; a load would be a 305 | call to `__builtin_wasm_i32_load(i32 offset)`, stores via 306 | `__builtin_wasm_i8_store(i32 offset, i8 val)`, and so on. But that 307 | would be silly. Instead, LLVM treats C pointers as denoting locations 308 | on the linear heap, and the compiler arranges to emit `i32.load`, 309 | `i8.store`, and so on for pointer access. There is very little 310 | impedance mismatch here betwen C, LLVM, and WebAssembly. 311 | 312 | With reference types, WebAssembly now also has instructions to operate 313 | on tables, which are a kind of heap, but for externrefs. The "indirect" 314 | strategy above tries to treat table access as pointers, in analogy to 315 | how access to the linear heap is compiled. But does it make sense to 316 | take this approach? We can't dereference the pointers, and arithmetic 317 | makes no sense. The externref values we're pointing to don't have a 318 | linear representation, and it seems like clang's need to `alloca` local 319 | variable storage is a tail-wagging-the-dog situation: it is what is 320 | causing us problems. 321 | 322 | What if, instead, we treat externrefs as a new class of value. There is 323 | no code out there that uses them right now, so this is an option open to 324 | us. You can't take their address, you can't dereference them, in fact 325 | you can't do anything on them other than use them as function 326 | parameters, return values, and locals. They have nothing to do with 327 | tables, so forcing them to be a pointer to a table seems a needless 328 | complication. 329 | 330 | With this approach, if we wanted to store an externref value to a 331 | table, we'd call a builtin to do so. If we need the null value, we call 332 | a builtin. 333 | 334 | If we need to store an externref to the (linear) heap -- well, you just 335 | can't do that. It's a restriction. We require the user program to 336 | build its own side table if needed. We'd need explicit support in the 337 | compiler for working with tables, so that the user could implement their 338 | side table, but that's fine. 339 | 340 | There is an open question about how we would represent raw externref 341 | values in LLVM, but we also have that issue with the indirect strategy, 342 | at the boundaries where values are stored and loaded. 343 | 344 | Finally, we return to the `alloca` problem. The reason clang takes this 345 | approach is that locals are mutable; it needs to do a pass to determine 346 | where to place phi variables. We should adapt to this. Locals of type 347 | `externref` should be allocated not with standard `alloca`, but rather 348 | something like `externref_alloca`. The front-end would insert the 349 | corresponding `externref_free` calls as well. Therefore the IR coming 350 | out of the front-end for our example would look like: 351 | 352 | ```llvm 353 | define %externref @bar() { 354 | %1 = call %externref @foo() 355 | %2 = call i32 @externref_alloca() 356 | call @externref_store(%2, %1) 357 | %3 = call @externref_load(%2) 358 | call @externref_freea() 359 | ret %externref %3 360 | } 361 | ``` 362 | 363 | We would then add a `mem2reg`-like optimizer pass that could reason 364 | about `externref_alloca` and friends, allowing minimal optimization to 365 | result in: 366 | 367 | ```llvm 368 | define %externref @bar() { 369 | %1 = call %externref @foo() 370 | ret %externref %1 371 | } 372 | ``` 373 | 374 | In summary, we would need to: 375 | 1. Add an opaque type for `externref` to LLVM 376 | 2. Add the externref alloca transformation to the C / C++ front-end 377 | 3. Add front-end restrictions on where externrefs can be stored 378 | 4. Add a mem2reg optimization pass for the externref stack 379 | 380 | ### Synthesis? 381 | 382 | These two ideas approach each other from opposite directions. If we 383 | start from the perspective that externref values should be tightly 384 | integrated with C, we arrive at the indirect approach. If we start from 385 | the code that we would like to generate, we arrive at the restrictive 386 | approach; but we still need some run-time indirection facilities for 387 | handling the `alloca` transformation. But if we do support automatic 388 | storage duration, why not add more transformations, for example to 389 | support static durations? And if we want good code, we already need a 390 | `mem2reg` pass for the externref heap; could this result in good code 391 | coming from the indirect side? 392 | 393 | We don't know the answer, but we think that the incremental way to start 394 | is to begin with the restrictive approach. If the indirect approach is 395 | possible (and a good idea), it can be reached next. 396 | -------------------------------------------------------------------------------- /milestones/m0/walloc.c: -------------------------------------------------------------------------------- 1 | // walloc.c: a small malloc implementation for use in WebAssembly targets 2 | // Copyright (c) 2020 Igalia, S.L. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | typedef __SIZE_TYPE__ size_t; 24 | typedef __UINTPTR_TYPE__ uintptr_t; 25 | typedef __UINT8_TYPE__ uint8_t; 26 | 27 | #define NULL ((void *) 0) 28 | 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") 30 | 31 | #ifndef NDEBUG 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 33 | #else 34 | #define ASSERT(x) do { } while (0) 35 | #endif 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b)) 37 | 38 | static inline size_t max(size_t a, size_t b) { 39 | return a < b ? b : a; 40 | } 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) { 42 | return (val + alignment - 1) & ~(alignment - 1); 43 | } 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) 45 | 46 | #define CHUNK_SIZE 256 47 | #define CHUNK_SIZE_LOG_2 8 48 | #define CHUNK_MASK (CHUNK_SIZE - 1) 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); 50 | 51 | #define PAGE_SIZE 65536 52 | #define PAGE_SIZE_LOG_2 16 53 | #define PAGE_MASK (PAGE_SIZE - 1) 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); 55 | 56 | #define CHUNKS_PER_PAGE 256 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE); 58 | 59 | #define GRANULE_SIZE 8 60 | #define GRANULE_SIZE_LOG_2 3 61 | #define LARGE_OBJECT_THRESHOLD 256 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 63 | 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, 66 | LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 67 | 68 | struct chunk { 69 | char data[CHUNK_SIZE]; 70 | }; 71 | 72 | // There are small object pages for allocations of these sizes. 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ 74 | M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) 75 | 76 | enum chunk_kind { 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, 78 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND 80 | 81 | SMALL_OBJECT_CHUNK_KINDS, 82 | FREE_LARGE_OBJECT = 254, 83 | LARGE_OBJECT = 255 84 | }; 85 | 86 | static const uint8_t small_object_granule_sizes[] = 87 | { 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, 89 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) 90 | #undef SMALL_OBJECT_GRANULE_SIZE 91 | }; 92 | 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) { 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i; 95 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); 96 | #undef TEST_GRANULE_SIZE 97 | return LARGE_OBJECT; 98 | } 99 | 100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) { 101 | switch (kind) { 102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i; 103 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); 104 | #undef CHUNK_KIND_GRANULE_SIZE 105 | default: 106 | return -1; 107 | } 108 | } 109 | 110 | // Given a pointer P returned by malloc(), we get a header pointer via 111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If 112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large 113 | // object, otherwise the kind indicates the size in granules of the objects in 114 | // the chunk. 115 | struct page_header { 116 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; 117 | }; 118 | 119 | struct page { 120 | union { 121 | struct page_header header; 122 | struct chunk chunks[CHUNKS_PER_PAGE]; 123 | }; 124 | }; 125 | 126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header)) 127 | #define FIRST_ALLOCATABLE_CHUNK 1 128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE); 129 | 130 | static struct page* get_page(void *ptr) { 131 | return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK); 132 | } 133 | static unsigned get_chunk_index(void *ptr) { 134 | return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 135 | } 136 | 137 | struct freelist { 138 | struct freelist *next; 139 | }; 140 | 141 | struct large_object { 142 | struct large_object *next; 143 | size_t size; 144 | }; 145 | 146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object)) 147 | 148 | static inline void* get_large_object_payload(struct large_object *obj) { 149 | return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE; 150 | } 151 | static inline struct large_object* get_large_object(void *ptr) { 152 | return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE); 153 | } 154 | 155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; 156 | static struct large_object *large_objects; 157 | 158 | extern void __heap_base; 159 | static size_t walloc_heap_size; 160 | 161 | static struct page* 162 | allocate_pages(size_t payload_size, size_t *n_allocated) { 163 | size_t needed = payload_size + PAGE_HEADER_SIZE; 164 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; 165 | uintptr_t base = heap_size; 166 | uintptr_t preallocated = 0, grow = 0; 167 | 168 | if (!walloc_heap_size) { 169 | // We are allocating the initial pages, if any. We skip the first 64 kB, 170 | // then take any additional space up to the memory size. 171 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); 172 | preallocated = heap_size - heap_base; // Preallocated pages. 173 | walloc_heap_size = preallocated; 174 | base -= preallocated; 175 | } 176 | 177 | if (preallocated < needed) { 178 | // Always grow the walloc heap at least by 50%. 179 | grow = align(max(walloc_heap_size / 2, needed - preallocated), 180 | PAGE_SIZE); 181 | ASSERT(grow); 182 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) { 183 | return NULL; 184 | } 185 | walloc_heap_size += grow; 186 | } 187 | 188 | struct page *ret = (struct page *)base; 189 | size_t size = grow + preallocated; 190 | ASSERT(size); 191 | ASSERT_ALIGNED(size, PAGE_SIZE); 192 | *n_allocated = size / PAGE_SIZE; 193 | return ret; 194 | } 195 | 196 | static char* 197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) { 198 | page->header.chunk_kinds[idx] = kind; 199 | return page->chunks[idx].data; 200 | } 201 | 202 | // It's possible for splitting to produce a large object of size 248 (256 minus 203 | // the header size) -- i.e. spanning a single chunk. In that case, push the 204 | // chunk back on the GRANULES_32 small object freelist. 205 | static void maybe_repurpose_single_chunk_large_objects_head(void) { 206 | if (large_objects->size < CHUNK_SIZE) { 207 | unsigned idx = get_chunk_index(large_objects); 208 | char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32); 209 | large_objects = large_objects->next; 210 | struct freelist* head = (struct freelist *)ptr; 211 | head->next = small_object_freelists[GRANULES_32]; 212 | small_object_freelists[GRANULES_32] = head; 213 | } 214 | } 215 | 216 | // If there have been any large-object frees since the last large object 217 | // allocation, go through the freelist and merge any adjacent objects. 218 | static int pending_large_object_compact = 0; 219 | static struct large_object** 220 | maybe_merge_free_large_object(struct large_object** prev) { 221 | struct large_object *obj = *prev; 222 | while (1) { 223 | char *end = get_large_object_payload(obj) + obj->size; 224 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 225 | unsigned chunk = get_chunk_index(end); 226 | if (chunk < FIRST_ALLOCATABLE_CHUNK) { 227 | // Merging can't create a large object that newly spans the header chunk. 228 | // This check also catches the end-of-heap case. 229 | return prev; 230 | } 231 | struct page *page = get_page(end); 232 | if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) { 233 | return prev; 234 | } 235 | struct large_object *next = (struct large_object*) end; 236 | 237 | struct large_object **prev_prev = &large_objects, *walk = large_objects; 238 | while (1) { 239 | ASSERT(walk); 240 | if (walk == next) { 241 | obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size; 242 | *prev_prev = walk->next; 243 | if (prev == &walk->next) { 244 | prev = prev_prev; 245 | } 246 | break; 247 | } 248 | prev_prev = &walk->next; 249 | walk = walk->next; 250 | } 251 | } 252 | } 253 | static void 254 | maybe_compact_free_large_objects(void) { 255 | if (pending_large_object_compact) { 256 | pending_large_object_compact = 0; 257 | struct large_object **prev = &large_objects; 258 | while (*prev) { 259 | prev = &(*maybe_merge_free_large_object(prev))->next; 260 | } 261 | } 262 | } 263 | 264 | // Allocate a large object with enough space for SIZE payload bytes. Returns a 265 | // large object with a header, aligned on a chunk boundary, whose payload size 266 | // may be larger than SIZE, and whose total size (header included) is 267 | // chunk-aligned. Either a suitable allocation is found in the large object 268 | // freelist, or we ask the OS for some more pages and treat those pages as a 269 | // large object. If the allocation fits in that large object and there's more 270 | // than an aligned chunk's worth of data free at the end, the large object is 271 | // split. 272 | // 273 | // The return value's corresponding chunk in the page as starting a large 274 | // object. 275 | static struct large_object* 276 | allocate_large_object(size_t size) { 277 | maybe_compact_free_large_objects(); 278 | struct large_object *best = NULL, **best_prev = &large_objects; 279 | size_t best_size = -1; 280 | for (struct large_object **prev = &large_objects, *walk = large_objects; 281 | walk; 282 | prev = &walk->next, walk = walk->next) { 283 | if (walk->size >= size && walk->size < best_size) { 284 | best_size = walk->size; 285 | best = walk; 286 | best_prev = prev; 287 | if (best_size + LARGE_OBJECT_HEADER_SIZE 288 | == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 289 | // Not going to do any better than this; just return it. 290 | break; 291 | } 292 | } 293 | 294 | if (!best) { 295 | // The large object freelist doesn't have an object big enough for this 296 | // allocation. Allocate one or more pages from the OS, and treat that new 297 | // sequence of pages as a fresh large object. It will be split if 298 | // necessary. 299 | size_t size_with_header = size + sizeof(struct large_object); 300 | size_t n_allocated = 0; 301 | struct page *page = allocate_pages(size_with_header, &n_allocated); 302 | if (!page) { 303 | return NULL; 304 | } 305 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 306 | best = (struct large_object *)ptr; 307 | size_t page_header = ptr - ((char*) page); 308 | best->next = large_objects; 309 | best->size = best_size = 310 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 311 | ASSERT(best_size >= size_with_header); 312 | } 313 | 314 | allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT); 315 | 316 | struct large_object *next = best->next; 317 | *best_prev = next; 318 | 319 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; 320 | if (tail_size) { 321 | // The best-fitting object has 1 or more aligned chunks free after the 322 | // requested allocation; split the tail off into a fresh aligned object. 323 | struct page *start_page = get_page(best); 324 | char *start = get_large_object_payload(best); 325 | char *end = start + best_size; 326 | 327 | if (start_page == get_page(end - tail_size - 1)) { 328 | // The allocation does not span a page boundary; yay. 329 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 330 | } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 331 | // If the allocation itself smaller than a page, split off the head, then 332 | // fall through to maybe split the tail. 333 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 334 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 335 | struct large_object *head = best; 336 | allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT); 337 | head->size = first_page_size; 338 | head->next = large_objects; 339 | large_objects = head; 340 | 341 | maybe_repurpose_single_chunk_large_objects_head(); 342 | 343 | struct page *next_page = start_page + 1; 344 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 345 | best = (struct large_object *) ptr; 346 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; 347 | ASSERT(best_size >= size); 348 | start = get_large_object_payload(best); 349 | tail_size = (best_size - size) & ~CHUNK_MASK; 350 | } else { 351 | // A large object that spans more than one page will consume all of its 352 | // tail pages. Therefore if the split traverses a page boundary, round up 353 | // to page size. 354 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 355 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 356 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); 357 | size = first_page_size + tail_pages_size; 358 | tail_size = best_size - size; 359 | } 360 | best->size -= tail_size; 361 | 362 | unsigned tail_idx = get_chunk_index(end - tail_size); 363 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 364 | // We would be splitting in a page header; don't do that. 365 | tail_size -= CHUNK_SIZE; 366 | tail_idx++; 367 | } 368 | 369 | if (tail_size) { 370 | struct page *page = get_page(end - tail_size); 371 | char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT); 372 | struct large_object *tail = (struct large_object *) tail_ptr; 373 | tail->next = large_objects; 374 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; 375 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); 376 | large_objects = tail; 377 | 378 | maybe_repurpose_single_chunk_large_objects_head(); 379 | } 380 | } 381 | 382 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE); 383 | return best; 384 | } 385 | 386 | static struct freelist* 387 | obtain_small_objects(enum chunk_kind kind) { 388 | struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32]; 389 | void *chunk; 390 | if (*whole_chunk_freelist) { 391 | chunk = *whole_chunk_freelist; 392 | *whole_chunk_freelist = (*whole_chunk_freelist)->next; 393 | } else { 394 | chunk = allocate_large_object(0); 395 | if (!chunk) { 396 | return NULL; 397 | } 398 | } 399 | char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); 400 | char *end = ptr + CHUNK_SIZE; 401 | struct freelist *next = NULL; 402 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 403 | for (size_t i = size; i <= CHUNK_SIZE; i += size) { 404 | struct freelist *head = (struct freelist*) (end - i); 405 | head->next = next; 406 | next = head; 407 | } 408 | return next; 409 | } 410 | 411 | static inline size_t size_to_granules(size_t size) { 412 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 413 | } 414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) { 415 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); 416 | return &small_object_freelists[kind]; 417 | } 418 | 419 | static void* 420 | allocate_small(enum chunk_kind kind) { 421 | struct freelist **loc = get_small_object_freelist(kind); 422 | if (!*loc) { 423 | struct freelist *freelist = obtain_small_objects(kind); 424 | if (!freelist) { 425 | return NULL; 426 | } 427 | *loc = freelist; 428 | } 429 | struct freelist *ret = *loc; 430 | *loc = ret->next; 431 | return (void *) ret; 432 | } 433 | 434 | static void* 435 | allocate_large(size_t size) { 436 | struct large_object *obj = allocate_large_object(size); 437 | return obj ? get_large_object_payload(obj) : NULL; 438 | } 439 | 440 | void* 441 | malloc(size_t size) { 442 | size_t granules = size_to_granules(size); 443 | enum chunk_kind kind = granules_to_chunk_kind(granules); 444 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 445 | } 446 | 447 | void 448 | free(void *ptr) { 449 | if (!ptr) return; 450 | struct page *page = get_page(ptr); 451 | unsigned chunk = get_chunk_index(ptr); 452 | uint8_t kind = page->header.chunk_kinds[chunk]; 453 | if (kind == LARGE_OBJECT) { 454 | struct large_object *obj = get_large_object(ptr); 455 | obj->next = large_objects; 456 | large_objects = obj; 457 | allocate_chunk(page, chunk, FREE_LARGE_OBJECT); 458 | pending_large_object_compact = 1; 459 | } else { 460 | size_t granules = kind; 461 | struct freelist **loc = get_small_object_freelist(granules); 462 | struct freelist *obj = ptr; 463 | obj->next = *loc; 464 | *loc = obj; 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /milestones/m1/walloc.c: -------------------------------------------------------------------------------- 1 | // walloc.c: a small malloc implementation for use in WebAssembly targets 2 | // Copyright (c) 2020 Igalia, S.L. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | typedef __SIZE_TYPE__ size_t; 24 | typedef __UINTPTR_TYPE__ uintptr_t; 25 | typedef __UINT8_TYPE__ uint8_t; 26 | 27 | #define NULL ((void *) 0) 28 | 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") 30 | 31 | #ifndef NDEBUG 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 33 | #else 34 | #define ASSERT(x) do { } while (0) 35 | #endif 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b)) 37 | 38 | static inline size_t max(size_t a, size_t b) { 39 | return a < b ? b : a; 40 | } 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) { 42 | return (val + alignment - 1) & ~(alignment - 1); 43 | } 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) 45 | 46 | #define CHUNK_SIZE 256 47 | #define CHUNK_SIZE_LOG_2 8 48 | #define CHUNK_MASK (CHUNK_SIZE - 1) 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); 50 | 51 | #define PAGE_SIZE 65536 52 | #define PAGE_SIZE_LOG_2 16 53 | #define PAGE_MASK (PAGE_SIZE - 1) 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); 55 | 56 | #define CHUNKS_PER_PAGE 256 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE); 58 | 59 | #define GRANULE_SIZE 8 60 | #define GRANULE_SIZE_LOG_2 3 61 | #define LARGE_OBJECT_THRESHOLD 256 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 63 | 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, 66 | LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 67 | 68 | struct chunk { 69 | char data[CHUNK_SIZE]; 70 | }; 71 | 72 | // There are small object pages for allocations of these sizes. 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ 74 | M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) 75 | 76 | enum chunk_kind { 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, 78 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND 80 | 81 | SMALL_OBJECT_CHUNK_KINDS, 82 | FREE_LARGE_OBJECT = 254, 83 | LARGE_OBJECT = 255 84 | }; 85 | 86 | static const uint8_t small_object_granule_sizes[] = 87 | { 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, 89 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) 90 | #undef SMALL_OBJECT_GRANULE_SIZE 91 | }; 92 | 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) { 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i; 95 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); 96 | #undef TEST_GRANULE_SIZE 97 | return LARGE_OBJECT; 98 | } 99 | 100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) { 101 | switch (kind) { 102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i; 103 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); 104 | #undef CHUNK_KIND_GRANULE_SIZE 105 | default: 106 | return -1; 107 | } 108 | } 109 | 110 | // Given a pointer P returned by malloc(), we get a header pointer via 111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If 112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large 113 | // object, otherwise the kind indicates the size in granules of the objects in 114 | // the chunk. 115 | struct page_header { 116 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; 117 | }; 118 | 119 | struct page { 120 | union { 121 | struct page_header header; 122 | struct chunk chunks[CHUNKS_PER_PAGE]; 123 | }; 124 | }; 125 | 126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header)) 127 | #define FIRST_ALLOCATABLE_CHUNK 1 128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE); 129 | 130 | static struct page* get_page(void *ptr) { 131 | return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK); 132 | } 133 | static unsigned get_chunk_index(void *ptr) { 134 | return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 135 | } 136 | 137 | struct freelist { 138 | struct freelist *next; 139 | }; 140 | 141 | struct large_object { 142 | struct large_object *next; 143 | size_t size; 144 | }; 145 | 146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object)) 147 | 148 | static inline void* get_large_object_payload(struct large_object *obj) { 149 | return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE; 150 | } 151 | static inline struct large_object* get_large_object(void *ptr) { 152 | return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE); 153 | } 154 | 155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; 156 | static struct large_object *large_objects; 157 | 158 | extern void __heap_base; 159 | static size_t walloc_heap_size; 160 | 161 | static struct page* 162 | allocate_pages(size_t payload_size, size_t *n_allocated) { 163 | size_t needed = payload_size + PAGE_HEADER_SIZE; 164 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; 165 | uintptr_t base = heap_size; 166 | uintptr_t preallocated = 0, grow = 0; 167 | 168 | if (!walloc_heap_size) { 169 | // We are allocating the initial pages, if any. We skip the first 64 kB, 170 | // then take any additional space up to the memory size. 171 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); 172 | preallocated = heap_size - heap_base; // Preallocated pages. 173 | walloc_heap_size = preallocated; 174 | base -= preallocated; 175 | } 176 | 177 | if (preallocated < needed) { 178 | // Always grow the walloc heap at least by 50%. 179 | grow = align(max(walloc_heap_size / 2, needed - preallocated), 180 | PAGE_SIZE); 181 | ASSERT(grow); 182 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) { 183 | return NULL; 184 | } 185 | walloc_heap_size += grow; 186 | } 187 | 188 | struct page *ret = (struct page *)base; 189 | size_t size = grow + preallocated; 190 | ASSERT(size); 191 | ASSERT_ALIGNED(size, PAGE_SIZE); 192 | *n_allocated = size / PAGE_SIZE; 193 | return ret; 194 | } 195 | 196 | static char* 197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) { 198 | page->header.chunk_kinds[idx] = kind; 199 | return page->chunks[idx].data; 200 | } 201 | 202 | // It's possible for splitting to produce a large object of size 248 (256 minus 203 | // the header size) -- i.e. spanning a single chunk. In that case, push the 204 | // chunk back on the GRANULES_32 small object freelist. 205 | static void maybe_repurpose_single_chunk_large_objects_head(void) { 206 | if (large_objects->size < CHUNK_SIZE) { 207 | unsigned idx = get_chunk_index(large_objects); 208 | char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32); 209 | large_objects = large_objects->next; 210 | struct freelist* head = (struct freelist *)ptr; 211 | head->next = small_object_freelists[GRANULES_32]; 212 | small_object_freelists[GRANULES_32] = head; 213 | } 214 | } 215 | 216 | // If there have been any large-object frees since the last large object 217 | // allocation, go through the freelist and merge any adjacent objects. 218 | static int pending_large_object_compact = 0; 219 | static struct large_object** 220 | maybe_merge_free_large_object(struct large_object** prev) { 221 | struct large_object *obj = *prev; 222 | while (1) { 223 | char *end = get_large_object_payload(obj) + obj->size; 224 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 225 | unsigned chunk = get_chunk_index(end); 226 | if (chunk < FIRST_ALLOCATABLE_CHUNK) { 227 | // Merging can't create a large object that newly spans the header chunk. 228 | // This check also catches the end-of-heap case. 229 | return prev; 230 | } 231 | struct page *page = get_page(end); 232 | if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) { 233 | return prev; 234 | } 235 | struct large_object *next = (struct large_object*) end; 236 | 237 | struct large_object **prev_prev = &large_objects, *walk = large_objects; 238 | while (1) { 239 | ASSERT(walk); 240 | if (walk == next) { 241 | obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size; 242 | *prev_prev = walk->next; 243 | if (prev == &walk->next) { 244 | prev = prev_prev; 245 | } 246 | break; 247 | } 248 | prev_prev = &walk->next; 249 | walk = walk->next; 250 | } 251 | } 252 | } 253 | static void 254 | maybe_compact_free_large_objects(void) { 255 | if (pending_large_object_compact) { 256 | pending_large_object_compact = 0; 257 | struct large_object **prev = &large_objects; 258 | while (*prev) { 259 | prev = &(*maybe_merge_free_large_object(prev))->next; 260 | } 261 | } 262 | } 263 | 264 | // Allocate a large object with enough space for SIZE payload bytes. Returns a 265 | // large object with a header, aligned on a chunk boundary, whose payload size 266 | // may be larger than SIZE, and whose total size (header included) is 267 | // chunk-aligned. Either a suitable allocation is found in the large object 268 | // freelist, or we ask the OS for some more pages and treat those pages as a 269 | // large object. If the allocation fits in that large object and there's more 270 | // than an aligned chunk's worth of data free at the end, the large object is 271 | // split. 272 | // 273 | // The return value's corresponding chunk in the page as starting a large 274 | // object. 275 | static struct large_object* 276 | allocate_large_object(size_t size) { 277 | maybe_compact_free_large_objects(); 278 | struct large_object *best = NULL, **best_prev = &large_objects; 279 | size_t best_size = -1; 280 | for (struct large_object **prev = &large_objects, *walk = large_objects; 281 | walk; 282 | prev = &walk->next, walk = walk->next) { 283 | if (walk->size >= size && walk->size < best_size) { 284 | best_size = walk->size; 285 | best = walk; 286 | best_prev = prev; 287 | if (best_size + LARGE_OBJECT_HEADER_SIZE 288 | == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 289 | // Not going to do any better than this; just return it. 290 | break; 291 | } 292 | } 293 | 294 | if (!best) { 295 | // The large object freelist doesn't have an object big enough for this 296 | // allocation. Allocate one or more pages from the OS, and treat that new 297 | // sequence of pages as a fresh large object. It will be split if 298 | // necessary. 299 | size_t size_with_header = size + sizeof(struct large_object); 300 | size_t n_allocated = 0; 301 | struct page *page = allocate_pages(size_with_header, &n_allocated); 302 | if (!page) { 303 | return NULL; 304 | } 305 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 306 | best = (struct large_object *)ptr; 307 | size_t page_header = ptr - ((char*) page); 308 | best->next = large_objects; 309 | best->size = best_size = 310 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 311 | ASSERT(best_size >= size_with_header); 312 | } 313 | 314 | allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT); 315 | 316 | struct large_object *next = best->next; 317 | *best_prev = next; 318 | 319 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; 320 | if (tail_size) { 321 | // The best-fitting object has 1 or more aligned chunks free after the 322 | // requested allocation; split the tail off into a fresh aligned object. 323 | struct page *start_page = get_page(best); 324 | char *start = get_large_object_payload(best); 325 | char *end = start + best_size; 326 | 327 | if (start_page == get_page(end - tail_size - 1)) { 328 | // The allocation does not span a page boundary; yay. 329 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 330 | } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 331 | // If the allocation itself smaller than a page, split off the head, then 332 | // fall through to maybe split the tail. 333 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 334 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 335 | struct large_object *head = best; 336 | allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT); 337 | head->size = first_page_size; 338 | head->next = large_objects; 339 | large_objects = head; 340 | 341 | maybe_repurpose_single_chunk_large_objects_head(); 342 | 343 | struct page *next_page = start_page + 1; 344 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 345 | best = (struct large_object *) ptr; 346 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; 347 | ASSERT(best_size >= size); 348 | start = get_large_object_payload(best); 349 | tail_size = (best_size - size) & ~CHUNK_MASK; 350 | } else { 351 | // A large object that spans more than one page will consume all of its 352 | // tail pages. Therefore if the split traverses a page boundary, round up 353 | // to page size. 354 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 355 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 356 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); 357 | size = first_page_size + tail_pages_size; 358 | tail_size = best_size - size; 359 | } 360 | best->size -= tail_size; 361 | 362 | unsigned tail_idx = get_chunk_index(end - tail_size); 363 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 364 | // We would be splitting in a page header; don't do that. 365 | tail_size -= CHUNK_SIZE; 366 | tail_idx++; 367 | } 368 | 369 | if (tail_size) { 370 | struct page *page = get_page(end - tail_size); 371 | char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT); 372 | struct large_object *tail = (struct large_object *) tail_ptr; 373 | tail->next = large_objects; 374 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; 375 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); 376 | large_objects = tail; 377 | 378 | maybe_repurpose_single_chunk_large_objects_head(); 379 | } 380 | } 381 | 382 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE); 383 | return best; 384 | } 385 | 386 | static struct freelist* 387 | obtain_small_objects(enum chunk_kind kind) { 388 | struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32]; 389 | void *chunk; 390 | if (*whole_chunk_freelist) { 391 | chunk = *whole_chunk_freelist; 392 | *whole_chunk_freelist = (*whole_chunk_freelist)->next; 393 | } else { 394 | chunk = allocate_large_object(0); 395 | if (!chunk) { 396 | return NULL; 397 | } 398 | } 399 | char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); 400 | char *end = ptr + CHUNK_SIZE; 401 | struct freelist *next = NULL; 402 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 403 | for (size_t i = size; i <= CHUNK_SIZE; i += size) { 404 | struct freelist *head = (struct freelist*) (end - i); 405 | head->next = next; 406 | next = head; 407 | } 408 | return next; 409 | } 410 | 411 | static inline size_t size_to_granules(size_t size) { 412 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 413 | } 414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) { 415 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); 416 | return &small_object_freelists[kind]; 417 | } 418 | 419 | static void* 420 | allocate_small(enum chunk_kind kind) { 421 | struct freelist **loc = get_small_object_freelist(kind); 422 | if (!*loc) { 423 | struct freelist *freelist = obtain_small_objects(kind); 424 | if (!freelist) { 425 | return NULL; 426 | } 427 | *loc = freelist; 428 | } 429 | struct freelist *ret = *loc; 430 | *loc = ret->next; 431 | return (void *) ret; 432 | } 433 | 434 | static void* 435 | allocate_large(size_t size) { 436 | struct large_object *obj = allocate_large_object(size); 437 | return obj ? get_large_object_payload(obj) : NULL; 438 | } 439 | 440 | void* 441 | malloc(size_t size) { 442 | size_t granules = size_to_granules(size); 443 | enum chunk_kind kind = granules_to_chunk_kind(granules); 444 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 445 | } 446 | 447 | void 448 | free(void *ptr) { 449 | if (!ptr) return; 450 | struct page *page = get_page(ptr); 451 | unsigned chunk = get_chunk_index(ptr); 452 | uint8_t kind = page->header.chunk_kinds[chunk]; 453 | if (kind == LARGE_OBJECT) { 454 | struct large_object *obj = get_large_object(ptr); 455 | obj->next = large_objects; 456 | large_objects = obj; 457 | allocate_chunk(page, chunk, FREE_LARGE_OBJECT); 458 | pending_large_object_compact = 1; 459 | } else { 460 | size_t granules = kind; 461 | struct freelist **loc = get_small_object_freelist(granules); 462 | struct freelist *obj = ptr; 463 | obj->next = *loc; 464 | *loc = obj; 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /milestones/m2/walloc.c: -------------------------------------------------------------------------------- 1 | // walloc.c: a small malloc implementation for use in WebAssembly targets 2 | // Copyright (c) 2020 Igalia, S.L. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | typedef __SIZE_TYPE__ size_t; 24 | typedef __UINTPTR_TYPE__ uintptr_t; 25 | typedef __UINT8_TYPE__ uint8_t; 26 | 27 | #define NULL ((void *) 0) 28 | 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") 30 | 31 | #ifndef NDEBUG 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 33 | #else 34 | #define ASSERT(x) do { } while (0) 35 | #endif 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b)) 37 | 38 | static inline size_t max(size_t a, size_t b) { 39 | return a < b ? b : a; 40 | } 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) { 42 | return (val + alignment - 1) & ~(alignment - 1); 43 | } 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) 45 | 46 | #define CHUNK_SIZE 256 47 | #define CHUNK_SIZE_LOG_2 8 48 | #define CHUNK_MASK (CHUNK_SIZE - 1) 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); 50 | 51 | #define PAGE_SIZE 65536 52 | #define PAGE_SIZE_LOG_2 16 53 | #define PAGE_MASK (PAGE_SIZE - 1) 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); 55 | 56 | #define CHUNKS_PER_PAGE 256 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE); 58 | 59 | #define GRANULE_SIZE 8 60 | #define GRANULE_SIZE_LOG_2 3 61 | #define LARGE_OBJECT_THRESHOLD 256 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 63 | 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, 66 | LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 67 | 68 | struct chunk { 69 | char data[CHUNK_SIZE]; 70 | }; 71 | 72 | // There are small object pages for allocations of these sizes. 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ 74 | M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) 75 | 76 | enum chunk_kind { 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, 78 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND 80 | 81 | SMALL_OBJECT_CHUNK_KINDS, 82 | FREE_LARGE_OBJECT = 254, 83 | LARGE_OBJECT = 255 84 | }; 85 | 86 | static const uint8_t small_object_granule_sizes[] = 87 | { 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, 89 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) 90 | #undef SMALL_OBJECT_GRANULE_SIZE 91 | }; 92 | 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) { 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i; 95 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); 96 | #undef TEST_GRANULE_SIZE 97 | return LARGE_OBJECT; 98 | } 99 | 100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) { 101 | switch (kind) { 102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i; 103 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); 104 | #undef CHUNK_KIND_GRANULE_SIZE 105 | default: 106 | return -1; 107 | } 108 | } 109 | 110 | // Given a pointer P returned by malloc(), we get a header pointer via 111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If 112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large 113 | // object, otherwise the kind indicates the size in granules of the objects in 114 | // the chunk. 115 | struct page_header { 116 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; 117 | }; 118 | 119 | struct page { 120 | union { 121 | struct page_header header; 122 | struct chunk chunks[CHUNKS_PER_PAGE]; 123 | }; 124 | }; 125 | 126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header)) 127 | #define FIRST_ALLOCATABLE_CHUNK 1 128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE); 129 | 130 | static struct page* get_page(void *ptr) { 131 | return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK); 132 | } 133 | static unsigned get_chunk_index(void *ptr) { 134 | return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 135 | } 136 | 137 | struct freelist { 138 | struct freelist *next; 139 | }; 140 | 141 | struct large_object { 142 | struct large_object *next; 143 | size_t size; 144 | }; 145 | 146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object)) 147 | 148 | static inline void* get_large_object_payload(struct large_object *obj) { 149 | return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE; 150 | } 151 | static inline struct large_object* get_large_object(void *ptr) { 152 | return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE); 153 | } 154 | 155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; 156 | static struct large_object *large_objects; 157 | 158 | extern void __heap_base; 159 | static size_t walloc_heap_size; 160 | 161 | static struct page* 162 | allocate_pages(size_t payload_size, size_t *n_allocated) { 163 | size_t needed = payload_size + PAGE_HEADER_SIZE; 164 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; 165 | uintptr_t base = heap_size; 166 | uintptr_t preallocated = 0, grow = 0; 167 | 168 | if (!walloc_heap_size) { 169 | // We are allocating the initial pages, if any. We skip the first 64 kB, 170 | // then take any additional space up to the memory size. 171 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); 172 | preallocated = heap_size - heap_base; // Preallocated pages. 173 | walloc_heap_size = preallocated; 174 | base -= preallocated; 175 | } 176 | 177 | if (preallocated < needed) { 178 | // Always grow the walloc heap at least by 50%. 179 | grow = align(max(walloc_heap_size / 2, needed - preallocated), 180 | PAGE_SIZE); 181 | ASSERT(grow); 182 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) { 183 | return NULL; 184 | } 185 | walloc_heap_size += grow; 186 | } 187 | 188 | struct page *ret = (struct page *)base; 189 | size_t size = grow + preallocated; 190 | ASSERT(size); 191 | ASSERT_ALIGNED(size, PAGE_SIZE); 192 | *n_allocated = size / PAGE_SIZE; 193 | return ret; 194 | } 195 | 196 | static char* 197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) { 198 | page->header.chunk_kinds[idx] = kind; 199 | return page->chunks[idx].data; 200 | } 201 | 202 | // It's possible for splitting to produce a large object of size 248 (256 minus 203 | // the header size) -- i.e. spanning a single chunk. In that case, push the 204 | // chunk back on the GRANULES_32 small object freelist. 205 | static void maybe_repurpose_single_chunk_large_objects_head(void) { 206 | if (large_objects->size < CHUNK_SIZE) { 207 | unsigned idx = get_chunk_index(large_objects); 208 | char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32); 209 | large_objects = large_objects->next; 210 | struct freelist* head = (struct freelist *)ptr; 211 | head->next = small_object_freelists[GRANULES_32]; 212 | small_object_freelists[GRANULES_32] = head; 213 | } 214 | } 215 | 216 | // If there have been any large-object frees since the last large object 217 | // allocation, go through the freelist and merge any adjacent objects. 218 | static int pending_large_object_compact = 0; 219 | static struct large_object** 220 | maybe_merge_free_large_object(struct large_object** prev) { 221 | struct large_object *obj = *prev; 222 | while (1) { 223 | char *end = get_large_object_payload(obj) + obj->size; 224 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 225 | unsigned chunk = get_chunk_index(end); 226 | if (chunk < FIRST_ALLOCATABLE_CHUNK) { 227 | // Merging can't create a large object that newly spans the header chunk. 228 | // This check also catches the end-of-heap case. 229 | return prev; 230 | } 231 | struct page *page = get_page(end); 232 | if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) { 233 | return prev; 234 | } 235 | struct large_object *next = (struct large_object*) end; 236 | 237 | struct large_object **prev_prev = &large_objects, *walk = large_objects; 238 | while (1) { 239 | ASSERT(walk); 240 | if (walk == next) { 241 | obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size; 242 | *prev_prev = walk->next; 243 | if (prev == &walk->next) { 244 | prev = prev_prev; 245 | } 246 | break; 247 | } 248 | prev_prev = &walk->next; 249 | walk = walk->next; 250 | } 251 | } 252 | } 253 | static void 254 | maybe_compact_free_large_objects(void) { 255 | if (pending_large_object_compact) { 256 | pending_large_object_compact = 0; 257 | struct large_object **prev = &large_objects; 258 | while (*prev) { 259 | prev = &(*maybe_merge_free_large_object(prev))->next; 260 | } 261 | } 262 | } 263 | 264 | // Allocate a large object with enough space for SIZE payload bytes. Returns a 265 | // large object with a header, aligned on a chunk boundary, whose payload size 266 | // may be larger than SIZE, and whose total size (header included) is 267 | // chunk-aligned. Either a suitable allocation is found in the large object 268 | // freelist, or we ask the OS for some more pages and treat those pages as a 269 | // large object. If the allocation fits in that large object and there's more 270 | // than an aligned chunk's worth of data free at the end, the large object is 271 | // split. 272 | // 273 | // The return value's corresponding chunk in the page as starting a large 274 | // object. 275 | static struct large_object* 276 | allocate_large_object(size_t size) { 277 | maybe_compact_free_large_objects(); 278 | struct large_object *best = NULL, **best_prev = &large_objects; 279 | size_t best_size = -1; 280 | for (struct large_object **prev = &large_objects, *walk = large_objects; 281 | walk; 282 | prev = &walk->next, walk = walk->next) { 283 | if (walk->size >= size && walk->size < best_size) { 284 | best_size = walk->size; 285 | best = walk; 286 | best_prev = prev; 287 | if (best_size + LARGE_OBJECT_HEADER_SIZE 288 | == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 289 | // Not going to do any better than this; just return it. 290 | break; 291 | } 292 | } 293 | 294 | if (!best) { 295 | // The large object freelist doesn't have an object big enough for this 296 | // allocation. Allocate one or more pages from the OS, and treat that new 297 | // sequence of pages as a fresh large object. It will be split if 298 | // necessary. 299 | size_t size_with_header = size + sizeof(struct large_object); 300 | size_t n_allocated = 0; 301 | struct page *page = allocate_pages(size_with_header, &n_allocated); 302 | if (!page) { 303 | return NULL; 304 | } 305 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 306 | best = (struct large_object *)ptr; 307 | size_t page_header = ptr - ((char*) page); 308 | best->next = large_objects; 309 | best->size = best_size = 310 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 311 | ASSERT(best_size >= size_with_header); 312 | } 313 | 314 | allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT); 315 | 316 | struct large_object *next = best->next; 317 | *best_prev = next; 318 | 319 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; 320 | if (tail_size) { 321 | // The best-fitting object has 1 or more aligned chunks free after the 322 | // requested allocation; split the tail off into a fresh aligned object. 323 | struct page *start_page = get_page(best); 324 | char *start = get_large_object_payload(best); 325 | char *end = start + best_size; 326 | 327 | if (start_page == get_page(end - tail_size - 1)) { 328 | // The allocation does not span a page boundary; yay. 329 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 330 | } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 331 | // If the allocation itself smaller than a page, split off the head, then 332 | // fall through to maybe split the tail. 333 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 334 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 335 | struct large_object *head = best; 336 | allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT); 337 | head->size = first_page_size; 338 | head->next = large_objects; 339 | large_objects = head; 340 | 341 | maybe_repurpose_single_chunk_large_objects_head(); 342 | 343 | struct page *next_page = start_page + 1; 344 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 345 | best = (struct large_object *) ptr; 346 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; 347 | ASSERT(best_size >= size); 348 | start = get_large_object_payload(best); 349 | tail_size = (best_size - size) & ~CHUNK_MASK; 350 | } else { 351 | // A large object that spans more than one page will consume all of its 352 | // tail pages. Therefore if the split traverses a page boundary, round up 353 | // to page size. 354 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 355 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 356 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); 357 | size = first_page_size + tail_pages_size; 358 | tail_size = best_size - size; 359 | } 360 | best->size -= tail_size; 361 | 362 | unsigned tail_idx = get_chunk_index(end - tail_size); 363 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 364 | // We would be splitting in a page header; don't do that. 365 | tail_size -= CHUNK_SIZE; 366 | tail_idx++; 367 | } 368 | 369 | if (tail_size) { 370 | struct page *page = get_page(end - tail_size); 371 | char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT); 372 | struct large_object *tail = (struct large_object *) tail_ptr; 373 | tail->next = large_objects; 374 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; 375 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); 376 | large_objects = tail; 377 | 378 | maybe_repurpose_single_chunk_large_objects_head(); 379 | } 380 | } 381 | 382 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE); 383 | return best; 384 | } 385 | 386 | static struct freelist* 387 | obtain_small_objects(enum chunk_kind kind) { 388 | struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32]; 389 | void *chunk; 390 | if (*whole_chunk_freelist) { 391 | chunk = *whole_chunk_freelist; 392 | *whole_chunk_freelist = (*whole_chunk_freelist)->next; 393 | } else { 394 | chunk = allocate_large_object(0); 395 | if (!chunk) { 396 | return NULL; 397 | } 398 | } 399 | char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); 400 | char *end = ptr + CHUNK_SIZE; 401 | struct freelist *next = NULL; 402 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 403 | for (size_t i = size; i <= CHUNK_SIZE; i += size) { 404 | struct freelist *head = (struct freelist*) (end - i); 405 | head->next = next; 406 | next = head; 407 | } 408 | return next; 409 | } 410 | 411 | static inline size_t size_to_granules(size_t size) { 412 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 413 | } 414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) { 415 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); 416 | return &small_object_freelists[kind]; 417 | } 418 | 419 | static void* 420 | allocate_small(enum chunk_kind kind) { 421 | struct freelist **loc = get_small_object_freelist(kind); 422 | if (!*loc) { 423 | struct freelist *freelist = obtain_small_objects(kind); 424 | if (!freelist) { 425 | return NULL; 426 | } 427 | *loc = freelist; 428 | } 429 | struct freelist *ret = *loc; 430 | *loc = ret->next; 431 | return (void *) ret; 432 | } 433 | 434 | static void* 435 | allocate_large(size_t size) { 436 | struct large_object *obj = allocate_large_object(size); 437 | return obj ? get_large_object_payload(obj) : NULL; 438 | } 439 | 440 | void* 441 | malloc(size_t size) { 442 | size_t granules = size_to_granules(size); 443 | enum chunk_kind kind = granules_to_chunk_kind(granules); 444 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 445 | } 446 | 447 | void 448 | free(void *ptr) { 449 | if (!ptr) return; 450 | struct page *page = get_page(ptr); 451 | unsigned chunk = get_chunk_index(ptr); 452 | uint8_t kind = page->header.chunk_kinds[chunk]; 453 | if (kind == LARGE_OBJECT) { 454 | struct large_object *obj = get_large_object(ptr); 455 | obj->next = large_objects; 456 | large_objects = obj; 457 | allocate_chunk(page, chunk, FREE_LARGE_OBJECT); 458 | pending_large_object_compact = 1; 459 | } else { 460 | size_t granules = kind; 461 | struct freelist **loc = get_small_object_freelist(granules); 462 | struct freelist *obj = ptr; 463 | obj->next = *loc; 464 | *loc = obj; 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /milestones/m5/walloc.c: -------------------------------------------------------------------------------- 1 | // walloc.c: a small malloc implementation for use in WebAssembly targets 2 | // Copyright (c) 2020 Igalia, S.L. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | typedef __SIZE_TYPE__ size_t; 24 | typedef __UINTPTR_TYPE__ uintptr_t; 25 | typedef __UINT8_TYPE__ uint8_t; 26 | 27 | #define NULL ((void *) 0) 28 | 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") 30 | 31 | #ifndef NDEBUG 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 33 | #else 34 | #define ASSERT(x) do { } while (0) 35 | #endif 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b)) 37 | 38 | static inline size_t max(size_t a, size_t b) { 39 | return a < b ? b : a; 40 | } 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) { 42 | return (val + alignment - 1) & ~(alignment - 1); 43 | } 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) 45 | 46 | #define CHUNK_SIZE 256 47 | #define CHUNK_SIZE_LOG_2 8 48 | #define CHUNK_MASK (CHUNK_SIZE - 1) 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); 50 | 51 | #define PAGE_SIZE 65536 52 | #define PAGE_SIZE_LOG_2 16 53 | #define PAGE_MASK (PAGE_SIZE - 1) 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); 55 | 56 | #define CHUNKS_PER_PAGE 256 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE); 58 | 59 | #define GRANULE_SIZE 8 60 | #define GRANULE_SIZE_LOG_2 3 61 | #define LARGE_OBJECT_THRESHOLD 256 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 63 | 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, 66 | LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 67 | 68 | struct chunk { 69 | char data[CHUNK_SIZE]; 70 | }; 71 | 72 | // There are small object pages for allocations of these sizes. 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ 74 | M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) 75 | 76 | enum chunk_kind { 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, 78 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND 80 | 81 | SMALL_OBJECT_CHUNK_KINDS, 82 | FREE_LARGE_OBJECT = 254, 83 | LARGE_OBJECT = 255 84 | }; 85 | 86 | static const uint8_t small_object_granule_sizes[] = 87 | { 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, 89 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) 90 | #undef SMALL_OBJECT_GRANULE_SIZE 91 | }; 92 | 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) { 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i; 95 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); 96 | #undef TEST_GRANULE_SIZE 97 | return LARGE_OBJECT; 98 | } 99 | 100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) { 101 | switch (kind) { 102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i; 103 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); 104 | #undef CHUNK_KIND_GRANULE_SIZE 105 | default: 106 | return -1; 107 | } 108 | } 109 | 110 | // Given a pointer P returned by malloc(), we get a header pointer via 111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If 112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large 113 | // object, otherwise the kind indicates the size in granules of the objects in 114 | // the chunk. 115 | struct page_header { 116 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; 117 | }; 118 | 119 | struct page { 120 | union { 121 | struct page_header header; 122 | struct chunk chunks[CHUNKS_PER_PAGE]; 123 | }; 124 | }; 125 | 126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header)) 127 | #define FIRST_ALLOCATABLE_CHUNK 1 128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE); 129 | 130 | static struct page* get_page(void *ptr) { 131 | return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK); 132 | } 133 | static unsigned get_chunk_index(void *ptr) { 134 | return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 135 | } 136 | 137 | struct freelist { 138 | struct freelist *next; 139 | }; 140 | 141 | struct large_object { 142 | struct large_object *next; 143 | size_t size; 144 | }; 145 | 146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object)) 147 | 148 | static inline void* get_large_object_payload(struct large_object *obj) { 149 | return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE; 150 | } 151 | static inline struct large_object* get_large_object(void *ptr) { 152 | return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE); 153 | } 154 | 155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; 156 | static struct large_object *large_objects; 157 | 158 | extern void __heap_base; 159 | static size_t walloc_heap_size; 160 | 161 | static struct page* 162 | allocate_pages(size_t payload_size, size_t *n_allocated) { 163 | size_t needed = payload_size + PAGE_HEADER_SIZE; 164 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; 165 | uintptr_t base = heap_size; 166 | uintptr_t preallocated = 0, grow = 0; 167 | 168 | if (!walloc_heap_size) { 169 | // We are allocating the initial pages, if any. We skip the first 64 kB, 170 | // then take any additional space up to the memory size. 171 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); 172 | preallocated = heap_size - heap_base; // Preallocated pages. 173 | walloc_heap_size = preallocated; 174 | base -= preallocated; 175 | } 176 | 177 | if (preallocated < needed) { 178 | // Always grow the walloc heap at least by 50%. 179 | grow = align(max(walloc_heap_size / 2, needed - preallocated), 180 | PAGE_SIZE); 181 | ASSERT(grow); 182 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) { 183 | return NULL; 184 | } 185 | walloc_heap_size += grow; 186 | } 187 | 188 | struct page *ret = (struct page *)base; 189 | size_t size = grow + preallocated; 190 | ASSERT(size); 191 | ASSERT_ALIGNED(size, PAGE_SIZE); 192 | *n_allocated = size / PAGE_SIZE; 193 | return ret; 194 | } 195 | 196 | static char* 197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) { 198 | page->header.chunk_kinds[idx] = kind; 199 | return page->chunks[idx].data; 200 | } 201 | 202 | // It's possible for splitting to produce a large object of size 248 (256 minus 203 | // the header size) -- i.e. spanning a single chunk. In that case, push the 204 | // chunk back on the GRANULES_32 small object freelist. 205 | static void maybe_repurpose_single_chunk_large_objects_head(void) { 206 | if (large_objects->size < CHUNK_SIZE) { 207 | unsigned idx = get_chunk_index(large_objects); 208 | char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32); 209 | large_objects = large_objects->next; 210 | struct freelist* head = (struct freelist *)ptr; 211 | head->next = small_object_freelists[GRANULES_32]; 212 | small_object_freelists[GRANULES_32] = head; 213 | } 214 | } 215 | 216 | // If there have been any large-object frees since the last large object 217 | // allocation, go through the freelist and merge any adjacent objects. 218 | static int pending_large_object_compact = 0; 219 | static struct large_object** 220 | maybe_merge_free_large_object(struct large_object** prev) { 221 | struct large_object *obj = *prev; 222 | while (1) { 223 | char *end = get_large_object_payload(obj) + obj->size; 224 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 225 | unsigned chunk = get_chunk_index(end); 226 | if (chunk < FIRST_ALLOCATABLE_CHUNK) { 227 | // Merging can't create a large object that newly spans the header chunk. 228 | // This check also catches the end-of-heap case. 229 | return prev; 230 | } 231 | struct page *page = get_page(end); 232 | if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) { 233 | return prev; 234 | } 235 | struct large_object *next = (struct large_object*) end; 236 | 237 | struct large_object **prev_prev = &large_objects, *walk = large_objects; 238 | while (1) { 239 | ASSERT(walk); 240 | if (walk == next) { 241 | obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size; 242 | *prev_prev = walk->next; 243 | if (prev == &walk->next) { 244 | prev = prev_prev; 245 | } 246 | break; 247 | } 248 | prev_prev = &walk->next; 249 | walk = walk->next; 250 | } 251 | } 252 | } 253 | static void 254 | maybe_compact_free_large_objects(void) { 255 | if (pending_large_object_compact) { 256 | pending_large_object_compact = 0; 257 | struct large_object **prev = &large_objects; 258 | while (*prev) { 259 | prev = &(*maybe_merge_free_large_object(prev))->next; 260 | } 261 | } 262 | } 263 | 264 | // Allocate a large object with enough space for SIZE payload bytes. Returns a 265 | // large object with a header, aligned on a chunk boundary, whose payload size 266 | // may be larger than SIZE, and whose total size (header included) is 267 | // chunk-aligned. Either a suitable allocation is found in the large object 268 | // freelist, or we ask the OS for some more pages and treat those pages as a 269 | // large object. If the allocation fits in that large object and there's more 270 | // than an aligned chunk's worth of data free at the end, the large object is 271 | // split. 272 | // 273 | // The return value's corresponding chunk in the page as starting a large 274 | // object. 275 | static struct large_object* 276 | allocate_large_object(size_t size) { 277 | maybe_compact_free_large_objects(); 278 | struct large_object *best = NULL, **best_prev = &large_objects; 279 | size_t best_size = -1; 280 | for (struct large_object **prev = &large_objects, *walk = large_objects; 281 | walk; 282 | prev = &walk->next, walk = walk->next) { 283 | if (walk->size >= size && walk->size < best_size) { 284 | best_size = walk->size; 285 | best = walk; 286 | best_prev = prev; 287 | if (best_size + LARGE_OBJECT_HEADER_SIZE 288 | == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 289 | // Not going to do any better than this; just return it. 290 | break; 291 | } 292 | } 293 | 294 | if (!best) { 295 | // The large object freelist doesn't have an object big enough for this 296 | // allocation. Allocate one or more pages from the OS, and treat that new 297 | // sequence of pages as a fresh large object. It will be split if 298 | // necessary. 299 | size_t size_with_header = size + sizeof(struct large_object); 300 | size_t n_allocated = 0; 301 | struct page *page = allocate_pages(size_with_header, &n_allocated); 302 | if (!page) { 303 | return NULL; 304 | } 305 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 306 | best = (struct large_object *)ptr; 307 | size_t page_header = ptr - ((char*) page); 308 | best->next = large_objects; 309 | best->size = best_size = 310 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 311 | ASSERT(best_size >= size_with_header); 312 | } 313 | 314 | allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT); 315 | 316 | struct large_object *next = best->next; 317 | *best_prev = next; 318 | 319 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; 320 | if (tail_size) { 321 | // The best-fitting object has 1 or more aligned chunks free after the 322 | // requested allocation; split the tail off into a fresh aligned object. 323 | struct page *start_page = get_page(best); 324 | char *start = get_large_object_payload(best); 325 | char *end = start + best_size; 326 | 327 | if (start_page == get_page(end - tail_size - 1)) { 328 | // The allocation does not span a page boundary; yay. 329 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 330 | } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 331 | // If the allocation itself smaller than a page, split off the head, then 332 | // fall through to maybe split the tail. 333 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 334 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 335 | struct large_object *head = best; 336 | allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT); 337 | head->size = first_page_size; 338 | head->next = large_objects; 339 | large_objects = head; 340 | 341 | maybe_repurpose_single_chunk_large_objects_head(); 342 | 343 | struct page *next_page = start_page + 1; 344 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 345 | best = (struct large_object *) ptr; 346 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; 347 | ASSERT(best_size >= size); 348 | start = get_large_object_payload(best); 349 | tail_size = (best_size - size) & ~CHUNK_MASK; 350 | } else { 351 | // A large object that spans more than one page will consume all of its 352 | // tail pages. Therefore if the split traverses a page boundary, round up 353 | // to page size. 354 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 355 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 356 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); 357 | size = first_page_size + tail_pages_size; 358 | tail_size = best_size - size; 359 | } 360 | best->size -= tail_size; 361 | 362 | unsigned tail_idx = get_chunk_index(end - tail_size); 363 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 364 | // We would be splitting in a page header; don't do that. 365 | tail_size -= CHUNK_SIZE; 366 | tail_idx++; 367 | } 368 | 369 | if (tail_size) { 370 | struct page *page = get_page(end - tail_size); 371 | char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT); 372 | struct large_object *tail = (struct large_object *) tail_ptr; 373 | tail->next = large_objects; 374 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; 375 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); 376 | large_objects = tail; 377 | 378 | maybe_repurpose_single_chunk_large_objects_head(); 379 | } 380 | } 381 | 382 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE); 383 | return best; 384 | } 385 | 386 | static struct freelist* 387 | obtain_small_objects(enum chunk_kind kind) { 388 | struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32]; 389 | void *chunk; 390 | if (*whole_chunk_freelist) { 391 | chunk = *whole_chunk_freelist; 392 | *whole_chunk_freelist = (*whole_chunk_freelist)->next; 393 | } else { 394 | chunk = allocate_large_object(0); 395 | if (!chunk) { 396 | return NULL; 397 | } 398 | } 399 | char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); 400 | char *end = ptr + CHUNK_SIZE; 401 | struct freelist *next = NULL; 402 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 403 | for (size_t i = size; i <= CHUNK_SIZE; i += size) { 404 | struct freelist *head = (struct freelist*) (end - i); 405 | head->next = next; 406 | next = head; 407 | } 408 | return next; 409 | } 410 | 411 | static inline size_t size_to_granules(size_t size) { 412 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 413 | } 414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) { 415 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); 416 | return &small_object_freelists[kind]; 417 | } 418 | 419 | static void* 420 | allocate_small(enum chunk_kind kind) { 421 | struct freelist **loc = get_small_object_freelist(kind); 422 | if (!*loc) { 423 | struct freelist *freelist = obtain_small_objects(kind); 424 | if (!freelist) { 425 | return NULL; 426 | } 427 | *loc = freelist; 428 | } 429 | struct freelist *ret = *loc; 430 | *loc = ret->next; 431 | return (void *) ret; 432 | } 433 | 434 | static void* 435 | allocate_large(size_t size) { 436 | struct large_object *obj = allocate_large_object(size); 437 | return obj ? get_large_object_payload(obj) : NULL; 438 | } 439 | 440 | void* 441 | malloc(size_t size) { 442 | size_t granules = size_to_granules(size); 443 | enum chunk_kind kind = granules_to_chunk_kind(granules); 444 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 445 | } 446 | 447 | void 448 | free(void *ptr) { 449 | if (!ptr) return; 450 | struct page *page = get_page(ptr); 451 | unsigned chunk = get_chunk_index(ptr); 452 | uint8_t kind = page->header.chunk_kinds[chunk]; 453 | if (kind == LARGE_OBJECT) { 454 | struct large_object *obj = get_large_object(ptr); 455 | obj->next = large_objects; 456 | large_objects = obj; 457 | allocate_chunk(page, chunk, FREE_LARGE_OBJECT); 458 | pending_large_object_compact = 1; 459 | } else { 460 | size_t granules = kind; 461 | struct freelist **loc = get_small_object_freelist(granules); 462 | struct freelist *obj = ptr; 463 | obj->next = *loc; 464 | *loc = obj; 465 | } 466 | } 467 | --------------------------------------------------------------------------------