├── .github └── workflows │ └── test.yml ├── Makefile ├── test.c ├── LICENSE.md ├── test.js ├── README.md └── walloc.c /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-20.04 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: test 17 | run: make test CC=clang LD="wasm-ld-10 --allow-undefined" JS=node 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: test 2 | 3 | CC?=clang 4 | LD?=wasm-ld 5 | JS?=node 6 | 7 | .PHONY: test 8 | 9 | test: test.js test.wasm 10 | $(JS) $< 11 | 12 | %.o: %.c 13 | $(CC) -DNDEBUG -Oz --target=wasm32 -nostdlib -c -o $@ $< 14 | 15 | test.wasm: test.o walloc.o 16 | $(LD) --no-entry --import-memory -o $@ $^ 17 | 18 | .PHONY: clean 19 | clean: 20 | rm -f *.o *.wasm 21 | -------------------------------------------------------------------------------- /test.c: -------------------------------------------------------------------------------- 1 | typedef __SIZE_TYPE__ size_t; 2 | 3 | #define WASM_EXPORT(name) \ 4 | __attribute__((export_name(#name))) \ 5 | name 6 | 7 | // Pull these in from walloc.c. 8 | void *malloc(size_t size); 9 | void free(void *p); 10 | 11 | void* WASM_EXPORT(walloc)(size_t size) { 12 | return malloc(size); 13 | } 14 | 15 | void WASM_EXPORT(wfree)(void* ptr) { 16 | free(ptr); 17 | } 18 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | walloc is available under an MIT-style license, the text of which 4 | follows. 5 | 6 | ``` 7 | Copyright (c) 2020 Igalia, S.L. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a 10 | copy of this software and associated documentation files (the 11 | "Software"), to deal in the Software without restriction, including 12 | without limitation the rights to use, copy, modify, merge, publish, 13 | distribute, sublicense, and/or sell copies of the Software, and to 14 | permit persons to whom the Software is furnished to do so, subject to 15 | the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included 18 | in all copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 21 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 24 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 25 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 | ``` 28 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | if (typeof read !== 'undefined') { 2 | function readBinaryFile(f) { return read(f, 'binary'); } 3 | } else if (typeof readFile !== 'undefined') { 4 | function readBinaryFile(f) { return readFile(f); } 5 | } else if (typeof require !== 'undefined') { 6 | let fs = require('fs'); 7 | function readBinaryFile(f) { return fs.readFileSync(f); } 8 | } else { 9 | throw "no way to read a binary file"; 10 | } 11 | 12 | function assert(c, msg) { if (!c) throw new Error(msg); } 13 | function power_of_two(x) { return x && (x & (x - 1)) == 0; } 14 | function assert_power_of_two(x) { 15 | assert(power_of_two(x), `not power of two: ${x}`); 16 | } 17 | function aligned(x, y) { 18 | assert_power_of_two(y); 19 | return (x & (y - 1)) == 0; 20 | } 21 | function assert_aligned(x, y) { 22 | assert(aligned(x, y), `bad alignment: ${x} % ${y}`); 23 | } 24 | function round_up(x, y) { 25 | assert_power_of_two(y); 26 | return (x + y - 1) & ~(y - 1); 27 | } 28 | 29 | let granule_size = 8; 30 | let bits_per_byte = 8; 31 | let bits_per_byte_log2 = 3; 32 | 33 | class HeapVerifier { 34 | constructor(maxbytes) { 35 | this.maxwords = maxbytes / granule_size; 36 | this.state = new Uint8Array(this.maxwords / bits_per_byte); 37 | this.allocations = new Map; 38 | } 39 | acquire(offset, len) { 40 | assert_aligned(offset, granule_size); 41 | for (let i = 0; i < len; i += granule_size) { 42 | let bit = (offset + i) / granule_size; 43 | let byte = bit >> bits_per_byte_log2; 44 | let mask = 1 << (bit & (bits_per_byte - 1)); 45 | assert((this.state[byte] & mask) == 0, "word in use"); 46 | this.state[byte] |= mask; 47 | } 48 | this.allocations.set(offset, len); 49 | } 50 | release(offset) { 51 | assert(this.allocations.has(offset)) 52 | let len = this.allocations.get(offset); 53 | this.allocations.delete(offset); 54 | for (let i = 0; i < len; i += granule_size) { 55 | let bit = (offset + i) / granule_size; 56 | let byte = bit >> bits_per_byte_log2; 57 | let mask = 1 << (bit & (bits_per_byte - 1)); 58 | this.state[byte] &= ~mask; 59 | } 60 | } 61 | } 62 | 63 | class LinearMemory { 64 | constructor({initial = 256, maximum = 256}) { 65 | this.memory = new WebAssembly.Memory({ initial, maximum }); 66 | this.verifier = new HeapVerifier(maximum * 65536); 67 | } 68 | record_malloc(ptr, len) { this.verifier.acquire(ptr, len); } 69 | record_free(ptr) { this.verifier.release(ptr); } 70 | read_string(offset) { 71 | let view = new Uint8Array(this.memory.buffer); 72 | let bytes = [] 73 | for (let byte = view[offset]; byte; byte = view[++offset]) 74 | bytes.push(byte); 75 | return String.fromCharCode(...bytes); 76 | } 77 | log(str) { console.log(`wasm log: ${str}`) } 78 | log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) } 79 | env() { 80 | return { 81 | memory: this.memory, 82 | wasm_log: (off) => this.log(this.read_string(off)), 83 | wasm_log_i: (off, i) => this.log_i(this.read_string(off), i) 84 | } 85 | } 86 | } 87 | 88 | function randu(x, max) { return Math.floor(x * max); } 89 | function sys_rand32() { return randu(Math.random(), 2**32); } 90 | function xoshiro128ss(a, b, c, d) { 91 | console.log(`Seeding RNG with [${a}, ${b}, ${c}, ${d}].`) 92 | return function() { 93 | var t = b << 9, r = a * 5; r = (r << 7 | r >>> 25) * 9; 94 | c ^= a; d ^= b; 95 | b ^= c; a ^= d; c ^= t; 96 | d = d << 11 | d >>> 21; 97 | return (r >>> 0) / 4294967296; 98 | } 99 | } 100 | let rand = xoshiro128ss(sys_rand32(), sys_rand32(), sys_rand32(), 101 | sys_rand32()); 102 | 103 | let bytes = readBinaryFile("test.wasm", "binary"); 104 | let mod = new WebAssembly.Module(bytes); 105 | let memory = new LinearMemory({ initial: 2, maximum: 256 }); 106 | let imports = { env: memory.env() } 107 | let instance = new WebAssembly.Instance(mod, imports); 108 | let {walloc, wfree} = instance.exports; 109 | 110 | for (let j = 0; j < 40; j++) { 111 | let allocs = []; 112 | console.log(`Allocating 2 MB, iteration ${j}.`) 113 | let count = 0; 114 | for (let allocated = 0; allocated < 2e6; count++) { 115 | let size = randu(rand(), 2000); 116 | let free_priority = rand(); 117 | let ptr = walloc(size); 118 | assert((ptr % 8) == 0, "unaligned result"); 119 | memory.record_malloc(ptr, size); 120 | allocs.push([free_priority, ptr]); 121 | allocated += size; 122 | } 123 | console.log(`Freeing ${count} allocations.`) 124 | allocs.sort(([p1,ptr1], [p2,ptr2]) => (p1 - p2)); 125 | for (let [p, ptr] of allocs) { 126 | memory.record_free(ptr); 127 | wfree(ptr) 128 | } 129 | } 130 | console.log(`Success.`) 131 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # walloc 2 | 3 | walloc is a bare-bones implementation of `malloc` for use by C 4 | programs when targetting WebAssembly. It is a single-file 5 | implementation with no dependencies: no stdlib, no JavaScript imports, 6 | no emscripten. 7 | 8 | Walloc was designed with the following priorities, in order: 9 | 1. Standalone. No stdlib needed; no emscripten. Can be included in a 10 | project without pulling in anything else. 11 | 2. Reasonable allocation speed and fragmentation/overhead. 12 | 3. Small size, to minimize download time. 13 | 4. Standard interface: a drop-in replacement for malloc. 14 | 5. Single-threaded (currently, anyway). 15 | 16 | Emscripten includes a couple of good malloc implementations 17 | ([dlmalloc](https://github.com/emscripten-core/emscripten/blob/master/system/lib/dlmalloc.c) 18 | and 19 | [emmalloc](https://github.com/emscripten-core/emscripten/blob/master/system/lib/emmalloc.cpp)); 20 | perhaps consider using one of those? But if you are really looking for 21 | a bare-bones malloc, walloc is fine. 22 | 23 | ## Test 24 | 25 | ``` 26 | $ make CC=$LLVM/clang LD=$LLVM/wasm-ld JS=node test 27 | clang -DNDEBUG -Oz --target=wasm32 -nostdlib -c -o test.o test.c 28 | clang -DNDEBUG -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c 29 | wasm-ld --no-entry --import-memory -o test.wasm test.o walloc.o 30 | node test.js 31 | Seeding RNG with [2959819678, 4094888344, 3121363251, 822200628]. 32 | Allocating 2 MB, iteration 0. 33 | Freeing 2031 allocations. 34 | Allocating 2 MB, iteration 1. 35 | Freeing 1956 allocations. 36 | Allocating 2 MB, iteration 2. 37 | Freeing 2000 allocations. 38 | Allocating 2 MB, iteration 3. 39 | Freeing 2037 allocations. 40 | ... 41 | Allocating 2 MB, iteration 38. 42 | Freeing 2029 allocations. 43 | Allocating 2 MB, iteration 39. 44 | Freeing 2023 allocations. 45 | Success. 46 | ``` 47 | 48 | You can link `walloc.c` into your program just by adding it to your link 49 | line, as above. 50 | 51 | ## Size 52 | 53 | The resulting wasm file is about 2 kB (uncompressed). 54 | 55 | Walloc isn't the smallest allocator out there. A simple bump-pointer 56 | allocator that never frees is the fastest thing you can have. There is 57 | also an alternate allocator for Rust, 58 | [wee_alloc](https://github.com/rustwasm/wee_alloc), which is said to be 59 | smaller than walloc, though it is less space-efficient for small 60 | objects. But still, walloc is pretty small. 61 | 62 | ## Design 63 | 64 | When a C program is compiled to WebAssembly, the resulting wasm module 65 | (usually) has associated linear memory. It can be linked in a way that 66 | the memory is created by the module when it's instantiated, or such that 67 | the module is given a memory by its host. The above example passed 68 | `--import-memory` to the linker, allowing the host to bound memory 69 | usage for the module instance. 70 | 71 | The linear memory has the usual data, stack, and heap segments. The 72 | data and stack are placed first. The heap starts at the `&__heap_base` 73 | symbol. (This symbol is computed and defined by the linker.) All bytes 74 | above `&__heap_base` can be used by the wasm program as it likes. So 75 | `&__heap_base` is the lower bound of memory managed by walloc. 76 | 77 | ``` 78 | memory growth -> 79 | +----------------+-----------+-------------+-------------+---- 80 | | data and stack | alignment | walloc page | walloc page | ... 81 | +----------------+-----------+-------------+-------------+---- 82 | ^ 0 ^ &__heap_base ^ 64 kB aligned 83 | ``` 84 | 85 | The upper bound of memory managed by walloc is the total size of the 86 | memory, which is aligned on 64-kilobyte boundaries. (WebAssembly 87 | ensures this alignment.) Walloc manages memory in 64-kb pages as well. 88 | It starts with whatever memory is initially given to the module, and 89 | will expand the memory if it runs out. The host can specify a maximum 90 | memory size, in pages; if no more pages are available, walloc's `malloc` 91 | will simply return `NULL`; handling out-of-memory is up to the caller. 92 | 93 | Walloc has two allocation strategies: small and large objects. 94 | 95 | ### Large objects 96 | 97 | A large object is more than 256 bytes. 98 | 99 | There is a global freelist of available large objects, each of which has 100 | a header indicating its size. When allocating, walloc does a best-fit 101 | search through that list. 102 | 103 | ```c 104 | struct large_object { 105 | struct large_object *next; 106 | size_t size; 107 | char payload[0]; 108 | }; 109 | struct large_object* large_object_free_list; 110 | ``` 111 | 112 | Large object allocations are rounded up to 256-byte boundaries, 113 | including the header. 114 | 115 | If there is no object on the freelist that can satisfy an allocation, 116 | walloc will expand the heap by the size of the allocation, or by half of 117 | the current walloc heap size, whichever is larger. The resulting page 118 | or pages form a large object that can satisfy the allocation. 119 | 120 | If the best object on the freelist has more than a chunk of space on the 121 | end, it is split, and the tail put back on the freelist. A chunk is 256 122 | bytes. 123 | 124 | ``` 125 | +-------------+---------+---------+-----+-----------+ 126 | | page header | chunk 1 | chunk 2 | ... | chunk 255 | 127 | +-------------+---------+---------+-----+-----------+ 128 | ^ +0 ^ +256 ^ +512 ^ +64 kB 129 | ``` 130 | 131 | As each page is 65536 bytes, and each chunk is 256 bytes, there are 132 | therefore 256 chunks in a page. The first chunk in a page that begins 133 | an allocated object, large or small, contains a header chunk. The page 134 | header has a byte for each of the 256 chunks in the page. The byte is 135 | 255 if the corresponding chunk starts a large object; otherwise the byte 136 | indicates the size class for packed small-object allocations (see 137 | below). 138 | 139 | ``` 140 | +-------------+---------+---------+----------+-----------+ 141 | | page header | large object 1 | large object 2 ... | 142 | +-------------+---------+---------+----------+-----------+ 143 | ^ +0 ^ +256 ^ +512 ^ +64 kB 144 | ``` 145 | 146 | When splitting large objects, we avoid starting a new large object on a 147 | page header chunk. A large object can only span where a page header 148 | chunk would be if it includes the entire page. 149 | 150 | Freeing a large object pushes it on the global freelist. We know a 151 | pointer is a large object by looking at the page header. We know the 152 | size of the allocation, because the large object header precedes the 153 | allocation. When the next large object allocation happens after a free, 154 | the freelist will be compacted by merging adjacent large objects. 155 | 156 | ### Small objects 157 | 158 | Small objects are allocated from segregated freelists. The granule size 159 | is 8 bytes. Small object allocations are packed in a chunk of uniform 160 | allocation size. There are size classes for allocations of each size 161 | from 1 to 6 granules, then 8, 10, 16, and 32 granules; 10 sizes in all. 162 | For example, an allocation of e.g. 12 granules will be satisfied from a 163 | 16-granule chunk. Each size class has its own free list. 164 | 165 | ```c 166 | struct small_object_freelist { 167 | struct small_object_freelist *next; 168 | }; 169 | struct small_object_freelist small_object_freelists[10]; 170 | ``` 171 | 172 | When allocating, if there is nothing on the corresponding freelist, 173 | walloc will allocate a new large object, then change its chunk kind in 174 | the page header to the size class. It then goes through the fresh 175 | chunk, threading the objects through each other onto a free list. 176 | 177 | ``` 178 | +-------------+---------+---------+------------+---------------------+ 179 | | page header | large object 1 | granules=4 | large object 2' ... | 180 | +-------------+---------+---------+------------+---------------------+ 181 | ^ +0 ^ +256 ^ +512 ^ +768 + +1024 ^ +64 kB 182 | ``` 183 | 184 | In this example, we imagine that the 4-granules freelist was empty, and 185 | that the large object freelist contained only large object 2, running 186 | all the way to the end of the page. We allocated a new 4-granules 187 | chunk, splitting the first chunk off the large object, and pushing the 188 | newly trimmed large object back onto the large object freelist, updating 189 | the page header appropriately. We then thread the 4-granules (32-byte) 190 | allocations in the fresh chunk together (the chunk has room for 8 of 191 | them), treating them as if they were instances of `struct freelist`, 192 | pushing them onto the global freelist for 4-granules allocations. 193 | 194 | ``` 195 | in fresh chunk, next link for object N points to object N+1 196 | /--------\ 197 | | | 198 | +------------------+-^--------v-----+----------+ 199 | granules=4: | (padding, maybe) | object 0 | ... | object 7 | 200 | +------------------+----------+-----+----------+ 201 | ^ 4-granule freelist now points here 202 | ``` 203 | 204 | The size classes were chosen so that any wasted space (padding) is less 205 | than the size class. 206 | 207 | Freeing a small object pushes it back on its size class's free list. 208 | Given a pointer, we know its size class by looking in the chunk kind in 209 | the page header. 210 | 211 | ## License 212 | 213 | `walloc` is available under a permissive MIT-style license. See 214 | [LICENSE.md](./LICENSE.md) for full details. 215 | -------------------------------------------------------------------------------- /walloc.c: -------------------------------------------------------------------------------- 1 | // walloc.c: a small malloc implementation for use in WebAssembly targets 2 | // Copyright (c) 2020 Igalia, S.L. 3 | // 4 | // Permission is hereby granted, free of charge, to any person obtaining a 5 | // copy of this software and associated documentation files (the 6 | // "Software"), to deal in the Software without restriction, including 7 | // without limitation the rights to use, copy, modify, merge, publish, 8 | // distribute, sublicense, and/or sell copies of the Software, and to 9 | // permit persons to whom the Software is furnished to do so, subject to 10 | // the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included 13 | // in all copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | typedef __SIZE_TYPE__ size_t; 24 | typedef __UINTPTR_TYPE__ uintptr_t; 25 | typedef __UINT8_TYPE__ uint8_t; 26 | 27 | #define NULL ((void *) 0) 28 | 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") 30 | 31 | #ifndef NDEBUG 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0) 33 | #else 34 | #define ASSERT(x) do { } while (0) 35 | #endif 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b)) 37 | 38 | static inline size_t max(size_t a, size_t b) { 39 | return a < b ? b : a; 40 | } 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) { 42 | return (val + alignment - 1) & ~(alignment - 1); 43 | } 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y)) 45 | 46 | #define CHUNK_SIZE 256 47 | #define CHUNK_SIZE_LOG_2 8 48 | #define CHUNK_MASK (CHUNK_SIZE - 1) 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2); 50 | 51 | #define PAGE_SIZE 65536 52 | #define PAGE_SIZE_LOG_2 16 53 | #define PAGE_MASK (PAGE_SIZE - 1) 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2); 55 | 56 | #define CHUNKS_PER_PAGE 256 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE); 58 | 59 | #define GRANULE_SIZE 8 60 | #define GRANULE_SIZE_LOG_2 3 61 | #define LARGE_OBJECT_THRESHOLD 256 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32 63 | 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2); 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, 66 | LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); 67 | 68 | struct chunk { 69 | char data[CHUNK_SIZE]; 70 | }; 71 | 72 | // There are small object pages for allocations of these sizes. 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \ 74 | M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32) 75 | 76 | enum chunk_kind { 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i, 78 | FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND) 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND 80 | 81 | SMALL_OBJECT_CHUNK_KINDS, 82 | FREE_LARGE_OBJECT = 254, 83 | LARGE_OBJECT = 255 84 | }; 85 | 86 | static const uint8_t small_object_granule_sizes[] = 87 | { 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i, 89 | FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE) 90 | #undef SMALL_OBJECT_GRANULE_SIZE 91 | }; 92 | 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) { 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i; 95 | FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE); 96 | #undef TEST_GRANULE_SIZE 97 | return LARGE_OBJECT; 98 | } 99 | 100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) { 101 | switch (kind) { 102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i; 103 | FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE); 104 | #undef CHUNK_KIND_GRANULE_SIZE 105 | default: 106 | return -1; 107 | } 108 | } 109 | 110 | // Given a pointer P returned by malloc(), we get a header pointer via 111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE. If 112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large 113 | // object, otherwise the kind indicates the size in granules of the objects in 114 | // the chunk. 115 | struct page_header { 116 | uint8_t chunk_kinds[CHUNKS_PER_PAGE]; 117 | }; 118 | 119 | struct page { 120 | union { 121 | struct page_header header; 122 | struct chunk chunks[CHUNKS_PER_PAGE]; 123 | }; 124 | }; 125 | 126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header)) 127 | #define FIRST_ALLOCATABLE_CHUNK 1 128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE); 129 | 130 | static struct page* get_page(void *ptr) { 131 | return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK); 132 | } 133 | static unsigned get_chunk_index(void *ptr) { 134 | return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE; 135 | } 136 | 137 | struct freelist { 138 | struct freelist *next; 139 | }; 140 | 141 | struct large_object { 142 | struct large_object *next; 143 | size_t size; 144 | }; 145 | 146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object)) 147 | 148 | static inline void* get_large_object_payload(struct large_object *obj) { 149 | return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE; 150 | } 151 | static inline struct large_object* get_large_object(void *ptr) { 152 | return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE); 153 | } 154 | 155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS]; 156 | static struct large_object *large_objects; 157 | 158 | extern void __heap_base; 159 | static size_t walloc_heap_size; 160 | 161 | static struct page* 162 | allocate_pages(size_t payload_size, size_t *n_allocated) { 163 | size_t needed = payload_size + PAGE_HEADER_SIZE; 164 | size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE; 165 | uintptr_t base = heap_size; 166 | uintptr_t preallocated = 0, grow = 0; 167 | 168 | if (!walloc_heap_size) { 169 | // We are allocating the initial pages, if any. We skip the first 64 kB, 170 | // then take any additional space up to the memory size. 171 | uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE); 172 | preallocated = heap_size - heap_base; // Preallocated pages. 173 | walloc_heap_size = preallocated; 174 | base -= preallocated; 175 | } 176 | 177 | if (preallocated < needed) { 178 | // Always grow the walloc heap at least by 50%. 179 | grow = align(max(walloc_heap_size / 2, needed - preallocated), 180 | PAGE_SIZE); 181 | ASSERT(grow); 182 | if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) { 183 | return NULL; 184 | } 185 | walloc_heap_size += grow; 186 | } 187 | 188 | struct page *ret = (struct page *)base; 189 | size_t size = grow + preallocated; 190 | ASSERT(size); 191 | ASSERT_ALIGNED(size, PAGE_SIZE); 192 | *n_allocated = size / PAGE_SIZE; 193 | return ret; 194 | } 195 | 196 | static char* 197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) { 198 | page->header.chunk_kinds[idx] = kind; 199 | return page->chunks[idx].data; 200 | } 201 | 202 | // It's possible for splitting to produce a large object of size 248 (256 minus 203 | // the header size) -- i.e. spanning a single chunk. In that case, push the 204 | // chunk back on the GRANULES_32 small object freelist. 205 | static void maybe_repurpose_single_chunk_large_objects_head(void) { 206 | if (large_objects->size < CHUNK_SIZE) { 207 | unsigned idx = get_chunk_index(large_objects); 208 | char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32); 209 | large_objects = large_objects->next; 210 | struct freelist* head = (struct freelist *)ptr; 211 | head->next = small_object_freelists[GRANULES_32]; 212 | small_object_freelists[GRANULES_32] = head; 213 | } 214 | } 215 | 216 | // If there have been any large-object frees since the last large object 217 | // allocation, go through the freelist and merge any adjacent objects. 218 | static int pending_large_object_compact = 0; 219 | static struct large_object** 220 | maybe_merge_free_large_object(struct large_object** prev) { 221 | struct large_object *obj = *prev; 222 | while (1) { 223 | char *end = get_large_object_payload(obj) + obj->size; 224 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 225 | unsigned chunk = get_chunk_index(end); 226 | if (chunk < FIRST_ALLOCATABLE_CHUNK) { 227 | // Merging can't create a large object that newly spans the header chunk. 228 | // This check also catches the end-of-heap case. 229 | return prev; 230 | } 231 | struct page *page = get_page(end); 232 | if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) { 233 | return prev; 234 | } 235 | struct large_object *next = (struct large_object*) end; 236 | 237 | struct large_object **prev_prev = &large_objects, *walk = large_objects; 238 | while (1) { 239 | ASSERT(walk); 240 | if (walk == next) { 241 | obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size; 242 | *prev_prev = walk->next; 243 | if (prev == &walk->next) { 244 | prev = prev_prev; 245 | } 246 | break; 247 | } 248 | prev_prev = &walk->next; 249 | walk = walk->next; 250 | } 251 | } 252 | } 253 | static void 254 | maybe_compact_free_large_objects(void) { 255 | if (pending_large_object_compact) { 256 | pending_large_object_compact = 0; 257 | struct large_object **prev = &large_objects; 258 | while (*prev) { 259 | prev = &(*maybe_merge_free_large_object(prev))->next; 260 | } 261 | } 262 | } 263 | 264 | // Allocate a large object with enough space for SIZE payload bytes. Returns a 265 | // large object with a header, aligned on a chunk boundary, whose payload size 266 | // may be larger than SIZE, and whose total size (header included) is 267 | // chunk-aligned. Either a suitable allocation is found in the large object 268 | // freelist, or we ask the OS for some more pages and treat those pages as a 269 | // large object. If the allocation fits in that large object and there's more 270 | // than an aligned chunk's worth of data free at the end, the large object is 271 | // split. 272 | // 273 | // The return value's corresponding chunk in the page as starting a large 274 | // object. 275 | static struct large_object* 276 | allocate_large_object(size_t size) { 277 | maybe_compact_free_large_objects(); 278 | struct large_object *best = NULL, **best_prev = &large_objects; 279 | size_t best_size = -1; 280 | for (struct large_object **prev = &large_objects, *walk = large_objects; 281 | walk; 282 | prev = &walk->next, walk = walk->next) { 283 | if (walk->size >= size && walk->size < best_size) { 284 | best_size = walk->size; 285 | best = walk; 286 | best_prev = prev; 287 | if (best_size + LARGE_OBJECT_HEADER_SIZE 288 | == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE)) 289 | // Not going to do any better than this; just return it. 290 | break; 291 | } 292 | } 293 | 294 | if (!best) { 295 | // The large object freelist doesn't have an object big enough for this 296 | // allocation. Allocate one or more pages from the OS, and treat that new 297 | // sequence of pages as a fresh large object. It will be split if 298 | // necessary. 299 | size_t size_with_header = size + sizeof(struct large_object); 300 | size_t n_allocated = 0; 301 | struct page *page = allocate_pages(size_with_header, &n_allocated); 302 | if (!page) { 303 | return NULL; 304 | } 305 | char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 306 | best = (struct large_object *)ptr; 307 | size_t page_header = ptr - ((char*) page); 308 | best->next = large_objects; 309 | best->size = best_size = 310 | n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE; 311 | ASSERT(best_size >= size_with_header); 312 | } 313 | 314 | allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT); 315 | 316 | struct large_object *next = best->next; 317 | *best_prev = next; 318 | 319 | size_t tail_size = (best_size - size) & ~CHUNK_MASK; 320 | if (tail_size) { 321 | // The best-fitting object has 1 or more aligned chunks free after the 322 | // requested allocation; split the tail off into a fresh aligned object. 323 | struct page *start_page = get_page(best); 324 | char *start = get_large_object_payload(best); 325 | char *end = start + best_size; 326 | 327 | if (start_page == get_page(end - tail_size - 1)) { 328 | // The allocation does not span a page boundary; yay. 329 | ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE); 330 | } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) { 331 | // If the allocation itself smaller than a page, split off the head, then 332 | // fall through to maybe split the tail. 333 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 334 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 335 | struct large_object *head = best; 336 | allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT); 337 | head->size = first_page_size; 338 | head->next = large_objects; 339 | large_objects = head; 340 | 341 | maybe_repurpose_single_chunk_large_objects_head(); 342 | 343 | struct page *next_page = start_page + 1; 344 | char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT); 345 | best = (struct large_object *) ptr; 346 | best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE; 347 | ASSERT(best_size >= size); 348 | start = get_large_object_payload(best); 349 | tail_size = (best_size - size) & ~CHUNK_MASK; 350 | } else { 351 | // A large object that spans more than one page will consume all of its 352 | // tail pages. Therefore if the split traverses a page boundary, round up 353 | // to page size. 354 | ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE); 355 | size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK); 356 | size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE); 357 | size = first_page_size + tail_pages_size; 358 | tail_size = best_size - size; 359 | } 360 | best->size -= tail_size; 361 | 362 | unsigned tail_idx = get_chunk_index(end - tail_size); 363 | while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) { 364 | // We would be splitting in a page header; don't do that. 365 | tail_size -= CHUNK_SIZE; 366 | tail_idx++; 367 | } 368 | 369 | if (tail_size) { 370 | struct page *page = get_page(end - tail_size); 371 | char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT); 372 | struct large_object *tail = (struct large_object *) tail_ptr; 373 | tail->next = large_objects; 374 | tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE; 375 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE); 376 | large_objects = tail; 377 | 378 | maybe_repurpose_single_chunk_large_objects_head(); 379 | } 380 | } 381 | 382 | ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE); 383 | return best; 384 | } 385 | 386 | static struct freelist* 387 | obtain_small_objects(enum chunk_kind kind) { 388 | struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32]; 389 | void *chunk; 390 | if (*whole_chunk_freelist) { 391 | chunk = *whole_chunk_freelist; 392 | *whole_chunk_freelist = (*whole_chunk_freelist)->next; 393 | } else { 394 | chunk = allocate_large_object(0); 395 | if (!chunk) { 396 | return NULL; 397 | } 398 | } 399 | char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind); 400 | char *end = ptr + CHUNK_SIZE; 401 | struct freelist *next = NULL; 402 | size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE; 403 | for (size_t i = size; i <= CHUNK_SIZE; i += size) { 404 | struct freelist *head = (struct freelist*) (end - i); 405 | head->next = next; 406 | next = head; 407 | } 408 | return next; 409 | } 410 | 411 | static inline size_t size_to_granules(size_t size) { 412 | return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2; 413 | } 414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) { 415 | ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS); 416 | return &small_object_freelists[kind]; 417 | } 418 | 419 | static void* 420 | allocate_small(enum chunk_kind kind) { 421 | struct freelist **loc = get_small_object_freelist(kind); 422 | if (!*loc) { 423 | struct freelist *freelist = obtain_small_objects(kind); 424 | if (!freelist) { 425 | return NULL; 426 | } 427 | *loc = freelist; 428 | } 429 | struct freelist *ret = *loc; 430 | *loc = ret->next; 431 | return (void *) ret; 432 | } 433 | 434 | static void* 435 | allocate_large(size_t size) { 436 | struct large_object *obj = allocate_large_object(size); 437 | return obj ? get_large_object_payload(obj) : NULL; 438 | } 439 | 440 | void* 441 | malloc(size_t size) { 442 | size_t granules = size_to_granules(size); 443 | enum chunk_kind kind = granules_to_chunk_kind(granules); 444 | return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind); 445 | } 446 | 447 | void 448 | free(void *ptr) { 449 | if (!ptr) return; 450 | struct page *page = get_page(ptr); 451 | unsigned chunk = get_chunk_index(ptr); 452 | uint8_t kind = page->header.chunk_kinds[chunk]; 453 | if (kind == LARGE_OBJECT) { 454 | struct large_object *obj = get_large_object(ptr); 455 | obj->next = large_objects; 456 | large_objects = obj; 457 | allocate_chunk(page, chunk, FREE_LARGE_OBJECT); 458 | pending_large_object_compact = 1; 459 | } else { 460 | size_t granules = kind; 461 | struct freelist **loc = get_small_object_freelist(granules); 462 | struct freelist *obj = ptr; 463 | obj->next = *loc; 464 | *loc = obj; 465 | } 466 | } 467 | --------------------------------------------------------------------------------