├── .github
    └── workflows
    │   └── test.yml
├── Makefile
├── test.c
├── LICENSE.md
├── test.js
├── README.md
└── walloc.c


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-20.04
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: test
17 |       run: make test CC=clang LD="wasm-ld-10 --allow-undefined" JS=node
18 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: test
 2 | 
 3 | CC?=clang
 4 | LD?=wasm-ld
 5 | JS?=node
 6 | 
 7 | .PHONY: test
 8 | 
 9 | test: test.js test.wasm
10 | 	$(JS) $<
11 | 
12 | %.o: %.c
13 | 	$(CC) -DNDEBUG -Oz --target=wasm32 -nostdlib -c -o $@ $<
14 | 
15 | test.wasm: test.o walloc.o
16 | 	$(LD) --no-entry --import-memory -o $@ $^
17 | 
18 | .PHONY: clean
19 | clean:
20 | 	rm -f *.o *.wasm
21 | 


--------------------------------------------------------------------------------
/test.c:
--------------------------------------------------------------------------------
 1 | typedef __SIZE_TYPE__ size_t;
 2 | 
 3 | #define WASM_EXPORT(name) \
 4 |   __attribute__((export_name(#name))) \
 5 |   name
 6 | 
 7 | // Pull these in from walloc.c.
 8 | void *malloc(size_t size);
 9 | void free(void *p);
10 |                           
11 | void* WASM_EXPORT(walloc)(size_t size) {
12 |   return malloc(size);
13 | }
14 | 
15 | void WASM_EXPORT(wfree)(void* ptr) {
16 |   free(ptr);
17 | }
18 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # License
 2 | 
 3 | walloc is available under an MIT-style license, the text of which
 4 | follows.
 5 | 
 6 | ```
 7 | Copyright (c) 2020 Igalia, S.L.
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a
10 | copy of this software and associated documentation files (the
11 | "Software"), to deal in the Software without restriction, including
12 | without limitation the rights to use, copy, modify, merge, publish,
13 | distribute, sublicense, and/or sell copies of the Software, and to
14 | permit persons to whom the Software is furnished to do so, subject to
15 | the following conditions:
16 | 
17 | The above copyright notice and this permission notice shall be included
18 | in all copies or substantial portions of the Software.
19 | 
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 | ```
28 | 


--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
  1 | if (typeof read !== 'undefined') {
  2 |     function readBinaryFile(f) { return read(f, 'binary'); }
  3 | } else if (typeof readFile !== 'undefined') {
  4 |     function readBinaryFile(f) { return readFile(f); }
  5 | } else if (typeof require !== 'undefined') {
  6 |     let fs = require('fs');
  7 |     function readBinaryFile(f) { return fs.readFileSync(f); }
  8 | } else {
  9 |     throw "no way to read a binary file";
 10 | }
 11 | 
 12 | function assert(c, msg) { if (!c) throw new Error(msg); }
 13 | function power_of_two(x) { return x && (x & (x - 1)) == 0; }
 14 | function assert_power_of_two(x) {
 15 |     assert(power_of_two(x), `not power of two: ${x}`);
 16 | }
 17 | function aligned(x, y) {
 18 |     assert_power_of_two(y);
 19 |     return (x & (y - 1)) == 0;
 20 | }
 21 | function assert_aligned(x, y) {
 22 |     assert(aligned(x, y), `bad alignment: ${x} % ${y}`);
 23 | }
 24 | function round_up(x, y) {
 25 |     assert_power_of_two(y);
 26 |     return (x + y - 1) & ~(y - 1);
 27 | }
 28 | 
 29 | let granule_size = 8;
 30 | let bits_per_byte = 8;
 31 | let bits_per_byte_log2 = 3;
 32 | 
 33 | class HeapVerifier {
 34 |     constructor(maxbytes) {
 35 |         this.maxwords = maxbytes / granule_size;
 36 |         this.state = new Uint8Array(this.maxwords / bits_per_byte);
 37 |         this.allocations = new Map;
 38 |     }
 39 |     acquire(offset, len) {
 40 |         assert_aligned(offset, granule_size);
 41 |         for (let i = 0; i < len; i += granule_size) {
 42 |             let bit = (offset + i) / granule_size;
 43 |             let byte = bit >> bits_per_byte_log2;
 44 |             let mask = 1 << (bit & (bits_per_byte - 1));
 45 |             assert((this.state[byte] & mask) == 0, "word in use");
 46 |             this.state[byte] |= mask;
 47 |         }
 48 |         this.allocations.set(offset, len);
 49 |     }
 50 |     release(offset) {
 51 |         assert(this.allocations.has(offset))
 52 |         let len = this.allocations.get(offset);
 53 |         this.allocations.delete(offset);
 54 |         for (let i = 0; i < len; i += granule_size) {
 55 |             let bit = (offset + i) / granule_size;
 56 |             let byte = bit >> bits_per_byte_log2;
 57 |             let mask = 1 << (bit & (bits_per_byte - 1));
 58 |             this.state[byte] &= ~mask;
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | class LinearMemory {
 64 |     constructor({initial = 256, maximum = 256}) {
 65 |         this.memory = new WebAssembly.Memory({ initial, maximum });
 66 |         this.verifier = new HeapVerifier(maximum * 65536);
 67 |     }
 68 |     record_malloc(ptr, len) { this.verifier.acquire(ptr, len); }
 69 |     record_free(ptr) { this.verifier.release(ptr); }
 70 |     read_string(offset) {
 71 |         let view = new Uint8Array(this.memory.buffer);
 72 |         let bytes = []
 73 |         for (let byte = view[offset]; byte; byte = view[++offset])
 74 |             bytes.push(byte);
 75 |         return String.fromCharCode(...bytes);
 76 |     }
 77 |     log(str)      { console.log(`wasm log: ${str}`) }
 78 |     log_i(str, i) { console.log(`wasm log: ${str}: ${i}`) }
 79 |     env() {
 80 |         return {
 81 |             memory: this.memory,
 82 |             wasm_log: (off) => this.log(this.read_string(off)),
 83 |             wasm_log_i: (off, i) => this.log_i(this.read_string(off), i)
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | function randu(x, max) { return Math.floor(x * max); }
 89 | function sys_rand32() { return randu(Math.random(), 2**32); }
 90 | function xoshiro128ss(a, b, c, d) {
 91 |     console.log(`Seeding RNG with [${a}, ${b}, ${c}, ${d}].`)
 92 |     return function() {
 93 |         var t = b << 9, r = a * 5; r = (r << 7 | r >>> 25) * 9;
 94 |         c ^= a; d ^= b;
 95 |         b ^= c; a ^= d; c ^= t;
 96 |         d = d << 11 | d >>> 21;
 97 |         return (r >>> 0) / 4294967296;
 98 |     }
 99 | }
100 | let rand = xoshiro128ss(sys_rand32(), sys_rand32(), sys_rand32(),
101 |                         sys_rand32());
102 | 
103 | let bytes = readBinaryFile("test.wasm", "binary");
104 | let mod = new WebAssembly.Module(bytes);
105 | let memory = new LinearMemory({ initial: 2, maximum: 256 });
106 | let imports = { env: memory.env() }
107 | let instance = new WebAssembly.Instance(mod, imports);
108 | let {walloc, wfree} = instance.exports;
109 | 
110 | for (let j = 0; j < 40; j++) {
111 |     let allocs = [];
112 |     console.log(`Allocating 2 MB, iteration ${j}.`)
113 |     let count = 0;
114 |     for (let allocated = 0; allocated < 2e6; count++) {
115 |         let size = randu(rand(), 2000);
116 |         let free_priority = rand();
117 |         let ptr = walloc(size);
118 |         assert((ptr % 8) == 0, "unaligned result");
119 |         memory.record_malloc(ptr, size);
120 |         allocs.push([free_priority, ptr]);
121 |         allocated += size;
122 |     }
123 |     console.log(`Freeing ${count} allocations.`)
124 |     allocs.sort(([p1,ptr1], [p2,ptr2]) => (p1 - p2));
125 |     for (let [p, ptr] of allocs) {
126 |         memory.record_free(ptr);
127 |         wfree(ptr)
128 |     }
129 | }
130 | console.log(`Success.`)
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # walloc
  2 | 
  3 | walloc is a bare-bones implementation of `malloc` for use by C
  4 | programs when targetting WebAssembly.  It is a single-file
  5 | implementation with no dependencies: no stdlib, no JavaScript imports,
  6 | no emscripten.
  7 | 
  8 | Walloc was designed with the following priorities, in order:
  9 |  1. Standalone.  No stdlib needed; no emscripten.  Can be included in a
 10 |     project without pulling in anything else.
 11 |  2. Reasonable allocation speed and fragmentation/overhead.
 12 |  3. Small size, to minimize download time.
 13 |  4. Standard interface: a drop-in replacement for malloc.
 14 |  5. Single-threaded (currently, anyway).
 15 | 
 16 | Emscripten includes a couple of good malloc implementations
 17 | ([dlmalloc](https://github.com/emscripten-core/emscripten/blob/master/system/lib/dlmalloc.c)
 18 | and
 19 | [emmalloc](https://github.com/emscripten-core/emscripten/blob/master/system/lib/emmalloc.cpp));
 20 | perhaps consider using one of those?  But if you are really looking for
 21 | a bare-bones malloc, walloc is fine.
 22 | 
 23 | ## Test
 24 | 
 25 | ```
 26 | $ make CC=$LLVM/clang LD=$LLVM/wasm-ld JS=node test
 27 | clang -DNDEBUG -Oz --target=wasm32 -nostdlib -c -o test.o test.c
 28 | clang -DNDEBUG -Oz --target=wasm32 -nostdlib -c -o walloc.o walloc.c
 29 | wasm-ld --no-entry --import-memory -o test.wasm test.o walloc.o
 30 | node test.js
 31 | Seeding RNG with [2959819678, 4094888344, 3121363251, 822200628].
 32 | Allocating 2 MB, iteration 0.
 33 | Freeing 2031 allocations.
 34 | Allocating 2 MB, iteration 1.
 35 | Freeing 1956 allocations.
 36 | Allocating 2 MB, iteration 2.
 37 | Freeing 2000 allocations.
 38 | Allocating 2 MB, iteration 3.
 39 | Freeing 2037 allocations.
 40 | ...
 41 | Allocating 2 MB, iteration 38.
 42 | Freeing 2029 allocations.
 43 | Allocating 2 MB, iteration 39.
 44 | Freeing 2023 allocations.
 45 | Success.
 46 | ```
 47 | 
 48 | You can link `walloc.c` into your program just by adding it to your link
 49 | line, as above.
 50 | 
 51 | ## Size
 52 | 
 53 | The resulting wasm file is about 2 kB (uncompressed).
 54 | 
 55 | Walloc isn't the smallest allocator out there.  A simple bump-pointer
 56 | allocator that never frees is the fastest thing you can have.  There is
 57 | also an alternate allocator for Rust,
 58 | [wee_alloc](https://github.com/rustwasm/wee_alloc), which is said to be
 59 | smaller than walloc, though it is less space-efficient for small
 60 | objects.  But still, walloc is pretty small.
 61 | 
 62 | ## Design
 63 | 
 64 | When a C program is compiled to WebAssembly, the resulting wasm module
 65 | (usually) has associated linear memory.  It can be linked in a way that
 66 | the memory is created by the module when it's instantiated, or such that
 67 | the module is given a memory by its host.  The above example passed
 68 | `--import-memory` to the linker, allowing the host to bound memory
 69 | usage for the module instance.
 70 | 
 71 | The linear memory has the usual data, stack, and heap segments.  The
 72 | data and stack are placed first.  The heap starts at the `&__heap_base`
 73 | symbol.  (This symbol is computed and defined by the linker.)  All bytes
 74 | above `&__heap_base` can be used by the wasm program as it likes.  So
 75 | `&__heap_base` is the lower bound of memory managed by walloc.
 76 | 
 77 | ```
 78 |                                               memory growth ->
 79 | +----------------+-----------+-------------+-------------+----
 80 | | data and stack | alignment | walloc page | walloc page | ...
 81 | +----------------+-----------+-------------+-------------+----
 82 | ^ 0              ^ &__heap_base            ^ 64 kB aligned
 83 | ```
 84 | 
 85 | The upper bound of memory managed by walloc is the total size of the
 86 | memory, which is aligned on 64-kilobyte boundaries.  (WebAssembly
 87 | ensures this alignment.)  Walloc manages memory in 64-kb pages as well.
 88 | It starts with whatever memory is initially given to the module, and
 89 | will expand the memory if it runs out.  The host can specify a maximum
 90 | memory size, in pages; if no more pages are available, walloc's `malloc`
 91 | will simply return `NULL`; handling out-of-memory is up to the caller.
 92 | 
 93 | Walloc has two allocation strategies: small and large objects.
 94 | 
 95 | ### Large objects
 96 | 
 97 | A large object is more than 256 bytes.
 98 | 
 99 | There is a global freelist of available large objects, each of which has
100 | a header indicating its size.  When allocating, walloc does a best-fit
101 | search through that list.  
102 | 
103 | ```c
104 | struct large_object {
105 |   struct large_object *next;
106 |   size_t size;
107 |   char payload[0];
108 | };
109 | struct large_object* large_object_free_list;
110 | ```
111 | 
112 | Large object allocations are rounded up to 256-byte boundaries,
113 | including the header.
114 | 
115 | If there is no object on the freelist that can satisfy an allocation,
116 | walloc will expand the heap by the size of the allocation, or by half of
117 | the current walloc heap size, whichever is larger.  The resulting page
118 | or pages form a large object that can satisfy the allocation.
119 | 
120 | If the best object on the freelist has more than a chunk of space on the
121 | end, it is split, and the tail put back on the freelist.  A chunk is 256
122 | bytes.
123 | 
124 | ```
125 | +-------------+---------+---------+-----+-----------+
126 | | page header | chunk 1 | chunk 2 | ... | chunk 255 |
127 | +-------------+---------+---------+-----+-----------+
128 | ^ +0          ^ +256    ^ +512                      ^ +64 kB
129 | ```
130 | 
131 | As each page is 65536 bytes, and each chunk is 256 bytes, there are
132 | therefore 256 chunks in a page.  The first chunk in a page that begins
133 | an allocated object, large or small, contains a header chunk.  The page
134 | header has a byte for each of the 256 chunks in the page.  The byte is
135 | 255 if the corresponding chunk starts a large object; otherwise the byte
136 | indicates the size class for packed small-object allocations (see
137 | below).
138 | 
139 | ```
140 | +-------------+---------+---------+----------+-----------+
141 | | page header | large object 1    | large object 2 ...   |
142 | +-------------+---------+---------+----------+-----------+
143 | ^ +0          ^ +256    ^ +512                           ^ +64 kB
144 | ```
145 | 
146 | When splitting large objects, we avoid starting a new large object on a
147 | page header chunk.  A large object can only span where a page header
148 | chunk would be if it includes the entire page.
149 | 
150 | Freeing a large object pushes it on the global freelist.  We know a
151 | pointer is a large object by looking at the page header.  We know the
152 | size of the allocation, because the large object header precedes the
153 | allocation.  When the next large object allocation happens after a free,
154 | the freelist will be compacted by merging adjacent large objects.
155 | 
156 | ### Small objects
157 | 
158 | Small objects are allocated from segregated freelists.  The granule size
159 | is 8 bytes.  Small object allocations are packed in a chunk of uniform
160 | allocation size.  There are size classes for allocations of each size
161 | from 1 to 6 granules, then 8, 10, 16, and 32 granules; 10 sizes in all.
162 | For example, an allocation of e.g. 12 granules will be satisfied from a
163 | 16-granule chunk.  Each size class has its own free list.
164 | 
165 | ```c
166 | struct small_object_freelist {
167 |   struct small_object_freelist *next;
168 | };
169 | struct small_object_freelist small_object_freelists[10];
170 | ```
171 | 
172 | When allocating, if there is nothing on the corresponding freelist,
173 | walloc will allocate a new large object, then change its chunk kind in
174 | the page header to the size class.  It then goes through the fresh
175 | chunk, threading the objects through each other onto a free list.
176 | 
177 | ```
178 | +-------------+---------+---------+------------+---------------------+
179 | | page header | large object 1    | granules=4 | large object 2' ... |
180 | +-------------+---------+---------+------------+---------------------+
181 | ^ +0          ^ +256    ^ +512    ^ +768       + +1024               ^ +64 kB
182 | ```
183 | 
184 | In this example, we imagine that the 4-granules freelist was empty, and
185 | that the large object freelist contained only large object 2, running
186 | all the way to the end of the page.  We allocated a new 4-granules
187 | chunk, splitting the first chunk off the large object, and pushing the
188 | newly trimmed large object back onto the large object freelist, updating
189 | the page header appropriately.  We then thread the 4-granules (32-byte)
190 | allocations in the fresh chunk together (the chunk has room for 8 of
191 | them), treating them as if they were instances of `struct freelist`,
192 | pushing them onto the global freelist for 4-granules allocations.
193 | 
194 | ```
195 |            in fresh chunk, next link for object N points to object N+1
196 |                                  /--------\                     
197 |                                  |        |
198 |             +------------------+-^--------v-----+----------+
199 | granules=4: | (padding, maybe) | object 0 | ... | object 7 |
200 |             +------------------+----------+-----+----------+
201 |                                ^ 4-granule freelist now points here 
202 | ```
203 | 
204 | The size classes were chosen so that any wasted space (padding) is less
205 | than the size class.
206 | 
207 | Freeing a small object pushes it back on its size class's free list.
208 | Given a pointer, we know its size class by looking in the chunk kind in
209 | the page header.
210 | 
211 | ## License
212 | 
213 | `walloc` is available under a permissive MIT-style license.  See
214 | [LICENSE.md](./LICENSE.md) for full details.
215 | 


--------------------------------------------------------------------------------
/walloc.c:
--------------------------------------------------------------------------------
  1 | // walloc.c: a small malloc implementation for use in WebAssembly targets
  2 | // Copyright (c) 2020 Igalia, S.L.
  3 | // 
  4 | // Permission is hereby granted, free of charge, to any person obtaining a
  5 | // copy of this software and associated documentation files (the
  6 | // "Software"), to deal in the Software without restriction, including
  7 | // without limitation the rights to use, copy, modify, merge, publish,
  8 | // distribute, sublicense, and/or sell copies of the Software, and to
  9 | // permit persons to whom the Software is furnished to do so, subject to
 10 | // the following conditions:
 11 | // 
 12 | // The above copyright notice and this permission notice shall be included
 13 | // in all copies or substantial portions of the Software.
 14 | // 
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 16 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17 | // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 | // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 19 | // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 20 | // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 21 | // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | 
 23 | typedef __SIZE_TYPE__ size_t;
 24 | typedef __UINTPTR_TYPE__ uintptr_t;
 25 | typedef __UINT8_TYPE__ uint8_t;
 26 | 
 27 | #define NULL ((void *) 0)
 28 | 
 29 | #define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
 30 | 
 31 | #ifndef NDEBUG
 32 | #define ASSERT(x) do { if (!(x)) __builtin_trap(); } while (0)
 33 | #else
 34 | #define ASSERT(x) do { } while (0)
 35 | #endif
 36 | #define ASSERT_EQ(a,b) ASSERT((a) == (b))
 37 | 
 38 | static inline size_t max(size_t a, size_t b) {
 39 |   return a < b ? b : a;
 40 | }
 41 | static inline uintptr_t align(uintptr_t val, uintptr_t alignment) {
 42 |   return (val + alignment - 1) & ~(alignment - 1);
 43 | }
 44 | #define ASSERT_ALIGNED(x, y) ASSERT((x) == align((x), y))
 45 | 
 46 | #define CHUNK_SIZE 256
 47 | #define CHUNK_SIZE_LOG_2 8
 48 | #define CHUNK_MASK (CHUNK_SIZE - 1)
 49 | STATIC_ASSERT_EQ(CHUNK_SIZE, 1 << CHUNK_SIZE_LOG_2);
 50 | 
 51 | #define PAGE_SIZE 65536
 52 | #define PAGE_SIZE_LOG_2 16
 53 | #define PAGE_MASK (PAGE_SIZE - 1)
 54 | STATIC_ASSERT_EQ(PAGE_SIZE, 1 << PAGE_SIZE_LOG_2);
 55 | 
 56 | #define CHUNKS_PER_PAGE 256
 57 | STATIC_ASSERT_EQ(PAGE_SIZE, CHUNK_SIZE * CHUNKS_PER_PAGE);
 58 | 
 59 | #define GRANULE_SIZE 8
 60 | #define GRANULE_SIZE_LOG_2 3
 61 | #define LARGE_OBJECT_THRESHOLD 256
 62 | #define LARGE_OBJECT_GRANULE_THRESHOLD 32
 63 | 
 64 | STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
 65 | STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
 66 |                  LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
 67 | 
 68 | struct chunk {
 69 |   char data[CHUNK_SIZE];
 70 | };
 71 | 
 72 | // There are small object pages for allocations of these sizes.
 73 | #define FOR_EACH_SMALL_OBJECT_GRANULES(M) \
 74 |   M(1) M(2) M(3) M(4) M(5) M(6) M(8) M(10) M(16) M(32)
 75 | 
 76 | enum chunk_kind {
 77 | #define DEFINE_SMALL_OBJECT_CHUNK_KIND(i) GRANULES_##i,
 78 |   FOR_EACH_SMALL_OBJECT_GRANULES(DEFINE_SMALL_OBJECT_CHUNK_KIND)
 79 | #undef DEFINE_SMALL_OBJECT_CHUNK_KIND
 80 | 
 81 |   SMALL_OBJECT_CHUNK_KINDS,
 82 |   FREE_LARGE_OBJECT = 254,
 83 |   LARGE_OBJECT = 255
 84 | };
 85 | 
 86 | static const uint8_t small_object_granule_sizes[] = 
 87 | {
 88 | #define SMALL_OBJECT_GRANULE_SIZE(i) i,
 89 |   FOR_EACH_SMALL_OBJECT_GRANULES(SMALL_OBJECT_GRANULE_SIZE)
 90 | #undef SMALL_OBJECT_GRANULE_SIZE
 91 | };
 92 | 
 93 | static enum chunk_kind granules_to_chunk_kind(unsigned granules) {
 94 | #define TEST_GRANULE_SIZE(i) if (granules <= i) return GRANULES_##i;
 95 |   FOR_EACH_SMALL_OBJECT_GRANULES(TEST_GRANULE_SIZE);
 96 | #undef TEST_GRANULE_SIZE
 97 |   return LARGE_OBJECT;
 98 | }
 99 |   
100 | static unsigned chunk_kind_to_granules(enum chunk_kind kind) {
101 |   switch (kind) {
102 | #define CHUNK_KIND_GRANULE_SIZE(i) case GRANULES_##i: return i;
103 |   FOR_EACH_SMALL_OBJECT_GRANULES(CHUNK_KIND_GRANULE_SIZE);
104 | #undef CHUNK_KIND_GRANULE_SIZE
105 |     default:
106 |       return -1;
107 |   }
108 | }
109 | 
110 | // Given a pointer P returned by malloc(), we get a header pointer via
111 | // P&~PAGE_MASK, and a chunk index via (P&PAGE_MASK)/CHUNKS_PER_PAGE.  If
112 | // chunk_kinds[chunk_idx] is [FREE_]LARGE_OBJECT, then the pointer is a large
113 | // object, otherwise the kind indicates the size in granules of the objects in
114 | // the chunk.
115 | struct page_header {
116 |   uint8_t chunk_kinds[CHUNKS_PER_PAGE];
117 | };
118 | 
119 | struct page {
120 |   union {
121 |     struct page_header header;
122 |     struct chunk chunks[CHUNKS_PER_PAGE];
123 |   };
124 | };
125 | 
126 | #define PAGE_HEADER_SIZE (sizeof (struct page_header))
127 | #define FIRST_ALLOCATABLE_CHUNK 1
128 | STATIC_ASSERT_EQ(PAGE_HEADER_SIZE, FIRST_ALLOCATABLE_CHUNK * CHUNK_SIZE);
129 | 
130 | static struct page* get_page(void *ptr) {
131 |   return (struct page*) (char*) (((uintptr_t) ptr) & ~PAGE_MASK);
132 | }
133 | static unsigned get_chunk_index(void *ptr) {
134 |   return (((uintptr_t) ptr) & PAGE_MASK) / CHUNK_SIZE;
135 | }
136 | 
137 | struct freelist {
138 |   struct freelist *next;
139 | };
140 | 
141 | struct large_object {
142 |   struct large_object *next;
143 |   size_t size;
144 | };
145 | 
146 | #define LARGE_OBJECT_HEADER_SIZE (sizeof (struct large_object))
147 | 
148 | static inline void* get_large_object_payload(struct large_object *obj) {
149 |   return ((char*) obj) + LARGE_OBJECT_HEADER_SIZE;
150 | }
151 | static inline struct large_object* get_large_object(void *ptr) {
152 |   return (struct large_object*) (((char*) ptr) - LARGE_OBJECT_HEADER_SIZE);
153 | }
154 | 
155 | static struct freelist *small_object_freelists[SMALL_OBJECT_CHUNK_KINDS];
156 | static struct large_object *large_objects;
157 | 
158 | extern void __heap_base;
159 | static size_t walloc_heap_size;
160 | 
161 | static struct page*
162 | allocate_pages(size_t payload_size, size_t *n_allocated) {
163 |   size_t needed = payload_size + PAGE_HEADER_SIZE;
164 |   size_t heap_size = __builtin_wasm_memory_size(0) * PAGE_SIZE;
165 |   uintptr_t base = heap_size;
166 |   uintptr_t preallocated = 0, grow = 0;
167 | 
168 |   if (!walloc_heap_size) {
169 |     // We are allocating the initial pages, if any.  We skip the first 64 kB,
170 |     // then take any additional space up to the memory size.
171 |     uintptr_t heap_base = align((uintptr_t)&__heap_base, PAGE_SIZE);
172 |     preallocated = heap_size - heap_base; // Preallocated pages.
173 |     walloc_heap_size = preallocated;
174 |     base -= preallocated;
175 |   }
176 | 
177 |   if (preallocated < needed) {
178 |     // Always grow the walloc heap at least by 50%.
179 |     grow = align(max(walloc_heap_size / 2, needed - preallocated),
180 |                  PAGE_SIZE);
181 |     ASSERT(grow);
182 |     if (__builtin_wasm_memory_grow(0, grow >> PAGE_SIZE_LOG_2) == -1) {
183 |       return NULL;
184 |     }
185 |     walloc_heap_size += grow;
186 |   }
187 |   
188 |   struct page *ret = (struct page *)base;
189 |   size_t size = grow + preallocated;
190 |   ASSERT(size);
191 |   ASSERT_ALIGNED(size, PAGE_SIZE);
192 |   *n_allocated = size / PAGE_SIZE;
193 |   return ret;
194 | }
195 | 
196 | static char*
197 | allocate_chunk(struct page *page, unsigned idx, enum chunk_kind kind) {
198 |   page->header.chunk_kinds[idx] = kind;
199 |   return page->chunks[idx].data;
200 | }
201 | 
202 | // It's possible for splitting to produce a large object of size 248 (256 minus
203 | // the header size) -- i.e. spanning a single chunk.  In that case, push the
204 | // chunk back on the GRANULES_32 small object freelist.
205 | static void maybe_repurpose_single_chunk_large_objects_head(void) {
206 |   if (large_objects->size < CHUNK_SIZE) {
207 |     unsigned idx = get_chunk_index(large_objects);
208 |     char *ptr = allocate_chunk(get_page(large_objects), idx, GRANULES_32);
209 |     large_objects = large_objects->next;
210 |     struct freelist* head = (struct freelist *)ptr;
211 |     head->next = small_object_freelists[GRANULES_32];
212 |     small_object_freelists[GRANULES_32] = head;
213 |   }
214 | }
215 | 
216 | // If there have been any large-object frees since the last large object
217 | // allocation, go through the freelist and merge any adjacent objects.
218 | static int pending_large_object_compact = 0;
219 | static struct large_object**
220 | maybe_merge_free_large_object(struct large_object** prev) {
221 |   struct large_object *obj = *prev;
222 |   while (1) {
223 |     char *end = get_large_object_payload(obj) + obj->size;
224 |     ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
225 |     unsigned chunk = get_chunk_index(end);
226 |     if (chunk < FIRST_ALLOCATABLE_CHUNK) {
227 |       // Merging can't create a large object that newly spans the header chunk.
228 |       // This check also catches the end-of-heap case.
229 |       return prev;
230 |     }
231 |     struct page *page = get_page(end);
232 |     if (page->header.chunk_kinds[chunk] != FREE_LARGE_OBJECT) {
233 |       return prev;
234 |     }
235 |     struct large_object *next = (struct large_object*) end;
236 | 
237 |     struct large_object **prev_prev = &large_objects, *walk = large_objects;
238 |     while (1) {
239 |       ASSERT(walk);
240 |       if (walk == next) {
241 |         obj->size += LARGE_OBJECT_HEADER_SIZE + walk->size;
242 |         *prev_prev = walk->next;
243 |         if (prev == &walk->next) {
244 |           prev = prev_prev;
245 |         }
246 |         break;
247 |       }
248 |       prev_prev = &walk->next;
249 |       walk = walk->next;
250 |     }
251 |   }
252 | }
253 | static void
254 | maybe_compact_free_large_objects(void) {
255 |   if (pending_large_object_compact) {
256 |     pending_large_object_compact = 0;
257 |     struct large_object **prev = &large_objects;
258 |     while (*prev) {
259 |       prev = &(*maybe_merge_free_large_object(prev))->next;
260 |     }
261 |   }
262 | }
263 | 
264 | // Allocate a large object with enough space for SIZE payload bytes.  Returns a
265 | // large object with a header, aligned on a chunk boundary, whose payload size
266 | // may be larger than SIZE, and whose total size (header included) is
267 | // chunk-aligned.  Either a suitable allocation is found in the large object
268 | // freelist, or we ask the OS for some more pages and treat those pages as a
269 | // large object.  If the allocation fits in that large object and there's more
270 | // than an aligned chunk's worth of data free at the end, the large object is
271 | // split.
272 | //
273 | // The return value's corresponding chunk in the page as starting a large
274 | // object.
275 | static struct large_object*
276 | allocate_large_object(size_t size) {
277 |   maybe_compact_free_large_objects();
278 |   struct large_object *best = NULL, **best_prev = &large_objects;
279 |   size_t best_size = -1;
280 |   for (struct large_object **prev = &large_objects, *walk = large_objects;
281 |        walk;
282 |        prev = &walk->next, walk = walk->next) {
283 |     if (walk->size >= size && walk->size < best_size) {
284 |       best_size = walk->size;
285 |       best = walk;
286 |       best_prev = prev;
287 |       if (best_size + LARGE_OBJECT_HEADER_SIZE
288 |           == align(size + LARGE_OBJECT_HEADER_SIZE, CHUNK_SIZE))
289 |         // Not going to do any better than this; just return it.
290 |         break;
291 |     }
292 |   }
293 | 
294 |   if (!best) {
295 |     // The large object freelist doesn't have an object big enough for this
296 |     // allocation.  Allocate one or more pages from the OS, and treat that new
297 |     // sequence of pages as a fresh large object.  It will be split if
298 |     // necessary.
299 |     size_t size_with_header = size + sizeof(struct large_object);
300 |     size_t n_allocated = 0;
301 |     struct page *page = allocate_pages(size_with_header, &n_allocated);
302 |     if (!page) {
303 |       return NULL;
304 |     }
305 |     char *ptr = allocate_chunk(page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
306 |     best = (struct large_object *)ptr;
307 |     size_t page_header = ptr - ((char*) page);
308 |     best->next = large_objects;
309 |     best->size = best_size =
310 |       n_allocated * PAGE_SIZE - page_header - LARGE_OBJECT_HEADER_SIZE;
311 |     ASSERT(best_size >= size_with_header);
312 |   }
313 | 
314 |   allocate_chunk(get_page(best), get_chunk_index(best), LARGE_OBJECT);
315 | 
316 |   struct large_object *next = best->next;
317 |   *best_prev = next;
318 | 
319 |   size_t tail_size = (best_size - size) & ~CHUNK_MASK;
320 |   if (tail_size) {
321 |     // The best-fitting object has 1 or more aligned chunks free after the
322 |     // requested allocation; split the tail off into a fresh aligned object.
323 |     struct page *start_page = get_page(best);
324 |     char *start = get_large_object_payload(best);
325 |     char *end = start + best_size;
326 | 
327 |     if (start_page == get_page(end - tail_size - 1)) {
328 |       // The allocation does not span a page boundary; yay.
329 |       ASSERT_ALIGNED((uintptr_t)end, CHUNK_SIZE);
330 |     } else if (size < PAGE_SIZE - LARGE_OBJECT_HEADER_SIZE - CHUNK_SIZE) {
331 |       // If the allocation itself smaller than a page, split off the head, then
332 |       // fall through to maybe split the tail.
333 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
334 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
335 |       struct large_object *head = best;
336 |       allocate_chunk(start_page, get_chunk_index(start), FREE_LARGE_OBJECT);
337 |       head->size = first_page_size;
338 |       head->next = large_objects;
339 |       large_objects = head;
340 | 
341 |       maybe_repurpose_single_chunk_large_objects_head();
342 | 
343 |       struct page *next_page = start_page + 1;
344 |       char *ptr = allocate_chunk(next_page, FIRST_ALLOCATABLE_CHUNK, LARGE_OBJECT);
345 |       best = (struct large_object *) ptr;
346 |       best->size = best_size = best_size - first_page_size - CHUNK_SIZE - LARGE_OBJECT_HEADER_SIZE;
347 |       ASSERT(best_size >= size);
348 |       start = get_large_object_payload(best);
349 |       tail_size = (best_size - size) & ~CHUNK_MASK;
350 |     } else {
351 |       // A large object that spans more than one page will consume all of its
352 |       // tail pages.  Therefore if the split traverses a page boundary, round up
353 |       // to page size.
354 |       ASSERT_ALIGNED((uintptr_t)end, PAGE_SIZE);
355 |       size_t first_page_size = PAGE_SIZE - (((uintptr_t)start) & PAGE_MASK);
356 |       size_t tail_pages_size = align(size - first_page_size, PAGE_SIZE);
357 |       size = first_page_size + tail_pages_size;
358 |       tail_size = best_size - size;
359 |     }
360 |     best->size -= tail_size;
361 |     
362 |     unsigned tail_idx = get_chunk_index(end - tail_size);
363 |     while (tail_idx < FIRST_ALLOCATABLE_CHUNK && tail_size) {
364 |       // We would be splitting in a page header; don't do that.
365 |       tail_size -= CHUNK_SIZE;
366 |       tail_idx++;
367 |     }
368 |     
369 |     if (tail_size) {
370 |       struct page *page = get_page(end - tail_size);
371 |       char *tail_ptr = allocate_chunk(page, tail_idx, FREE_LARGE_OBJECT);
372 |       struct large_object *tail = (struct large_object *) tail_ptr;
373 |       tail->next = large_objects;
374 |       tail->size = tail_size - LARGE_OBJECT_HEADER_SIZE;
375 |       ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(tail) + tail->size), CHUNK_SIZE);
376 |       large_objects = tail;
377 | 
378 |       maybe_repurpose_single_chunk_large_objects_head();
379 |     }
380 |   }
381 | 
382 |   ASSERT_ALIGNED((uintptr_t)(get_large_object_payload(best) + best->size), CHUNK_SIZE);
383 |   return best;
384 | }
385 | 
386 | static struct freelist*
387 | obtain_small_objects(enum chunk_kind kind) {
388 |   struct freelist** whole_chunk_freelist = &small_object_freelists[GRANULES_32];
389 |   void *chunk;
390 |   if (*whole_chunk_freelist) {
391 |     chunk = *whole_chunk_freelist;
392 |     *whole_chunk_freelist = (*whole_chunk_freelist)->next;
393 |   } else {
394 |     chunk = allocate_large_object(0);
395 |     if (!chunk) {
396 |       return NULL;
397 |     }
398 |   }
399 |   char *ptr = allocate_chunk(get_page(chunk), get_chunk_index(chunk), kind);
400 |   char *end = ptr + CHUNK_SIZE;
401 |   struct freelist *next = NULL;
402 |   size_t size = chunk_kind_to_granules(kind) * GRANULE_SIZE;
403 |   for (size_t i = size; i <= CHUNK_SIZE; i += size) {
404 |     struct freelist *head = (struct freelist*) (end - i);
405 |     head->next = next;
406 |     next = head;
407 |   }
408 |   return next;
409 | }
410 | 
411 | static inline size_t size_to_granules(size_t size) {
412 |   return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
413 | }
414 | static struct freelist** get_small_object_freelist(enum chunk_kind kind) {
415 |   ASSERT(kind < SMALL_OBJECT_CHUNK_KINDS);
416 |   return &small_object_freelists[kind];
417 | }
418 | 
419 | static void*
420 | allocate_small(enum chunk_kind kind) {
421 |   struct freelist **loc = get_small_object_freelist(kind);
422 |   if (!*loc) {
423 |     struct freelist *freelist = obtain_small_objects(kind);
424 |     if (!freelist) {
425 |       return NULL;
426 |     }
427 |     *loc = freelist;
428 |   }
429 |   struct freelist *ret = *loc;
430 |   *loc = ret->next;
431 |   return (void *) ret;
432 | }
433 | 
434 | static void*
435 | allocate_large(size_t size) {
436 |   struct large_object *obj = allocate_large_object(size);
437 |   return obj ? get_large_object_payload(obj) : NULL;
438 | }
439 |   
440 | void*
441 | malloc(size_t size) {
442 |   size_t granules = size_to_granules(size);
443 |   enum chunk_kind kind = granules_to_chunk_kind(granules);
444 |   return (kind == LARGE_OBJECT) ? allocate_large(size) : allocate_small(kind);
445 | }
446 | 
447 | void
448 | free(void *ptr) {
449 |   if (!ptr) return;
450 |   struct page *page = get_page(ptr);
451 |   unsigned chunk = get_chunk_index(ptr);
452 |   uint8_t kind = page->header.chunk_kinds[chunk];
453 |   if (kind == LARGE_OBJECT) {
454 |     struct large_object *obj = get_large_object(ptr);
455 |     obj->next = large_objects;
456 |     large_objects = obj;
457 |     allocate_chunk(page, chunk, FREE_LARGE_OBJECT);
458 |     pending_large_object_compact = 1;
459 |   } else {
460 |     size_t granules = kind;
461 |     struct freelist **loc = get_small_object_freelist(granules);
462 |     struct freelist *obj = ptr;
463 |     obj->next = *loc;
464 |     *loc = obj;
465 |   }
466 | }
467 | 


--------------------------------------------------------------------------------