├── .gitignore ├── LICENSE ├── README.md ├── benchmark └── main.zig ├── build.zig ├── build.zig.zon └── src └── rpmalloc.zig /.gitignore: -------------------------------------------------------------------------------- 1 | zig-cache 2 | zig-out -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2022 Trevor Berrange Sanchez 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rpmalloc zig port 2 | This project is an attempt to make a fast general-purpose allocator for zig. It is mostly derived from [rpmalloc](https://github.com/mjansson/rpmalloc), using the same general structure and mostly retaining the essential strategies that make it fast, though with many options and facets stripped down or modified to suit Zig. 3 | 4 | ## WIP 5 | This project is under development, does not guarantee a stable API and lacks documentation and tests. 6 | Contributions, in the form of PRs and relevant resources, are greatly appreciated. 7 | 8 | ## Usage 9 | If you cloned the repository, or vendor it as a git submodule or similar, you can just add the main source file as a module like so: 10 | ```zig 11 | exe.addAnonymousModule("rpmalloc", .{ .source_file = .{ .path = "/src/rpmalloc.zig" } }); 12 | ``` 13 | However, if you want to use the zig package manager, the recommended process (at the time of writing) is as follows: 14 | 1. Get the SHA of the commit you want to depend on. 15 | 2. Make sure you have something akin to the following in your build.zig.zon file: 16 | ```zig 17 | .dependencies = .{ 18 | // -- snip -- 19 | .@"rpmalloc-zig-port" = .{ 20 | .url = "https://github.com/InKryption/rpmalloc-zig-port/archive/.tar.gz", 21 | .hash = , // you can get the expected value by running `zig build` while omitting this field. 22 | }, 23 | }, 24 | ``` 25 | 3. Add something akin to the following to your build.zig file: 26 | ```zig 27 | const rpmalloc_dep = b.dependency("rpmalloc-zig-port", .{}); 28 | const rpmalloc_module = rpmalloc_dep.module("rpmalloc"); 29 | exe.addModule("rpmalloc", rpmalloc_module); 30 | ``` 31 | 32 | and then import and use it: 33 | ```zig 34 | const rpmalloc = @import("rpmalloc"); 35 | const Rp = rpmalloc.RPMalloc(.{}); 36 | 37 | pub fn main() !void { 38 | try Rp.init(null, .{}); 39 | defer Rp.deinit(); 40 | 41 | const allocator = Rp.allocator(); 42 | // -- snip -- 43 | } 44 | ``` 45 | It should be noted that this allocator is indeed a singleton, much like in the original C source, with an important distinction being that you can concurrently have different permutations based on the configuration. 46 | 47 | ## Notes 48 | * There are a good few TODO comments in the code, comprised mostly of uncertanties on as to the benefits or semantics of certain parts of the code. 49 | * At the time of writing, the port runs marginally slower than the original C source in the benchmark when linked statically, and notably slower when linked dynamically. 50 | * I've opted to remove most code related to partial unmapping, as it's not a pattern that is well-suited to Zig (or at least that I couldn't figure out how to map to Zig's prototypical allocator patterns). 51 | -------------------------------------------------------------------------------- /benchmark/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const assert = std.debug.assert; 3 | 4 | const build_options = @import("build-options"); 5 | const compile_err = struct { 6 | const unknown_impl: noreturn = @compileError("unknown allocator implementation '" ++ @tagName(build_options.impl) ++ "'.\n"); 7 | const dont_reference: noreturn = @compileError("don't reference.\n"); 8 | const todo: noreturn = @compileError("TODO: implement for '" ++ @tagName(build_options.impl) ++ "'.\n"); 9 | }; 10 | 11 | pub const log_level = build_options.log_level; 12 | 13 | const rp = if (build_options.impl == .@"rp-zig") @import("rpmalloc") else compile_err.dont_reference; 14 | const Rp = rp.RPMalloc(.{}); 15 | 16 | var gpa: std.heap.GeneralPurposeAllocator(.{}) = if (build_options.impl == .gpa) (.{}) else compile_err.dont_reference; 17 | 18 | pub fn main() !void { 19 | const CmdArgs = struct { 20 | seed: u64, 21 | loop_count: u64, 22 | min_size: u64, 23 | max_size: u64, 24 | }; 25 | const cmd_args: CmdArgs = struct { 26 | var buf: [@max(1, build_options.cmd_args_buffer_size orelse 4096)]u8 = undefined; 27 | var fba = std.heap.FixedBufferAllocator.init(&buf); 28 | inline fn cmdArgs() !CmdArgs { 29 | const ArgResults = std.enums.EnumFieldStruct(std.meta.FieldEnum(CmdArgs), ?u64, @as(?u64, null)); 30 | const ArgName = comptime ArgName: { 31 | const old_fields = @typeInfo(std.meta.FieldEnum(CmdArgs)).Enum.fields; 32 | var fields: [old_fields.len]std.builtin.Type.EnumField = old_fields[0..].*; 33 | 34 | for (&fields) |*field| { 35 | const newFieldName = replaceScalarComptime(field.name, '_', '-'); 36 | field.name = (newFieldName ++ .{ 0 })[0..newFieldName.len:0]; 37 | } 38 | break :ArgName @Type(.{ .Enum = std.builtin.Type.Enum{ 39 | .tag_type = std.math.IntFittingRange(0, fields.len - 1), 40 | .fields = &fields, 41 | .decls = &.{}, 42 | .is_exhaustive = true, 43 | } }); 44 | }; 45 | 46 | var results: ArgResults = .{}; 47 | 48 | var args_iter = try std.process.argsWithAllocator(fba.allocator()); 49 | defer args_iter.deinit(); 50 | 51 | if (!args_iter.skip()) @panic("command line arguments don't contain executable path\n"); 52 | 53 | while (args_iter.next()) |whole_str| { 54 | if (whole_str.len == 0) { 55 | std.log.warn("Empty command line argument token", .{}); 56 | continue; 57 | } 58 | if (!std.mem.startsWith(u8, whole_str, "--")) { 59 | if (whole_str[0] == '-') { 60 | std.log.err("Argument name must be preceded by two dashes, but there's only one in `{s}`.\n", .{whole_str}); 61 | } else { 62 | std.log.err("Expected `--[name]=[value]`, got `{s}`.\n", .{whole_str}); 63 | } 64 | return error.InvalidPositional; 65 | } 66 | const kv_string = whole_str["--".len..]; 67 | if (kv_string.len == 0 or kv_string[0] == '=') { 68 | std.log.err("Expeced `--[name]=[value]`, got `{s}`.\n", .{whole_str}); 69 | return error.MissingArgumentName; 70 | } 71 | const name_str: []const u8 = kv_string[0 .. std.mem.indexOfScalar(u8, kv_string, '=') orelse kv_string.len]; 72 | const value_str: ?[]const u8 = if (name_str.len == kv_string.len) null else kv_string[name_str.len + 1 ..]; 73 | 74 | const name: ArgName = std.meta.stringToEnum(ArgName, name_str) orelse { 75 | std.log.err("Unrecognized argument name '{s}'.\n", .{name_str}); 76 | return error.UnrecognizedArgumentName; 77 | }; 78 | switch (name) { 79 | inline else => |iname| { 80 | const iname_str = @tagName(iname); 81 | const field_name = replaceScalarComptime(iname_str, '-', '_'); 82 | if (@field(results, field_name) != null) { 83 | std.log.err("Specified argument '{s}' twice.\n", .{iname_str}); 84 | return error.DuplicateArgument; 85 | } 86 | 87 | const val_str = value_str orelse { 88 | std.log.err("Missing value for argument '{s}'.\n", .{iname_str}); 89 | return error.MissingArgumentValue; 90 | }; 91 | if (val_str.len == 0) { 92 | std.log.err("Missing value for argument '{s}'.\n", .{iname_str}); 93 | return error.MissingArgumentValue; 94 | } 95 | @field(results, field_name) = std.fmt.parseUnsigned(u64, val_str, 0) catch |err| { 96 | std.log.err("({s}) Couldn't parse '{s}' as value for '{s}', of type {s}.\n", .{ 97 | @errorName(err), 98 | val_str, 99 | iname_str, 100 | @typeName(@TypeOf(@field(@as(CmdArgs, undefined), field_name))), 101 | }); 102 | return error.InvalidArgumentValue; 103 | }; 104 | }, 105 | } 106 | } 107 | 108 | var cmd_args = CmdArgs{ 109 | .seed = results.seed orelse int: { 110 | var int: u64 = 0; 111 | try std.os.getrandom(std.mem.asBytes(&int)); 112 | break :int int; 113 | }, 114 | .loop_count = results.loop_count orelse 1000, 115 | .min_size = results.min_size orelse 8, 116 | .max_size = results.max_size orelse 4096, 117 | }; 118 | 119 | if (cmd_args.loop_count == 0) { 120 | const default_loop_count = 1000; 121 | std.log.warn("Loop count must be greater than 0. Defaulting to {d}\n", .{default_loop_count}); 122 | cmd_args.loop_count = default_loop_count; 123 | } 124 | if (cmd_args.min_size > cmd_args.max_size) { 125 | std.log.err("Minimum size must be less than or equal to maximum size.\n", .{}); 126 | return error.IncompatibleArgumentValues; 127 | } 128 | 129 | return cmd_args; 130 | } 131 | }.cmdArgs() catch return; 132 | 133 | std.log.debug("Command line arguments:", .{}); 134 | inline for (@typeInfo(CmdArgs).Struct.fields) |field| { 135 | std.log.debug(" {s} = {any}", .{ field.name, @field(cmd_args, field.name) }); 136 | } 137 | 138 | const PrngImpl = if (build_options.prng) |prng| @field(std.rand, @tagName(prng)) else struct { 139 | inline fn init(s: u64) @This() { 140 | _ = s; 141 | return .{}; 142 | } 143 | 144 | inline fn random(this: *@This()) std.rand.Random { 145 | return std.rand.Random{ 146 | .ptr = this, 147 | .fillFn = noopFill, 148 | }; 149 | } 150 | 151 | fn noopFill(ptr: *anyopaque, bytes: []u8) void { 152 | _ = ptr; 153 | _ = bytes; 154 | unreachable; 155 | } 156 | }; 157 | var prng = PrngImpl.init(cmd_args.seed); 158 | const random: std.rand.Random = prng.random(); 159 | _ = random; 160 | 161 | // initialise 162 | switch (build_options.impl) { 163 | .@"rp-zig" => try Rp.init(null, .{}), 164 | .@"rp-c" => compile_err.todo, 165 | .gpa => gpa = .{}, 166 | else => compile_err.unknown_impl, 167 | } 168 | // deinitialise 169 | defer switch (build_options.impl) { 170 | .@"rp-zig" => Rp.deinit(), 171 | .@"rp-c" => @compileError("todo"), 172 | .gpa => _ = gpa.deinit(), 173 | else => compile_err.unknown_impl, 174 | }; 175 | 176 | const allocator: std.mem.Allocator = switch (build_options.impl) { 177 | .@"rp-zig" => Rp.allocator(), 178 | .@"rp-c" => compile_err.todo, 179 | .gpa => gpa.allocator(), 180 | else => compile_err.unknown_impl, 181 | }; 182 | _ = allocator; 183 | 184 | @panic("TODO: Do benchmarking"); 185 | } 186 | 187 | inline fn replaceScalarComptime(comptime input: []const u8, comptime needle: u8, comptime replacement: u8) *const [input.len]u8 { 188 | comptime { 189 | var result: [input.len]u8 = input[0..].*; 190 | for (&result) |*c| { 191 | if (c.* == needle) c.* = replacement; 192 | } 193 | return &result; 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn build(b: *std.Build) void { 4 | const target = b.standardTargetOptions(.{}); 5 | const optimize = b.standardOptimizeOption(.{}); 6 | 7 | const rpmalloc_mod = b.addModule("rpmalloc", .{ 8 | .root_source_file = .{ .path = "src/rpmalloc.zig" }, 9 | }); 10 | 11 | const link_libc = b.option(bool, "link-c", "Force generated executables to link to C") orelse false; 12 | const options = .{ 13 | .strip = b.option(bool, "strip", "Strip generated executables"), 14 | .sanitize_thread = b.option(bool, "sanitize-thread", "Enable thread sanitizer") orelse false, 15 | .sanitize_c = !(b.option(bool, "no-sanitize-c", "Disable C UBSAN") orelse false), 16 | .valgrind = b.option(bool, "valgrind-support", "Force valgrind support on or off."), 17 | }; 18 | const setOptions = struct { 19 | fn setOptions(leo: *std.Build.Module, opts: @TypeOf(options)) void { 20 | inline for (@typeInfo(@TypeOf(opts)).Struct.fields) |field| { 21 | @field(leo, field.name) = @field(opts, field.name); 22 | } 23 | } 24 | }.setOptions; 25 | 26 | const unit_tests_leo = b.addTest(.{ 27 | .root_source_file = .{ .path = "src/rpmalloc.zig" }, 28 | .target = target, 29 | .optimize = optimize, 30 | }); 31 | setOptions(&unit_tests_leo.root_module, options); 32 | if (link_libc) unit_tests_leo.linkLibC(); 33 | 34 | const unit_tests_tls = b.step("unit-tests", "Run the unit tests"); 35 | unit_tests_tls.dependOn(&unit_tests_leo.step); 36 | 37 | // const bench_exe_leo = b.addExecutable("bench", "benchmark/main.zig"); 38 | const bench_exe_leo = b.addExecutable(.{ 39 | .name = "bench", 40 | .root_source_file = .{ .path = "benchmark/main.zig" }, 41 | .target = target, 42 | .optimize = optimize, 43 | }); 44 | setOptions(&bench_exe_leo.root_module, options); 45 | bench_exe_leo.root_module.addImport("rpmalloc", rpmalloc_mod); 46 | if (link_libc) bench_exe_leo.linkLibC(); 47 | b.installArtifact(bench_exe_leo); 48 | 49 | 50 | const bench_exe_options = b.addOptions(); 51 | bench_exe_leo.root_module.addOptions("build-options", bench_exe_options); 52 | bench_exe_options.addOption(?comptime_int, "cmd_args_buffer_size", null); 53 | 54 | const BenchImpl = enum { 55 | @"rp-zig", 56 | @"rp-c", 57 | gpa, 58 | }; 59 | const BenchPrng = enum { 60 | Xoshiro256, 61 | Xoroshiro128, 62 | Pcg, 63 | RomuTrio, 64 | Sfc64, 65 | Isaac64, 66 | }; 67 | 68 | const bench_log_level = b.option(std.log.Level, "bench-log", "Log level for benchmark") orelse .debug; 69 | const bench_impl = b.option(BenchImpl, "bench", "Which allocator to benchmark") orelse .@"rp-zig"; 70 | const bench_prng = b.option(BenchPrng, "bench-prng", "Name of PRNG to use"); 71 | 72 | bench_exe_options.contents.writer().print( 73 | \\pub const impl = .{s}; 74 | \\pub const prng: ?@TypeOf(.enum_literal) = {?s}; 75 | \\pub const log_level: @import("std").log.Level = .{s}; 76 | \\ 77 | , .{ 78 | std.zig.fmtId(@tagName(bench_impl)), 79 | if (bench_prng) |tag| switch (tag) { 80 | inline else => |itag| "." ++ @tagName(itag), 81 | } else null, 82 | @tagName(bench_log_level), 83 | }) catch unreachable; 84 | 85 | const bench_exe_run = b.addRunArtifact(bench_exe_leo); 86 | bench_exe_run.step.dependOn(b.getInstallStep()); 87 | if (b.args) |args| { 88 | bench_exe_run.addArgs(args); 89 | } 90 | 91 | const bench_exe_run_tls = b.step("bench", "Run the benchmark"); 92 | bench_exe_run_tls.dependOn(&bench_exe_run.step); 93 | } 94 | -------------------------------------------------------------------------------- /build.zig.zon: -------------------------------------------------------------------------------- 1 | .{ 2 | .name = "rpmalloc-zig-port", 3 | .version = "0.1.1", 4 | .depdendencies = .{}, 5 | .paths = .{""}, 6 | } 7 | -------------------------------------------------------------------------------- /src/rpmalloc.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const builtin = @import("builtin"); 3 | const Allocator = std.mem.Allocator; 4 | const assert = std.debug.assert; 5 | 6 | test { 7 | comptime var options: RPMallocOptions = .{ .backing_allocator = null }; 8 | try testRPMalloc(options, std.testing.allocator, .{}, .{}); 9 | 10 | options.backing_allocator = &std.testing.allocator; 11 | try testRPMalloc(options, null, .{}, .{}); 12 | try testRPMalloc(options, null, .{}, .{}); 13 | 14 | options.configurable_sizes = true; 15 | try testRPMalloc(options, null, .{ .span_size = .pow12 }, .{}); 16 | try testRPMalloc(options, null, .{ .span_size = .pow18 }, .{}); 17 | 18 | options.configurable_sizes = false; 19 | options.global_cache = false; 20 | try testRPMalloc(options, null, .{}, .{}); 21 | 22 | options.thread_cache = false; 23 | try testRPMalloc(options, null, .{}, .{}); 24 | } 25 | 26 | fn testRPMalloc( 27 | comptime options: RPMallocOptions, 28 | ally: if (options.backing_allocator != null) ?noreturn else Allocator, 29 | init_config: RPMalloc(options).InitConfig, 30 | comptime extra: struct { 31 | min_align: comptime_int = 1, 32 | max_align: comptime_int = 16, 33 | }, 34 | ) !void { 35 | const min_align = extra.min_align; 36 | const max_align = extra.max_align; 37 | 38 | // powers of two 39 | assert(min_align > 0 and @popCount(@as(std.math.IntFittingRange(0, min_align), min_align)) == 1); 40 | assert(max_align > 0 and @popCount(@as(std.math.IntFittingRange(0, max_align), max_align)) == 1); 41 | 42 | const Rp = RPMalloc(options); 43 | try Rp.init(ally, init_config); 44 | defer Rp.deinit(); 45 | 46 | comptime var alignment = min_align; 47 | inline while (alignment <= max_align) : (alignment *= 2) { 48 | var list = std.ArrayListAligned(u8, alignment).init(Rp.allocator()); 49 | defer list.deinit(); 50 | 51 | try list.append(33); 52 | try list.appendNTimes(71, 22); 53 | list.shrinkAndFree(2); 54 | try list.ensureUnusedCapacity(1024 * 64); 55 | list.appendSliceAssumeCapacity(&[1]u8{96} ** (1024 * 64)); 56 | list.shrinkAndFree(3); 57 | var i: u32 = 1; 58 | while (i < std.math.maxInt(u32)) : (i *|= 2) { 59 | try list.append(@as(u8, @intCast((i - 1) % std.math.maxInt(u8)))); 60 | } 61 | 62 | // brief check for any obvious memory corruption 63 | try list.resize(6); 64 | try std.testing.expectEqualSlices(u8, list.items, &.{ 33, 71, 96, 0, 1, 3 }); 65 | try list.appendNTimes(11, 1024); 66 | try std.testing.expectEqualSlices(u8, list.items, &[_]u8{ 33, 71, 96, 0, 1, 3 } ++ [_]u8{11} ** 1024); 67 | } 68 | } 69 | 70 | pub const RPMallocOptions = struct { 71 | /// Enable configuring sizes at runtime. Will introduce a very small 72 | /// overhead due to some size calculations not being compile time constants 73 | configurable_sizes: bool = false, 74 | /// Enable per-thread cache 75 | thread_cache: bool = true, 76 | /// Enable global cache shared between all threads, requires thread cache 77 | global_cache: bool = true, 78 | /// Enable some slightly more expensive safety checks. 79 | assertions: bool = std.debug.runtime_safety, 80 | /// Disable unmapping memory pages (also enables unlimited cache) 81 | never_unmap: bool = false, 82 | /// Enable unlimited global cache (no unmapping until finalization) 83 | unlimited_cache: bool = false, 84 | /// Default number of spans to map in call to map more virtual memory (default values yield 4MiB here) 85 | default_span_map_count: usize = 64, 86 | /// Size of heap hashmap 87 | heap_array_size: usize = 47, 88 | /// Multiplier for global cache 89 | global_cache_multiplier: usize = 8, 90 | /// Either a pointer to a comptime-known pointer to an allocator interface, or null to indicate 91 | /// that the backing allocator will be supplied during initialisation. 92 | backing_allocator: ?*const Allocator = &std.heap.page_allocator, 93 | }; 94 | pub fn RPMalloc(comptime options: RPMallocOptions) type { 95 | const configurable_sizes = options.configurable_sizes; 96 | 97 | if (options.never_unmap and !options.global_cache) { 98 | @compileError("Must use global cache if unmap is disabled"); 99 | } 100 | 101 | if (options.never_unmap and !options.unlimited_cache) { 102 | var new_options: RPMallocOptions = options; 103 | new_options.unlimited_cache = true; 104 | return RPMalloc(new_options); 105 | } 106 | 107 | if (!options.global_cache and options.unlimited_cache) { 108 | var new_options = options; 109 | new_options.unlimited_cache = false; 110 | return RPMalloc(new_options); 111 | } 112 | 113 | const enable_thread_cache = options.thread_cache; 114 | const enable_global_cache = options.global_cache; 115 | const never_unmap = options.never_unmap; 116 | const enable_unlimited_cache = options.unlimited_cache; 117 | const default_span_map_count = options.default_span_map_count; 118 | const global_cache_multiplier = options.global_cache_multiplier; 119 | 120 | const known_allocator = options.backing_allocator != null; 121 | 122 | return struct { 123 | pub inline fn allocator() Allocator { 124 | comptime return Allocator{ 125 | .ptr = undefined, 126 | .vtable = &Allocator.VTable{ 127 | .alloc = alloc, 128 | .resize = resize, 129 | .free = free, 130 | }, 131 | }; 132 | } 133 | 134 | pub const InitConfig = struct { 135 | /// Size of a span of memory blocks. MUST be a power of two, and in [4096,262144] 136 | /// range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE 137 | /// is defined to 1. 138 | span_size: if (configurable_sizes) SpanSize else enum { default } = .default, 139 | /// Number of spans to map at each request to map new virtual memory blocks. This can 140 | /// be used to minimize the system call overhead at the cost of virtual memory address 141 | /// space. The extra mapped pages will not be written until actually used, so physical 142 | /// committed memory should not be affected in the default implementation. Will be 143 | /// aligned to a multiple of spans that match memory page size in case of huge pages. 144 | span_map_count: usize = 0, 145 | 146 | pub const SpanSize = enum(usize) { 147 | default = 0, 148 | pow12 = 1 << 12, 149 | pow13 = 1 << 13, 150 | pow14 = 1 << 14, 151 | pow15 = 1 << 15, 152 | pow16 = 1 << 16, 153 | pow17 = 1 << 17, 154 | pow18 = 1 << 18, 155 | }; 156 | }; 157 | 158 | /// Initialize the allocator and setup global data. 159 | pub fn init( 160 | ally: if (known_allocator) ?noreturn else Allocator, 161 | config: InitConfig, 162 | ) error{OutOfMemory}!void { 163 | @setCold(true); 164 | assert(!initialized); 165 | initialized = true; 166 | if (!known_allocator) { 167 | backing_allocator_mut = ally; 168 | } 169 | 170 | const min_span_size: usize = 256; 171 | const max_page_size: usize = if (std.math.maxInt(usize) > 0xFFFF_FFFF) 172 | (4096 * 1024 * 1024) 173 | else 174 | (4 * 1024 * 1024); 175 | // _memory_page_size = std.math.clamp(_memory_page_size, min_span_size, max_page_size); 176 | comptime assert(page_size >= min_span_size and page_size <= max_page_size); 177 | 178 | if (config.span_size != .default) { 179 | comptime assert(configurable_sizes); 180 | span_size_mut = @intFromEnum(config.span_size); 181 | span_size_shift_mut = log2(span_size.*); 182 | span_mask_mut = calculateSpanMask(span_size.*); 183 | } // otherwise, they're either not confiburable, or they're already set to default values. 184 | 185 | span_map_count = if (config.span_map_count != 0) 186 | config.span_map_count 187 | else 188 | default_span_map_count; 189 | if ((span_size.* * span_map_count) < page_size) { 190 | span_map_count = (page_size / span_size.*); 191 | } 192 | if ((page_size >= span_size.*) and ((span_map_count * span_size.*) % page_size) != 0) { 193 | span_map_count = (page_size / span_size.*); 194 | } 195 | heap_reserve_count = if (span_map_count > default_span_map_count) default_span_map_count else span_map_count; 196 | 197 | // Setup all small and medium size classes 198 | if (configurable_sizes) { 199 | globalSmallSizeClassesInit(&global_size_classes, span_size); 200 | } else if (comptime builtin.mode == .Debug) { 201 | comptime var expected: [SIZE_CLASS_COUNT]SizeClass = std.mem.zeroes([SIZE_CLASS_COUNT]SizeClass); 202 | comptime globalSmallSizeClassesInit(&expected, span_size); 203 | inline for (global_size_classes[0..SMALL_CLASS_COUNT], 0..) |sz_class, i| { 204 | assert(std.meta.eql(sz_class, expected[i])); 205 | } 206 | } 207 | 208 | if (configurable_sizes) { 209 | // At least two blocks per span, then fall back to large allocations 210 | medium_size_limit_runtime_mut = calculateMediumSizeLimitRuntime(span_size.*); 211 | } 212 | var iclass: usize = 0; 213 | while (iclass < MEDIUM_CLASS_COUNT) : (iclass += 1) { 214 | const size: usize = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY); 215 | if (size > medium_size_limit_runtime.*) break; 216 | global_size_classes[SMALL_CLASS_COUNT + iclass].block_size = @as(u32, @intCast(size)); 217 | adjustSizeClass(SMALL_CLASS_COUNT + iclass, &global_size_classes, span_size); 218 | } 219 | 220 | try threadInitialize(); // initialise this thread after everything else is set up. 221 | } 222 | pub inline fn initThread() error{OutOfMemory}!void { 223 | comptime if (builtin.single_threaded) return; 224 | assert(getThreadId() != main_thread_id); 225 | assert(!isThreadInitialized()); 226 | try threadInitialize(); 227 | } 228 | 229 | /// Finalize the allocator 230 | pub fn deinit() void { 231 | assert(initialized); 232 | threadFinalize(true); 233 | 234 | if (global_reserve != null) { 235 | _ = @atomicRmw(u32, &global_reserve_master.?.remaining_spans, .Sub, @as(u32, @intCast(global_reserve_count)), .monotonic); 236 | global_reserve_master = null; 237 | global_reserve_count = 0; 238 | global_reserve = null; 239 | } 240 | @atomicStore(Lock, &global_lock, .unlocked, .release); // TODO: Is unconditionally setting this OK? 241 | 242 | { // Free all thread caches and fully free spans 243 | var list_idx: usize = 0; 244 | while (list_idx < all_heaps.len) : (list_idx += 1) { 245 | var maybe_heap: ?*Heap = all_heaps[list_idx]; 246 | while (maybe_heap != null) { 247 | const next_heap: ?*Heap = maybe_heap.?.next_heap; 248 | maybe_heap.?.finalize = 1; 249 | heapGlobalFinalize(maybe_heap.?); 250 | maybe_heap = next_heap; 251 | } 252 | } 253 | } 254 | 255 | if (enable_global_cache) { 256 | // Free global caches 257 | var iclass: usize = 0; 258 | while (iclass < LARGE_CLASS_COUNT) : (iclass += 1) { 259 | globalCacheFinalize(&global_span_cache[iclass]); 260 | } 261 | } 262 | 263 | orphan_heaps = null; 264 | initialized = false; 265 | } 266 | pub inline fn deinitThread(release_caches: bool) void { 267 | comptime if (builtin.single_threaded) return; 268 | assert(getThreadId() != main_thread_id); 269 | assert(isThreadInitialized()); 270 | threadFinalize(release_caches); 271 | } 272 | 273 | fn alloc(state_ptr: *anyopaque, len: usize, ptr_align_log2: u8, ret_addr: usize) ?[*]u8 { 274 | _ = state_ptr; 275 | _ = ret_addr; 276 | 277 | const result_ptr = alignedAllocate( 278 | thread_heap.?, 279 | @as(u6, @intCast(ptr_align_log2)), 280 | len, 281 | ) orelse return null; 282 | 283 | if (options.assertions) { 284 | const usable_size = usableSize(result_ptr); 285 | assert(len <= usable_size); 286 | } 287 | return @as([*]u8, @ptrCast(result_ptr)); 288 | } 289 | fn resize(state_ptr: *anyopaque, buf: []u8, buf_align: u8, new_len: usize, ret_addr: usize) bool { 290 | _ = state_ptr; 291 | _ = ret_addr; 292 | 293 | const usable_size = usableSize(buf.ptr); 294 | assert(buf.len <= usable_size); 295 | if (options.assertions) { 296 | assert(std.mem.isAligned(@intFromPtr(buf.ptr), std.math.shl(usize, 1, buf_align))); 297 | } 298 | 299 | return usable_size >= new_len; 300 | } 301 | fn free(state_ptr: *anyopaque, buf: []u8, buf_align: u8, ret_addr: usize) void { 302 | _ = state_ptr; 303 | _ = ret_addr; 304 | if (options.assertions) { 305 | const usable_size = usableSize(buf.ptr); 306 | assert(buf.len <= usable_size); 307 | assert(std.mem.isAligned(@intFromPtr(buf.ptr), std.math.shl(usize, 1, buf_align))); 308 | } 309 | const span: *Span = getSpanPtr(buf.ptr).?; 310 | if (span.size_class < SIZE_CLASS_COUNT) { 311 | @setCold(false); 312 | deallocateSmallOrMedium(span, @alignCast(@as(*anyopaque, @ptrCast(buf.ptr)))); 313 | } else if (span.size_class == SIZE_CLASS_LARGE) { 314 | deallocateLarge(span); 315 | } else { 316 | deallocateHuge(span); 317 | } 318 | } 319 | 320 | /// Maximum allocation size to avoid integer overflow 321 | inline fn maxAllocSize() @TypeOf(span_size.*) { 322 | return std.math.maxInt(usize) - span_size.*; 323 | } 324 | 325 | /// A span can either represent a single span of memory pages with size declared by span_map_count configuration variable, 326 | /// or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single 327 | /// span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first 328 | /// (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans 329 | /// that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire 330 | /// superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released 331 | /// in the same call to release the virtual memory range, but individual subranges can be decommitted individually 332 | /// to reduce physical memory use). 333 | const Span = extern struct { 334 | /// Free list 335 | free_list: ?*align(SMALL_GRANULARITY) anyopaque align(SMALL_GRANULARITY), 336 | /// Total block count of size class 337 | block_count: u32, 338 | /// Size class 339 | size_class: u32, 340 | /// Index of last block initialized in free list 341 | free_list_limit: u32, 342 | /// Number of used blocks remaining when in partial state 343 | used_count: u32, 344 | /// Deferred free list 345 | free_list_deferred: ?*align(SMALL_GRANULARITY) anyopaque, // atomic 346 | /// Size of deferred free list, or list of spans when part of a cache list 347 | list_size: u32, 348 | /// Size of a block 349 | block_size: u32, 350 | /// Flags and counters 351 | flags: SpanFlags, 352 | /// Number of spans 353 | span_count: u32, 354 | /// Total span counter for master spans 355 | total_spans: u32, 356 | /// Offset from master span for subspans 357 | offset_from_master: u32, 358 | /// Remaining span counter, for master spans 359 | remaining_spans: u32, // atomic 360 | /// Alignment offset 361 | align_offset: u32, 362 | /// Owning heap 363 | heap: *Heap, 364 | /// Next span 365 | next: ?*Span, 366 | /// Previous span 367 | prev: ?*Span, 368 | }; 369 | 370 | comptime { 371 | if (@sizeOf(Span) > SPAN_HEADER_SIZE) @compileError("span size mismatch"); 372 | } 373 | 374 | const SpanCache = extern struct { 375 | count: usize, 376 | span: [MAX_THREAD_SPAN_CACHE]*Span, 377 | }; 378 | 379 | const SpanLargeCache = extern struct { 380 | count: usize, 381 | span: [MAX_THREAD_SPAN_LARGE_CACHE]*Span, 382 | }; 383 | 384 | const HeapSizeClass = extern struct { 385 | /// Free list of active span 386 | free_list: ?*align(SMALL_GRANULARITY) anyopaque, 387 | /// Double linked list of partially used spans with free blocks. 388 | /// Previous span pointer in head points to tail span of list. 389 | partial_span: ?*Span, 390 | /// Early level cache of fully free spans 391 | cache: ?*Span, 392 | }; 393 | 394 | /// Control structure for a heap, either a thread heap or a first class heap if enabled 395 | const Heap = extern struct { 396 | /// Owning thread ID 397 | owner_thread: if (builtin.single_threaded) [0]u8 else ThreadId, 398 | /// Free lists for each size class 399 | size_class: [SIZE_CLASS_COUNT]HeapSizeClass, 400 | /// Arrays of fully freed spans, single span 401 | span_cache: if (enable_thread_cache) SpanCache else [0]u8, 402 | /// List of deferred free spans (single linked list) 403 | span_free_deferred: ?*Span, // atomic 404 | /// Number of full spans 405 | full_span_count: usize, 406 | /// Mapped but unused spans 407 | span_reserve: ?*Span, 408 | /// Master span for mapped but unused spans 409 | span_reserve_master: ?*Span, 410 | /// Number of mapped but unused spans 411 | spans_reserved: u32, 412 | /// Child count 413 | child_count: u32, // atomic 414 | /// Next heap in id list 415 | next_heap: ?*Heap, 416 | /// Next heap in orphan list 417 | next_orphan: ?*Heap, 418 | /// Heap ID 419 | id: u32, 420 | /// Finalization state flag 421 | finalize: i8, 422 | /// Master heap owning the memory pages 423 | master_heap: ?*Heap, 424 | 425 | /// Arrays of fully freed spans, large spans with > 1 span count 426 | span_large_cache: if (enable_thread_cache) ([LARGE_CLASS_COUNT - 1]SpanLargeCache) else [0]u8, 427 | }; 428 | 429 | /// Size class for defining a block size bucket 430 | const SizeClass = extern struct { 431 | /// Size of blocks in this class 432 | block_size: u32, 433 | /// Number of blocks in each chunk 434 | block_count: u16, 435 | /// Class index this class is merged with 436 | class_idx: u16, 437 | }; 438 | 439 | comptime { 440 | if (@sizeOf(SizeClass) != 8) @compileError("Size class size mismatch"); 441 | } 442 | 443 | const GlobalCache = extern struct { 444 | /// Cache lock 445 | lock: Lock, // atomic 446 | /// Cache count 447 | count: u32, 448 | /// Cached spans 449 | span: [global_cache_multiplier * MAX_THREAD_SPAN_CACHE]*Span, 450 | /// Unlimited cache overflow 451 | overflow: ?*Span, 452 | }; 453 | 454 | /// Default span size (64KiB) 455 | const default_span_size = 64 * 1024; 456 | const default_span_size_shift = log2(default_span_size); 457 | inline fn calculateSpanMask(input_span_size: anytype) @TypeOf(input_span_size) { 458 | assert(input_span_size > 0); 459 | const T = @TypeOf(input_span_size); 460 | const SmallestInt = if (T == comptime_int) std.math.IntFittingRange(0, input_span_size) else T; 461 | if (T == comptime_int) { 462 | assert(@popCount(@as(SmallestInt, input_span_size)) == 1); 463 | } else {} 464 | return ~@as(usize, input_span_size - 1); 465 | } 466 | 467 | // Global data 468 | 469 | /// Pointer to backing allocator. If one is specified at comptime, 470 | /// this is a pointer to a comptime-known read-only interface. 471 | /// Otherwise, this is actually a mutable pointer. 472 | const backing_allocator: *const Allocator = options.backing_allocator orelse &backing_allocator_mut; 473 | var backing_allocator_mut: std.mem.Allocator = if (known_allocator) @compileError("Don't reference") else undefined; 474 | 475 | var initialized: bool = false; 476 | var main_thread_id: ThreadId = 0; 477 | const page_size: usize = std.mem.page_size; 478 | /// Shift to divide by page size 479 | const page_size_shift: std.math.Log2Int(usize) = log2(page_size); 480 | /// Granularity at which memory pages are mapped by OS 481 | const map_granularity: usize = page_size; 482 | 483 | /// Returns `*const Int` if `configurable_sizes`. Otherwise returns `*const comptime_int`. 484 | fn ConfigurableIntPtr(comptime Int: type) type { 485 | if (configurable_sizes) return *const Int; 486 | return *const comptime_int; 487 | } 488 | 489 | /// Size of a span of memory pages 490 | const span_size: ConfigurableIntPtr(usize) = if (!configurable_sizes) &default_span_size else &span_size_mut; 491 | var span_size_mut: usize = if (configurable_sizes) default_span_size else @compileError("Don't reference"); 492 | 493 | /// Shift to divide by span size 494 | const span_size_shift: ConfigurableIntPtr(std.math.Log2Int(usize)) = if (!configurable_sizes) &default_span_size_shift else &span_size_shift_mut; 495 | var span_size_shift_mut: std.math.Log2Int(usize) = if (configurable_sizes) default_span_size_shift else @compileError("Don't reference"); 496 | 497 | /// Mask to get to start of a memory span 498 | const span_mask: ConfigurableIntPtr(usize) = if (!configurable_sizes) &calculateSpanMask(span_size.*) else &span_mask_mut; 499 | var span_mask_mut: usize = if (configurable_sizes) calculateSpanMask(default_span_size) else @compileError("Don't reference"); 500 | 501 | /// Number of spans to map in each map call 502 | var span_map_count: usize = 0; 503 | 504 | /// Number of spans to keep reserved in each heap 505 | var heap_reserve_count: usize = 0; 506 | var global_size_classes: [SIZE_CLASS_COUNT]SizeClass = blk: { 507 | var global_size_classes_init = [_]SizeClass{.{ .block_size = 0, .block_count = 0, .class_idx = 0 }} ** SIZE_CLASS_COUNT; 508 | if (!configurable_sizes) { 509 | globalSmallSizeClassesInit(&global_size_classes_init, span_size); 510 | } 511 | break :blk global_size_classes_init; 512 | }; 513 | 514 | /// Run-time size limit of medium blocks 515 | const medium_size_limit_runtime: ConfigurableIntPtr(usize) = if (!configurable_sizes) &calculateMediumSizeLimitRuntime(span_size.*) else &medium_size_limit_runtime_mut; 516 | var medium_size_limit_runtime_mut: usize = if (configurable_sizes) undefined else @compileError("Don't reference"); 517 | 518 | var heap_id_counter: u32 = 0; // atomic 519 | 520 | var global_span_cache = if (enable_global_cache) ([_]GlobalCache{.{ 521 | .lock = .unlocked, 522 | .count = 0, 523 | .span = undefined, 524 | .overflow = null, 525 | }} ** LARGE_CLASS_COUNT) else @compileError(""); 526 | 527 | var global_reserve: ?*Span = null; 528 | var global_reserve_count: usize = 0; 529 | var global_reserve_master: ?*Span = null; 530 | var all_heaps: [options.heap_array_size]?*Heap = .{null} ** options.heap_array_size; 531 | // TODO: Is this comment accurate? If so, does that mean that 532 | // this isn't needed if we're not supporting huge pages? 533 | /// Used to restrict access to mapping memory for huge pages 534 | var global_lock: Lock = .unlocked; // atomic 535 | /// Orphaned heaps 536 | var orphan_heaps: ?*Heap = null; 537 | 538 | /// Thread local heap and ID 539 | threadlocal var thread_heap: ?*Heap = null; 540 | 541 | /// Fast thread ID 542 | const ThreadId = if (builtin.single_threaded) u0 else std.Thread.Id; 543 | inline fn getThreadId() ThreadId { 544 | comptime if (builtin.single_threaded) return 0; 545 | return std.Thread.getCurrentId(); 546 | } 547 | 548 | /// Set the current thread heap 549 | inline fn setThreadHeap(heap: ?*Heap) void { 550 | thread_heap = heap; 551 | if (!builtin.single_threaded) { 552 | if (heap != null) { 553 | heap.?.owner_thread = getThreadId(); 554 | } 555 | } 556 | } 557 | 558 | // Low level memory map/unmap 559 | 560 | /// Map more virtual memory 561 | /// size is number of bytes to map 562 | /// offset receives the offset in bytes from start of mapped region 563 | /// returns address to start of mapped region to use 564 | inline fn memoryMap(size: usize, offset: *usize) ?*align(page_size) anyopaque { 565 | assert(size != 0); // invalid mmap size 566 | assert(size % page_size == 0); // invalid mmap size 567 | // Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity 568 | const padding: usize = if (size >= span_size.* and span_size.* > map_granularity) span_size.* else 0; 569 | var ptr: *align(page_size) anyopaque = blk: { 570 | const ptr = backing_allocator.rawAlloc( 571 | size + padding, 572 | comptime log2(page_size), 573 | @returnAddress(), 574 | ) orelse return null; 575 | break :blk @alignCast(@ptrCast(ptr)); 576 | }; 577 | if (padding != 0) { 578 | const final_padding: usize = padding - (@intFromPtr(ptr) & ~@as(usize, span_mask.*)); 579 | assert(final_padding <= span_size.*); 580 | assert(final_padding % 8 == 0); 581 | ptr = @alignCast(@ptrCast(@as([*]u8, @ptrCast(ptr)) + final_padding)); 582 | offset.* = final_padding >> 3; 583 | } 584 | assert(size < span_size.* or (@intFromPtr(ptr) & ~@as(usize, span_mask.*)) == 0); 585 | return ptr; 586 | } 587 | 588 | /// Unmap virtual memory 589 | /// address is the memory address to unmap, as returned from _memory_map 590 | /// size is the number of bytes to unmap, which might be less than full region for a partial unmap 591 | /// offset is the offset in bytes to the actual mapped region, as set by _memory_map 592 | /// release is set to 0 for partial unmap, or size of entire range for a full unmap 593 | inline fn memoryUnmap(address_init: ?*anyopaque, offset: usize, release_init: usize) void { 594 | var address: *anyopaque = address_init orelse return; 595 | var release = release_init; 596 | 597 | // I don't think we want to/can do partial unmappings, and it 598 | // seems like the zig stdlib discourages it as well. 599 | assert(release != 0); 600 | // TODO: Investigate why this causes issues in configs other than the default 601 | if (false) { 602 | assert(offset != 0); 603 | } 604 | assert(release >= page_size); // Invalid unmap size 605 | assert(release % page_size == 0); // Invalid unmap size 606 | 607 | address = @as([*]u8, @ptrCast(address)) - (offset << 3); 608 | if ((release >= span_size.*) and (span_size.* > map_granularity)) { 609 | // Padding is always one span size 610 | release += span_size.*; 611 | } 612 | 613 | if (!never_unmap) { 614 | backing_allocator.rawFree(@as([*]u8, @ptrCast(address))[0..release], page_size_shift, @returnAddress()); 615 | } 616 | } 617 | 618 | /// Declare the span to be a subspan and store distance from master span and span count 619 | inline fn spanMarkAsSubspanUnlessMaster(master: *Span, subspan: *Span, span_count: usize) void { 620 | assert(subspan != master or subspan.flags.master); // Span master pointer and/or flag mismatch 621 | if (subspan != master) { 622 | subspan.flags = .{ .subspan = true }; 623 | assert(@intFromPtr(subspan) > @intFromPtr(master)); 624 | subspan.offset_from_master = @as(u32, @intCast((@intFromPtr(subspan) - @intFromPtr(master)) >> span_size_shift.*)); 625 | subspan.align_offset = 0; 626 | } 627 | subspan.span_count = @as(u32, @intCast(span_count)); 628 | } 629 | 630 | /// Use global reserved spans to fulfill a memory map request (reserve size must be checked by caller) 631 | inline fn globalGetReservedSpans(span_count: usize) ?*Span { 632 | const span: *Span = global_reserve.?; 633 | spanMarkAsSubspanUnlessMaster(global_reserve_master.?, span, span_count); 634 | global_reserve_count -= span_count; 635 | if (global_reserve_count != 0) { 636 | global_reserve = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(span)) + (span_count << span_size_shift.*)); 637 | } else { 638 | global_reserve = null; 639 | } 640 | return span; 641 | } 642 | 643 | /// Store the given spans as global reserve (must only be called from within new heap allocation, not thread safe) 644 | inline fn globalSetReservedSpans(master: *Span, reserve: *Span, reserve_span_count: usize) void { 645 | global_reserve_master = master; 646 | global_reserve_count = reserve_span_count; 647 | global_reserve = reserve; 648 | } 649 | 650 | // Span linked list management 651 | 652 | /// Add a span to double linked list at the head 653 | inline fn spanDoubleLinkListAdd(head: *?*Span, span: *Span) void { 654 | if (head.* != null) { 655 | head.*.?.prev = span; 656 | } 657 | span.next = head.*; 658 | head.* = span; 659 | } 660 | 661 | /// Pop head span from double linked list 662 | inline fn spanDoubleLinkListPopHead(head: *?*Span, span: *Span) void { 663 | assert(head.* == span); // Linked list corrupted 664 | const old_head: *Span = head.*.?; 665 | head.* = old_head.next; 666 | } 667 | 668 | /// Remove a span from double linked list 669 | inline fn spanDoubleLinkListRemove(maybe_head: *?*Span, span: *Span) void { 670 | assert(maybe_head.* != null); // Linked list corrupted 671 | const head = maybe_head; 672 | if (head.* == span) { 673 | head.* = span.next; 674 | return; 675 | } 676 | 677 | const maybe_next_span: ?*Span = span.next; 678 | const prev_span: *Span = span.prev.?; 679 | prev_span.next = maybe_next_span; 680 | if (maybe_next_span != null) { 681 | @setCold(false); 682 | maybe_next_span.?.prev = prev_span; 683 | } 684 | } 685 | 686 | // Span control 687 | 688 | inline fn getSpanPtr(ptr: *anyopaque) ?*Span { 689 | const span_addr = @intFromPtr(ptr) & span_mask.*; 690 | return @as(?*Span, @ptrFromInt(span_addr)); 691 | } 692 | 693 | /// Use reserved spans to fulfill a memory map request (reserve size must be checked by caller) 694 | inline fn spanMapFromReserve(heap: *Heap, span_count: usize) ?*Span { 695 | //Update the heap span reserve 696 | const span: ?*Span = heap.span_reserve; 697 | heap.span_reserve = ptrAndAlignCast(?*Span, @as([*]u8, @ptrCast(span)) + (span_count * span_size.*)); 698 | heap.spans_reserved -= @as(u32, @intCast(span_count)); 699 | spanMarkAsSubspanUnlessMaster(heap.span_reserve_master.?, span.?, span_count); 700 | return span; 701 | } 702 | 703 | /// Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size 704 | inline fn spanAlignCount(span_count: usize) usize { 705 | var request_count: usize = if (span_count > span_map_count) span_count else span_map_count; 706 | if ((page_size > span_size.*) and ((request_count * span_size.*) % page_size) != 0) { 707 | request_count += span_map_count - (request_count % span_map_count); 708 | } 709 | return request_count; 710 | } 711 | 712 | /// Setup a newly mapped span 713 | inline fn spanInitialize(span: *Span, total_span_count: usize, span_count: usize, align_offset: usize) void { 714 | span.total_spans = @as(u32, @intCast(total_span_count)); 715 | span.span_count = @as(u32, @intCast(span_count)); 716 | span.align_offset = @as(u32, @intCast(align_offset)); 717 | span.flags = .{ .master = true }; 718 | assert(@as(u32, @bitCast(span.flags)) == 1); 719 | // TODO: Is there a reason for this to be atomic? 720 | // Intuitively it seems like there wouldn't be, since the span in question has presumably 721 | // just been mapped, and thus wouldn't be accessible by any other thread at present. 722 | @atomicStore(u32, &span.remaining_spans, @as(u32, @intCast(total_span_count)), .monotonic); 723 | } 724 | 725 | /// Map an aligned set of spans, taking configured mapping granularity and the page size into account 726 | fn spanMapAlignedCount(heap: *Heap, span_count: usize) ?*Span { 727 | // If we already have some, but not enough, reserved spans, release those to heap cache and map a new 728 | // full set of spans. Otherwise we would waste memory if page size > span size (huge pages) 729 | const aligned_span_count: usize = spanAlignCount(span_count); 730 | var align_offset: usize = 0; 731 | const span: *Span = @as(?*Span, @ptrCast(memoryMap(aligned_span_count * span_size.*, &align_offset))) orelse return null; 732 | spanInitialize(span, aligned_span_count, span_count, align_offset); 733 | if (aligned_span_count > span_count) { 734 | const reserved_spans: *Span = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(span)) + (span_count * span_size.*)); 735 | var reserved_count: usize = aligned_span_count - span_count; 736 | if (heap.spans_reserved != 0) { 737 | spanMarkAsSubspanUnlessMaster(heap.span_reserve_master.?, heap.span_reserve.?, heap.spans_reserved); 738 | heapCacheInsert(heap, heap.span_reserve.?); 739 | } 740 | // TODO: Is this ever true? Empirically it seems like no, and if the comment on global_lock is true, 741 | // then the assumed precondition of this branch would indicate that it is never allowed to happen anyways. 742 | if (reserved_count > heap_reserve_count) { 743 | // If huge pages or eager spam map count, the global reserve spin lock is held by caller, spanMap 744 | if (options.assertions) { 745 | assert(@atomicLoad(Lock, &global_lock, .monotonic) == .locked); // Global spin lock not held as expected 746 | } 747 | const remain_count: usize = reserved_count - heap_reserve_count; 748 | reserved_count = heap_reserve_count; 749 | const remain_span: *Span = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(reserved_spans)) + (reserved_count * span_size.*)); 750 | 751 | if (global_reserve != null) { 752 | spanMarkAsSubspanUnlessMaster(global_reserve_master.?, global_reserve.?, global_reserve_count); 753 | spanUnmap(global_reserve.?); 754 | } 755 | globalSetReservedSpans(span, remain_span, remain_count); 756 | } 757 | heapSetReservedSpans(heap, span, reserved_spans, @as(u32, @intCast(reserved_count))); 758 | } 759 | return span; 760 | } 761 | 762 | /// Map in memory pages for the given number of spans (or use previously reserved pages) 763 | inline fn spanMap(heap: *Heap, span_count: usize) ?*Span { 764 | @setCold(true); 765 | if (span_count <= heap.spans_reserved) 766 | return spanMapFromReserve(heap, span_count); 767 | var span: ?*Span = null; 768 | const use_global_reserve: bool = (page_size > span_size.*) or (span_map_count > heap_reserve_count); 769 | if (use_global_reserve) { 770 | // If huge pages, make sure only one thread maps more memory to avoid bloat 771 | global_lock.acquire(); 772 | if (global_reserve_count >= span_count) { 773 | var reserve_count: usize = if (heap.spans_reserved == 0) heap_reserve_count else span_count; 774 | reserve_count = @min(reserve_count, global_reserve_count); 775 | span = globalGetReservedSpans(reserve_count); 776 | if (span != null) { 777 | if (reserve_count > span_count) { 778 | const reserved_span: *Span = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(span)) + (span_count << span_size_shift.*)); 779 | heapSetReservedSpans(heap, global_reserve_master, reserved_span, @as(u32, @intCast(reserve_count - span_count))); 780 | } 781 | // Already marked as subspan in globalGetReservedSpans 782 | span.?.span_count = @as(u32, @intCast(span_count)); 783 | } 784 | } 785 | } 786 | defer if (use_global_reserve) global_lock.release(); 787 | 788 | if (span == null) { 789 | span = spanMapAlignedCount(heap, span_count); 790 | } 791 | return span; 792 | } 793 | 794 | /// Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings) 795 | fn spanUnmap(span: *Span) void { 796 | assert(span.flags.master or span.flags.subspan); // Span flag corrupted 797 | assert(!span.flags.master or !span.flags.subspan); // Span flag corrupted 798 | 799 | const is_master = span.flags.master; 800 | const master: *Span = if (!is_master) 801 | ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(span)) - (span.offset_from_master * span_size.*)) 802 | else 803 | span; 804 | assert(is_master or span.flags.subspan); // Span flag corrupted 805 | assert(master.flags.master); // Span flag corrupted 806 | 807 | if (!is_master) { 808 | assert(span.align_offset == 0); // Span align offset corrupted 809 | // TODO: partial unmapping doesn't really work with a generic backing allocator, 810 | // and it seems like the zig stdlib discourages it as well. 811 | 812 | if (false) { 813 | // Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master) 814 | if (span_size.* >= page_size) { 815 | memoryUnmap(span, 0, 0); 816 | } 817 | } 818 | } else { 819 | // Special double flag to denote an unmapped master 820 | // It must be kept in memory since span header must be used 821 | @as(*SpanFlags.BackingInt, @ptrCast(&span.flags)).* |= comptime @as(SpanFlags.BackingInt, @bitCast(SpanFlags{ 822 | .aligned_blocks = false, 823 | .master = true, 824 | .subspan = true, 825 | .unmapped_master = true, 826 | })); 827 | } 828 | 829 | std.debug.assert(span.span_count != 0); 830 | const prev_remaining_spans: i64 = @atomicRmw(u32, &master.remaining_spans, .Sub, span.span_count, .monotonic); 831 | if (prev_remaining_spans - span.span_count <= 0) { 832 | // Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span 833 | assert(master.flags.master and master.flags.subspan); // Span flag corrupted 834 | memoryUnmap(master, master.align_offset, @as(usize, master.total_spans) * span_size.*); 835 | } 836 | } 837 | 838 | /// Move the span (used for small or medium allocations) to the heap thread cache 839 | inline fn spanReleaseToCache(heap: *Heap, span: *Span) void { 840 | assert(heap == span.heap); // Span heap pointer corrupted 841 | assert(span.size_class < SIZE_CLASS_COUNT); // Invalid span size class 842 | assert(span.span_count == 1); // Invalid span count 843 | if (heap.finalize == 0) { 844 | if (heap.size_class[span.size_class].cache != null) { 845 | heapCacheInsert(heap, heap.size_class[span.size_class].cache.?); 846 | } 847 | heap.size_class[span.size_class].cache = span; 848 | } else { 849 | spanUnmap(span); 850 | } 851 | } 852 | 853 | /// Initialize a (partial) free list up to next system memory page, while reserving the first block 854 | /// as allocated, returning number of blocks in list 855 | fn freeListPartialInit(list: *?*anyopaque, first_block: *?*anyopaque, page_start: *anyopaque, block_start: *anyopaque, block_count_init: u32, block_size: u32) u32 { 856 | var block_count = block_count_init; 857 | assert(block_count != 0); // Internal failure 858 | first_block.* = block_start; 859 | if (block_count > 1) { 860 | var free_block = ptrAndAlignCast(*align(SMALL_GRANULARITY) anyopaque, @as([*]u8, @ptrCast(block_start)) + block_size); 861 | var block_end = ptrAndAlignCast(*align(SMALL_GRANULARITY) anyopaque, @as([*]u8, @ptrCast(block_start)) + (@as(usize, block_size) * block_count)); 862 | // If block size is less than half a memory page, bound init to next memory page boundary 863 | if (block_size < (page_size >> 1)) { 864 | const page_end = ptrAndAlignCast(*align(SMALL_GRANULARITY) anyopaque, @as([*]u8, @ptrCast(page_start)) + page_size); 865 | if (@intFromPtr(page_end) < @intFromPtr(block_end)) { 866 | block_end = page_end; 867 | } 868 | } 869 | list.* = free_block; 870 | block_count = 2; 871 | var next_block = ptrAndAlignCast(*align(SMALL_GRANULARITY) anyopaque, @as([*]u8, @ptrCast(free_block)) + block_size); 872 | while (@intFromPtr(next_block) < @intFromPtr(block_end)) { 873 | ptrAndAlignCast(*?*anyopaque, free_block).* = next_block; 874 | free_block = next_block; 875 | block_count += 1; 876 | next_block = @as(*align(SMALL_GRANULARITY) anyopaque, @alignCast(@as(*anyopaque, @ptrCast(@as([*]u8, @ptrCast(next_block)) + block_size)))); 877 | } 878 | ptrAndAlignCast(*?*anyopaque, free_block).* = null; 879 | } else { 880 | list.* = null; 881 | } 882 | return block_count; 883 | } 884 | 885 | /// Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list 886 | fn spanInitializeNew(heap: *Heap, heap_size_class: *HeapSizeClass, span: *Span, class_idx: u32) ?*align(SMALL_GRANULARITY) anyopaque { 887 | assert(span.span_count == 1); // Internal failure 888 | const size_class: *SizeClass = &global_size_classes[class_idx]; 889 | span.size_class = class_idx; 890 | span.heap = heap; 891 | // span.flags &= ~SPAN_FLAG_ALIGNED_BLOCKS; 892 | @as(*SpanFlags.BackingInt, @ptrCast(&span.flags)).* &= comptime @as(u32, @bitCast(SpanFlags{ 893 | .master = true, 894 | .subspan = true, 895 | .aligned_blocks = false, 896 | .unmapped_master = true, 897 | })); 898 | span.block_size = size_class.block_size; 899 | span.block_count = size_class.block_count; 900 | span.free_list = null; 901 | span.list_size = 0; 902 | atomicStorePtrRelease(&span.free_list_deferred, null); 903 | 904 | //Setup free list. Only initialize one system page worth of free blocks in list 905 | var block: ?*align(SMALL_GRANULARITY) anyopaque = undefined; 906 | span.free_list_limit = freeListPartialInit( 907 | &heap_size_class.free_list, 908 | &block, 909 | span, 910 | @as([*]align(SMALL_GRANULARITY) u8, @ptrCast(span)) + SPAN_HEADER_SIZE, 911 | size_class.block_count, 912 | size_class.block_size, 913 | ); 914 | // Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized 915 | if (span.free_list_limit < span.block_count) { 916 | spanDoubleLinkListAdd(&heap_size_class.partial_span, span); 917 | span.used_count = span.free_list_limit; 918 | } else { 919 | heap.full_span_count += 1; 920 | span.used_count = span.block_count; 921 | } 922 | return block; 923 | } 924 | 925 | fn spanExtractFreeListDeferred(span: *Span) void { 926 | // We need acquire semantics on the CAS operation since we are interested in the list size 927 | // Refer to deallocateDeferSmallOrMedium for further comments on this dependency 928 | 929 | // TODO: is this OK? According to Protty `@atomicRmw` is already a loop like the one below 930 | span.free_list = atomicExchangePtrAcquire(&span.free_list_deferred, INVALID_POINTER); 931 | if (false) while (true) { 932 | span.free_list = atomicExchangePtrAcquire(&span.free_list_deferred, INVALID_POINTER); 933 | if (span.free_list != INVALID_POINTER) break; 934 | }; 935 | span.used_count -= span.list_size; 936 | span.list_size = 0; 937 | atomicStorePtrRelease(&span.free_list_deferred, null); 938 | } 939 | 940 | inline fn spanIsFullyUtilized(span: *Span) bool { 941 | assert(span.free_list_limit <= span.block_count); // Span free list corrupted 942 | return span.free_list == null and (span.free_list_limit == span.block_count); 943 | } 944 | 945 | fn spanFinalize(heap: *Heap, iclass: usize, span: *Span, list_head: ?*?*Span) bool { 946 | const free_list = heap.size_class[iclass].free_list.?; 947 | const class_span: ?*Span = getSpanPtr(free_list); 948 | if (span == class_span) { 949 | // Adopt the heap class free list back into the span free list 950 | var block: ?*align(SMALL_GRANULARITY) anyopaque = span.free_list; 951 | var last_block: @TypeOf(block) = null; 952 | while (block != null) { 953 | last_block = block; 954 | block = @as(*@TypeOf(block), @ptrCast(block)).*; 955 | } 956 | var free_count: u32 = 0; 957 | block = free_list; 958 | while (block != null) { 959 | free_count += 1; 960 | block = @as(*@TypeOf(block), @ptrCast(block)).*; 961 | } 962 | if (last_block != null) { 963 | @as(*@TypeOf(last_block), @ptrCast(last_block)).* = free_list; 964 | } else { 965 | span.free_list = free_list; 966 | } 967 | heap.size_class[iclass].free_list = null; 968 | span.used_count -= free_count; 969 | } 970 | // TODO: should this leak check be kept? And should it be an assertion? 971 | if (false) { 972 | assert(span.list_size == span.used_count); // If this assert triggers you have memory leaks 973 | } 974 | if (span.list_size == span.used_count) { 975 | // This function only used for spans in double linked lists 976 | if (list_head != null) { 977 | spanDoubleLinkListRemove(list_head.?, span); 978 | } 979 | spanUnmap(span); 980 | return true; 981 | } 982 | return false; 983 | } 984 | 985 | // Global cache 986 | 987 | /// Finalize a global cache 988 | fn globalCacheFinalize(cache: *GlobalCache) void { 989 | comptime assert(enable_global_cache); 990 | 991 | cache.lock.acquire(); 992 | defer cache.lock.release(); 993 | 994 | for (@as([*]*Span, &cache.span)[0..cache.count]) |span| { 995 | spanUnmap(span); 996 | } 997 | cache.count = 0; 998 | 999 | while (cache.overflow != null) { 1000 | cache.overflow = cache.overflow.?.next; 1001 | spanUnmap(cache.overflow.?); 1002 | } 1003 | } 1004 | 1005 | fn globalCacheInsertSpans(span: [*]*Span, span_count: usize, count: usize) void { 1006 | comptime assert(enable_global_cache); 1007 | 1008 | const cache_limit: usize = if (span_count == 1) 1009 | global_cache_multiplier * MAX_THREAD_SPAN_CACHE 1010 | else 1011 | global_cache_multiplier * (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1012 | 1013 | const cache: *GlobalCache = &global_span_cache[span_count - 1]; 1014 | 1015 | var insert_count: usize = count; 1016 | { 1017 | cache.lock.acquire(); 1018 | defer cache.lock.release(); 1019 | 1020 | if ((cache.count + insert_count) > cache_limit) 1021 | insert_count = cache_limit - cache.count; 1022 | 1023 | // memcpy(cache->span + cache->count, span, sizeof(Span*) * insert_count); 1024 | for ((@as([*]*Span, &cache.span) + cache.count)[0..insert_count], 0..) |*dst, i| { 1025 | dst.* = span[i]; 1026 | } 1027 | cache.count += @as(u32, @intCast(insert_count)); 1028 | 1029 | while ( // zig fmt: off 1030 | if (comptime enable_unlimited_cache) 1031 | (insert_count < count) 1032 | else 1033 | // Enable unlimited cache if huge pages, or we will leak since it is unlikely that an entire huge page 1034 | // will be unmapped, and we're unable to partially decommit a huge page 1035 | ((page_size > span_size.*) and (insert_count < count)) 1036 | // zig fmt: on 1037 | ) { 1038 | const current_span: *Span = span[insert_count]; 1039 | insert_count += 1; 1040 | current_span.next = cache.overflow; 1041 | cache.overflow = current_span; 1042 | } 1043 | } 1044 | 1045 | var keep: ?*Span = null; 1046 | for (span[insert_count..count]) |current_span| { 1047 | // Keep master spans that has remaining subspans to avoid dangling them 1048 | if (current_span.flags.master and (@atomicLoad(u32, ¤t_span.remaining_spans, .monotonic) > current_span.span_count)) { 1049 | current_span.next = keep; 1050 | keep = current_span; 1051 | } else { 1052 | spanUnmap(current_span); 1053 | } 1054 | } 1055 | 1056 | if (keep != null) { 1057 | cache.lock.acquire(); 1058 | defer cache.lock.release(); 1059 | 1060 | var islot: usize = 0; 1061 | while (keep != null) { 1062 | while (islot < cache.count) : (islot += 1) { 1063 | const current_span: *Span = cache.span[islot]; 1064 | if (!current_span.flags.master or 1065 | (current_span.flags.master and (@atomicLoad(u32, ¤t_span.remaining_spans, .monotonic) <= current_span.span_count))) 1066 | { 1067 | spanUnmap(current_span); 1068 | cache.span[islot] = keep.?; 1069 | break; 1070 | } 1071 | } 1072 | if (islot == cache.count) break; 1073 | keep = keep.?.next; 1074 | } 1075 | 1076 | if (keep != null) { 1077 | var tail: *Span = keep.?; 1078 | while (tail.next != null) { 1079 | tail = tail.next.?; 1080 | } 1081 | tail.next = cache.overflow; 1082 | cache.overflow = keep; 1083 | } 1084 | } 1085 | } 1086 | 1087 | fn globalCacheExtractSpans(span: [*]*Span, span_count: usize, count: usize) usize { 1088 | comptime assert(enable_global_cache); 1089 | 1090 | const cache: *GlobalCache = &global_span_cache[span_count - 1]; 1091 | 1092 | var extract_count: usize = 0; 1093 | cache.lock.acquire(); 1094 | defer cache.lock.release(); 1095 | 1096 | const want = @as(u32, @intCast(@min(count - extract_count, cache.count))); 1097 | 1098 | // memcpy(span + extract_count, cache->span + (cache->count - want), sizeof(span_t*) * want); 1099 | for (@as([*]*Span, &cache.span)[cache.count - want .. want][0..want], 0..) |src, i| { 1100 | (span + extract_count)[i] = src; 1101 | } 1102 | 1103 | cache.count -= want; 1104 | extract_count += want; 1105 | 1106 | while (extract_count < count) { 1107 | const current_span: *Span = cache.overflow orelse break; 1108 | span[extract_count] = current_span; 1109 | extract_count += 1; 1110 | cache.overflow = current_span.next; 1111 | } 1112 | 1113 | if (options.assertions) { 1114 | for (span[0..extract_count]) |span_elem| { 1115 | assert(span_elem.span_count == span_count); 1116 | } 1117 | } 1118 | 1119 | return extract_count; 1120 | } 1121 | 1122 | // Heap control 1123 | 1124 | /// Store the given spans as reserve in the given heap 1125 | inline fn heapSetReservedSpans(heap: *Heap, master: ?*Span, reserve: ?*Span, reserve_span_count: u32) void { 1126 | heap.span_reserve_master = master; 1127 | heap.span_reserve = reserve; 1128 | heap.spans_reserved = reserve_span_count; 1129 | } 1130 | 1131 | /// Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use 1132 | fn heapCacheAdoptDeferred(heap: *Heap, single_span: ?*?*Span) void { 1133 | var maybe_span: ?*Span = atomicExchangePtrAcquire(&heap.span_free_deferred, null); 1134 | while (maybe_span != null) { 1135 | const next_span: ?*Span = @as(?*Span, @ptrCast(maybe_span.?.free_list)); 1136 | assert(maybe_span.?.heap == heap); // Span heap pointer corrupted 1137 | 1138 | if (maybe_span.?.size_class < SIZE_CLASS_COUNT) { 1139 | @setCold(false); 1140 | assert(heap.full_span_count != 0); // Heap span counter corrupted 1141 | heap.full_span_count -= 1; 1142 | if (single_span != null and single_span.?.* == null) { 1143 | @as(*?*Span, @ptrCast(single_span)).* = maybe_span.?; 1144 | } else { 1145 | heapCacheInsert(heap, maybe_span.?); 1146 | } 1147 | } else { 1148 | if (maybe_span.?.size_class == SIZE_CLASS_HUGE) { 1149 | deallocateHuge(maybe_span.?); 1150 | } else { 1151 | assert(maybe_span.?.size_class == SIZE_CLASS_LARGE); // Span size class invalid 1152 | assert(heap.full_span_count != 0); // Heap span counter corrupted 1153 | heap.full_span_count -= 1; 1154 | const idx: u32 = maybe_span.?.span_count - 1; 1155 | if (idx == 0 and single_span != null and single_span.?.* == null) { 1156 | single_span.?.* = maybe_span.?; 1157 | } else { 1158 | heapCacheInsert(heap, maybe_span.?); 1159 | } 1160 | } 1161 | } 1162 | 1163 | maybe_span = next_span; 1164 | } 1165 | } 1166 | 1167 | fn heapUnmap(heap: *Heap) void { 1168 | const master_heap = heap.master_heap orelse { 1169 | if (heap.finalize > 1 and @atomicLoad(u32, &heap.child_count, .monotonic) == 0) { 1170 | const span: *Span = getSpanPtr(heap).?; 1171 | spanUnmap(span); 1172 | } 1173 | return; 1174 | }; 1175 | if (@atomicRmw(u32, &master_heap.child_count, .Sub, 1, .monotonic) - 1 == 0) { 1176 | return @call(.always_tail, heapUnmap, .{master_heap}); 1177 | } 1178 | } 1179 | 1180 | inline fn heapGlobalFinalize(heap: *Heap) void { 1181 | if (heap.finalize > 1) return; 1182 | heap.finalize += 1; 1183 | 1184 | heapFinalize(heap); 1185 | 1186 | if (enable_thread_cache) { 1187 | const helper = struct { 1188 | inline fn unmapCache(span_cache: *SpanCache) void { 1189 | for (@as([*]*Span, &span_cache.span)[0..span_cache.count]) |cached_span| { 1190 | spanUnmap(cached_span); 1191 | } 1192 | span_cache.count = 0; 1193 | } 1194 | }; 1195 | 1196 | helper.unmapCache(&heap.span_cache); 1197 | for (&heap.span_large_cache) |*span_large_cache| { 1198 | helper.unmapCache(@as(*SpanCache, @ptrCast(span_large_cache))); 1199 | } 1200 | } 1201 | 1202 | if (heap.full_span_count != 0) { 1203 | heap.finalize -= 1; 1204 | return; 1205 | } 1206 | 1207 | for (&heap.size_class) |size_class| { 1208 | if (size_class.free_list != null or size_class.partial_span != null) { 1209 | heap.finalize -= 1; 1210 | return; 1211 | } 1212 | } 1213 | 1214 | // Heap is now completely free, unmap and remove from heap list 1215 | const list_idx: usize = heap.id % all_heaps.len; 1216 | var list_heap: ?*Heap = all_heaps[list_idx].?; 1217 | if (list_heap == heap) { 1218 | all_heaps[list_idx] = heap.next_heap; 1219 | } else { 1220 | while (list_heap.?.next_heap != heap) { 1221 | list_heap = list_heap.?.next_heap; 1222 | } 1223 | list_heap.?.next_heap = heap.next_heap; 1224 | } 1225 | 1226 | heapUnmap(heap); 1227 | } 1228 | 1229 | /// Insert a single span into thread heap cache, releasing to global cache if overflow 1230 | fn heapCacheInsert(heap: *Heap, span: *Span) void { 1231 | if (heap.finalize != 0) { 1232 | spanUnmap(span); 1233 | heapGlobalFinalize(heap); 1234 | return; 1235 | } else { 1236 | @setCold(false); 1237 | } 1238 | if (enable_thread_cache) { 1239 | const span_count: usize = span.span_count; 1240 | if (span_count == 1) { 1241 | const span_cache: *SpanCache = &heap.span_cache; 1242 | span_cache.span[span_cache.count] = span; 1243 | span_cache.count += 1; 1244 | 1245 | if (span_cache.count == MAX_THREAD_SPAN_CACHE) { 1246 | const remain_count: usize = MAX_THREAD_SPAN_CACHE - THREAD_SPAN_CACHE_TRANSFER; 1247 | if (enable_global_cache) { 1248 | globalCacheInsertSpans(@as([*]*Span, &span_cache.span) + remain_count, span_count, THREAD_SPAN_CACHE_TRANSFER); 1249 | } else { 1250 | var ispan: usize = 0; 1251 | while (ispan < THREAD_SPAN_CACHE_TRANSFER) : (ispan += 1) { 1252 | spanUnmap(span_cache.span[remain_count + ispan]); 1253 | } 1254 | } 1255 | span_cache.count = remain_count; 1256 | } 1257 | } else { 1258 | const cache_idx: usize = span_count - 2; 1259 | const span_cache: *SpanLargeCache = &heap.span_large_cache[cache_idx]; 1260 | span_cache.span[span_cache.count] = span; 1261 | span_cache.count += 1; 1262 | 1263 | const cache_limit: usize = (MAX_THREAD_SPAN_LARGE_CACHE - (span_count >> 1)); 1264 | if (span_cache.count == cache_limit) { 1265 | const transfer_limit: usize = 2 + (cache_limit >> 2); 1266 | const transfer_count: usize = if (THREAD_SPAN_LARGE_CACHE_TRANSFER <= transfer_limit) THREAD_SPAN_LARGE_CACHE_TRANSFER else transfer_limit; 1267 | const remain_count: usize = cache_limit - transfer_count; 1268 | if (enable_global_cache) { 1269 | globalCacheInsertSpans(@as([*]*Span, &span_cache.span) + remain_count, span_count, transfer_count); 1270 | } else { 1271 | var ispan: usize = 0; 1272 | while (ispan < transfer_count) : (ispan += 1) { 1273 | spanUnmap(span_cache.span[remain_count + ispan]); 1274 | } 1275 | } 1276 | span_cache.count = remain_count; 1277 | } 1278 | } 1279 | } else { 1280 | spanUnmap(span); 1281 | } 1282 | } 1283 | 1284 | /// Extract the given number of spans from the different cache levels 1285 | inline fn heapThreadCacheExtract(heap: *Heap, span_count: usize) ?*Span { 1286 | if (enable_thread_cache) { 1287 | assert(span_count != 0); 1288 | const span_cache: *SpanCache = if (span_count == 1) 1289 | &heap.span_cache 1290 | else 1291 | @as(*SpanCache, @ptrCast(&heap.span_large_cache[span_count - 2])); 1292 | 1293 | if (span_cache.count != 0) { 1294 | span_cache.count -= 1; 1295 | return span_cache.span[span_cache.count]; 1296 | } 1297 | } 1298 | return null; 1299 | } 1300 | 1301 | inline fn heapThreadCacheDeferredExtract(heap: *Heap, span_count: usize) ?*Span { 1302 | var span: ?*Span = null; 1303 | if (span_count == 1) { 1304 | heapCacheAdoptDeferred(heap, &span); 1305 | } else { 1306 | heapCacheAdoptDeferred(heap, null); 1307 | span = heapThreadCacheExtract(heap, span_count); 1308 | } 1309 | return span; 1310 | } 1311 | 1312 | inline fn heapReservedExtract(heap: *Heap, span_count: usize) ?*Span { 1313 | if (heap.spans_reserved >= span_count) { 1314 | return spanMapFromReserve(heap, span_count); 1315 | } 1316 | return null; 1317 | } 1318 | 1319 | /// Extract a span from the global cache 1320 | inline fn heapGlobalCacheExtract(heap: *Heap, span_count: usize) ?*Span { 1321 | if (enable_global_cache) { 1322 | assert(span_count != 0); 1323 | if (enable_thread_cache) { 1324 | var span_cache: *SpanCache = undefined; 1325 | var wanted_count: usize = undefined; 1326 | if (span_count == 1) { 1327 | span_cache = &heap.span_cache; 1328 | wanted_count = THREAD_SPAN_CACHE_TRANSFER; 1329 | } else { 1330 | span_cache = @as(*SpanCache, @ptrCast(&heap.span_large_cache[span_count - 2])); 1331 | wanted_count = THREAD_SPAN_LARGE_CACHE_TRANSFER; 1332 | } 1333 | span_cache.count = globalCacheExtractSpans(&span_cache.span, span_count, wanted_count); 1334 | if (span_cache.count != 0) { 1335 | span_cache.count -= 1; 1336 | return span_cache.span[span_cache.count]; 1337 | } 1338 | } else { 1339 | var span: *Span = undefined; 1340 | const count: usize = globalCacheExtractSpans(@as(*[1]*Span, @ptrCast(&span)), span_count, 1); 1341 | if (count != 0) { 1342 | return span; 1343 | } 1344 | } 1345 | } 1346 | return null; 1347 | } 1348 | 1349 | /// Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory 1350 | inline fn heapExtractNewSpan(heap: *Heap, maybe_heap_size_class: ?*HeapSizeClass, span_count_init: usize) ?*Span { 1351 | if (enable_thread_cache) cached_blk: { 1352 | const heap_size_class: *HeapSizeClass = maybe_heap_size_class orelse break :cached_blk; 1353 | const span: *Span = heap_size_class.cache orelse break :cached_blk; 1354 | heap_size_class.cache = null; 1355 | if (heap.span_cache.count != 0) { 1356 | heap.span_cache.count -= 1; 1357 | heap_size_class.cache = heap.span_cache.span[heap.span_cache.count]; 1358 | } 1359 | return span; 1360 | } 1361 | 1362 | var span_count = span_count_init; 1363 | 1364 | // Allow 50% overhead to increase cache hits 1365 | const base_span_count: usize = span_count; 1366 | var limit_span_count: usize = if (span_count > 2) (span_count + (span_count >> 1)) else span_count; 1367 | if (limit_span_count > LARGE_CLASS_COUNT) { 1368 | limit_span_count = LARGE_CLASS_COUNT; 1369 | } 1370 | while (true) { 1371 | if (heapThreadCacheExtract(heap, span_count)) |span| { 1372 | @setCold(false); 1373 | return span; 1374 | } 1375 | if (heapThreadCacheDeferredExtract(heap, span_count)) |span| { 1376 | @setCold(false); 1377 | return span; 1378 | } 1379 | if (heapReservedExtract(heap, span_count)) |span| { 1380 | @setCold(false); 1381 | return span; 1382 | } 1383 | if (heapGlobalCacheExtract(heap, span_count)) |span| { 1384 | @setCold(false); 1385 | return span; 1386 | } 1387 | span_count += 1; 1388 | if (span_count > limit_span_count) break; 1389 | } 1390 | // Final fallback, map in more virtual memory 1391 | return spanMap(heap, base_span_count); 1392 | } 1393 | 1394 | inline fn heapInitialize(heap: *Heap) void { 1395 | heap.* = comptime Heap{ 1396 | .owner_thread = if (builtin.single_threaded) undefined else 0, 1397 | .size_class = [_]HeapSizeClass{.{ .free_list = null, .partial_span = null, .cache = null }} ** SIZE_CLASS_COUNT, 1398 | .span_cache = if (enable_thread_cache) SpanCache{ .count = 0, .span = undefined } else .{}, 1399 | .span_free_deferred = null, 1400 | .full_span_count = 0, 1401 | .span_reserve = null, 1402 | .span_reserve_master = null, 1403 | .spans_reserved = 0, 1404 | .child_count = 0, 1405 | .next_heap = null, 1406 | .next_orphan = null, 1407 | .id = 0, 1408 | .finalize = 0, 1409 | .master_heap = null, 1410 | .span_large_cache = if (enable_thread_cache) [_]SpanLargeCache{.{ .count = 0, .span = undefined }} ** (LARGE_CLASS_COUNT - 1) else .{}, 1411 | }; 1412 | // TODO: In the original code this used a function which returned the old value of heap_id_counter plus 1, 1413 | // and then also added one, which caused the first id to ever be assigned to be '2', instead of '0' like it is here. 1414 | // Need to investigate whether this is in any way significant. 1415 | heap.id = @atomicRmw(u32, &heap_id_counter, .Add, 1, .monotonic); 1416 | 1417 | //Link in heap in heap ID map 1418 | const list_idx: usize = heap.id % all_heaps.len; 1419 | heap.next_heap = all_heaps[list_idx]; 1420 | all_heaps[list_idx] = heap; 1421 | } 1422 | 1423 | inline fn heapOrphan(heap: *Heap) void { 1424 | if (!builtin.single_threaded) { 1425 | heap.owner_thread = std.math.maxInt(ThreadId); 1426 | } 1427 | const heap_list: *?*Heap = &orphan_heaps; 1428 | heap.next_orphan = heap_list.*; 1429 | heap_list.* = heap; 1430 | } 1431 | 1432 | /// Allocate a new heap from newly mapped memory pages 1433 | inline fn heapAllocateNew() ?*Heap { 1434 | // Map in pages for a 16 heaps. If page size is greater than required size for this, map a page and 1435 | // use first part for heaps and remaining part for spans for allocations. Adds a lot of complexity, 1436 | // but saves a lot of memory on systems where page size > 64 spans (4MiB) 1437 | const aligned_heap_size: usize = 16 * ((@sizeOf(Heap) + 15) / 16); 1438 | var request_heap_count: usize = 16; 1439 | var heap_span_count: usize = ((aligned_heap_size * request_heap_count) + @sizeOf(Span) + span_size.* - 1) / span_size.*; 1440 | 1441 | var span_count: usize = heap_span_count; 1442 | const span: *Span = span_init: { 1443 | // If there are global reserved spans, use these first 1444 | if (global_reserve_count >= heap_span_count) { 1445 | break :span_init globalGetReservedSpans(heap_span_count).?; 1446 | } 1447 | 1448 | var block_size: usize = span_size.* * heap_span_count; 1449 | if (page_size > block_size) { 1450 | span_count = page_size / span_size.*; 1451 | block_size = page_size; 1452 | // If using huge pages, make sure to grab enough heaps to avoid reallocating a huge page just to serve new heaps 1453 | const possible_heap_count: usize = (block_size - @sizeOf(Span)) / aligned_heap_size; 1454 | if (possible_heap_count >= (request_heap_count * 16)) { 1455 | request_heap_count *= 16; 1456 | } else if (possible_heap_count < request_heap_count) { 1457 | request_heap_count = possible_heap_count; 1458 | } 1459 | heap_span_count = ((aligned_heap_size * request_heap_count) + @sizeOf(Span) + span_size.* - 1) / span_size.*; 1460 | } 1461 | 1462 | var align_offset: usize = 0; 1463 | const span: *Span = @as(*Span, @ptrCast(memoryMap(block_size, &align_offset) orelse return null)); 1464 | 1465 | // Master span will contain the heaps 1466 | spanInitialize(span, span_count, heap_span_count, align_offset); 1467 | 1468 | break :span_init span; 1469 | }; 1470 | 1471 | const remain_size: usize = span_size.* - @sizeOf(Span); 1472 | const heap: *Heap = @as(*Heap, @ptrCast(@as([*]Span, @ptrCast(span)) + 1)); 1473 | heapInitialize(heap); 1474 | 1475 | // Put extra heaps as orphans 1476 | var num_heaps: usize = @max(remain_size / aligned_heap_size, request_heap_count); 1477 | @atomicStore(u32, &heap.child_count, @as(u32, @intCast(num_heaps - 1)), .monotonic); 1478 | var extra_heap: *Heap = @as(*Heap, @ptrCast(@as([*]align(@alignOf(Heap)) u8, @ptrCast(heap)) + aligned_heap_size)); 1479 | while (num_heaps > 1) { 1480 | heapInitialize(extra_heap); 1481 | extra_heap.master_heap = heap; 1482 | heapOrphan(extra_heap); 1483 | extra_heap = @as(*Heap, @ptrCast(@as([*]align(@alignOf(Heap)) u8, @ptrCast(extra_heap)) + aligned_heap_size)); 1484 | num_heaps -= 1; 1485 | } 1486 | 1487 | if (span_count > heap_span_count) { 1488 | // Cap reserved spans 1489 | const remain_count: usize = span_count - heap_span_count; 1490 | var reserve_count: usize = if (remain_count > heap_reserve_count) heap_reserve_count else remain_count; 1491 | var remain_span: *Span = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(span)) + (heap_span_count * span_size.*)); 1492 | heapSetReservedSpans(heap, span, remain_span, @as(u32, @intCast(reserve_count))); 1493 | 1494 | if (remain_count > reserve_count) { 1495 | // Set to global reserved spans 1496 | remain_span = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(remain_span)) + (reserve_count * span_size.*)); 1497 | reserve_count = remain_count - reserve_count; 1498 | globalSetReservedSpans(span, remain_span, @as(u32, @intCast(reserve_count))); 1499 | } 1500 | } 1501 | 1502 | return heap; 1503 | } 1504 | 1505 | inline fn heapExtractOrphan(heap_list: *?*Heap) ?*Heap { 1506 | const heap: ?*Heap = heap_list.*; 1507 | heap_list.* = if (heap) |h| h.next_orphan else null; 1508 | return heap; 1509 | } 1510 | 1511 | /// Allocate a new heap, potentially reusing a previously orphaned heap 1512 | inline fn heapAllocate() ?*Heap { 1513 | global_lock.acquire(); 1514 | defer global_lock.release(); 1515 | const maybe_heap = heapExtractOrphan(&orphan_heaps) orelse heapAllocateNew(); 1516 | if (maybe_heap != null) heapCacheAdoptDeferred(maybe_heap.?, null); 1517 | return maybe_heap; 1518 | } 1519 | 1520 | inline fn heapRelease(heap: *Heap, release_cache: bool) void { 1521 | // Release thread cache spans back to global cache 1522 | heapCacheAdoptDeferred(heap, null); 1523 | if (enable_thread_cache) { 1524 | if (release_cache or heap.finalize != 0) { 1525 | const helper = struct { 1526 | inline fn releaseSpan(p_heap: *Heap, span_cache: *SpanCache, iclass: usize) void { 1527 | if (span_cache.count == 0) return; 1528 | if (enable_global_cache) { 1529 | if (p_heap.finalize != 0) { 1530 | var ispan: usize = 0; 1531 | while (ispan < span_cache.count) : (ispan += 1) { 1532 | spanUnmap(span_cache.span[ispan]); 1533 | } 1534 | } else { 1535 | globalCacheInsertSpans(&span_cache.span, iclass + 1, span_cache.count); 1536 | } 1537 | } else { 1538 | var ispan: usize = 0; 1539 | while (ispan < span_cache.count) : (ispan += 1) { 1540 | spanUnmap(span_cache.span[ispan]); 1541 | } 1542 | } 1543 | span_cache.count = 0; 1544 | } 1545 | }; 1546 | 1547 | helper.releaseSpan(heap, &heap.span_cache, 0); 1548 | for (&heap.span_large_cache, 0..) |*span_large_cache, @"iclass-1"| { 1549 | helper.releaseSpan(heap, @as(*SpanCache, @ptrCast(span_large_cache)), @"iclass-1" + 1); 1550 | } 1551 | } 1552 | } 1553 | 1554 | if (thread_heap == heap) { 1555 | setThreadHeap(null); 1556 | } 1557 | 1558 | // If we are forcibly terminating with _exit the state of the 1559 | // lock atomic is unknown and it's best to just go ahead and exit 1560 | if (getThreadId() != main_thread_id) { 1561 | global_lock.acquire(); 1562 | } 1563 | heapOrphan(heap); 1564 | // TODO: the original source does this unconditionally, despite 1565 | // the lock being acquired conditionally, but I don't understand 1566 | // why or whether it's a good idea to do this. 1567 | global_lock.release(); 1568 | } 1569 | 1570 | inline fn heapFinalize(heap: *Heap) void { 1571 | if (heap.spans_reserved != 0) { 1572 | const span: *Span = spanMapFromReserve(heap, heap.spans_reserved).?; 1573 | spanUnmap(span); 1574 | assert(heap.spans_reserved == 0); 1575 | } 1576 | 1577 | heapCacheAdoptDeferred(heap, null); 1578 | 1579 | { 1580 | var iclass: usize = 0; 1581 | while (iclass < SIZE_CLASS_COUNT) : (iclass += 1) { 1582 | if (heap.size_class[iclass].cache != null) { 1583 | spanUnmap(heap.size_class[iclass].cache.?); 1584 | } 1585 | heap.size_class[iclass].cache = null; 1586 | var maybe_span: ?*Span = heap.size_class[iclass].partial_span; 1587 | while (maybe_span != null) { 1588 | const next: ?*Span = maybe_span.?.next; 1589 | _ = spanFinalize(heap, iclass, maybe_span.?, &heap.size_class[iclass].partial_span); 1590 | maybe_span = next; 1591 | } 1592 | // If class still has a free list it must be a full span 1593 | if (heap.size_class[iclass].free_list != null) { 1594 | const class_span: *Span = getSpanPtr(heap.size_class[iclass].free_list.?).?; 1595 | 1596 | heap.full_span_count -= 1; 1597 | if (!spanFinalize(heap, iclass, class_span, null)) { 1598 | spanDoubleLinkListAdd(&heap.size_class[iclass].partial_span, class_span); 1599 | } 1600 | } 1601 | } 1602 | } 1603 | 1604 | if (enable_thread_cache) { 1605 | var iclass: usize = 0; 1606 | while (iclass < LARGE_CLASS_COUNT) : (iclass += 1) { 1607 | const span_cache: *SpanCache = if (iclass == 0) &heap.span_cache else @as(*SpanCache, @ptrCast(&heap.span_large_cache[iclass - 1])); 1608 | var ispan: usize = 0; 1609 | while (ispan < span_cache.count) : (ispan += 1) { 1610 | spanUnmap(span_cache.span[ispan]); 1611 | } 1612 | span_cache.count = 0; 1613 | } 1614 | } 1615 | if (options.assertions) { 1616 | assert(@atomicLoad(?*Span, &heap.span_free_deferred, .monotonic) == null); // Heaps still active during finalization 1617 | } 1618 | } 1619 | 1620 | // Allocation entry points 1621 | 1622 | /// Pop first block from a free list 1623 | inline fn freeListPop(list: *?*align(SMALL_GRANULARITY) anyopaque) ?*align(SMALL_GRANULARITY) anyopaque { 1624 | const block = list.*; 1625 | list.* = @as(*?*align(SMALL_GRANULARITY) anyopaque, @ptrCast(block)).*; 1626 | return block; 1627 | } 1628 | 1629 | /// Allocate a small/medium sized memory block from the given heap 1630 | inline fn allocateFromHeapFallback(heap: *Heap, heap_size_class: *HeapSizeClass, class_idx: u32) ?*align(SMALL_GRANULARITY) anyopaque { 1631 | var span = heap_size_class.partial_span; 1632 | if (span != null) { 1633 | @setCold(false); 1634 | assert(span.?.block_count == global_size_classes[span.?.size_class].block_count); // Span block count corrupted 1635 | assert(!spanIsFullyUtilized(span.?)); // Internal failure 1636 | var block: *align(SMALL_GRANULARITY) anyopaque = undefined; 1637 | if (span.?.free_list != null) { 1638 | // Span local free list is not empty, swap to size class free list 1639 | block = freeListPop(&span.?.free_list).?; 1640 | heap_size_class.free_list = span.?.free_list; 1641 | span.?.free_list = null; 1642 | } else { 1643 | // If the span did not fully initialize free list, link up another page worth of blocks 1644 | const block_start = @as([*]u8, @ptrCast(span)) + (SPAN_HEADER_SIZE + (span.?.free_list_limit * span.?.block_size)); 1645 | span.?.free_list_limit += freeListPartialInit( 1646 | &heap_size_class.free_list, 1647 | @as(*?*anyopaque, @ptrCast(&block)), 1648 | @as(*anyopaque, @ptrFromInt(@intFromPtr(block_start) & ~(page_size - 1))), 1649 | block_start, 1650 | span.?.block_count - span.?.free_list_limit, 1651 | span.?.block_size, 1652 | ); 1653 | } 1654 | assert(span.?.free_list_limit <= span.?.block_count); // Span block count corrupted 1655 | span.?.used_count = span.?.free_list_limit; 1656 | 1657 | // Swap in deferred free list if present 1658 | if (@atomicLoad(?*align(SMALL_GRANULARITY) anyopaque, &span.?.free_list_deferred, .monotonic) != null) { 1659 | spanExtractFreeListDeferred(span.?); 1660 | } 1661 | 1662 | // If span is still not fully utilized keep it in partial list and early return block 1663 | if (!spanIsFullyUtilized(span.?)) return block; 1664 | 1665 | // The span is fully utilized, unlink from partial list and add to fully utilized list 1666 | spanDoubleLinkListPopHead(&heap_size_class.partial_span, span.?); 1667 | heap.full_span_count += 1; 1668 | return block; 1669 | } 1670 | 1671 | //Find a span in one of the cache levels 1672 | span = heapExtractNewSpan(heap, heap_size_class, 1); 1673 | if (span != null) { 1674 | @setCold(false); 1675 | //Mark span as owned by this heap and set base data, return first block 1676 | return spanInitializeNew(heap, heap_size_class, span.?, class_idx); 1677 | } 1678 | 1679 | return null; 1680 | } 1681 | 1682 | /// Allocate a small sized memory block from the given heap 1683 | inline fn allocateSmall(heap: *Heap, size: usize) ?*align(SMALL_GRANULARITY) anyopaque { 1684 | // Small sizes have unique size classes 1685 | const class_idx: u32 = @as(u32, @intCast((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT)); 1686 | const heap_size_class: *HeapSizeClass = &heap.size_class[class_idx]; 1687 | if (heap_size_class.free_list != null) { 1688 | @setCold(false); 1689 | return freeListPop(&heap_size_class.free_list); 1690 | } 1691 | return allocateFromHeapFallback(heap, heap_size_class, class_idx); 1692 | } 1693 | 1694 | /// Allocate a medium sized memory block from the given heap 1695 | inline fn allocateMedium(heap: *Heap, size: usize) ?*align(SMALL_GRANULARITY) anyopaque { 1696 | // Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes) 1697 | const base_idx: u32 = @as(u32, @intCast(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT))); 1698 | const class_idx: u32 = global_size_classes[base_idx].class_idx; 1699 | const heap_size_class: *HeapSizeClass = &heap.size_class[class_idx]; 1700 | if (heap_size_class.free_list != null) { 1701 | @setCold(false); 1702 | return freeListPop(&heap_size_class.free_list); 1703 | } 1704 | return allocateFromHeapFallback(heap, heap_size_class, class_idx); 1705 | } 1706 | 1707 | /// Allocate a large sized memory block from the given heap 1708 | inline fn allocateLarge(heap: *Heap, size_init: usize) ?*align(SMALL_GRANULARITY) anyopaque { 1709 | var size = size_init; 1710 | 1711 | // Calculate number of needed max sized spans (including header) 1712 | // Since this function is never called if size > calculateLargeSizeLimit(span_size.*) 1713 | // the span_count is guaranteed to be <= LARGE_CLASS_COUNT 1714 | size += SPAN_HEADER_SIZE; 1715 | var span_count: usize = size >> span_size_shift.*; 1716 | if (size & (span_size.* - 1) != 0) { 1717 | span_count += 1; 1718 | } 1719 | 1720 | // Find a span in one of the cache levels 1721 | const span: *Span = heapExtractNewSpan(heap, null, span_count) orelse return null; 1722 | 1723 | // Mark span as owned by this heap and set base data 1724 | assert(span.span_count >= span_count); // Internal failure 1725 | span.size_class = SIZE_CLASS_LARGE; 1726 | span.heap = heap; 1727 | heap.full_span_count += 1; 1728 | 1729 | return @as([*]align(SMALL_GRANULARITY) u8, @ptrCast(span)) + SPAN_HEADER_SIZE; 1730 | } 1731 | 1732 | /// Allocate a huge block by mapping memory pages directly 1733 | inline fn allocateHuge(heap: *Heap, size_init: usize) ?*align(SMALL_GRANULARITY) anyopaque { 1734 | var size = size_init; 1735 | 1736 | heapCacheAdoptDeferred(heap, null); 1737 | size += SPAN_HEADER_SIZE; 1738 | var num_pages: usize = size >> page_size_shift; 1739 | if (size & (page_size - 1) != 0) { 1740 | num_pages += 1; 1741 | } 1742 | var align_offset: usize = 0; 1743 | const span: *Span = @as(*Span, @ptrCast(memoryMap(num_pages * page_size, &align_offset) orelse return null)); 1744 | 1745 | // Store page count in span_count 1746 | span.size_class = SIZE_CLASS_HUGE; 1747 | span.span_count = @as(u32, @intCast(num_pages)); 1748 | span.align_offset = @as(u32, @intCast(align_offset)); 1749 | span.heap = heap; 1750 | heap.full_span_count += 1; 1751 | 1752 | return @as([*]align(SMALL_GRANULARITY) u8, @ptrCast(span)) + SPAN_HEADER_SIZE; 1753 | } 1754 | 1755 | /// Allocate a block of the given size 1756 | inline fn allocate(heap: *Heap, size: usize) ?*align(SMALL_GRANULARITY) anyopaque { 1757 | if (size <= SMALL_SIZE_LIMIT) { 1758 | @setCold(false); 1759 | return allocateSmall(heap, size); 1760 | } 1761 | if (size <= medium_size_limit_runtime.*) return allocateMedium(heap, size); 1762 | if (size <= calculateLargeSizeLimit(span_size.*)) return allocateLarge(heap, size); 1763 | return allocateHuge(heap, size); 1764 | } 1765 | 1766 | inline fn alignedAllocate(heap: *Heap, align_log2: u6, size: usize) ?*align(SMALL_GRANULARITY) anyopaque { 1767 | if (size > maxAllocSize()) return null; 1768 | if (align_log2 <= SMALL_GRANULARITY_SHIFT) { 1769 | return allocate(heap, size); 1770 | } 1771 | const alignment = @as(usize, 1) << align_log2; 1772 | 1773 | if ((alignment <= SPAN_HEADER_SIZE) and (size < medium_size_limit_runtime.*)) { 1774 | // If alignment is less or equal to span header size (which is power of two), 1775 | // and size aligned to span header size multiples is less than size + alignment, 1776 | // then use natural alignment of blocks to provide alignment 1777 | const multiple_size: usize = if (size != 0) (size + (SPAN_HEADER_SIZE - 1)) & ~@as(usize, SPAN_HEADER_SIZE - 1) else SPAN_HEADER_SIZE; 1778 | if (options.assertions) { 1779 | assert(multiple_size % SPAN_HEADER_SIZE == 0); // Failed alignment calculation 1780 | } 1781 | if (multiple_size <= (size + alignment)) { 1782 | return allocate(heap, multiple_size); 1783 | } 1784 | } 1785 | 1786 | const align_mask: usize = alignment - 1; 1787 | assert(alignment <= page_size); // this is imposed by the stdlib, so may as well take advantage here. 1788 | if (true or alignment <= page_size) { 1789 | var ptr = allocate(heap, size + alignment); 1790 | if (@intFromPtr(ptr) & align_mask != 0) { 1791 | ptr = @as(*align(SMALL_GRANULARITY) anyopaque, @ptrFromInt((@intFromPtr(ptr) & ~@as(usize, align_mask)) + alignment)); 1792 | // Mark as having aligned blocks 1793 | const span: *Span = getSpanPtr(ptr.?).?; 1794 | span.flags.aligned_blocks = true; 1795 | } 1796 | return ptr; 1797 | } 1798 | 1799 | // TODO: To delete, or not to delete the rest of this code, that is the question. 1800 | 1801 | // Fallback to mapping new pages for this request. Since pointers passed 1802 | // to rpfree must be able to reach the start of the span by bitmasking of 1803 | // the address with the span size, the returned aligned pointer from this 1804 | // function must be with a span size of the start of the mapped area. 1805 | // In worst case this requires us to loop and map pages until we get a 1806 | // suitable memory address. It also means we can never align to span size 1807 | // or greater, since the span header will push alignment more than one 1808 | // span size away from span start (thus causing pointer mask to give us 1809 | // an invalid span start on free) 1810 | if (options.assertions) { 1811 | assert(alignment & align_mask == 0); 1812 | assert(alignment < span_size.*); 1813 | } 1814 | 1815 | const extra_pages: usize = alignment / page_size; 1816 | 1817 | // Since each span has a header, we will at least need one extra memory page 1818 | var num_pages: usize = 1 + (size / page_size) + 1819 | @intFromBool(size & (page_size - 1) != 0); 1820 | if (num_pages < extra_pages) { 1821 | num_pages = 1 + extra_pages; 1822 | } 1823 | 1824 | const original_pages: usize = num_pages; 1825 | // var limit_pages: usize = (span_size.* / page_size) * 2; 1826 | // if (limit_pages < (original_pages * 2)) { 1827 | // limit_pages = original_pages * 2; 1828 | // } 1829 | const limit_pages: usize = 2 * @max(span_size.* / page_size, original_pages); 1830 | 1831 | var ptr: *align(SMALL_GRANULARITY) anyopaque = undefined; 1832 | var mapped_size: usize = undefined; 1833 | var align_offset: usize = undefined; 1834 | var span: *Span = undefined; 1835 | 1836 | retry: while (true) { 1837 | align_offset = 0; 1838 | mapped_size = num_pages * page_size; 1839 | 1840 | span = @as(*Span, @ptrCast(memoryMap(mapped_size, &align_offset) orelse return null)); 1841 | ptr = @as([*]align(SMALL_GRANULARITY) u8, @ptrCast(span)) + SPAN_HEADER_SIZE; 1842 | 1843 | if (@intFromPtr(ptr) & align_mask != 0) { 1844 | ptr = @as(*align(SMALL_GRANULARITY) anyopaque, @ptrFromInt((@intFromPtr(ptr) & ~@as(usize, align_mask)) + alignment)); 1845 | } 1846 | 1847 | if ((@intFromPtr(ptr) - @intFromPtr(span)) >= span_size.* or 1848 | (@intFromPtr(ptr) + size) > (@intFromPtr(span) + mapped_size) or 1849 | ((@intFromPtr(ptr) & span_mask.*) != @intFromPtr(span))) 1850 | { 1851 | memoryUnmap(span, align_offset, mapped_size); 1852 | num_pages += 1; 1853 | if (num_pages > limit_pages) return null; 1854 | continue :retry; 1855 | } 1856 | 1857 | break; 1858 | } 1859 | 1860 | // Store page count in span_count 1861 | span.size_class = SIZE_CLASS_HUGE; 1862 | span.span_count = @as(u32, @intCast(num_pages)); 1863 | span.align_offset = @as(u32, @intCast(align_offset)); 1864 | span.heap = heap; 1865 | heap.full_span_count += 1; 1866 | 1867 | return ptr; 1868 | } 1869 | 1870 | // Deallocation entry points 1871 | 1872 | /// Deallocate the given small/medium memory block in the current thread local heap 1873 | inline fn deallocateDirectSmallOrMedium(span: *Span, block: *align(SMALL_GRANULARITY) anyopaque) void { 1874 | const heap: *Heap = span.heap; 1875 | if (!builtin.single_threaded and options.assertions) { 1876 | assert(heap.finalize != 0 or heap.owner_thread == 0 or heap.owner_thread == getThreadId()); // Internal failure 1877 | } 1878 | // Add block to free list 1879 | if (spanIsFullyUtilized(span)) { 1880 | span.used_count = span.block_count; 1881 | spanDoubleLinkListAdd(&heap.size_class[span.size_class].partial_span, span); 1882 | heap.full_span_count -= 1; 1883 | } else { 1884 | @setCold(false); 1885 | } 1886 | @as(*?*anyopaque, @ptrCast(block)).* = span.free_list; 1887 | span.used_count -= 1; 1888 | span.free_list = block; 1889 | if (span.used_count == span.list_size) { 1890 | spanDoubleLinkListRemove(&heap.size_class[span.size_class].partial_span, span); 1891 | spanReleaseToCache(heap, span); 1892 | } else { 1893 | @setCold(false); 1894 | } 1895 | } 1896 | 1897 | inline fn deallocateDeferFreeSpan(heap: *Heap, span: *Span) void { 1898 | // This list does not need ABA protection, no mutable side state 1899 | while (true) { 1900 | span.free_list = @alignCast(@as(?*anyopaque, @ptrCast(@atomicLoad(?*Span, &heap.span_free_deferred, .monotonic)))); 1901 | const dst = &heap.span_free_deferred; 1902 | const val = span; 1903 | const ref = @as(?*Span, @ptrCast(span.free_list)); 1904 | if (@cmpxchgWeak(@TypeOf(dst.*), dst, ref, val, .monotonic, .monotonic) == null) break; 1905 | } 1906 | } 1907 | 1908 | /// Put the block in the deferred free list of the owning span 1909 | inline fn deallocateDeferSmallOrMedium(span: *Span, block: *align(SMALL_GRANULARITY) anyopaque) void { 1910 | const free_list = blk: { 1911 | // TODO: is this OK? According to Protty `@atomicRmw` is already a loop like the one below 1912 | if (true) break :blk atomicExchangePtrAcquire(&span.free_list_deferred, INVALID_POINTER); 1913 | 1914 | // The memory ordering here is a bit tricky, to avoid having to ABA protect 1915 | // the deferred free list to avoid desynchronization of list and list size 1916 | // we need to have acquire semantics on successful CAS of the pointer to 1917 | // guarantee the list_size variable validity + release semantics on pointer store 1918 | var free_list: ?*anyopaque = undefined; 1919 | while (true) { 1920 | free_list = atomicExchangePtrAcquire(&span.free_list_deferred, INVALID_POINTER); 1921 | if (free_list != INVALID_POINTER) break; 1922 | } 1923 | }; 1924 | @as(*?*anyopaque, @ptrCast(block)).* = free_list; 1925 | 1926 | span.list_size += 1; 1927 | const free_count: u32 = span.list_size; 1928 | 1929 | const all_deferred_free = free_count == span.block_count; 1930 | atomicStorePtrRelease(&span.free_list_deferred, block); 1931 | if (all_deferred_free) { 1932 | // Span was completely freed by this block. Due to the INVALID_POINTER spin lock 1933 | // no other thread can reach this state simultaneously on this span. 1934 | // Safe to move to owner heap deferred cache 1935 | deallocateDeferFreeSpan(span.heap, span); 1936 | } 1937 | } 1938 | 1939 | inline fn deallocateSmallOrMedium(span: *Span, ptr: *align(SMALL_GRANULARITY) anyopaque) void { 1940 | const block = if (span.flags.aligned_blocks) blk: { 1941 | // Realign pointer to block start 1942 | const blocks_start: *align(SMALL_GRANULARITY) anyopaque = @as([*]align(SMALL_GRANULARITY) u8, @ptrCast(span)) + SPAN_HEADER_SIZE; 1943 | const block_offset = @intFromPtr(ptr) - @intFromPtr(blocks_start); 1944 | const offset_mod_size = @as(u32, @intCast(block_offset % span.block_size)); 1945 | break :blk ptrAndAlignCast(*align(SMALL_GRANULARITY) anyopaque, @as([*]u8, @ptrCast(ptr)) - offset_mod_size); 1946 | } else ptr; 1947 | 1948 | // Check if block belongs to this heap or if deallocation should be deferred 1949 | const defer_dealloc: bool = span.heap.finalize == 0 and (if (builtin.single_threaded) false else span.heap.owner_thread != getThreadId()); 1950 | if (!defer_dealloc) { 1951 | deallocateDirectSmallOrMedium(span, block); 1952 | } else { 1953 | deallocateDeferSmallOrMedium(span, block); 1954 | } 1955 | } 1956 | 1957 | /// Deallocate the given large memory block to the current heap 1958 | inline fn deallocateLarge(span: *Span) void { 1959 | @setCold(true); 1960 | assert(span.size_class == SIZE_CLASS_LARGE); // Bad span size class 1961 | assert(!span.flags.master or !span.flags.subspan); // Span flag corrupted 1962 | assert(span.flags.master or span.flags.subspan); // Span flag corrupted 1963 | //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap 1964 | const defer_dealloc: bool = span.heap.finalize == 0 and (if (builtin.single_threaded) false else span.heap.owner_thread != getThreadId()); 1965 | 1966 | if (defer_dealloc) { 1967 | deallocateDeferFreeSpan(span.heap, span); 1968 | return; 1969 | } 1970 | assert(span.heap.full_span_count != 0); // Heap span counter corrupted 1971 | span.heap.full_span_count -= 1; 1972 | 1973 | const heap: *Heap = span.heap; 1974 | 1975 | const set_as_reserved = if (enable_thread_cache) 1976 | ((span.span_count > 1) and (heap.span_cache.count == 0) and heap.finalize == 0 and heap.spans_reserved == 0) 1977 | else 1978 | ((span.span_count > 1) and heap.finalize == 0 and heap.spans_reserved == 0); 1979 | 1980 | if (set_as_reserved) { 1981 | heap.span_reserve = span; 1982 | heap.spans_reserved = span.span_count; 1983 | if (span.flags.master) { 1984 | heap.span_reserve_master = span; 1985 | } else { //SPAN_FLAG_SUBSPAN 1986 | const master = ptrAndAlignCast(*Span, @as([*]u8, @ptrCast(span)) - (span.offset_from_master * span_size.*)); 1987 | heap.span_reserve_master = master; 1988 | if (options.assertions) { 1989 | assert(master.flags.master); // Span flag corrupted 1990 | assert(@atomicLoad(u32, &master.remaining_spans, .monotonic) >= span.span_count); // Master span count corrupted 1991 | } 1992 | } 1993 | } else { 1994 | // Insert into cache list 1995 | heapCacheInsert(heap, span); 1996 | } 1997 | } 1998 | 1999 | /// Deallocate the given huge span 2000 | inline fn deallocateHuge(span: *Span) void { 2001 | @setCold(true); 2002 | const defer_dealloc: bool = span.heap.finalize == 0 and (if (builtin.single_threaded) false else span.heap.owner_thread != getThreadId()); 2003 | if (defer_dealloc) { 2004 | deallocateDeferFreeSpan(span.heap, span); 2005 | return; 2006 | } 2007 | assert(span.heap.full_span_count != 0); // Heap span counter corrupted 2008 | span.heap.full_span_count -= 1; 2009 | 2010 | // Oversized allocation, page count is stored in span_count 2011 | const num_pages: usize = span.span_count; 2012 | memoryUnmap(span, span.align_offset, num_pages * page_size); 2013 | } 2014 | 2015 | /// Deallocate the given block 2016 | inline fn deallocate(p_unaligned: *anyopaque) void { 2017 | const p = @as(*align(SMALL_GRANULARITY) anyopaque, @alignCast(p_unaligned)); 2018 | // Grab the span (always at start of span, using span alignment) 2019 | const span: *Span = getSpanPtr(p).?; 2020 | if (span.size_class < SIZE_CLASS_COUNT) { 2021 | @setCold(false); 2022 | deallocateSmallOrMedium(span, p); 2023 | } else if (span.size_class == SIZE_CLASS_LARGE) { 2024 | deallocateLarge(span); 2025 | } else { 2026 | deallocateHuge(span); 2027 | } 2028 | } 2029 | 2030 | // Initialization, finalization and utility 2031 | 2032 | /// Get the usable size of the given block 2033 | inline fn usableSize(p: *anyopaque) usize { 2034 | // Grab the span using guaranteed span alignment 2035 | const span: *Span = getSpanPtr(p).?; 2036 | if (span.size_class < SIZE_CLASS_COUNT) { 2037 | // Small/medium block 2038 | const blocks_start: *anyopaque = @as([*]align(@alignOf(Span)) u8, @ptrCast(span)) + SPAN_HEADER_SIZE; 2039 | return span.block_size - ((@intFromPtr(p) - @intFromPtr(blocks_start)) % span.block_size); 2040 | } 2041 | if (span.size_class == SIZE_CLASS_LARGE) { 2042 | // Large block 2043 | const current_spans: usize = span.span_count; 2044 | return (current_spans * span_size.*) - (@intFromPtr(p) - @intFromPtr(span)); 2045 | } 2046 | // Oversized block, page count is stored in span_count 2047 | const current_pages: usize = span.span_count; 2048 | return (current_pages * page_size) - (@intFromPtr(p) - @intFromPtr(span)); 2049 | } 2050 | 2051 | /// Adjust and optimize the size class properties for the given class 2052 | inline fn adjustSizeClass( 2053 | iclass: usize, 2054 | comptime size_classes: *[SIZE_CLASS_COUNT]SizeClass, 2055 | comptime input_span_size: *const @TypeOf(span_size.*), 2056 | ) void { 2057 | comptime assert(input_span_size == span_size); 2058 | 2059 | const block_size: usize = size_classes[iclass].block_size; 2060 | const block_count: usize = (input_span_size.* - SPAN_HEADER_SIZE) / block_size; 2061 | 2062 | size_classes[iclass].block_count = @as(u16, @intCast(block_count)); 2063 | size_classes[iclass].class_idx = @as(u16, @intCast(iclass)); 2064 | 2065 | //Check if previous size classes can be merged 2066 | if (iclass >= SMALL_CLASS_COUNT) { 2067 | var prevclass: usize = iclass; 2068 | while (prevclass > 0) { 2069 | prevclass -= 1; 2070 | //A class can be merged if number of pages and number of blocks are equal 2071 | if (size_classes[prevclass].block_count == size_classes[iclass].block_count) { 2072 | size_classes[prevclass] = size_classes[iclass]; 2073 | } else { 2074 | break; 2075 | } 2076 | } 2077 | } 2078 | } 2079 | /// Initializes the small size classes of the given array. 2080 | inline fn globalSmallSizeClassesInit( 2081 | comptime p_size_classes: *[SIZE_CLASS_COUNT]SizeClass, 2082 | comptime input_span_size: *const @TypeOf(span_size.*), 2083 | ) void { 2084 | comptime assert(input_span_size == span_size); 2085 | p_size_classes[0].block_size = SMALL_GRANULARITY; 2086 | adjustSizeClass(0, p_size_classes, input_span_size); 2087 | var iclass: usize = 1; 2088 | while (iclass < SMALL_CLASS_COUNT) : (iclass += 1) { 2089 | const size: usize = iclass * SMALL_GRANULARITY; 2090 | p_size_classes[iclass].block_size = @as(u32, @intCast(size)); 2091 | adjustSizeClass(iclass, p_size_classes, input_span_size); 2092 | } 2093 | } 2094 | 2095 | /// Initialize thread, assign heap 2096 | inline fn threadInitialize() error{OutOfMemory}!void { 2097 | const heap = heapAllocate() orelse return error.OutOfMemory; 2098 | setThreadHeap(heap); 2099 | } 2100 | 2101 | /// Finalize thread, orphan heap 2102 | inline fn threadFinalize(release_caches: bool) void { 2103 | if (thread_heap != null) { 2104 | heapRelease(thread_heap.?, release_caches); 2105 | setThreadHeap(null); 2106 | } 2107 | } 2108 | 2109 | inline fn isThreadInitialized() bool { 2110 | return thread_heap != null; 2111 | } 2112 | }; 2113 | } 2114 | 2115 | inline fn atomicStorePtrRelease(dst: anytype, val: @TypeOf(dst.*)) void { 2116 | @atomicStore(@TypeOf(dst.*), dst, val, .release); 2117 | } 2118 | inline fn atomicExchangePtrAcquire(dst: anytype, val: @TypeOf(dst.*)) @TypeOf(dst.*) { 2119 | return @atomicRmw(@TypeOf(dst.*), dst, .Xchg, val, .acquire); 2120 | } 2121 | inline fn atomicCasPtr(dst: anytype, val: @TypeOf(dst.*), ref: @TypeOf(dst.*)) bool { 2122 | return @cmpxchgWeak(@TypeOf(dst.*), dst, ref, val, .monotonic, .monotonic) == null; 2123 | } 2124 | 2125 | const Lock = enum(u32) { 2126 | unlocked = 0, 2127 | locked = 1, 2128 | 2129 | inline fn acquire(lock: *Lock) void { 2130 | while (@cmpxchgWeak(Lock, lock, .unlocked, .locked, .acquire, .monotonic) != null) { 2131 | std.atomic.spinLoopHint(); 2132 | } 2133 | } 2134 | inline fn release(lock: *Lock) void { 2135 | if (builtin.mode == .Debug) { 2136 | const old_value = @atomicRmw(Lock, lock, .Xchg, .unlocked, .release); 2137 | assert(old_value == .locked); 2138 | return; 2139 | } 2140 | @atomicStore(Lock, lock, .unlocked, .release); 2141 | } 2142 | }; 2143 | 2144 | inline fn log2(x: anytype) switch (@typeInfo(@TypeOf(x))) { 2145 | .Int => std.math.Log2Int(@TypeOf(x)), 2146 | .ComptimeInt => comptime_int, 2147 | else => @compileError("can't get log₂ of type " ++ @typeName(@TypeOf(x))), 2148 | } { 2149 | const T = @TypeOf(x); 2150 | return switch (@typeInfo(T)) { 2151 | .Int => std.math.log2_int(T, x), 2152 | .ComptimeInt => std.math.log2(x), 2153 | else => @compileError("can't get log₂ of type " ++ @typeName(T)), 2154 | }; 2155 | } 2156 | 2157 | const INVALID_POINTER = @as(*align(SMALL_GRANULARITY) anyopaque, @ptrFromInt(std.mem.alignBackward(usize, std.math.maxInt(usize), SMALL_GRANULARITY))); 2158 | const SIZE_CLASS_LARGE = SIZE_CLASS_COUNT; 2159 | const SIZE_CLASS_HUGE = std.math.maxInt(u32); 2160 | 2161 | // Preconfigured limits and sizes 2162 | 2163 | /// Granularity of a small allocation block (must be power of two) 2164 | const SMALL_GRANULARITY = 16; 2165 | /// Small granularity shift count 2166 | const SMALL_GRANULARITY_SHIFT = log2(SMALL_GRANULARITY); 2167 | /// Number of small block size classes 2168 | const SMALL_CLASS_COUNT = 65; 2169 | /// Maximum size of a small block 2170 | const SMALL_SIZE_LIMIT = (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1)); 2171 | /// Granularity of a medium allocation block 2172 | const MEDIUM_GRANULARITY = 512; 2173 | /// Medium granularity shift count 2174 | const MEDIUM_GRANULARITY_SHIFT = 9; 2175 | /// Number of medium block size classes 2176 | const MEDIUM_CLASS_COUNT = 61; 2177 | /// Total number of small + medium size classes 2178 | const SIZE_CLASS_COUNT = (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT); 2179 | /// Number of large block size classes 2180 | const LARGE_CLASS_COUNT = 63; 2181 | /// Maximum size of a medium block 2182 | const MEDIUM_SIZE_LIMIT = (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT)); 2183 | inline fn calculateMediumSizeLimitRuntime(input_span_size: anytype) @TypeOf(input_span_size) { 2184 | return @min(MEDIUM_SIZE_LIMIT, (input_span_size - SPAN_HEADER_SIZE) >> 1); 2185 | } 2186 | /// Maximum size of a large block 2187 | inline fn calculateLargeSizeLimit(span_size: anytype) @TypeOf(span_size) { 2188 | return ((LARGE_CLASS_COUNT * span_size) - SPAN_HEADER_SIZE); 2189 | } 2190 | /// Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two) 2191 | const SPAN_HEADER_SIZE = 128; 2192 | /// Number of spans in thread cache 2193 | const MAX_THREAD_SPAN_CACHE = 400; 2194 | /// Number of spans to transfer between thread and global cache 2195 | const THREAD_SPAN_CACHE_TRANSFER = 64; 2196 | /// Number of spans in thread cache for large spans (must be greater than LARGE_CLASS_COUNT / 2) 2197 | const MAX_THREAD_SPAN_LARGE_CACHE = 100; 2198 | /// Number of spans to transfer between thread and global cache for large spans 2199 | const THREAD_SPAN_LARGE_CACHE_TRANSFER = 6; 2200 | 2201 | comptime { 2202 | if (@popCount(@as(std.math.IntFittingRange(0, SMALL_GRANULARITY), SMALL_GRANULARITY)) != 1) @compileError("Small granularity must be power of two"); 2203 | if ((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) != 0) @compileError("Span header size must be power of two"); 2204 | assert(SPAN_HEADER_SIZE % SMALL_GRANULARITY == 0); 2205 | } 2206 | 2207 | const SpanFlags = packed struct(u32) { 2208 | const BackingInt = @typeInfo(SpanFlags).Struct.backing_integer.?; 2209 | /// Flag indicating span is the first (master) span of a split superspan 2210 | master: bool = false, 2211 | /// Flag indicating span is a secondary (sub) span of a split superspan 2212 | subspan: bool = false, 2213 | /// Flag indicating span has blocks with increased alignment 2214 | aligned_blocks: bool = false, 2215 | /// Flag indicating an unmapped master span 2216 | unmapped_master: bool = false, 2217 | 2218 | _pad: enum(u28) { unset } = .unset, 2219 | }; 2220 | 2221 | inline fn ptrAndAlignCast(comptime T: type, ptr: anytype) T { 2222 | return @as(T, @ptrCast(@alignCast(ptr))); 2223 | } 2224 | --------------------------------------------------------------------------------