├── .gitignore ├── LICENSE ├── README.md ├── build.zig └── src ├── Atom.zig ├── Cld.zig ├── Coff.zig └── main.zig /.gitignore: -------------------------------------------------------------------------------- 1 | zig-out 2 | zig-cache 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License Copyright (c) 2022 Luuk de Gram 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice (including the next 11 | paragraph) shall be included in all copies or substantial portions of the 12 | Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS 17 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 | OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cld 2 | 3 | Linker for the Coff object file format. This project is meant as the base for [zig](https://ziglang.org)'s self-hosted linker. 4 | This repository will probably not contain a full-featured linker as the main work will move to the Zig repository at one point, 5 | as well as upstreamed into [zld](https://github.com/kubkon/zld). 6 | For those reasons, the code within this repository will be closely inline with the structure of the other linkers of the Zig project. 7 | 8 | ## building 9 | 10 | Cld will always closely follow the master branch of the Zig programming language. 11 | Building `Cld` itself will be as simple as running: 12 | ```sh 13 | zig build 14 | ``` 15 | This will create a `cld` binary in the `zig-out/bin` folder. 16 | 17 | To enable debug logging, the CLI flag `-Denable-logging` can be supplied to the `zig build` command. 18 | This will enable logging for the built binary, meaning it must be re-compiled to disable logging once again. 19 | -------------------------------------------------------------------------------- /build.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | pub fn build(b: *std.build.Builder) void { 4 | const target = b.standardTargetOptions(.{}); 5 | const mode = b.standardReleaseOptions(); 6 | const enable_logging = b.option(bool, "enable-logging", "Enables logging to stderr [default: false]") orelse false; 7 | const exe = b.addExecutable("cld", "src/main.zig"); 8 | exe.setTarget(target); 9 | exe.setBuildMode(mode); 10 | const options = b.addOptions(); 11 | options.addOption(bool, "enable_logging", enable_logging); 12 | exe.addOptions("build_flags", options); 13 | exe.install(); 14 | 15 | const run_cmd = exe.run(); 16 | run_cmd.step.dependOn(b.getInstallStep()); 17 | if (b.args) |args| { 18 | run_cmd.addArgs(args); 19 | } 20 | 21 | const run_step = b.step("run", "Run the app"); 22 | run_step.dependOn(&run_cmd.step); 23 | } 24 | -------------------------------------------------------------------------------- /src/Atom.zig: -------------------------------------------------------------------------------- 1 | //! Atom represents the smallest type of linkage. 2 | //! It can represent a function section, or the data belonging to 3 | //! a global integer. 4 | const Atom = @This(); 5 | const Cld = @import("Cld.zig"); 6 | const Relocation = @import("Coff.zig").Relocation; 7 | const std = @import("std"); 8 | 9 | /// The index of the Symbol within the 10 | /// object file that represents this Atom. 11 | sym_index: u32, 12 | /// Index of the object file this atom belongs to 13 | file: u16, 14 | /// Offset within the target section 15 | offset: u32, 16 | /// Alignment of this atom, this will always be equal 17 | /// to the highest alignment within the same section when compiling to 18 | /// a PE image file. In the case of a relocatable object file, the 19 | /// alignment will target the Atom itself. 20 | alignment: u32, 21 | /// Relocations that have to be performed within this Atom, 22 | /// meaning the `code` will be rewritten with values by the Relocation's target. 23 | relocations: []const Relocation, 24 | /// Code representing this atom. 25 | code: std.ArrayListUnmanaged(u8) = .{}, 26 | /// The size of this atom, takes account for alignment 27 | /// and can therefore be larger than `code`. 28 | size: u32, 29 | 30 | /// Next atom in relation to this atom. 31 | /// This is the last atom when `next` is 'null'. 32 | next: ?*Atom, 33 | /// The previous atom in relation to this atom. 34 | /// This is the first atom in the chain when `prev` is 'null'. 35 | prev: ?*Atom, 36 | 37 | /// Symbols by this Atom 38 | contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, 39 | /// Symbol indexes containing an alias to this Atom's symbol 40 | aliases: std.ArrayListUnmanaged(u32) = .{}, 41 | 42 | pub const SymbolAtOffset = struct { 43 | sym_index: u32, 44 | offset: u32, 45 | }; 46 | 47 | /// Allocates memory for an `Atom` and initializes an instance 48 | /// with default values. Memory is owned by the caller. 49 | pub fn create(gpa: std.mem.Allocator) !*Atom { 50 | const atom = try gpa.create(Atom); 51 | atom.* = .{ 52 | .sym_index = 0, 53 | .file = 0, 54 | .offset = 0, 55 | .alignment = 0, 56 | .relocations = &.{}, 57 | .size = 0, 58 | .next = null, 59 | .prev = null, 60 | }; 61 | return atom; 62 | } 63 | 64 | /// Frees all resources contained by this `Atom`. 65 | pub fn destroy(atom: *Atom, gpa: std.mem.Allocator) void { 66 | atom.code.deinit(gpa); 67 | atom.contained.deinit(gpa); 68 | atom.aliases.deinit(gpa); 69 | gpa.destroy(atom); 70 | } 71 | 72 | /// Returns the first `Atom` from a given atom 73 | pub fn getFirst(atom: *Atom) *Atom { 74 | var tmp = atom; 75 | while (tmp.prev) |prev| tmp = prev; 76 | return tmp; 77 | } 78 | 79 | /// Returns the symbol location for the given Atom. 80 | pub fn symLoc(atom: Atom) Cld.SymbolWithLoc { 81 | return .{ .index = atom.sym_index, .file = atom.file }; 82 | } 83 | -------------------------------------------------------------------------------- /src/Cld.zig: -------------------------------------------------------------------------------- 1 | //! Cld links one or multiple Coff object files 2 | //! into a single PE binary file. The order of supplying 3 | //! the object files is important to the output. 4 | const Cld = @This(); 5 | const std = @import("std"); 6 | const Coff = @import("Coff.zig"); 7 | const Allocator = std.mem.Allocator; 8 | const Atom = @import("Atom.zig"); 9 | 10 | /// The Cld-scoped logger 11 | const log = std.log.scoped(.cld); 12 | 13 | gpa: Allocator, 14 | /// Name of the final binary, also its output path 15 | name: []const u8, 16 | /// User-provided options which influence the final output 17 | options: Options, 18 | /// File descriptor of the output binary 19 | file: std.fs.File, 20 | /// Represents the coff file header, instructs the image file 21 | /// the data layour of the coff sections 22 | coff_header: Coff.Header, 23 | /// The optional header provides information to the loader. 24 | /// While named optional it's not optional for the final binary 25 | /// when building an image file (PE). 26 | optional_header: Coff.OptionalHeader, 27 | /// A list of all Coff object files to be linked 28 | objects: std.ArrayListUnmanaged(Coff) = .{}, 29 | /// List of synthetic symbols 30 | synthetic_symbols: std.ArrayListUnmanaged(Coff.Symbol) = .{}, 31 | /// A mapping between a symbol's old location, with its replacement 32 | /// location. i.e. when a weak symbol is overwritten by a stronger symbol. 33 | discarded: std.AutoHashMapUnmanaged(SymbolWithLoc, SymbolWithLoc) = .{}, 34 | /// A mapping for all symbols which have been resolved 35 | resolved_symbols: std.AutoHashMapUnmanaged(SymbolWithLoc, void) = .{}, 36 | /// Mapping between global's names and their symbol location 37 | globals: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, 38 | 39 | /// Contains all section headers (unordered) 40 | section_headers: std.ArrayListUnmanaged(Coff.SectionHeader) = .{}, 41 | /// Mapping from section name to their index 42 | section_mapping: std.StringHashMapUnmanaged(u16) = .{}, 43 | /// The table with all strings occupying more than 8 bytes. 44 | string_table: std.ArrayListUnmanaged(u8) = .{}, 45 | /// Maps section indexes to the last atom of that section. 46 | atoms: std.AutoHashMapUnmanaged(u16, *Atom) = .{}, 47 | /// Tracks all atoms created from various object files, 48 | /// used to clean up all resources. 49 | managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, 50 | 51 | /// Possible user configuration options 52 | const Options = struct {}; 53 | 54 | const number_of_data_directory = 16; 55 | pub const dos_stub_size = @sizeOf(Coff.DosHeader) + @sizeOf(@TypeOf(dos_program)); 56 | comptime { 57 | std.debug.assert(@sizeOf(Coff.DosHeader) == 64); 58 | } 59 | 60 | /// Dos stub that prints "This program cannot be run in DOS mode." 61 | /// This stub will be inserted at the start of the binary, before all other sections. 62 | pub const dos_program = [_]u8{ 63 | 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 64 | 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, 65 | 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 66 | 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 67 | 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 68 | 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 69 | 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00, 70 | }; 71 | 72 | pub const SymbolWithLoc = struct { 73 | /// Index of the symbol entry within the object file 74 | index: u32, 75 | /// When file is 'null', this symbol is populated from outside an object file. 76 | /// i.e. a synthetic symbol 77 | file: ?u16, 78 | 79 | pub fn getSymbol(sym_loc: SymbolWithLoc, cld: *const Cld) *Coff.Symbol { 80 | if (cld.discarded.get(sym_loc)) |new_loc| { 81 | return new_loc.getSymbol(cld); 82 | } 83 | if (sym_loc.file) |object_index| { 84 | return &cld.objects.items[object_index].symbols.items[sym_loc.index]; 85 | } 86 | return &cld.synthetic_symbols.items[sym_loc.index]; 87 | } 88 | }; 89 | 90 | /// Creates a new binary file, overwriting any existing file with the corresponding name. 91 | /// Then initializes all default values. 92 | /// 93 | /// Cld has eclusive access to the output file, meaning it cannot be accessed by outside 94 | /// processes until `deinit` is called and all resources are deallocated. 95 | pub fn openPath(allocator: Allocator, path: []const u8, options: Options) !Cld { 96 | const file = try std.fs.cwd().createFile(path, .{ .lock = .Exclusive }); 97 | const stat = try file.stat(); 98 | const time_stamp = @divFloor(stat.ctime, std.time.ns_per_s); 99 | 100 | return Cld{ 101 | .gpa = allocator, 102 | .name = path, 103 | .options = options, 104 | .file = file, 105 | .coff_header = .{ 106 | .machine = std.coff.MachineType.X64, // TODO: Make this dynamic, based on target 107 | .number_of_sections = 0, 108 | .timedate_stamp = @truncate(u32, @intCast(u64, time_stamp)), 109 | .pointer_to_symbol_table = 0, 110 | .number_of_symbols = 0, 111 | .size_of_optional_header = 112 + @sizeOf(Coff.DataDirectory) * number_of_data_directory, 112 | .characteristics = 0, 113 | }, 114 | .optional_header = .{ 115 | .magic = 0x20b, // PE32+, TODO: Make this dynamic, based on target 116 | .major_version = 14, // Output from VS2015. When this is '0' it fails to validate on Windows 7. 117 | .minor_version = 0, 118 | .size_of_code = 0, 119 | .size_of_initialized_data = 0, 120 | .size_of_uninitialized_data = 0, 121 | .address_of_entry_point = 0, 122 | .base_of_code = 0, 123 | .image_base = 0x140000000, 124 | .section_alignment = 4096, 125 | .file_alignment = 512, 126 | .major_os_version = 6, 127 | .minor_os_version = 0, 128 | .major_img_version = 0, 129 | .minor_img_version = 0, 130 | .major_sub_version = 6, 131 | .minor_sub_version = 0, 132 | .win32_version = 0, 133 | .size_of_image = 0, 134 | .size_of_headers = 0, 135 | .checksum = 0, 136 | .subsystem = 0, 137 | .dll_characteristics = 0, 138 | .size_of_stack_reserve = 1024 * 1024, 139 | .size_of_stack_commit = 4096, 140 | .size_of_heap_reserve = 1024 * 1024, 141 | .size_of_heap_commit = 4096, 142 | .loader_flags = 0, 143 | .number_of_rva_and_sizes = 0, 144 | }, 145 | }; 146 | } 147 | 148 | /// Closes the file handle to the PE binary file, 149 | /// deallocates all resources related to the linking process, 150 | /// and invalidates the passed `cld` instance. 151 | pub fn deinit(cld: *Cld) void { 152 | cld.file.close(); 153 | for (cld.objects.items) |*obj| { 154 | obj.file.close(); 155 | obj.deinit(); 156 | } 157 | cld.objects.deinit(cld.gpa); 158 | cld.synthetic_symbols.deinit(cld.gpa); 159 | cld.discarded.deinit(cld.gpa); 160 | cld.resolved_symbols.deinit(cld.gpa); 161 | cld.section_headers.deinit(cld.gpa); 162 | var header_names_it = cld.section_mapping.keyIterator(); 163 | while (header_names_it.next()) |name| { 164 | cld.gpa.free(name.*); 165 | } 166 | cld.section_mapping.deinit(cld.gpa); 167 | cld.atoms.deinit(cld.gpa); 168 | for (cld.managed_atoms.items) |atom| { 169 | atom.destroy(cld.gpa); 170 | } 171 | cld.managed_atoms.deinit(cld.gpa); 172 | cld.* = undefined; 173 | } 174 | 175 | /// Appends one or multiple Coff object files that will be linked into the final binary. 176 | /// Skips the file when the given path is not a Coff object file. 177 | /// 178 | /// TODO: Make this work for archive files as well as dynamic libraries. 179 | pub fn addObjects(cld: *Cld, paths: []const []const u8) !void { 180 | for (paths) |path| { 181 | const file = try std.fs.cwd().openFile(path, .{}); 182 | var coff = Coff.init(cld.gpa, file, path); 183 | errdefer coff.deinit(); 184 | 185 | if (try coff.parse()) { 186 | try cld.objects.append(cld.gpa, coff); 187 | log.debug("Appended Coff object '{s}'", .{path}); 188 | } 189 | } 190 | } 191 | 192 | pub fn flush(cld: *Cld) !void { 193 | for (cld.objects.items) |_, idx| { 194 | try resolveSymbolsInObject(cld, @intCast(u16, idx)); 195 | } 196 | 197 | // TODO: Emit unresolved symbols and error out 198 | 199 | for (cld.objects.items) |object, idx| { 200 | try Coff.parseIntoAtoms(object, cld, @intCast(u16, idx)); 201 | } 202 | 203 | try sortSections(cld); 204 | try allocateAtoms(cld); 205 | try emitImageFile(cld); 206 | } 207 | 208 | /// Resolves symbols in given object file index. 209 | fn resolveSymbolsInObject(cld: *Cld, index: u16) !void { 210 | const object: Coff = cld.objects.items[index]; 211 | var sym_index: u32 = 0; 212 | while (sym_index < object.header.number_of_symbols) : (sym_index += 1) { 213 | const symbol: Coff.Symbol = object.symbols.items[sym_index]; 214 | defer sym_index += symbol.number_aux_symbols; // skip auxiliry symbols 215 | 216 | // Add all symbols to resolved list for now 217 | // TODO: Actually resolve symbols correctly. 218 | try cld.resolved_symbols.putNoClobber(cld.gpa, .{ .file = index, .index = sym_index }, {}); 219 | } 220 | } 221 | 222 | pub fn getMatchingSection(cld: *Cld, object_index: u16, section_index: u16) !?u16 { 223 | const object: Coff = cld.objects.items[object_index]; 224 | const sec_header: Coff.SectionHeader = object.section_table.items[section_index]; 225 | const sec_name = object.getString(sec_header.name); 226 | const flags = sec_header.characteristics; 227 | const current_index = @intCast(u16, cld.section_headers.items.len); 228 | 229 | if (flags & Coff.SectionHeader.flags.IMAGE_SCN_LNK_REMOVE != 0) return null; 230 | if (flags & Coff.SectionHeader.flags.IMAGE_SCN_MEM_DISCARDABLE != 0) return null; 231 | 232 | const gop = try cld.section_mapping.getOrPut(cld.gpa, try cld.gpa.dupe(u8, sec_name)); 233 | if (!gop.found_existing) { 234 | gop.value_ptr.* = current_index; 235 | 236 | const header = try cld.section_headers.addOne(cld.gpa); 237 | header.* = .{ 238 | .name = try cld.makeString(gop.key_ptr.*, .header), 239 | .virtual_size = 0, 240 | .virtual_address = 0, 241 | .size_of_raw_data = 0, 242 | .pointer_to_raw_data = 0, 243 | .pointer_to_relocations = 0, 244 | .pointer_to_line_numbers = 0, 245 | .number_of_relocations = 0, 246 | .number_of_line_numbers = 0, 247 | .characteristics = flags, 248 | .alignment = 0, 249 | }; 250 | } 251 | return gop.value_ptr.*; 252 | } 253 | 254 | /// Makes a Coff-formatted string by storing it directly when smaller or equal to 8 bytes, 255 | /// or else store it in the string table and write the offset into that table in the 8 bytes 256 | /// of the returned array. The layout of this array is determined based on given `string_type`. 257 | fn makeString(cld: *Cld, string: []const u8, string_type: enum { symbol, header }) ![8]u8 { 258 | var buf = [_]u8{0} ** 8; 259 | if (string.len <= 8) { 260 | std.mem.copy(u8, &buf, string); 261 | return buf; 262 | } 263 | const offset = @intCast(u32, cld.string_table.items.len); 264 | try cld.string_table.appendSlice(cld.gpa, string); 265 | 266 | if (string_type == .symbol) { 267 | std.mem.writeIntLittle(u32, buf[4..8], offset); 268 | } else { 269 | buf[0] = '/'; 270 | _ = std.fmt.bufPrint(buf[1..], "{d}", .{offset}) catch unreachable; 271 | } 272 | return buf; 273 | } 274 | 275 | /// Returns the corresponding string from a given 8-byte buffer 276 | pub fn getString(cld: Cld, buf: [8]u8) []const u8 { 277 | const offset = if (buf[0] == '/') blk: { 278 | const offset_len = std.mem.indexOfScalar(u8, buf[1..], 0) orelse 7; 279 | const offset = std.fmt.parseInt(u32, buf[1..][0..offset_len], 10) catch return ""; 280 | break :blk offset; 281 | } else if (std.mem.eql(u8, buf[0..4], &.{ 0, 0, 0, 0 })) blk: { 282 | break :blk std.mem.readIntLittle(u32, buf[4..8]); 283 | } else return std.mem.sliceTo(&buf, 0); 284 | 285 | const str = @ptrCast([*:0]const u8, cld.string_table.items.ptr + offset); 286 | return std.mem.sliceTo(str, 0); 287 | } 288 | 289 | /// Sorts sections into the most optimal order 290 | fn sortSections(cld: *Cld) !void { 291 | log.debug("Sorting sections. Old order:", .{}); 292 | for (cld.section_headers.items) |hdr, index| { 293 | log.debug(" {d: >2} {s: >9}", .{ index, cld.getString(hdr.name) }); 294 | } 295 | 296 | // Sort sections based on their name. When the section is grouped, 297 | // we ordinally order the corresponding sections based on alphabetic order. 298 | var ctx: SectionSortContext = .{ .cld = cld }; 299 | std.sort.sort(Coff.SectionHeader, cld.section_headers.items, ctx, SectionSortContext.lessThan); 300 | 301 | // replace old section mapping indexes with the name indexes 302 | var old_mapping = std.AutoArrayHashMap(u16, u16).init(cld.gpa); 303 | defer old_mapping.deinit(); 304 | try old_mapping.ensureUnusedCapacity(cld.section_headers.items.len); 305 | for (cld.section_headers.items) |hdr, index| { 306 | const value = cld.section_mapping.getPtr(cld.getString(hdr.name)).?; 307 | const new_index = @intCast(u16, index); 308 | old_mapping.putAssumeCapacityNoClobber(value.*, new_index); 309 | value.* = new_index; 310 | } 311 | 312 | var new_atoms: std.AutoHashMapUnmanaged(u16, *Atom) = .{}; 313 | try new_atoms.ensureUnusedCapacity(cld.gpa, cld.atoms.count()); 314 | 315 | var it = cld.atoms.iterator(); 316 | while (it.next()) |entry| { 317 | const old_index = entry.key_ptr.*; 318 | const new_index = old_mapping.get(old_index).?; 319 | new_atoms.putAssumeCapacityNoClobber(new_index, entry.value_ptr.*); 320 | } 321 | 322 | cld.atoms.deinit(cld.gpa); 323 | cld.atoms = new_atoms; 324 | 325 | log.debug("Sorted sections. New order:", .{}); 326 | for (cld.section_headers.items) |hdr, index| { 327 | log.debug(" {d: >2} {s: >9}", .{ index, cld.getString(hdr.name) }); 328 | } 329 | } 330 | 331 | const SectionSortContext = struct { 332 | cld: *const Cld, 333 | 334 | fn value(ctx: SectionSortContext, header: Coff.SectionHeader) u16 { 335 | const startsWith = std.mem.startsWith; 336 | const name = ctx.cld.getString(header.name); 337 | if (startsWith(u8, name, ".text")) { 338 | return 0; 339 | } else if (startsWith(u8, name, ".data")) { 340 | return 1; 341 | } else if (startsWith(u8, name, ".bss")) { 342 | return 2; 343 | } else if (startsWith(u8, name, ".xdata")) { 344 | return 3; 345 | } else if (startsWith(u8, name, ".rdata")) { 346 | return 4; 347 | } else if (startsWith(u8, name, ".tls")) { 348 | return 5; 349 | } else if (startsWith(u8, name, ".debug")) { 350 | return 6; 351 | } else if (startsWith(u8, name, ".pdata")) { 352 | return 7; 353 | } else std.debug.panic("TODO: value of section named: '{s}'\n", .{name}); 354 | unreachable; 355 | } 356 | 357 | fn isGroupedFirst(ctx: SectionSortContext, lhs: Coff.SectionHeader, rhs: Coff.SectionHeader) bool { 358 | std.debug.assert(lhs.isGrouped() and rhs.isGrouped()); 359 | const lhs_name = ctx.cld.getString(lhs.name); 360 | const rhs_name = ctx.cld.getString(rhs.name); 361 | const start = std.mem.indexOfScalar(u8, lhs_name, '$').?; 362 | if (start == lhs_name.len - 1) return true; 363 | if (start == rhs_name.len - 1) return true; 364 | return lhs_name[start + 1] < rhs_name[start + 1]; 365 | } 366 | 367 | fn lessThan(ctx: SectionSortContext, lhs: Coff.SectionHeader, rhs: Coff.SectionHeader) bool { 368 | const lhs_val = ctx.value(lhs); 369 | const rhs_val = ctx.value(rhs); 370 | if (lhs_val == rhs_val) { 371 | return ctx.isGroupedFirst(lhs, rhs); 372 | } 373 | return lhs_val < rhs_val; 374 | } 375 | }; 376 | 377 | /// From a given section name, returns the short section name. 378 | /// This is useful to determine which section a grouped section belongs to. 379 | /// e.g. .text$X beloging to the .text section. 380 | fn sectionShortName(name: []const u8) []const u8 { 381 | const startsWith = std.mem.startsWith; 382 | if (startsWith(u8, name, ".text")) { 383 | return ".text"; 384 | } else if (startsWith(u8, name, ".data")) { 385 | return ".data"; 386 | } else if (startsWith(u8, name, ".bss")) { 387 | return ".bss"; 388 | } else if (startsWith(u8, name, ".xdata")) { 389 | return ".xdata"; 390 | } else if (startsWith(u8, name, ".rdata")) { 391 | return ".rdata"; 392 | } else if (startsWith(u8, name, ".tls")) { 393 | return ".tls"; 394 | } else if (startsWith(u8, name, ".debug")) { 395 | return ".debug"; 396 | } else if (startsWith(u8, name, ".pdata")) { 397 | return ".pdata"; 398 | } else std.debug.panic("TODO: shortname of section named: '{s}'\n", .{name}); 399 | unreachable; 400 | } 401 | 402 | fn allocateAtoms(cld: *Cld) !void { 403 | var offset: u32 = dos_stub_size + 404 | @sizeOf(@TypeOf(Coff.pe_magic)) + 405 | @sizeOf(Coff.Header) + 406 | cld.coff_header.size_of_optional_header; 407 | 408 | for (cld.section_headers.items) |hdr| { 409 | if (hdr.isGrouped()) { 410 | continue; 411 | } 412 | offset += 40; // each header takes up 40 bytes 413 | cld.coff_header.number_of_sections += 1; 414 | } 415 | 416 | offset = std.mem.alignForwardGeneric(u32, offset, 512); 417 | cld.optional_header.size_of_headers = offset; 418 | log.debug("allocating sections, starting at offset: 0x{x:0>8}", .{offset}); 419 | 420 | var file_size = offset; 421 | var rva = std.mem.alignForwardGeneric(u32, offset, 4096); // TODO: Get alignment from configuration 422 | 423 | var it = cld.atoms.iterator(); 424 | while (it.next()) |entry| { 425 | const section_index = entry.key_ptr.*; 426 | const hdr: *Coff.SectionHeader = &cld.section_headers.items[section_index]; 427 | hdr.virtual_address = rva; 428 | 429 | var atom: *Atom = entry.value_ptr.*.getFirst(); 430 | var raw_size: u32 = 0; 431 | var virtual_size: u32 = 0; 432 | 433 | log.debug("allocating atoms in section '{s}'", .{cld.getString(hdr.name)}); 434 | 435 | while (true) { 436 | virtual_size = std.mem.alignForwardGeneric(u32, virtual_size, atom.alignment); 437 | 438 | const symbol = atom.symLoc().getSymbol(cld); 439 | symbol.value = rva + virtual_size; 440 | virtual_size += atom.size; 441 | raw_size = std.mem.alignForwardGeneric(u32, virtual_size, cld.optional_header.file_alignment); 442 | 443 | symbol.section_number = @intCast(i16, section_index + 1); // section numbers are 1-indexed. 444 | 445 | log.debug(" atom '{s}' allocated from 0x{x:0>8} to 0x{x:0>8}", .{ 446 | cld.objects.items[atom.file].getString(symbol.name), 447 | symbol.value, 448 | symbol.value + atom.size, 449 | }); 450 | 451 | const coff = &cld.objects.items[atom.file]; 452 | for (atom.aliases.items) |sym_index| { 453 | const alias = &coff.symbols.items[sym_index]; 454 | alias.value = symbol.value; 455 | alias.section_number = symbol.section_number; 456 | } 457 | 458 | for (atom.contained.items) |sym_at_offset| { 459 | const contained_sym = &coff.symbols.items[sym_at_offset.sym_index]; 460 | contained_sym.value = symbol.value + sym_at_offset.offset; 461 | contained_sym.section_number = symbol.section_number; 462 | } 463 | 464 | atom = atom.next orelse break; 465 | } 466 | 467 | hdr.virtual_size = virtual_size; 468 | hdr.size_of_raw_data = raw_size; 469 | if (raw_size != 0) { 470 | hdr.pointer_to_raw_data = file_size; 471 | } 472 | rva += std.mem.alignForwardGeneric(u32, virtual_size, 4096); 473 | file_size += std.mem.alignForwardGeneric(u32, raw_size, 512); 474 | 475 | const hdr_name = cld.getString(hdr.name); 476 | if (std.mem.eql(u8, hdr_name, ".text")) { 477 | cld.optional_header.base_of_code = hdr.pointer_to_raw_data; 478 | } 479 | if (hdr.characteristics & Coff.SectionHeader.flags.IMAGE_SCN_CNT_CODE != 0) { 480 | cld.optional_header.size_of_code += hdr.size_of_raw_data; 481 | } else if (hdr.characteristics & Coff.SectionHeader.flags.IMAGE_SCN_CNT_INITIALIZED_DATA != 0) { 482 | cld.optional_header.size_of_initialized_data += hdr.size_of_raw_data; 483 | } else if (hdr.characteristics & Coff.SectionHeader.flags.IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0) { 484 | cld.optional_header.size_of_uninitialized_data += hdr.size_of_raw_data; 485 | } 486 | } 487 | 488 | cld.optional_header.size_of_image = std.mem.alignForwardGeneric(u32, rva, 4096); 489 | } 490 | 491 | fn emitImageFile(cld: *Cld) !void { 492 | var writer_list = std.ArrayList(u8).init(cld.gpa); 493 | defer writer_list.deinit(); 494 | const writer = writer_list.writer(); 495 | _ = writer; 496 | 497 | // no linker-errors, so update flags 498 | cld.coff_header.characteristics |= std.coff.IMAGE_FILE_EXECUTABLE_IMAGE; 499 | if (cld.optional_header.magic == 0x2b) { 500 | cld.coff_header.characteristics |= std.coff.IMAGE_FILE_LARGE_ADDRESS_AWARE; 501 | } 502 | 503 | try writeDosHeader(writer); 504 | try writeFileHeader(cld.coff_header, writer); 505 | try writeOptionalHeader(cld.*, writer); 506 | try writeSections(cld.*, writer); 507 | try writeStringtable(cld.*, writer); 508 | 509 | try cld.file.writevAll(&[_]std.os.iovec_const{ 510 | .{ .iov_base = writer_list.items.ptr, .iov_len = writer_list.items.len }, 511 | }); 512 | } 513 | 514 | fn writeDosHeader(writer: anytype) !void { 515 | var header: Coff.DosHeader = std.mem.zeroInit(Coff.DosHeader, .{}); 516 | header.magic = .{ 'M', 'Z' }; 517 | header.used_bytes_last_page = dos_stub_size % 512; 518 | header.file_size_pages = try std.math.divCeil(u16, dos_stub_size, 512); 519 | header.header_size_paragraphs = @sizeOf(Coff.DosHeader) / 16; 520 | header.address_of_relocation_table = @sizeOf(Coff.DosHeader); 521 | header.address_of_header = dos_stub_size; 522 | 523 | // TODO: Byteswap the header when target compilation is big-endian 524 | try writer.writeAll(std.mem.asBytes(&header)); 525 | try writer.writeAll(&dos_program); 526 | } 527 | 528 | fn writeFileHeader(header: Coff.Header, writer: anytype) !void { 529 | try writer.writeAll(&Coff.pe_magic); 530 | try writer.writeAll(std.mem.asBytes(&header)); 531 | } 532 | 533 | fn writeOptionalHeader(cld: Cld, writer: anytype) !void { 534 | try writer.writeAll(std.mem.asBytes(&cld.optional_header)); 535 | // TODO: Actually write to each directory when data is known 536 | var directories = [_]u8{0} ** (@sizeOf(Coff.DataDirectory) * number_of_data_directory); 537 | try writer.writeAll(&directories); 538 | } 539 | 540 | fn writeSections(cld: Cld, writer: anytype) !void { 541 | for (cld.section_headers.items) |hdr| { 542 | try writer.writeAll(&hdr.name); 543 | try writer.writeIntLittle(u32, hdr.virtual_size); 544 | try writer.writeIntLittle(u32, hdr.virtual_address); 545 | try writer.writeIntLittle(u32, hdr.size_of_raw_data); 546 | try writer.writeIntLittle(u32, hdr.pointer_to_raw_data); 547 | try writer.writeIntLittle(u32, hdr.pointer_to_relocations); 548 | try writer.writeIntLittle(u32, hdr.pointer_to_line_numbers); 549 | try writer.writeIntLittle(u16, hdr.number_of_relocations); 550 | try writer.writeIntLittle(u16, hdr.number_of_line_numbers); 551 | try writer.writeIntLittle(u32, hdr.characteristics); 552 | } 553 | 554 | var it = cld.atoms.valueIterator(); 555 | while (it.next()) |last_atom| { 556 | var atom: *Atom = last_atom.*.getFirst(); 557 | while (true) { 558 | const size = std.mem.alignForwardGeneric(u32, atom.size, atom.alignment); 559 | // TODO: Perform relocations before writing 560 | try writer.writeAll(atom.code.items); 561 | if (size > atom.size) { 562 | try writer.writeByteNTimes(0, size - atom.size); 563 | } 564 | atom = atom.next orelse break; 565 | } 566 | } 567 | } 568 | 569 | fn writeStringtable(cld: Cld, writer: anytype) !void { 570 | const size = @intCast(u32, cld.string_table.items.len) + 4; // 4 bytes for size field itself 571 | try writer.writeIntLittle(u32, size); 572 | if (size == 4) return; 573 | try writer.writeAll(cld.string_table.items); 574 | } 575 | -------------------------------------------------------------------------------- /src/Coff.zig: -------------------------------------------------------------------------------- 1 | //! Represents the object file format for Windows. 2 | //! This contains the structure as well as the ability 3 | //! to parse such file into this structure. 4 | const Coff = @This(); 5 | 6 | const std = @import("std"); 7 | const Cld = @import("Cld.zig"); 8 | const Atom = @import("Atom.zig"); 9 | const Allocator = std.mem.Allocator; 10 | const log = std.log.scoped(.cld); 11 | 12 | allocator: Allocator, 13 | file: std.fs.File, 14 | name: []const u8, 15 | 16 | header: Header, 17 | section_table: std.ArrayListUnmanaged(SectionHeader) = .{}, 18 | sections: std.ArrayListUnmanaged(Section) = .{}, 19 | relocations: std.AutoHashMapUnmanaged(u16, []const Relocation) = .{}, 20 | symbols: std.ArrayListUnmanaged(Symbol) = .{}, 21 | string_table: []const u8, 22 | 23 | pub const Header = extern struct { 24 | machine: std.coff.MachineType, 25 | number_of_sections: u16, 26 | timedate_stamp: u32, 27 | pointer_to_symbol_table: u32, 28 | number_of_symbols: u32, 29 | size_of_optional_header: u16, 30 | characteristics: u16, 31 | }; 32 | 33 | pub const DosHeader = extern struct { 34 | magic: [2]u8, 35 | used_bytes_last_page: u16, 36 | file_size_pages: u16, 37 | numberOfRelocationItems: u16, 38 | header_size_paragraphs: u16, 39 | minimum_extra_paragaphs: u16, 40 | maximum_extra_paragraphs: u16, 41 | initial_relative_ss: u16, 42 | initial_sp: u16, 43 | checksum: u16, 44 | initial_ip: u16, 45 | initial_relative_cs: u16, 46 | address_of_relocation_table: u16, 47 | overlay_number: u16, 48 | reserved: [4]u16, 49 | oem_id: u16, 50 | oem_info: u16, 51 | reserved2: [10]u16, 52 | address_of_header: u32, 53 | }; 54 | 55 | pub const OptionalHeader = struct { 56 | magic: u16, 57 | major_version: u8, 58 | minor_version: u8, 59 | size_of_code: u32, 60 | size_of_initialized_data: u32, 61 | size_of_uninitialized_data: u32, 62 | address_of_entry_point: u32, 63 | base_of_code: u32, 64 | // Windows-Specific fields 65 | image_base: u64, 66 | section_alignment: u32, 67 | file_alignment: u32 = 512, 68 | major_os_version: u16, 69 | minor_os_version: u16, 70 | major_img_version: u16, 71 | minor_img_version: u16, 72 | major_sub_version: u16, 73 | minor_sub_version: u16, 74 | /// Reserved and must always be set to '0' 75 | win32_version: u32 = 0, 76 | size_of_image: u32, 77 | size_of_headers: u32, 78 | checksum: u32, 79 | subsystem: u16, 80 | dll_characteristics: u16, 81 | size_of_stack_reserve: u64, 82 | size_of_stack_commit: u64, 83 | size_of_heap_reserve: u64, 84 | size_of_heap_commit: u64, 85 | /// Reserved and must always be set to '0' 86 | loader_flags: u32 = 0, 87 | /// Number of data-directory entries in the remainder of the 88 | /// optional header, of which each describes a location and size. 89 | number_of_rva_and_sizes: u32, 90 | }; 91 | 92 | pub const DataDirectory = extern struct { 93 | virtual_address: u32, 94 | size: u32, 95 | }; 96 | 97 | pub const pe_magic: [4]u8 = .{ 'P', 'E', 0, 0 }; 98 | 99 | pub const Section = struct { 100 | ptr: [*]const u8, 101 | size: u32, 102 | 103 | fn slice(section: Section) []const u8 { 104 | return section.ptr[0..section.size]; 105 | } 106 | 107 | fn fromSlice(buf: []const u8) Section { 108 | return .{ .ptr = buf.ptr, .size = @intCast(u32, buf.len) }; 109 | } 110 | }; 111 | 112 | pub const Relocation = struct { 113 | virtual_address: u32, 114 | symbol_table_index: u32, 115 | tag: u16, 116 | }; 117 | 118 | pub const Symbol = extern struct { 119 | name: [8]u8, 120 | value: u32, 121 | section_number: i16, 122 | sym_type: u16, 123 | storage_class: Class, 124 | number_aux_symbols: u8, 125 | 126 | pub fn complexType(symbol: Symbol) ComplexType { 127 | return @intToEnum(ComplexType, @truncate(u8, symbol.sym_type >> 4)); 128 | } 129 | 130 | pub fn baseType(symbol: Symbol) BaseType { 131 | return @intToEnum(BaseType, @truncate(u8, symbol.sym_type >> 8)); 132 | } 133 | 134 | pub fn isFunction(symbol: Symbol) bool { 135 | return symbol.sym_type == 0x20; 136 | } 137 | 138 | pub fn isUndefined(symbol: Symbol) bool { 139 | return symbol.section_number == 0; 140 | } 141 | 142 | pub fn isWeak(symbol: Symbol) bool { 143 | return symbol.storage_class == .IMAGE_SYM_CLASS_EXTERNAL and 144 | symbol.section_number == 0 and 145 | symbol.value == 0; 146 | } 147 | 148 | const ComplexType = enum(u8) { 149 | /// No derived type; the symbol is a simple scalar variable. 150 | IMAGE_SYM_DTYPE_NULL = 0, 151 | /// The symbol is a pointer to base type. 152 | IMAGE_SYM_DTYPE_POINTER = 1, 153 | /// The symbol is a function that returns a base type. 154 | IMAGE_SYM_DTYPE_FUNCTION = 2, 155 | /// The symbol is an array of base type. 156 | IMAGE_SYM_DTYPE_ARRAY = 3, 157 | }; 158 | 159 | pub const BaseType = enum(u8) { 160 | /// No type information or unknown base type. Microsoft tools use this setting 161 | IMAGE_SYM_TYPE_NULL = 0, 162 | /// No valid type; used with void pointers and functions 163 | IMAGE_SYM_TYPE_VOID = 1, 164 | /// A character (signed byte) 165 | IMAGE_SYM_TYPE_CHAR = 2, 166 | /// A 2-byte signed integer 167 | IMAGE_SYM_TYPE_SHORT = 3, 168 | /// A natural integer type (normally 4 bytes in Windows) 169 | IMAGE_SYM_TYPE_INT = 4, 170 | /// A 4-byte signed integer 171 | IMAGE_SYM_TYPE_LONG = 5, 172 | /// A 4-byte floating-point number 173 | IMAGE_SYM_TYPE_FLOAT = 6, 174 | /// An 8-byte floating-point number 175 | IMAGE_SYM_TYPE_DOUBLE = 7, 176 | /// A structure 177 | IMAGE_SYM_TYPE_STRUCT = 8, 178 | /// A union 179 | IMAGE_SYM_TYPE_UNION = 9, 180 | /// An enumerated type 181 | IMAGE_SYM_TYPE_ENUM = 10, 182 | /// A member of enumeration (a specific value) 183 | IMAGE_SYM_TYPE_MOE = 11, 184 | /// A byte; unsigned 1-byte integer 185 | IMAGE_SYM_TYPE_BYTE = 12, 186 | /// A word; unsigned 2-byte integer 187 | IMAGE_SYM_TYPE_WORD = 13, 188 | /// An unsigned integer of natural size (normally, 4 bytes) 189 | IMAGE_SYM_TYPE_UINT = 14, 190 | /// An unsigned 4-byte integer 191 | IMAGE_SYM_TYPE_DWORD = 15, 192 | }; 193 | 194 | pub const Class = enum(u8) { 195 | /// No assigned storage class. 196 | IMAGE_SYM_CLASS_NULL = 0, 197 | /// The automatic (stack) variable. The Value field specifies the stack frame offset. 198 | IMAGE_SYM_CLASS_AUTOMATIC = 1, 199 | /// A value that Microsoft tools use for external symbols. The Value field indicates the size if the section number is IMAGE_SYM_UNDEFINED (0). If the section number is not zero, then the Value field specifies the offset within the section. 200 | IMAGE_SYM_CLASS_EXTERNAL = 2, 201 | /// The offset of the symbol within the section. If the Value field is zero, then the symbol represents a section name. 202 | IMAGE_SYM_CLASS_STATIC = 3, 203 | /// A register variable. The Value field specifies the register number. 204 | IMAGE_SYM_CLASS_REGISTER = 4, 205 | /// A symbol that is defined externally. 206 | IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, 207 | /// A code label that is defined within the module. The Value field specifies the offset of the symbol within the section. 208 | IMAGE_SYM_CLASS_LABEL = 6, 209 | /// A reference to a code label that is not defined. 210 | IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, 211 | /// The structure member. The Value field specifies the n th member. 212 | IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, 213 | /// A formal argument (parameter) of a function. The Value field specifies the n th argument. 214 | IMAGE_SYM_CLASS_ARGUMENT = 9, 215 | /// The structure tag-name entry. 216 | IMAGE_SYM_CLASS_STRUCT_TAG = 10, 217 | /// A union member. The Value field specifies the n th member. 218 | IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, 219 | /// The Union tag-name entry. 220 | IMAGE_SYM_CLASS_UNION_TAG = 12, 221 | /// A Typedef entry. 222 | IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, 223 | /// A static data declaration. 224 | IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, 225 | /// An enumerated type tagname entry. 226 | IMAGE_SYM_CLASS_ENUM_TAG = 15, 227 | /// A member of an enumeration. The Value field specifies the n th member. 228 | IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, 229 | /// A register parameter. 230 | IMAGE_SYM_CLASS_REGISTER_PARAM = 17, 231 | /// A bit-field reference. The Value field specifies the n th bit in the bit field. 232 | IMAGE_SYM_CLASS_BIT_FIELD = 18, 233 | /// A .bb (beginning of block) or .eb (end of block) record. The Value field is the relocatable address of the code location. 234 | IMAGE_SYM_CLASS_BLOCK = 100, 235 | /// A value that Microsoft tools use for symbol records that define the extent of a function: begin function (.bf ), end function ( .ef ), and lines in function ( .lf ). For .lf records, the Value field gives the number of source lines in the function. For .ef records, the Value field gives the size of the function code. 236 | IMAGE_SYM_CLASS_FUNCTION = 101, 237 | /// An end-of-structure entry. 238 | IMAGE_SYM_CLASS_END_OF_STRUCT = 102, 239 | /// A value that Microsoft tools, as well as traditional COFF format, use for the source-file symbol record. The symbol is followed by auxiliary records that name the file. 240 | IMAGE_SYM_CLASS_FILE = 103, 241 | /// A definition of a section (Microsoft tools use STATIC storage class instead). 242 | IMAGE_SYM_CLASS_SECTION = 104, 243 | /// A weak external. For more information, see Auxiliary Format 3: Weak Externals. 244 | IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, 245 | /// A CLR token symbol. The name is an ASCII string that consists of the hexadecimal value of the token. For more information, see CLR Token Definition (Object Only). 246 | IMAGE_SYM_CLASS_CLR_TOKEN = 107, 247 | // A special symbol that represents the end of function, for debugging purposes. 248 | IMAGE_SYM_CLASS_END_OF_FUNCTION = 0xFF, 249 | }; 250 | }; 251 | 252 | pub const SectionHeader = struct { 253 | name: [8]u8, 254 | virtual_size: u32, 255 | virtual_address: u32, 256 | size_of_raw_data: u32, 257 | pointer_to_raw_data: u32, 258 | pointer_to_relocations: u32, 259 | pointer_to_line_numbers: u32, 260 | number_of_relocations: u16, 261 | number_of_line_numbers: u16, 262 | characteristics: u32, 263 | 264 | /// Set by checking the `characteristics` flags 265 | alignment: u32, 266 | 267 | pub const flags = struct { 268 | fn alignment(flag: u32) u32 { 269 | if (flag & flags.IMAGE_SCN_ALIGN_1BYTES != 0) return 1; 270 | if (flag & flags.IMAGE_SCN_ALIGN_2BYTES != 0) return 2; 271 | if (flag & flags.IMAGE_SCN_ALIGN_4BYTES != 0) return 4; 272 | if (flag & flags.IMAGE_SCN_ALIGN_8BYTES != 0) return 8; 273 | if (flag & flags.IMAGE_SCN_ALIGN_16BYTES != 0) return 16; 274 | if (flag & flags.IMAGE_SCN_ALIGN_32BYTES != 0) return 32; 275 | if (flag & flags.IMAGE_SCN_ALIGN_64BYTES != 0) return 64; 276 | if (flag & flags.IMAGE_SCN_ALIGN_128BYTES != 0) return 128; 277 | if (flag & flags.IMAGE_SCN_ALIGN_256BYTES != 0) return 256; 278 | if (flag & flags.IMAGE_SCN_ALIGN_512BYTES != 0) return 512; 279 | if (flag & flags.IMAGE_SCN_ALIGN_1024BYTES != 0) return 1024; 280 | if (flag & flags.IMAGE_SCN_ALIGN_2048BYTES != 0) return 2048; 281 | if (flag & flags.IMAGE_SCN_ALIGN_4096BYTES != 0) return 4096; 282 | if (flag & flags.IMAGE_SCN_ALIGN_8192BYTES != 0) return 8192; 283 | unreachable; 284 | } 285 | /// The section should not be padded to the next boundary. 286 | /// This flag is obsolete and is replaced by IMAGE_SCN_ALIGN_1BYTES. 287 | /// This is valid only for object files. 288 | pub const IMAGE_SCN_TYPE_NO_PAD = 0x00000008; 289 | /// The section contains executable code. 290 | pub const IMAGE_SCN_CNT_CODE = 0x00000020; 291 | /// The section contains initialized data. 292 | pub const IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040; 293 | /// The section contains uninitialized data. 294 | pub const IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080; 295 | /// Reserved for future use. 296 | pub const IMAGE_SCN_LNK_OTHER = 0x00000100; 297 | /// The section contains comments or other information. 298 | /// The .drectve section has this type. 299 | /// This is valid for object files only. 300 | pub const IMAGE_SCN_LNK_INFO = 0x00000200; 301 | /// The section will not become part of the image. 302 | /// This is valid only for object files. 303 | pub const IMAGE_SCN_LNK_REMOVE = 0x00000800; 304 | /// The section contains COMDAT data. 305 | /// For more information, see COMDAT Sections (Object Only). 306 | /// This is valid only for object files. 307 | pub const IMAGE_SCN_LNK_COMDAT = 0x00001000; 308 | /// The section contains data referenced through the global pointer (GP). 309 | pub const IMAGE_SCN_GPREL = 0x00008000; 310 | /// Reserved for future use. 311 | pub const IMAGE_SCN_MEM_PURGEABLE = 0x00020000; 312 | /// Reserved for future use. 313 | pub const IMAGE_SCN_MEM_16BIT = 0x00020000; 314 | /// Reserved for future use. 315 | pub const IMAGE_SCN_MEM_LOCKED = 0x00040000; 316 | /// Reserved for future use. 317 | pub const IMAGE_SCN_MEM_PRELOAD = 0x00080000; 318 | /// Align data on a 1-byte boundary. Valid only for object files. 319 | pub const IMAGE_SCN_ALIGN_1BYTES = 0x00100000; 320 | /// Align data on a 2-byte boundary. Valid only for object files. 321 | pub const IMAGE_SCN_ALIGN_2BYTES = 0x00200000; 322 | /// Align data on a 4-byte boundary. Valid only for object files. 323 | pub const IMAGE_SCN_ALIGN_4BYTES = 0x00300000; 324 | /// Align data on an 8-byte boundary. Valid only for object files. 325 | pub const IMAGE_SCN_ALIGN_8BYTES = 0x00400000; 326 | /// Align data on a 16-byte boundary. Valid only for object files. 327 | pub const IMAGE_SCN_ALIGN_16BYTES = 0x00500000; 328 | /// Align data on a 32-byte boundary. Valid only for object files. 329 | pub const IMAGE_SCN_ALIGN_32BYTES = 0x00600000; 330 | /// Align data on a 64-byte boundary. Valid only for object files. 331 | pub const IMAGE_SCN_ALIGN_64BYTES = 0x00700000; 332 | /// Align data on a 128-byte boundary. Valid only for object files. 333 | pub const IMAGE_SCN_ALIGN_128BYTES = 0x00800000; 334 | /// Align data on a 256-byte boundary. Valid only for object files. 335 | pub const IMAGE_SCN_ALIGN_256BYTES = 0x00900000; 336 | /// Align data on a 512-byte boundary. Valid only for object files. 337 | pub const IMAGE_SCN_ALIGN_512BYTES = 0x00A00000; 338 | /// Align data on a 1024-byte boundary. Valid only for object files. 339 | pub const IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000; 340 | /// Align data on a 2048-byte boundary. Valid only for object files. 341 | pub const IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000; 342 | /// Align data on a 4096-byte boundary. Valid only for object files. 343 | pub const IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000; 344 | /// Align data on an 8192-byte boundary. Valid only for object files. 345 | pub const IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000; 346 | /// The section contains extended relocations. 347 | pub const IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000; 348 | /// The section can be discarded as needed. 349 | pub const IMAGE_SCN_MEM_DISCARDABLE = 0x02000000; 350 | /// The section cannot be cached. 351 | pub const IMAGE_SCN_MEM_NOT_CACHED = 0x04000000; 352 | /// The section is not pageable. 353 | pub const IMAGE_SCN_MEM_NOT_PAGED = 0x08000000; 354 | /// The section can be shared in memory. 355 | pub const IMAGE_SCN_MEM_SHARED = 0x10000000; 356 | /// The section can be executed as code. 357 | pub const IMAGE_SCN_MEM_EXECUTE = 0x20000000; 358 | /// The section can be read. 359 | pub const IMAGE_SCN_MEM_READ = 0x40000000; 360 | /// The section can be written to. 361 | pub const IMAGE_SCN_MEM_WRITE = 0x80000000; 362 | }; 363 | 364 | /// When a section name contains the symbol `$`, it is considered 365 | /// a grouped section. e.g. a section named `.text$X` contributes 366 | /// to the `.text` section within the image. 367 | /// The character after the dollar sign, indicates the order when 368 | /// multiple (same prefix) sections were found. 369 | pub fn isGrouped(header: SectionHeader) bool { 370 | return std.mem.indexOfScalar(u8, &header.name, '$') != null; 371 | } 372 | 373 | /// For a given section, returns true when the section is a code section. 374 | pub fn isCodeSection(header: SectionHeader) bool { 375 | return header.characteristics & flags.IMAGE_SCN_CNT_CODE and 376 | header.characteristics & flags.IMAGE_SCN_MEM_READ and 377 | header.characteristics & flags.IMAGE_SCN_MEM_EXECUTE; 378 | } 379 | }; 380 | 381 | /// Initializes a new `Coff` instance. The file will not be parsed yet. 382 | pub fn init(allocator: Allocator, file: std.fs.File, path: []const u8) Coff { 383 | return .{ 384 | .allocator = allocator, 385 | .file = file, 386 | .name = path, 387 | .header = undefined, 388 | .string_table = undefined, 389 | }; 390 | } 391 | 392 | /// Frees all resources of the `Coff` file. This does not close the file handle. 393 | pub fn deinit(coff: *Coff) void { 394 | const gpa = coff.allocator; 395 | coff.section_table.deinit(gpa); 396 | for (coff.sections.items) |section, sec_index| { 397 | gpa.free(section.slice()); 398 | if (coff.relocations.get(@intCast(u16, sec_index))) |relocs| { 399 | gpa.free(relocs); 400 | } 401 | } 402 | coff.sections.deinit(gpa); 403 | coff.relocations.deinit(gpa); 404 | coff.symbols.deinit(gpa); 405 | gpa.free(coff.string_table); 406 | coff.* = undefined; 407 | } 408 | 409 | /// Parses the Coff file in its entirety and allocates any 410 | /// resources required. Memory is owned by the `coff` instance. 411 | pub fn parse(coff: *Coff) !bool { 412 | const reader = coff.file.reader(); 413 | const machine = std.meta.intToEnum(std.coff.MachineType, try reader.readIntLittle(u16)) catch { 414 | log.err("Given file {s} is not a coff file or contains an unknown machine", .{coff.name}); 415 | return false; 416 | }; 417 | 418 | coff.header = .{ 419 | .machine = machine, 420 | .number_of_sections = try reader.readIntLittle(u16), 421 | .timedate_stamp = try reader.readIntLittle(u32), 422 | .pointer_to_symbol_table = try reader.readIntLittle(u32), 423 | .number_of_symbols = try reader.readIntLittle(u32), 424 | .size_of_optional_header = try reader.readIntLittle(u16), 425 | .characteristics = try reader.readIntLittle(u16), 426 | }; 427 | 428 | // When the object file contains an optional header, we simply 429 | // skip it as object files are not interested in this data. 430 | if (coff.header.size_of_optional_header != 0) { 431 | try coff.file.seekBy(@intCast(i64, coff.header.size_of_optional_header)); 432 | } 433 | 434 | try parseStringTable(coff); 435 | try parseSectionTable(coff); 436 | try parseSectionData(coff); 437 | try parseRelocations(coff); 438 | try parseSymbolTable(coff); 439 | 440 | return true; 441 | } 442 | 443 | fn parseStringTable(coff: *Coff) !void { 444 | const reader = coff.file.reader(); 445 | const current_pos = try coff.file.getPos(); 446 | try coff.file.seekTo(coff.stringTableOffset()); 447 | const size = try reader.readIntLittle(u32); 448 | if (size == 0) return; 449 | const buffer = try coff.allocator.alloc(u8, size - 4); // account for 4 bytes of size field itself 450 | errdefer coff.allocator.free(buffer); 451 | try reader.readNoEof(buffer); 452 | coff.string_table = buffer; 453 | try coff.file.seekTo(current_pos); 454 | } 455 | 456 | pub fn getString(coff: Coff, buf: [8]u8) []const u8 { 457 | const offset = if (buf[0] == '/') blk: { 458 | const offset_len = std.mem.indexOfScalar(u8, buf[1..], 0) orelse 7; 459 | const offset = std.fmt.parseInt(u32, buf[1..][0..offset_len], 10) catch return ""; 460 | break :blk offset; 461 | } else if (std.mem.eql(u8, buf[0..4], &.{ 0, 0, 0, 0 })) blk: { 462 | break :blk std.mem.readIntLittle(u32, buf[4..8]); 463 | } else return std.mem.sliceTo(&buf, 0); 464 | 465 | const str = @ptrCast([*:0]const u8, coff.string_table.ptr + offset); 466 | return std.mem.sliceTo(str, 0); 467 | } 468 | 469 | fn parseSectionTable(coff: *Coff) !void { 470 | if (coff.header.number_of_sections == 0) return; 471 | try coff.section_table.ensureUnusedCapacity(coff.allocator, coff.header.number_of_sections); 472 | const reader = coff.file.reader(); 473 | 474 | var index: u16 = 0; 475 | while (index < coff.header.number_of_sections) : (index += 1) { 476 | const sec_header = coff.section_table.addOneAssumeCapacity(); 477 | 478 | var name: [8]u8 = undefined; 479 | try reader.readNoEof(&name); 480 | sec_header.* = .{ 481 | .name = name, 482 | .virtual_size = try reader.readIntLittle(u32), 483 | .virtual_address = try reader.readIntLittle(u32), 484 | .size_of_raw_data = try reader.readIntLittle(u32), 485 | .pointer_to_raw_data = try reader.readIntLittle(u32), 486 | .pointer_to_relocations = try reader.readIntLittle(u32), 487 | .pointer_to_line_numbers = try reader.readIntLittle(u32), 488 | .number_of_relocations = try reader.readIntLittle(u16), 489 | .number_of_line_numbers = try reader.readIntLittle(u16), 490 | .characteristics = try reader.readIntLittle(u32), 491 | .alignment = undefined, 492 | }; 493 | sec_header.alignment = SectionHeader.flags.alignment(sec_header.characteristics); 494 | 495 | log.debug("Parsed section header: '{s}'", .{std.mem.sliceTo(&name, 0)}); 496 | if (sec_header.virtual_size != 0) { 497 | log.err("Invalid object file. Expected virtual size '0' but found '{d}'", .{sec_header.virtual_size}); 498 | return error.InvalidVirtualSize; 499 | } 500 | } 501 | } 502 | 503 | fn stringTableOffset(coff: Coff) u32 { 504 | return coff.header.pointer_to_symbol_table + (coff.header.number_of_symbols * 18); 505 | } 506 | 507 | /// Parses a string from the string table found at given `offset`. 508 | /// Populates the given `buffer` with the string and returns the length. 509 | fn parseStringFromOffset(coff: *Coff, offset: u32, buf: []u8) !usize { 510 | std.debug.assert(buf.len != 0); 511 | 512 | const current_pos = try coff.file.getPos(); 513 | try coff.file.seekTo(coff.stringTableOffset() + offset); 514 | const str = (try coff.file.reader().readUntilDelimiterOrEof(buf, 0)) orelse ""; 515 | try coff.file.seekTo(current_pos); 516 | return str.len; 517 | } 518 | 519 | /// Parses all section data of the coff file. 520 | /// Asserts section headers are known. 521 | fn parseSectionData(coff: *Coff) !void { 522 | if (coff.header.number_of_sections == 0) return; 523 | std.debug.assert(coff.section_table.items.len == coff.header.number_of_sections); 524 | try coff.sections.ensureUnusedCapacity(coff.allocator, coff.header.number_of_sections); 525 | const reader = coff.file.reader(); 526 | for (coff.section_table.items) |sec_header| { 527 | try coff.file.seekTo(sec_header.pointer_to_raw_data); 528 | const buf = try coff.allocator.alloc(u8, sec_header.size_of_raw_data); 529 | try reader.readNoEof(buf); 530 | coff.sections.appendAssumeCapacity(Section.fromSlice(buf)); 531 | } 532 | } 533 | 534 | fn parseRelocations(coff: *Coff) !void { 535 | if (coff.header.number_of_sections == 0) return; 536 | const reader = coff.file.reader(); 537 | for (coff.section_table.items) |sec_header, index| { 538 | if (sec_header.number_of_relocations == 0) continue; 539 | const sec_index = @intCast(u16, index); 540 | 541 | const relocations = try coff.allocator.alloc(Relocation, sec_header.number_of_relocations); 542 | errdefer coff.allocator.free(relocations); 543 | 544 | try coff.file.seekTo(sec_header.pointer_to_relocations); 545 | for (relocations) |*reloc| { 546 | reloc.* = .{ 547 | .virtual_address = try reader.readIntLittle(u32), 548 | .symbol_table_index = try reader.readIntLittle(u32), 549 | .tag = try reader.readIntLittle(u16), 550 | }; 551 | } 552 | 553 | try coff.relocations.putNoClobber(coff.allocator, sec_index, relocations); 554 | } 555 | } 556 | 557 | fn parseSymbolTable(coff: *Coff) !void { 558 | if (coff.header.number_of_symbols == 0) return; 559 | 560 | try coff.symbols.ensureUnusedCapacity(coff.allocator, coff.header.number_of_symbols); 561 | try coff.file.seekTo(coff.header.pointer_to_symbol_table); 562 | const reader = coff.file.reader(); 563 | 564 | var index: u32 = 0; 565 | while (index < coff.header.number_of_symbols) : (index += 1) { 566 | var name: [8]u8 = undefined; 567 | try reader.readNoEof(&name); 568 | const sym: Symbol = .{ 569 | .name = name, 570 | .value = try reader.readIntLittle(u32), 571 | .section_number = try reader.readIntLittle(i16), 572 | .sym_type = try reader.readIntLittle(u16), 573 | .storage_class = @intToEnum(Symbol.Class, try reader.readByte()), 574 | .number_aux_symbols = try reader.readByte(), 575 | }; 576 | coff.symbols.appendAssumeCapacity(sym); 577 | } 578 | } 579 | 580 | pub fn parseIntoAtoms(coff: Coff, cld: *Cld, object_index: u16) !void { 581 | log.debug("parsing into atoms for object file '{s}'", .{coff.name}); 582 | const gpa = cld.gpa; 583 | var symbols_by_section = std.AutoHashMap(u16, std.ArrayList(u32)).init(gpa); 584 | defer { 585 | var it = symbols_by_section.valueIterator(); 586 | while (it.next()) |syms| { 587 | syms.deinit(); 588 | } 589 | symbols_by_section.deinit(); 590 | } 591 | for (coff.section_table.items) |_, sec_index| { 592 | try symbols_by_section.putNoClobber(@intCast(u16, sec_index), std.ArrayList(u32).init(gpa)); 593 | } 594 | 595 | { 596 | var sym_index: u32 = 0; 597 | while (sym_index < coff.header.number_of_symbols) : (sym_index += 1) { 598 | const symbol: Symbol = coff.symbols.items[sym_index]; 599 | if (symbol.isUndefined()) continue; 600 | if (symbol.section_number <= 0) continue; 601 | const map = symbols_by_section.getPtr(@intCast(u16, symbol.section_number - 1)) orelse continue; 602 | try map.append(sym_index); 603 | sym_index += symbol.number_aux_symbols; 604 | } 605 | } 606 | 607 | for (coff.section_table.items) |sec_header, sec_index| { 608 | const sec_name = coff.getString(sec_header.name); 609 | 610 | log.debug(" parsing section '{s}'", .{sec_name}); 611 | 612 | const syms = symbols_by_section.get(@intCast(u16, sec_index)).?; 613 | if (syms.items.len == 0) { 614 | log.debug(" skipping section because no symbols", .{}); 615 | continue; 616 | } 617 | 618 | const target_section_index = (try cld.getMatchingSection(object_index, @intCast(u16, sec_index))) orelse { 619 | log.info("ignored section '{s}'", .{sec_name}); 620 | continue; 621 | }; 622 | 623 | const atom = try Atom.create(gpa); 624 | errdefer atom.destroy(gpa); 625 | try cld.managed_atoms.append(gpa, atom); 626 | atom.file = object_index; 627 | atom.size = sec_header.size_of_raw_data; 628 | atom.alignment = sec_header.alignment; 629 | 630 | for (syms.items) |sym_index| { 631 | const symbol: Symbol = coff.symbols.items[sym_index]; 632 | if (symbol.value > 0) { 633 | try atom.contained.append(gpa, .{ 634 | .sym_index = sym_index, 635 | .offset = symbol.value, 636 | }); 637 | } else try atom.aliases.append(gpa, sym_index); 638 | } 639 | atom.sym_index = atom.aliases.swapRemove(0); 640 | try atom.code.appendSlice(gpa, coff.sections.items[sec_index].slice()); 641 | 642 | if (sec_header.number_of_relocations > 0) { 643 | atom.relocations = coff.relocations.get(@intCast(u16, sec_index)).?; 644 | } 645 | 646 | const target_section: *SectionHeader = &cld.section_headers.items[target_section_index]; 647 | target_section.alignment = @maximum(target_section.alignment, atom.alignment); 648 | target_section.size_of_raw_data = std.mem.alignForwardGeneric(u32, std.mem.alignForwardGeneric( 649 | u32, 650 | target_section.size_of_raw_data, 651 | atom.alignment, 652 | ) + atom.size, target_section.alignment); 653 | target_section.virtual_size = target_section.size_of_raw_data; 654 | 655 | if (cld.atoms.getPtr(target_section_index)) |last| { 656 | last.*.next = atom; 657 | atom.prev = last.*; 658 | last.* = atom; 659 | } else try cld.atoms.putNoClobber(gpa, target_section_index, atom); 660 | } 661 | } 662 | -------------------------------------------------------------------------------- /src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const Cld = @import("Cld.zig"); 3 | const mem = std.mem; 4 | 5 | const io = std.io; 6 | 7 | var gpa_allocator = std.heap.GeneralPurposeAllocator(.{ .stack_trace_frames = 8 }){}; 8 | const gpa = gpa_allocator.allocator(); 9 | 10 | pub fn log( 11 | comptime level: std.log.Level, 12 | comptime scope: @TypeOf(.EnumLiteral), 13 | comptime format: []const u8, 14 | args: anytype, 15 | ) void { 16 | if (@import("build_flags").enable_logging) { 17 | std.log.defaultLog(level, scope, format, args); 18 | } 19 | } 20 | 21 | const usage = 22 | \\Usage: coff [options] [files...] -o [path] 23 | \\ 24 | \\Options: 25 | \\-h, --help Print this help and exit 26 | \\-o [path] Output path of the binary 27 | ; 28 | 29 | pub fn main() !void { 30 | defer if (@import("builtin").mode == .Debug) { 31 | _ = gpa_allocator.deinit(); 32 | }; 33 | 34 | // we use arena for the arguments and its parsing 35 | var arena_allocator = std.heap.ArenaAllocator.init(gpa); 36 | defer arena_allocator.deinit(); 37 | const arena = arena_allocator.allocator(); 38 | 39 | const process_args = try std.process.argsAlloc(arena); 40 | defer std.process.argsFree(arena, process_args); 41 | 42 | const args = process_args[1..]; // exclude 'coff' binary 43 | if (args.len == 0) { 44 | printHelpAndExit(); 45 | } 46 | 47 | var positionals = std.ArrayList([]const u8).init(arena); 48 | var output_path: ?[]const u8 = null; 49 | 50 | var i: usize = 0; 51 | while (i < args.len) : (i += 1) { 52 | const arg = args[i]; 53 | if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) { 54 | printHelpAndExit(); 55 | } else if (mem.eql(u8, arg, "-o")) { 56 | if (i + 1 >= args.len) printErrorAndExit("Missing output path", .{}); 57 | output_path = args[i + 1]; 58 | i += 1; 59 | } else if (mem.startsWith(u8, arg, "--")) { 60 | printErrorAndExit("Unknown argument '{s}'", .{arg}); 61 | } else { 62 | try positionals.append(arg); 63 | } 64 | } 65 | 66 | if (positionals.items.len == 0) { 67 | printErrorAndExit("Expected one or more object files, none were given", .{}); 68 | } 69 | 70 | if (output_path == null) { 71 | printErrorAndExit("Missing output path", .{}); 72 | } 73 | 74 | var cld = try Cld.openPath(gpa, output_path.?, .{}); 75 | defer cld.deinit(); 76 | 77 | try cld.addObjects(positionals.items); 78 | try cld.flush(); 79 | } 80 | 81 | fn printHelpAndExit() noreturn { 82 | io.getStdOut().writer().print("{s}\n", .{usage}) catch {}; 83 | std.process.exit(0); 84 | } 85 | 86 | fn printErrorAndExit(comptime fmt: []const u8, args: anytype) noreturn { 87 | const writer = io.getStdErr().writer(); 88 | writer.print(fmt, args) catch {}; 89 | writer.writeByte('\n') catch {}; 90 | std.process.exit(1); 91 | } 92 | --------------------------------------------------------------------------------