├── .gitignore
├── LICENSE
├── README.md
├── build.zig
└── src
    ├── Atom.zig
    ├── Cld.zig
    ├── Coff.zig
    └── main.zig


/.gitignore:
--------------------------------------------------------------------------------
1 | zig-out
2 | zig-cache
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License Copyright (c) 2022 Luuk de Gram
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is furnished
 8 | to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice (including the next
11 | paragraph) shall be included in all copies or substantial portions of the
12 | Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
17 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 | OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Cld
 2 | 
 3 | Linker for the Coff object file format. This project is meant as the base for [zig](https://ziglang.org)'s self-hosted linker.
 4 | This repository will probably not contain a full-featured linker as the main work will move to the Zig repository at one point,
 5 | as well as upstreamed into [zld](https://github.com/kubkon/zld).
 6 | For those reasons, the code within this repository will be closely inline with the structure of the other linkers of the Zig project.
 7 | 
 8 | ## building
 9 | 
10 | Cld will always closely follow the master branch of the Zig programming language.
11 | Building `Cld` itself will be as simple as running:
12 | ```sh
13 | zig build
14 | ```
15 | This will create a `cld` binary in the `zig-out/bin` folder.
16 | 
17 | To enable debug logging, the CLI flag `-Denable-logging` can be supplied to the `zig build` command.
18 | This will enable logging for the built binary, meaning it must be re-compiled to disable logging once again.
19 | 


--------------------------------------------------------------------------------
/build.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | 
 3 | pub fn build(b: *std.build.Builder) void {
 4 |     const target = b.standardTargetOptions(.{});
 5 |     const mode = b.standardReleaseOptions();
 6 |     const enable_logging = b.option(bool, "enable-logging", "Enables logging to stderr [default: false]") orelse false;
 7 |     const exe = b.addExecutable("cld", "src/main.zig");
 8 |     exe.setTarget(target);
 9 |     exe.setBuildMode(mode);
10 |     const options = b.addOptions();
11 |     options.addOption(bool, "enable_logging", enable_logging);
12 |     exe.addOptions("build_flags", options);
13 |     exe.install();
14 | 
15 |     const run_cmd = exe.run();
16 |     run_cmd.step.dependOn(b.getInstallStep());
17 |     if (b.args) |args| {
18 |         run_cmd.addArgs(args);
19 |     }
20 | 
21 |     const run_step = b.step("run", "Run the app");
22 |     run_step.dependOn(&run_cmd.step);
23 | }
24 | 


--------------------------------------------------------------------------------
/src/Atom.zig:
--------------------------------------------------------------------------------
 1 | //! Atom represents the smallest type of linkage.
 2 | //! It can represent a function section, or the data belonging to
 3 | //! a global integer.
 4 | const Atom = @This();
 5 | const Cld = @import("Cld.zig");
 6 | const Relocation = @import("Coff.zig").Relocation;
 7 | const std = @import("std");
 8 | 
 9 | /// The index of the Symbol within the
10 | /// object file that represents this Atom.
11 | sym_index: u32,
12 | /// Index of the object file this atom belongs to
13 | file: u16,
14 | /// Offset within the target section
15 | offset: u32,
16 | /// Alignment of this atom, this will always be equal
17 | /// to the highest alignment within the same section when compiling to
18 | /// a PE image file. In the case of a relocatable object file, the
19 | /// alignment will target the Atom itself.
20 | alignment: u32,
21 | /// Relocations that have to be performed within this Atom,
22 | /// meaning the `code` will be rewritten with values by the Relocation's target.
23 | relocations: []const Relocation,
24 | /// Code representing this atom.
25 | code: std.ArrayListUnmanaged(u8) = .{},
26 | /// The size of this atom, takes account for alignment
27 | /// and can therefore be larger than `code`.
28 | size: u32,
29 | 
30 | /// Next atom in relation to this atom.
31 | /// This is the last atom when `next` is 'null'.
32 | next: ?*Atom,
33 | /// The previous atom in relation to this atom.
34 | /// This is the first atom in the chain when `prev` is 'null'.
35 | prev: ?*Atom,
36 | 
37 | /// Symbols by this Atom
38 | contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
39 | /// Symbol indexes containing an alias to this Atom's symbol
40 | aliases: std.ArrayListUnmanaged(u32) = .{},
41 | 
42 | pub const SymbolAtOffset = struct {
43 |     sym_index: u32,
44 |     offset: u32,
45 | };
46 | 
47 | /// Allocates memory for an `Atom` and initializes an instance
48 | /// with default values. Memory is owned by the caller.
49 | pub fn create(gpa: std.mem.Allocator) !*Atom {
50 |     const atom = try gpa.create(Atom);
51 |     atom.* = .{
52 |         .sym_index = 0,
53 |         .file = 0,
54 |         .offset = 0,
55 |         .alignment = 0,
56 |         .relocations = &.{},
57 |         .size = 0,
58 |         .next = null,
59 |         .prev = null,
60 |     };
61 |     return atom;
62 | }
63 | 
64 | /// Frees all resources contained by this `Atom`.
65 | pub fn destroy(atom: *Atom, gpa: std.mem.Allocator) void {
66 |     atom.code.deinit(gpa);
67 |     atom.contained.deinit(gpa);
68 |     atom.aliases.deinit(gpa);
69 |     gpa.destroy(atom);
70 | }
71 | 
72 | /// Returns the first `Atom` from a given atom
73 | pub fn getFirst(atom: *Atom) *Atom {
74 |     var tmp = atom;
75 |     while (tmp.prev) |prev| tmp = prev;
76 |     return tmp;
77 | }
78 | 
79 | /// Returns the symbol location for the given Atom.
80 | pub fn symLoc(atom: Atom) Cld.SymbolWithLoc {
81 |     return .{ .index = atom.sym_index, .file = atom.file };
82 | }
83 | 


--------------------------------------------------------------------------------
/src/Cld.zig:
--------------------------------------------------------------------------------
  1 | //! Cld links one or multiple Coff object files
  2 | //! into a single PE binary file. The order of supplying
  3 | //! the object files is important to the output.
  4 | const Cld = @This();
  5 | const std = @import("std");
  6 | const Coff = @import("Coff.zig");
  7 | const Allocator = std.mem.Allocator;
  8 | const Atom = @import("Atom.zig");
  9 | 
 10 | /// The Cld-scoped logger
 11 | const log = std.log.scoped(.cld);
 12 | 
 13 | gpa: Allocator,
 14 | /// Name of the final binary, also its output path
 15 | name: []const u8,
 16 | /// User-provided options which influence the final output
 17 | options: Options,
 18 | /// File descriptor of the output binary
 19 | file: std.fs.File,
 20 | /// Represents the coff file header, instructs the image file
 21 | /// the data layour of the coff sections
 22 | coff_header: Coff.Header,
 23 | /// The optional header provides information to the loader.
 24 | /// While named optional it's not optional for the final binary
 25 | /// when building an image file (PE).
 26 | optional_header: Coff.OptionalHeader,
 27 | /// A list of all Coff object files to be linked
 28 | objects: std.ArrayListUnmanaged(Coff) = .{},
 29 | /// List of synthetic symbols
 30 | synthetic_symbols: std.ArrayListUnmanaged(Coff.Symbol) = .{},
 31 | /// A mapping between a symbol's old location, with its replacement
 32 | /// location. i.e. when a weak symbol is overwritten by a stronger symbol.
 33 | discarded: std.AutoHashMapUnmanaged(SymbolWithLoc, SymbolWithLoc) = .{},
 34 | /// A mapping for all symbols which have been resolved
 35 | resolved_symbols: std.AutoHashMapUnmanaged(SymbolWithLoc, void) = .{},
 36 | /// Mapping between global's names and their symbol location
 37 | globals: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{},
 38 | 
 39 | /// Contains all section headers (unordered)
 40 | section_headers: std.ArrayListUnmanaged(Coff.SectionHeader) = .{},
 41 | /// Mapping from section name to their index
 42 | section_mapping: std.StringHashMapUnmanaged(u16) = .{},
 43 | /// The table with all strings occupying more than 8 bytes.
 44 | string_table: std.ArrayListUnmanaged(u8) = .{},
 45 | /// Maps section indexes to the last atom of that section.
 46 | atoms: std.AutoHashMapUnmanaged(u16, *Atom) = .{},
 47 | /// Tracks all atoms created from various object files,
 48 | /// used to clean up all resources.
 49 | managed_atoms: std.ArrayListUnmanaged(*Atom) = .{},
 50 | 
 51 | /// Possible user configuration options
 52 | const Options = struct {};
 53 | 
 54 | const number_of_data_directory = 16;
 55 | pub const dos_stub_size = @sizeOf(Coff.DosHeader) + @sizeOf(@TypeOf(dos_program));
 56 | comptime {
 57 |     std.debug.assert(@sizeOf(Coff.DosHeader) == 64);
 58 | }
 59 | 
 60 | /// Dos stub that prints "This program cannot be run in DOS mode."
 61 | /// This stub will be inserted at the start of the binary, before all other sections.
 62 | pub const dos_program = [_]u8{
 63 |     0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd,
 64 |     0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68,
 65 |     0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72,
 66 |     0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f,
 67 |     0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e,
 68 |     0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20,
 69 |     0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00,
 70 | };
 71 | 
 72 | pub const SymbolWithLoc = struct {
 73 |     /// Index of the symbol entry within the object file
 74 |     index: u32,
 75 |     /// When file is 'null', this symbol is populated from outside an object file.
 76 |     /// i.e. a synthetic symbol
 77 |     file: ?u16,
 78 | 
 79 |     pub fn getSymbol(sym_loc: SymbolWithLoc, cld: *const Cld) *Coff.Symbol {
 80 |         if (cld.discarded.get(sym_loc)) |new_loc| {
 81 |             return new_loc.getSymbol(cld);
 82 |         }
 83 |         if (sym_loc.file) |object_index| {
 84 |             return &cld.objects.items[object_index].symbols.items[sym_loc.index];
 85 |         }
 86 |         return &cld.synthetic_symbols.items[sym_loc.index];
 87 |     }
 88 | };
 89 | 
 90 | /// Creates a new binary file, overwriting any existing file with the corresponding name.
 91 | /// Then initializes all default values.
 92 | ///
 93 | /// Cld has eclusive access to the output file, meaning it cannot be accessed by outside
 94 | /// processes until `deinit` is called and all resources are deallocated.
 95 | pub fn openPath(allocator: Allocator, path: []const u8, options: Options) !Cld {
 96 |     const file = try std.fs.cwd().createFile(path, .{ .lock = .Exclusive });
 97 |     const stat = try file.stat();
 98 |     const time_stamp = @divFloor(stat.ctime, std.time.ns_per_s);
 99 | 
100 |     return Cld{
101 |         .gpa = allocator,
102 |         .name = path,
103 |         .options = options,
104 |         .file = file,
105 |         .coff_header = .{
106 |             .machine = std.coff.MachineType.X64, // TODO: Make this dynamic, based on target
107 |             .number_of_sections = 0,
108 |             .timedate_stamp = @truncate(u32, @intCast(u64, time_stamp)),
109 |             .pointer_to_symbol_table = 0,
110 |             .number_of_symbols = 0,
111 |             .size_of_optional_header = 112 + @sizeOf(Coff.DataDirectory) * number_of_data_directory,
112 |             .characteristics = 0,
113 |         },
114 |         .optional_header = .{
115 |             .magic = 0x20b, // PE32+, TODO: Make this dynamic, based on target
116 |             .major_version = 14, // Output from VS2015. When this is '0' it fails to validate on Windows 7.
117 |             .minor_version = 0,
118 |             .size_of_code = 0,
119 |             .size_of_initialized_data = 0,
120 |             .size_of_uninitialized_data = 0,
121 |             .address_of_entry_point = 0,
122 |             .base_of_code = 0,
123 |             .image_base = 0x140000000,
124 |             .section_alignment = 4096,
125 |             .file_alignment = 512,
126 |             .major_os_version = 6,
127 |             .minor_os_version = 0,
128 |             .major_img_version = 0,
129 |             .minor_img_version = 0,
130 |             .major_sub_version = 6,
131 |             .minor_sub_version = 0,
132 |             .win32_version = 0,
133 |             .size_of_image = 0,
134 |             .size_of_headers = 0,
135 |             .checksum = 0,
136 |             .subsystem = 0,
137 |             .dll_characteristics = 0,
138 |             .size_of_stack_reserve = 1024 * 1024,
139 |             .size_of_stack_commit = 4096,
140 |             .size_of_heap_reserve = 1024 * 1024,
141 |             .size_of_heap_commit = 4096,
142 |             .loader_flags = 0,
143 |             .number_of_rva_and_sizes = 0,
144 |         },
145 |     };
146 | }
147 | 
148 | /// Closes the file handle to the PE binary file,
149 | /// deallocates all resources related to the linking process,
150 | /// and invalidates the passed `cld` instance.
151 | pub fn deinit(cld: *Cld) void {
152 |     cld.file.close();
153 |     for (cld.objects.items) |*obj| {
154 |         obj.file.close();
155 |         obj.deinit();
156 |     }
157 |     cld.objects.deinit(cld.gpa);
158 |     cld.synthetic_symbols.deinit(cld.gpa);
159 |     cld.discarded.deinit(cld.gpa);
160 |     cld.resolved_symbols.deinit(cld.gpa);
161 |     cld.section_headers.deinit(cld.gpa);
162 |     var header_names_it = cld.section_mapping.keyIterator();
163 |     while (header_names_it.next()) |name| {
164 |         cld.gpa.free(name.*);
165 |     }
166 |     cld.section_mapping.deinit(cld.gpa);
167 |     cld.atoms.deinit(cld.gpa);
168 |     for (cld.managed_atoms.items) |atom| {
169 |         atom.destroy(cld.gpa);
170 |     }
171 |     cld.managed_atoms.deinit(cld.gpa);
172 |     cld.* = undefined;
173 | }
174 | 
175 | /// Appends one or multiple Coff object files that will be linked into the final binary.
176 | /// Skips the file when the given path is not a Coff object file.
177 | ///
178 | /// TODO: Make this work for archive files as well as dynamic libraries.
179 | pub fn addObjects(cld: *Cld, paths: []const []const u8) !void {
180 |     for (paths) |path| {
181 |         const file = try std.fs.cwd().openFile(path, .{});
182 |         var coff = Coff.init(cld.gpa, file, path);
183 |         errdefer coff.deinit();
184 | 
185 |         if (try coff.parse()) {
186 |             try cld.objects.append(cld.gpa, coff);
187 |             log.debug("Appended Coff object '{s}'", .{path});
188 |         }
189 |     }
190 | }
191 | 
192 | pub fn flush(cld: *Cld) !void {
193 |     for (cld.objects.items) |_, idx| {
194 |         try resolveSymbolsInObject(cld, @intCast(u16, idx));
195 |     }
196 | 
197 |     // TODO: Emit unresolved symbols and error out
198 | 
199 |     for (cld.objects.items) |object, idx| {
200 |         try Coff.parseIntoAtoms(object, cld, @intCast(u16, idx));
201 |     }
202 | 
203 |     try sortSections(cld);
204 |     try allocateAtoms(cld);
205 |     try emitImageFile(cld);
206 | }
207 | 
208 | /// Resolves symbols in given object file index.
209 | fn resolveSymbolsInObject(cld: *Cld, index: u16) !void {
210 |     const object: Coff = cld.objects.items[index];
211 |     var sym_index: u32 = 0;
212 |     while (sym_index < object.header.number_of_symbols) : (sym_index += 1) {
213 |         const symbol: Coff.Symbol = object.symbols.items[sym_index];
214 |         defer sym_index += symbol.number_aux_symbols; // skip auxiliry symbols
215 | 
216 |         // Add all symbols to resolved list for now
217 |         // TODO: Actually resolve symbols correctly.
218 |         try cld.resolved_symbols.putNoClobber(cld.gpa, .{ .file = index, .index = sym_index }, {});
219 |     }
220 | }
221 | 
222 | pub fn getMatchingSection(cld: *Cld, object_index: u16, section_index: u16) !?u16 {
223 |     const object: Coff = cld.objects.items[object_index];
224 |     const sec_header: Coff.SectionHeader = object.section_table.items[section_index];
225 |     const sec_name = object.getString(sec_header.name);
226 |     const flags = sec_header.characteristics;
227 |     const current_index = @intCast(u16, cld.section_headers.items.len);
228 | 
229 |     if (flags & Coff.SectionHeader.flags.IMAGE_SCN_LNK_REMOVE != 0) return null;
230 |     if (flags & Coff.SectionHeader.flags.IMAGE_SCN_MEM_DISCARDABLE != 0) return null;
231 | 
232 |     const gop = try cld.section_mapping.getOrPut(cld.gpa, try cld.gpa.dupe(u8, sec_name));
233 |     if (!gop.found_existing) {
234 |         gop.value_ptr.* = current_index;
235 | 
236 |         const header = try cld.section_headers.addOne(cld.gpa);
237 |         header.* = .{
238 |             .name = try cld.makeString(gop.key_ptr.*, .header),
239 |             .virtual_size = 0,
240 |             .virtual_address = 0,
241 |             .size_of_raw_data = 0,
242 |             .pointer_to_raw_data = 0,
243 |             .pointer_to_relocations = 0,
244 |             .pointer_to_line_numbers = 0,
245 |             .number_of_relocations = 0,
246 |             .number_of_line_numbers = 0,
247 |             .characteristics = flags,
248 |             .alignment = 0,
249 |         };
250 |     }
251 |     return gop.value_ptr.*;
252 | }
253 | 
254 | /// Makes a Coff-formatted string by storing it directly when smaller or equal to 8 bytes,
255 | /// or else store it in the string table and write the offset into that table in the 8 bytes
256 | /// of the returned array. The layout of this array is determined based on given `string_type`.
257 | fn makeString(cld: *Cld, string: []const u8, string_type: enum { symbol, header }) ![8]u8 {
258 |     var buf = [_]u8{0} ** 8;
259 |     if (string.len <= 8) {
260 |         std.mem.copy(u8, &buf, string);
261 |         return buf;
262 |     }
263 |     const offset = @intCast(u32, cld.string_table.items.len);
264 |     try cld.string_table.appendSlice(cld.gpa, string);
265 | 
266 |     if (string_type == .symbol) {
267 |         std.mem.writeIntLittle(u32, buf[4..8], offset);
268 |     } else {
269 |         buf[0] = '/';
270 |         _ = std.fmt.bufPrint(buf[1..], "{d}", .{offset}) catch unreachable;
271 |     }
272 |     return buf;
273 | }
274 | 
275 | /// Returns the corresponding string from a given 8-byte buffer
276 | pub fn getString(cld: Cld, buf: [8]u8) []const u8 {
277 |     const offset = if (buf[0] == '/') blk: {
278 |         const offset_len = std.mem.indexOfScalar(u8, buf[1..], 0) orelse 7;
279 |         const offset = std.fmt.parseInt(u32, buf[1..][0..offset_len], 10) catch return "";
280 |         break :blk offset;
281 |     } else if (std.mem.eql(u8, buf[0..4], &.{ 0, 0, 0, 0 })) blk: {
282 |         break :blk std.mem.readIntLittle(u32, buf[4..8]);
283 |     } else return std.mem.sliceTo(&buf, 0);
284 | 
285 |     const str = @ptrCast([*:0]const u8, cld.string_table.items.ptr + offset);
286 |     return std.mem.sliceTo(str, 0);
287 | }
288 | 
289 | /// Sorts sections into the most optimal order
290 | fn sortSections(cld: *Cld) !void {
291 |     log.debug("Sorting sections. Old order:", .{});
292 |     for (cld.section_headers.items) |hdr, index| {
293 |         log.debug("  {d: >2} {s: >9}", .{ index, cld.getString(hdr.name) });
294 |     }
295 | 
296 |     // Sort sections based on their name. When the section is grouped,
297 |     // we ordinally order the corresponding sections based on alphabetic order.
298 |     var ctx: SectionSortContext = .{ .cld = cld };
299 |     std.sort.sort(Coff.SectionHeader, cld.section_headers.items, ctx, SectionSortContext.lessThan);
300 | 
301 |     // replace old section mapping indexes with the name indexes
302 |     var old_mapping = std.AutoArrayHashMap(u16, u16).init(cld.gpa);
303 |     defer old_mapping.deinit();
304 |     try old_mapping.ensureUnusedCapacity(cld.section_headers.items.len);
305 |     for (cld.section_headers.items) |hdr, index| {
306 |         const value = cld.section_mapping.getPtr(cld.getString(hdr.name)).?;
307 |         const new_index = @intCast(u16, index);
308 |         old_mapping.putAssumeCapacityNoClobber(value.*, new_index);
309 |         value.* = new_index;
310 |     }
311 | 
312 |     var new_atoms: std.AutoHashMapUnmanaged(u16, *Atom) = .{};
313 |     try new_atoms.ensureUnusedCapacity(cld.gpa, cld.atoms.count());
314 | 
315 |     var it = cld.atoms.iterator();
316 |     while (it.next()) |entry| {
317 |         const old_index = entry.key_ptr.*;
318 |         const new_index = old_mapping.get(old_index).?;
319 |         new_atoms.putAssumeCapacityNoClobber(new_index, entry.value_ptr.*);
320 |     }
321 | 
322 |     cld.atoms.deinit(cld.gpa);
323 |     cld.atoms = new_atoms;
324 | 
325 |     log.debug("Sorted sections. New order:", .{});
326 |     for (cld.section_headers.items) |hdr, index| {
327 |         log.debug("  {d: >2} {s: >9}", .{ index, cld.getString(hdr.name) });
328 |     }
329 | }
330 | 
331 | const SectionSortContext = struct {
332 |     cld: *const Cld,
333 | 
334 |     fn value(ctx: SectionSortContext, header: Coff.SectionHeader) u16 {
335 |         const startsWith = std.mem.startsWith;
336 |         const name = ctx.cld.getString(header.name);
337 |         if (startsWith(u8, name, ".text")) {
338 |             return 0;
339 |         } else if (startsWith(u8, name, ".data")) {
340 |             return 1;
341 |         } else if (startsWith(u8, name, ".bss")) {
342 |             return 2;
343 |         } else if (startsWith(u8, name, ".xdata")) {
344 |             return 3;
345 |         } else if (startsWith(u8, name, ".rdata")) {
346 |             return 4;
347 |         } else if (startsWith(u8, name, ".tls")) {
348 |             return 5;
349 |         } else if (startsWith(u8, name, ".debug")) {
350 |             return 6;
351 |         } else if (startsWith(u8, name, ".pdata")) {
352 |             return 7;
353 |         } else std.debug.panic("TODO: value of section named: '{s}'\n", .{name});
354 |         unreachable;
355 |     }
356 | 
357 |     fn isGroupedFirst(ctx: SectionSortContext, lhs: Coff.SectionHeader, rhs: Coff.SectionHeader) bool {
358 |         std.debug.assert(lhs.isGrouped() and rhs.isGrouped());
359 |         const lhs_name = ctx.cld.getString(lhs.name);
360 |         const rhs_name = ctx.cld.getString(rhs.name);
361 |         const start = std.mem.indexOfScalar(u8, lhs_name, '$').?;
362 |         if (start == lhs_name.len - 1) return true;
363 |         if (start == rhs_name.len - 1) return true;
364 |         return lhs_name[start + 1] < rhs_name[start + 1];
365 |     }
366 | 
367 |     fn lessThan(ctx: SectionSortContext, lhs: Coff.SectionHeader, rhs: Coff.SectionHeader) bool {
368 |         const lhs_val = ctx.value(lhs);
369 |         const rhs_val = ctx.value(rhs);
370 |         if (lhs_val == rhs_val) {
371 |             return ctx.isGroupedFirst(lhs, rhs);
372 |         }
373 |         return lhs_val < rhs_val;
374 |     }
375 | };
376 | 
377 | /// From a given section name, returns the short section name.
378 | /// This is useful to determine which section a grouped section belongs to.
379 | /// e.g. .text$X beloging to the .text section.
380 | fn sectionShortName(name: []const u8) []const u8 {
381 |     const startsWith = std.mem.startsWith;
382 |     if (startsWith(u8, name, ".text")) {
383 |         return ".text";
384 |     } else if (startsWith(u8, name, ".data")) {
385 |         return ".data";
386 |     } else if (startsWith(u8, name, ".bss")) {
387 |         return ".bss";
388 |     } else if (startsWith(u8, name, ".xdata")) {
389 |         return ".xdata";
390 |     } else if (startsWith(u8, name, ".rdata")) {
391 |         return ".rdata";
392 |     } else if (startsWith(u8, name, ".tls")) {
393 |         return ".tls";
394 |     } else if (startsWith(u8, name, ".debug")) {
395 |         return ".debug";
396 |     } else if (startsWith(u8, name, ".pdata")) {
397 |         return ".pdata";
398 |     } else std.debug.panic("TODO: shortname of section named: '{s}'\n", .{name});
399 |     unreachable;
400 | }
401 | 
402 | fn allocateAtoms(cld: *Cld) !void {
403 |     var offset: u32 = dos_stub_size +
404 |         @sizeOf(@TypeOf(Coff.pe_magic)) +
405 |         @sizeOf(Coff.Header) +
406 |         cld.coff_header.size_of_optional_header;
407 | 
408 |     for (cld.section_headers.items) |hdr| {
409 |         if (hdr.isGrouped()) {
410 |             continue;
411 |         }
412 |         offset += 40; // each header takes up 40 bytes
413 |         cld.coff_header.number_of_sections += 1;
414 |     }
415 | 
416 |     offset = std.mem.alignForwardGeneric(u32, offset, 512);
417 |     cld.optional_header.size_of_headers = offset;
418 |     log.debug("allocating sections, starting at offset: 0x{x:0>8}", .{offset});
419 | 
420 |     var file_size = offset;
421 |     var rva = std.mem.alignForwardGeneric(u32, offset, 4096); // TODO: Get alignment from configuration
422 | 
423 |     var it = cld.atoms.iterator();
424 |     while (it.next()) |entry| {
425 |         const section_index = entry.key_ptr.*;
426 |         const hdr: *Coff.SectionHeader = &cld.section_headers.items[section_index];
427 |         hdr.virtual_address = rva;
428 | 
429 |         var atom: *Atom = entry.value_ptr.*.getFirst();
430 |         var raw_size: u32 = 0;
431 |         var virtual_size: u32 = 0;
432 | 
433 |         log.debug("allocating atoms in section '{s}'", .{cld.getString(hdr.name)});
434 | 
435 |         while (true) {
436 |             virtual_size = std.mem.alignForwardGeneric(u32, virtual_size, atom.alignment);
437 | 
438 |             const symbol = atom.symLoc().getSymbol(cld);
439 |             symbol.value = rva + virtual_size;
440 |             virtual_size += atom.size;
441 |             raw_size = std.mem.alignForwardGeneric(u32, virtual_size, cld.optional_header.file_alignment);
442 | 
443 |             symbol.section_number = @intCast(i16, section_index + 1); // section numbers are 1-indexed.
444 | 
445 |             log.debug("  atom '{s}' allocated from 0x{x:0>8} to 0x{x:0>8}", .{
446 |                 cld.objects.items[atom.file].getString(symbol.name),
447 |                 symbol.value,
448 |                 symbol.value + atom.size,
449 |             });
450 | 
451 |             const coff = &cld.objects.items[atom.file];
452 |             for (atom.aliases.items) |sym_index| {
453 |                 const alias = &coff.symbols.items[sym_index];
454 |                 alias.value = symbol.value;
455 |                 alias.section_number = symbol.section_number;
456 |             }
457 | 
458 |             for (atom.contained.items) |sym_at_offset| {
459 |                 const contained_sym = &coff.symbols.items[sym_at_offset.sym_index];
460 |                 contained_sym.value = symbol.value + sym_at_offset.offset;
461 |                 contained_sym.section_number = symbol.section_number;
462 |             }
463 | 
464 |             atom = atom.next orelse break;
465 |         }
466 | 
467 |         hdr.virtual_size = virtual_size;
468 |         hdr.size_of_raw_data = raw_size;
469 |         if (raw_size != 0) {
470 |             hdr.pointer_to_raw_data = file_size;
471 |         }
472 |         rva += std.mem.alignForwardGeneric(u32, virtual_size, 4096);
473 |         file_size += std.mem.alignForwardGeneric(u32, raw_size, 512);
474 | 
475 |         const hdr_name = cld.getString(hdr.name);
476 |         if (std.mem.eql(u8, hdr_name, ".text")) {
477 |             cld.optional_header.base_of_code = hdr.pointer_to_raw_data;
478 |         }
479 |         if (hdr.characteristics & Coff.SectionHeader.flags.IMAGE_SCN_CNT_CODE != 0) {
480 |             cld.optional_header.size_of_code += hdr.size_of_raw_data;
481 |         } else if (hdr.characteristics & Coff.SectionHeader.flags.IMAGE_SCN_CNT_INITIALIZED_DATA != 0) {
482 |             cld.optional_header.size_of_initialized_data += hdr.size_of_raw_data;
483 |         } else if (hdr.characteristics & Coff.SectionHeader.flags.IMAGE_SCN_CNT_UNINITIALIZED_DATA != 0) {
484 |             cld.optional_header.size_of_uninitialized_data += hdr.size_of_raw_data;
485 |         }
486 |     }
487 | 
488 |     cld.optional_header.size_of_image = std.mem.alignForwardGeneric(u32, rva, 4096);
489 | }
490 | 
491 | fn emitImageFile(cld: *Cld) !void {
492 |     var writer_list = std.ArrayList(u8).init(cld.gpa);
493 |     defer writer_list.deinit();
494 |     const writer = writer_list.writer();
495 |     _ = writer;
496 | 
497 |     // no linker-errors, so update flags
498 |     cld.coff_header.characteristics |= std.coff.IMAGE_FILE_EXECUTABLE_IMAGE;
499 |     if (cld.optional_header.magic == 0x2b) {
500 |         cld.coff_header.characteristics |= std.coff.IMAGE_FILE_LARGE_ADDRESS_AWARE;
501 |     }
502 | 
503 |     try writeDosHeader(writer);
504 |     try writeFileHeader(cld.coff_header, writer);
505 |     try writeOptionalHeader(cld.*, writer);
506 |     try writeSections(cld.*, writer);
507 |     try writeStringtable(cld.*, writer);
508 | 
509 |     try cld.file.writevAll(&[_]std.os.iovec_const{
510 |         .{ .iov_base = writer_list.items.ptr, .iov_len = writer_list.items.len },
511 |     });
512 | }
513 | 
514 | fn writeDosHeader(writer: anytype) !void {
515 |     var header: Coff.DosHeader = std.mem.zeroInit(Coff.DosHeader, .{});
516 |     header.magic = .{ 'M', 'Z' };
517 |     header.used_bytes_last_page = dos_stub_size % 512;
518 |     header.file_size_pages = try std.math.divCeil(u16, dos_stub_size, 512);
519 |     header.header_size_paragraphs = @sizeOf(Coff.DosHeader) / 16;
520 |     header.address_of_relocation_table = @sizeOf(Coff.DosHeader);
521 |     header.address_of_header = dos_stub_size;
522 | 
523 |     // TODO: Byteswap the header when target compilation is big-endian
524 |     try writer.writeAll(std.mem.asBytes(&header));
525 |     try writer.writeAll(&dos_program);
526 | }
527 | 
528 | fn writeFileHeader(header: Coff.Header, writer: anytype) !void {
529 |     try writer.writeAll(&Coff.pe_magic);
530 |     try writer.writeAll(std.mem.asBytes(&header));
531 | }
532 | 
533 | fn writeOptionalHeader(cld: Cld, writer: anytype) !void {
534 |     try writer.writeAll(std.mem.asBytes(&cld.optional_header));
535 |     // TODO: Actually write to each directory when data is known
536 |     var directories = [_]u8{0} ** (@sizeOf(Coff.DataDirectory) * number_of_data_directory);
537 |     try writer.writeAll(&directories);
538 | }
539 | 
540 | fn writeSections(cld: Cld, writer: anytype) !void {
541 |     for (cld.section_headers.items) |hdr| {
542 |         try writer.writeAll(&hdr.name);
543 |         try writer.writeIntLittle(u32, hdr.virtual_size);
544 |         try writer.writeIntLittle(u32, hdr.virtual_address);
545 |         try writer.writeIntLittle(u32, hdr.size_of_raw_data);
546 |         try writer.writeIntLittle(u32, hdr.pointer_to_raw_data);
547 |         try writer.writeIntLittle(u32, hdr.pointer_to_relocations);
548 |         try writer.writeIntLittle(u32, hdr.pointer_to_line_numbers);
549 |         try writer.writeIntLittle(u16, hdr.number_of_relocations);
550 |         try writer.writeIntLittle(u16, hdr.number_of_line_numbers);
551 |         try writer.writeIntLittle(u32, hdr.characteristics);
552 |     }
553 | 
554 |     var it = cld.atoms.valueIterator();
555 |     while (it.next()) |last_atom| {
556 |         var atom: *Atom = last_atom.*.getFirst();
557 |         while (true) {
558 |             const size = std.mem.alignForwardGeneric(u32, atom.size, atom.alignment);
559 |             // TODO: Perform relocations before writing
560 |             try writer.writeAll(atom.code.items);
561 |             if (size > atom.size) {
562 |                 try writer.writeByteNTimes(0, size - atom.size);
563 |             }
564 |             atom = atom.next orelse break;
565 |         }
566 |     }
567 | }
568 | 
569 | fn writeStringtable(cld: Cld, writer: anytype) !void {
570 |     const size = @intCast(u32, cld.string_table.items.len) + 4; // 4 bytes for size field itself
571 |     try writer.writeIntLittle(u32, size);
572 |     if (size == 4) return;
573 |     try writer.writeAll(cld.string_table.items);
574 | }
575 | 


--------------------------------------------------------------------------------
/src/Coff.zig:
--------------------------------------------------------------------------------
  1 | //! Represents the object file format for Windows.
  2 | //! This contains the structure as well as the ability
  3 | //! to parse such file into this structure.
  4 | const Coff = @This();
  5 | 
  6 | const std = @import("std");
  7 | const Cld = @import("Cld.zig");
  8 | const Atom = @import("Atom.zig");
  9 | const Allocator = std.mem.Allocator;
 10 | const log = std.log.scoped(.cld);
 11 | 
 12 | allocator: Allocator,
 13 | file: std.fs.File,
 14 | name: []const u8,
 15 | 
 16 | header: Header,
 17 | section_table: std.ArrayListUnmanaged(SectionHeader) = .{},
 18 | sections: std.ArrayListUnmanaged(Section) = .{},
 19 | relocations: std.AutoHashMapUnmanaged(u16, []const Relocation) = .{},
 20 | symbols: std.ArrayListUnmanaged(Symbol) = .{},
 21 | string_table: []const u8,
 22 | 
 23 | pub const Header = extern struct {
 24 |     machine: std.coff.MachineType,
 25 |     number_of_sections: u16,
 26 |     timedate_stamp: u32,
 27 |     pointer_to_symbol_table: u32,
 28 |     number_of_symbols: u32,
 29 |     size_of_optional_header: u16,
 30 |     characteristics: u16,
 31 | };
 32 | 
 33 | pub const DosHeader = extern struct {
 34 |     magic: [2]u8,
 35 |     used_bytes_last_page: u16,
 36 |     file_size_pages: u16,
 37 |     numberOfRelocationItems: u16,
 38 |     header_size_paragraphs: u16,
 39 |     minimum_extra_paragaphs: u16,
 40 |     maximum_extra_paragraphs: u16,
 41 |     initial_relative_ss: u16,
 42 |     initial_sp: u16,
 43 |     checksum: u16,
 44 |     initial_ip: u16,
 45 |     initial_relative_cs: u16,
 46 |     address_of_relocation_table: u16,
 47 |     overlay_number: u16,
 48 |     reserved: [4]u16,
 49 |     oem_id: u16,
 50 |     oem_info: u16,
 51 |     reserved2: [10]u16,
 52 |     address_of_header: u32,
 53 | };
 54 | 
 55 | pub const OptionalHeader = struct {
 56 |     magic: u16,
 57 |     major_version: u8,
 58 |     minor_version: u8,
 59 |     size_of_code: u32,
 60 |     size_of_initialized_data: u32,
 61 |     size_of_uninitialized_data: u32,
 62 |     address_of_entry_point: u32,
 63 |     base_of_code: u32,
 64 |     // Windows-Specific fields
 65 |     image_base: u64,
 66 |     section_alignment: u32,
 67 |     file_alignment: u32 = 512,
 68 |     major_os_version: u16,
 69 |     minor_os_version: u16,
 70 |     major_img_version: u16,
 71 |     minor_img_version: u16,
 72 |     major_sub_version: u16,
 73 |     minor_sub_version: u16,
 74 |     /// Reserved and must always be set to '0'
 75 |     win32_version: u32 = 0,
 76 |     size_of_image: u32,
 77 |     size_of_headers: u32,
 78 |     checksum: u32,
 79 |     subsystem: u16,
 80 |     dll_characteristics: u16,
 81 |     size_of_stack_reserve: u64,
 82 |     size_of_stack_commit: u64,
 83 |     size_of_heap_reserve: u64,
 84 |     size_of_heap_commit: u64,
 85 |     /// Reserved and must always be set to '0'
 86 |     loader_flags: u32 = 0,
 87 |     /// Number of data-directory entries in the remainder of the
 88 |     /// optional header, of which each describes a location and size.
 89 |     number_of_rva_and_sizes: u32,
 90 | };
 91 | 
 92 | pub const DataDirectory = extern struct {
 93 |     virtual_address: u32,
 94 |     size: u32,
 95 | };
 96 | 
 97 | pub const pe_magic: [4]u8 = .{ 'P', 'E', 0, 0 };
 98 | 
 99 | pub const Section = struct {
100 |     ptr: [*]const u8,
101 |     size: u32,
102 | 
103 |     fn slice(section: Section) []const u8 {
104 |         return section.ptr[0..section.size];
105 |     }
106 | 
107 |     fn fromSlice(buf: []const u8) Section {
108 |         return .{ .ptr = buf.ptr, .size = @intCast(u32, buf.len) };
109 |     }
110 | };
111 | 
112 | pub const Relocation = struct {
113 |     virtual_address: u32,
114 |     symbol_table_index: u32,
115 |     tag: u16,
116 | };
117 | 
118 | pub const Symbol = extern struct {
119 |     name: [8]u8,
120 |     value: u32,
121 |     section_number: i16,
122 |     sym_type: u16,
123 |     storage_class: Class,
124 |     number_aux_symbols: u8,
125 | 
126 |     pub fn complexType(symbol: Symbol) ComplexType {
127 |         return @intToEnum(ComplexType, @truncate(u8, symbol.sym_type >> 4));
128 |     }
129 | 
130 |     pub fn baseType(symbol: Symbol) BaseType {
131 |         return @intToEnum(BaseType, @truncate(u8, symbol.sym_type >> 8));
132 |     }
133 | 
134 |     pub fn isFunction(symbol: Symbol) bool {
135 |         return symbol.sym_type == 0x20;
136 |     }
137 | 
138 |     pub fn isUndefined(symbol: Symbol) bool {
139 |         return symbol.section_number == 0;
140 |     }
141 | 
142 |     pub fn isWeak(symbol: Symbol) bool {
143 |         return symbol.storage_class == .IMAGE_SYM_CLASS_EXTERNAL and
144 |             symbol.section_number == 0 and
145 |             symbol.value == 0;
146 |     }
147 | 
148 |     const ComplexType = enum(u8) {
149 |         /// No derived type; the symbol is a simple scalar variable.
150 |         IMAGE_SYM_DTYPE_NULL = 0,
151 |         /// The symbol is a pointer to base type.
152 |         IMAGE_SYM_DTYPE_POINTER = 1,
153 |         /// The symbol is a function that returns a base type.
154 |         IMAGE_SYM_DTYPE_FUNCTION = 2,
155 |         /// The symbol is an array of base type.
156 |         IMAGE_SYM_DTYPE_ARRAY = 3,
157 |     };
158 | 
159 |     pub const BaseType = enum(u8) {
160 |         /// No type information or unknown base type. Microsoft tools use this setting
161 |         IMAGE_SYM_TYPE_NULL = 0,
162 |         /// No valid type; used with void pointers and functions
163 |         IMAGE_SYM_TYPE_VOID = 1,
164 |         /// A character (signed byte)
165 |         IMAGE_SYM_TYPE_CHAR = 2,
166 |         /// A 2-byte signed integer
167 |         IMAGE_SYM_TYPE_SHORT = 3,
168 |         /// A natural integer type (normally 4 bytes in Windows)
169 |         IMAGE_SYM_TYPE_INT = 4,
170 |         /// A 4-byte signed integer
171 |         IMAGE_SYM_TYPE_LONG = 5,
172 |         /// A 4-byte floating-point number
173 |         IMAGE_SYM_TYPE_FLOAT = 6,
174 |         /// An 8-byte floating-point number
175 |         IMAGE_SYM_TYPE_DOUBLE = 7,
176 |         /// A structure
177 |         IMAGE_SYM_TYPE_STRUCT = 8,
178 |         /// A union
179 |         IMAGE_SYM_TYPE_UNION = 9,
180 |         /// An enumerated type
181 |         IMAGE_SYM_TYPE_ENUM = 10,
182 |         /// A member of enumeration (a specific value)
183 |         IMAGE_SYM_TYPE_MOE = 11,
184 |         /// A byte; unsigned 1-byte integer
185 |         IMAGE_SYM_TYPE_BYTE = 12,
186 |         /// A word; unsigned 2-byte integer
187 |         IMAGE_SYM_TYPE_WORD = 13,
188 |         /// An unsigned integer of natural size (normally, 4 bytes)
189 |         IMAGE_SYM_TYPE_UINT = 14,
190 |         /// An unsigned 4-byte integer
191 |         IMAGE_SYM_TYPE_DWORD = 15,
192 |     };
193 | 
194 |     pub const Class = enum(u8) {
195 |         /// No assigned storage class.
196 |         IMAGE_SYM_CLASS_NULL = 0,
197 |         /// The automatic (stack) variable. The Value field specifies the stack frame offset.
198 |         IMAGE_SYM_CLASS_AUTOMATIC = 1,
199 |         /// A value that Microsoft tools use for external symbols. The Value field indicates the size if the section number is IMAGE_SYM_UNDEFINED (0). If the section number is not zero, then the Value field specifies the offset within the section.
200 |         IMAGE_SYM_CLASS_EXTERNAL = 2,
201 |         /// The offset of the symbol within the section. If the Value field is zero, then the symbol represents a section name.
202 |         IMAGE_SYM_CLASS_STATIC = 3,
203 |         /// A register variable. The Value field specifies the register number.
204 |         IMAGE_SYM_CLASS_REGISTER = 4,
205 |         /// A symbol that is defined externally.
206 |         IMAGE_SYM_CLASS_EXTERNAL_DEF = 5,
207 |         /// A code label that is defined within the module. The Value field specifies the offset of the symbol within the section.
208 |         IMAGE_SYM_CLASS_LABEL = 6,
209 |         /// A reference to a code label that is not defined.
210 |         IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7,
211 |         /// The structure member. The Value field specifies the n th member.
212 |         IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,
213 |         /// A formal argument (parameter) of a function. The Value field specifies the n th argument.
214 |         IMAGE_SYM_CLASS_ARGUMENT = 9,
215 |         /// The structure tag-name entry.
216 |         IMAGE_SYM_CLASS_STRUCT_TAG = 10,
217 |         /// A union member. The Value field specifies the n th member.
218 |         IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11,
219 |         /// The Union tag-name entry.
220 |         IMAGE_SYM_CLASS_UNION_TAG = 12,
221 |         /// A Typedef entry.
222 |         IMAGE_SYM_CLASS_TYPE_DEFINITION = 13,
223 |         /// A static data declaration.
224 |         IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,
225 |         /// An enumerated type tagname entry.
226 |         IMAGE_SYM_CLASS_ENUM_TAG = 15,
227 |         /// A member of an enumeration. The Value field specifies the n th member.
228 |         IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16,
229 |         /// A register parameter.
230 |         IMAGE_SYM_CLASS_REGISTER_PARAM = 17,
231 |         /// A bit-field reference. The Value field specifies the n th bit in the bit field.
232 |         IMAGE_SYM_CLASS_BIT_FIELD = 18,
233 |         /// A .bb (beginning of block) or .eb (end of block) record. The Value field is the relocatable address of the code location.
234 |         IMAGE_SYM_CLASS_BLOCK = 100,
235 |         /// A value that Microsoft tools use for symbol records that define the extent of a function: begin function (.bf ), end function ( .ef ), and lines in function ( .lf ). For .lf records, the Value field gives the number of source lines in the function. For .ef records, the Value field gives the size of the function code.
236 |         IMAGE_SYM_CLASS_FUNCTION = 101,
237 |         /// An end-of-structure entry.
238 |         IMAGE_SYM_CLASS_END_OF_STRUCT = 102,
239 |         /// A value that Microsoft tools, as well as traditional COFF format, use for the source-file symbol record. The symbol is followed by auxiliary records that name the file.
240 |         IMAGE_SYM_CLASS_FILE = 103,
241 |         /// A definition of a section (Microsoft tools use STATIC storage class instead).
242 |         IMAGE_SYM_CLASS_SECTION = 104,
243 |         /// A weak external. For more information, see Auxiliary Format 3: Weak Externals.
244 |         IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105,
245 |         /// A CLR token symbol. The name is an ASCII string that consists of the hexadecimal value of the token. For more information, see CLR Token Definition (Object Only).
246 |         IMAGE_SYM_CLASS_CLR_TOKEN = 107,
247 |         // A special symbol that represents the end of function, for debugging purposes.
248 |         IMAGE_SYM_CLASS_END_OF_FUNCTION = 0xFF,
249 |     };
250 | };
251 | 
252 | pub const SectionHeader = struct {
253 |     name: [8]u8,
254 |     virtual_size: u32,
255 |     virtual_address: u32,
256 |     size_of_raw_data: u32,
257 |     pointer_to_raw_data: u32,
258 |     pointer_to_relocations: u32,
259 |     pointer_to_line_numbers: u32,
260 |     number_of_relocations: u16,
261 |     number_of_line_numbers: u16,
262 |     characteristics: u32,
263 | 
264 |     /// Set by checking the `characteristics` flags
265 |     alignment: u32,
266 | 
267 |     pub const flags = struct {
268 |         fn alignment(flag: u32) u32 {
269 |             if (flag & flags.IMAGE_SCN_ALIGN_1BYTES != 0) return 1;
270 |             if (flag & flags.IMAGE_SCN_ALIGN_2BYTES != 0) return 2;
271 |             if (flag & flags.IMAGE_SCN_ALIGN_4BYTES != 0) return 4;
272 |             if (flag & flags.IMAGE_SCN_ALIGN_8BYTES != 0) return 8;
273 |             if (flag & flags.IMAGE_SCN_ALIGN_16BYTES != 0) return 16;
274 |             if (flag & flags.IMAGE_SCN_ALIGN_32BYTES != 0) return 32;
275 |             if (flag & flags.IMAGE_SCN_ALIGN_64BYTES != 0) return 64;
276 |             if (flag & flags.IMAGE_SCN_ALIGN_128BYTES != 0) return 128;
277 |             if (flag & flags.IMAGE_SCN_ALIGN_256BYTES != 0) return 256;
278 |             if (flag & flags.IMAGE_SCN_ALIGN_512BYTES != 0) return 512;
279 |             if (flag & flags.IMAGE_SCN_ALIGN_1024BYTES != 0) return 1024;
280 |             if (flag & flags.IMAGE_SCN_ALIGN_2048BYTES != 0) return 2048;
281 |             if (flag & flags.IMAGE_SCN_ALIGN_4096BYTES != 0) return 4096;
282 |             if (flag & flags.IMAGE_SCN_ALIGN_8192BYTES != 0) return 8192;
283 |             unreachable;
284 |         }
285 |         /// The section should not be padded to the next boundary.
286 |         /// This flag is obsolete and is replaced by IMAGE_SCN_ALIGN_1BYTES.
287 |         /// This is valid only for object files.
288 |         pub const IMAGE_SCN_TYPE_NO_PAD = 0x00000008;
289 |         /// The section contains executable code.
290 |         pub const IMAGE_SCN_CNT_CODE = 0x00000020;
291 |         /// The section contains initialized data.
292 |         pub const IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040;
293 |         /// The section contains uninitialized data.
294 |         pub const IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080;
295 |         /// Reserved for future use.
296 |         pub const IMAGE_SCN_LNK_OTHER = 0x00000100;
297 |         /// The section contains comments or other information.
298 |         /// The .drectve section has this type.
299 |         /// This is valid for object files only.
300 |         pub const IMAGE_SCN_LNK_INFO = 0x00000200;
301 |         /// The section will not become part of the image.
302 |         /// This is valid only for object files.
303 |         pub const IMAGE_SCN_LNK_REMOVE = 0x00000800;
304 |         /// The section contains COMDAT data.
305 |         /// For more information, see COMDAT Sections (Object Only).
306 |         /// This is valid only for object files.
307 |         pub const IMAGE_SCN_LNK_COMDAT = 0x00001000;
308 |         /// The section contains data referenced through the global pointer (GP).
309 |         pub const IMAGE_SCN_GPREL = 0x00008000;
310 |         /// Reserved for future use.
311 |         pub const IMAGE_SCN_MEM_PURGEABLE = 0x00020000;
312 |         /// Reserved for future use.
313 |         pub const IMAGE_SCN_MEM_16BIT = 0x00020000;
314 |         /// Reserved for future use.
315 |         pub const IMAGE_SCN_MEM_LOCKED = 0x00040000;
316 |         /// Reserved for future use.
317 |         pub const IMAGE_SCN_MEM_PRELOAD = 0x00080000;
318 |         /// Align data on a 1-byte boundary. Valid only for object files.
319 |         pub const IMAGE_SCN_ALIGN_1BYTES = 0x00100000;
320 |         /// Align data on a 2-byte boundary. Valid only for object files.
321 |         pub const IMAGE_SCN_ALIGN_2BYTES = 0x00200000;
322 |         /// Align data on a 4-byte boundary. Valid only for object files.
323 |         pub const IMAGE_SCN_ALIGN_4BYTES = 0x00300000;
324 |         /// Align data on an 8-byte boundary. Valid only for object files.
325 |         pub const IMAGE_SCN_ALIGN_8BYTES = 0x00400000;
326 |         /// Align data on a 16-byte boundary. Valid only for object files.
327 |         pub const IMAGE_SCN_ALIGN_16BYTES = 0x00500000;
328 |         /// Align data on a 32-byte boundary. Valid only for object files.
329 |         pub const IMAGE_SCN_ALIGN_32BYTES = 0x00600000;
330 |         /// Align data on a 64-byte boundary. Valid only for object files.
331 |         pub const IMAGE_SCN_ALIGN_64BYTES = 0x00700000;
332 |         /// Align data on a 128-byte boundary. Valid only for object files.
333 |         pub const IMAGE_SCN_ALIGN_128BYTES = 0x00800000;
334 |         /// Align data on a 256-byte boundary. Valid only for object files.
335 |         pub const IMAGE_SCN_ALIGN_256BYTES = 0x00900000;
336 |         /// Align data on a 512-byte boundary. Valid only for object files.
337 |         pub const IMAGE_SCN_ALIGN_512BYTES = 0x00A00000;
338 |         /// Align data on a 1024-byte boundary. Valid only for object files.
339 |         pub const IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000;
340 |         /// Align data on a 2048-byte boundary. Valid only for object files.
341 |         pub const IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000;
342 |         /// Align data on a 4096-byte boundary. Valid only for object files.
343 |         pub const IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000;
344 |         /// Align data on an 8192-byte boundary. Valid only for object files.
345 |         pub const IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000;
346 |         /// The section contains extended relocations.
347 |         pub const IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000;
348 |         /// The section can be discarded as needed.
349 |         pub const IMAGE_SCN_MEM_DISCARDABLE = 0x02000000;
350 |         /// The section cannot be cached.
351 |         pub const IMAGE_SCN_MEM_NOT_CACHED = 0x04000000;
352 |         /// The section is not pageable.
353 |         pub const IMAGE_SCN_MEM_NOT_PAGED = 0x08000000;
354 |         /// The section can be shared in memory.
355 |         pub const IMAGE_SCN_MEM_SHARED = 0x10000000;
356 |         /// The section can be executed as code.
357 |         pub const IMAGE_SCN_MEM_EXECUTE = 0x20000000;
358 |         /// The section can be read.
359 |         pub const IMAGE_SCN_MEM_READ = 0x40000000;
360 |         /// The section can be written to.
361 |         pub const IMAGE_SCN_MEM_WRITE = 0x80000000;
362 |     };
363 | 
364 |     /// When a section name contains the symbol `$`, it is considered
365 |     /// a grouped section. e.g. a section named `.text$X` contributes
366 |     /// to the `.text` section within the image.
367 |     /// The character after the dollar sign, indicates the order when
368 |     /// multiple (same prefix) sections were found.
369 |     pub fn isGrouped(header: SectionHeader) bool {
370 |         return std.mem.indexOfScalar(u8, &header.name, '$') != null;
371 |     }
372 | 
373 |     /// For a given section, returns true when the section is a code section.
374 |     pub fn isCodeSection(header: SectionHeader) bool {
375 |         return header.characteristics & flags.IMAGE_SCN_CNT_CODE and
376 |             header.characteristics & flags.IMAGE_SCN_MEM_READ and
377 |             header.characteristics & flags.IMAGE_SCN_MEM_EXECUTE;
378 |     }
379 | };
380 | 
381 | /// Initializes a new `Coff` instance. The file will not be parsed yet.
382 | pub fn init(allocator: Allocator, file: std.fs.File, path: []const u8) Coff {
383 |     return .{
384 |         .allocator = allocator,
385 |         .file = file,
386 |         .name = path,
387 |         .header = undefined,
388 |         .string_table = undefined,
389 |     };
390 | }
391 | 
392 | /// Frees all resources of the `Coff` file. This does not close the file handle.
393 | pub fn deinit(coff: *Coff) void {
394 |     const gpa = coff.allocator;
395 |     coff.section_table.deinit(gpa);
396 |     for (coff.sections.items) |section, sec_index| {
397 |         gpa.free(section.slice());
398 |         if (coff.relocations.get(@intCast(u16, sec_index))) |relocs| {
399 |             gpa.free(relocs);
400 |         }
401 |     }
402 |     coff.sections.deinit(gpa);
403 |     coff.relocations.deinit(gpa);
404 |     coff.symbols.deinit(gpa);
405 |     gpa.free(coff.string_table);
406 |     coff.* = undefined;
407 | }
408 | 
409 | /// Parses the Coff file in its entirety and allocates any
410 | /// resources required. Memory is owned by the `coff` instance.
411 | pub fn parse(coff: *Coff) !bool {
412 |     const reader = coff.file.reader();
413 |     const machine = std.meta.intToEnum(std.coff.MachineType, try reader.readIntLittle(u16)) catch {
414 |         log.err("Given file {s} is not a coff file or contains an unknown machine", .{coff.name});
415 |         return false;
416 |     };
417 | 
418 |     coff.header = .{
419 |         .machine = machine,
420 |         .number_of_sections = try reader.readIntLittle(u16),
421 |         .timedate_stamp = try reader.readIntLittle(u32),
422 |         .pointer_to_symbol_table = try reader.readIntLittle(u32),
423 |         .number_of_symbols = try reader.readIntLittle(u32),
424 |         .size_of_optional_header = try reader.readIntLittle(u16),
425 |         .characteristics = try reader.readIntLittle(u16),
426 |     };
427 | 
428 |     // When the object file contains an optional header, we simply
429 |     // skip it as object files are not interested in this data.
430 |     if (coff.header.size_of_optional_header != 0) {
431 |         try coff.file.seekBy(@intCast(i64, coff.header.size_of_optional_header));
432 |     }
433 | 
434 |     try parseStringTable(coff);
435 |     try parseSectionTable(coff);
436 |     try parseSectionData(coff);
437 |     try parseRelocations(coff);
438 |     try parseSymbolTable(coff);
439 | 
440 |     return true;
441 | }
442 | 
443 | fn parseStringTable(coff: *Coff) !void {
444 |     const reader = coff.file.reader();
445 |     const current_pos = try coff.file.getPos();
446 |     try coff.file.seekTo(coff.stringTableOffset());
447 |     const size = try reader.readIntLittle(u32);
448 |     if (size == 0) return;
449 |     const buffer = try coff.allocator.alloc(u8, size - 4); // account for 4 bytes of size field itself
450 |     errdefer coff.allocator.free(buffer);
451 |     try reader.readNoEof(buffer);
452 |     coff.string_table = buffer;
453 |     try coff.file.seekTo(current_pos);
454 | }
455 | 
456 | pub fn getString(coff: Coff, buf: [8]u8) []const u8 {
457 |     const offset = if (buf[0] == '/') blk: {
458 |         const offset_len = std.mem.indexOfScalar(u8, buf[1..], 0) orelse 7;
459 |         const offset = std.fmt.parseInt(u32, buf[1..][0..offset_len], 10) catch return "";
460 |         break :blk offset;
461 |     } else if (std.mem.eql(u8, buf[0..4], &.{ 0, 0, 0, 0 })) blk: {
462 |         break :blk std.mem.readIntLittle(u32, buf[4..8]);
463 |     } else return std.mem.sliceTo(&buf, 0);
464 | 
465 |     const str = @ptrCast([*:0]const u8, coff.string_table.ptr + offset);
466 |     return std.mem.sliceTo(str, 0);
467 | }
468 | 
469 | fn parseSectionTable(coff: *Coff) !void {
470 |     if (coff.header.number_of_sections == 0) return;
471 |     try coff.section_table.ensureUnusedCapacity(coff.allocator, coff.header.number_of_sections);
472 |     const reader = coff.file.reader();
473 | 
474 |     var index: u16 = 0;
475 |     while (index < coff.header.number_of_sections) : (index += 1) {
476 |         const sec_header = coff.section_table.addOneAssumeCapacity();
477 | 
478 |         var name: [8]u8 = undefined;
479 |         try reader.readNoEof(&name);
480 |         sec_header.* = .{
481 |             .name = name,
482 |             .virtual_size = try reader.readIntLittle(u32),
483 |             .virtual_address = try reader.readIntLittle(u32),
484 |             .size_of_raw_data = try reader.readIntLittle(u32),
485 |             .pointer_to_raw_data = try reader.readIntLittle(u32),
486 |             .pointer_to_relocations = try reader.readIntLittle(u32),
487 |             .pointer_to_line_numbers = try reader.readIntLittle(u32),
488 |             .number_of_relocations = try reader.readIntLittle(u16),
489 |             .number_of_line_numbers = try reader.readIntLittle(u16),
490 |             .characteristics = try reader.readIntLittle(u32),
491 |             .alignment = undefined,
492 |         };
493 |         sec_header.alignment = SectionHeader.flags.alignment(sec_header.characteristics);
494 | 
495 |         log.debug("Parsed section header: '{s}'", .{std.mem.sliceTo(&name, 0)});
496 |         if (sec_header.virtual_size != 0) {
497 |             log.err("Invalid object file. Expected virtual size '0' but found '{d}'", .{sec_header.virtual_size});
498 |             return error.InvalidVirtualSize;
499 |         }
500 |     }
501 | }
502 | 
503 | fn stringTableOffset(coff: Coff) u32 {
504 |     return coff.header.pointer_to_symbol_table + (coff.header.number_of_symbols * 18);
505 | }
506 | 
507 | /// Parses a string from the string table found at given `offset`.
508 | /// Populates the given `buffer` with the string and returns the length.
509 | fn parseStringFromOffset(coff: *Coff, offset: u32, buf: []u8) !usize {
510 |     std.debug.assert(buf.len != 0);
511 | 
512 |     const current_pos = try coff.file.getPos();
513 |     try coff.file.seekTo(coff.stringTableOffset() + offset);
514 |     const str = (try coff.file.reader().readUntilDelimiterOrEof(buf, 0)) orelse "";
515 |     try coff.file.seekTo(current_pos);
516 |     return str.len;
517 | }
518 | 
519 | /// Parses all section data of the coff file.
520 | /// Asserts section headers are known.
521 | fn parseSectionData(coff: *Coff) !void {
522 |     if (coff.header.number_of_sections == 0) return;
523 |     std.debug.assert(coff.section_table.items.len == coff.header.number_of_sections);
524 |     try coff.sections.ensureUnusedCapacity(coff.allocator, coff.header.number_of_sections);
525 |     const reader = coff.file.reader();
526 |     for (coff.section_table.items) |sec_header| {
527 |         try coff.file.seekTo(sec_header.pointer_to_raw_data);
528 |         const buf = try coff.allocator.alloc(u8, sec_header.size_of_raw_data);
529 |         try reader.readNoEof(buf);
530 |         coff.sections.appendAssumeCapacity(Section.fromSlice(buf));
531 |     }
532 | }
533 | 
534 | fn parseRelocations(coff: *Coff) !void {
535 |     if (coff.header.number_of_sections == 0) return;
536 |     const reader = coff.file.reader();
537 |     for (coff.section_table.items) |sec_header, index| {
538 |         if (sec_header.number_of_relocations == 0) continue;
539 |         const sec_index = @intCast(u16, index);
540 | 
541 |         const relocations = try coff.allocator.alloc(Relocation, sec_header.number_of_relocations);
542 |         errdefer coff.allocator.free(relocations);
543 | 
544 |         try coff.file.seekTo(sec_header.pointer_to_relocations);
545 |         for (relocations) |*reloc| {
546 |             reloc.* = .{
547 |                 .virtual_address = try reader.readIntLittle(u32),
548 |                 .symbol_table_index = try reader.readIntLittle(u32),
549 |                 .tag = try reader.readIntLittle(u16),
550 |             };
551 |         }
552 | 
553 |         try coff.relocations.putNoClobber(coff.allocator, sec_index, relocations);
554 |     }
555 | }
556 | 
557 | fn parseSymbolTable(coff: *Coff) !void {
558 |     if (coff.header.number_of_symbols == 0) return;
559 | 
560 |     try coff.symbols.ensureUnusedCapacity(coff.allocator, coff.header.number_of_symbols);
561 |     try coff.file.seekTo(coff.header.pointer_to_symbol_table);
562 |     const reader = coff.file.reader();
563 | 
564 |     var index: u32 = 0;
565 |     while (index < coff.header.number_of_symbols) : (index += 1) {
566 |         var name: [8]u8 = undefined;
567 |         try reader.readNoEof(&name);
568 |         const sym: Symbol = .{
569 |             .name = name,
570 |             .value = try reader.readIntLittle(u32),
571 |             .section_number = try reader.readIntLittle(i16),
572 |             .sym_type = try reader.readIntLittle(u16),
573 |             .storage_class = @intToEnum(Symbol.Class, try reader.readByte()),
574 |             .number_aux_symbols = try reader.readByte(),
575 |         };
576 |         coff.symbols.appendAssumeCapacity(sym);
577 |     }
578 | }
579 | 
580 | pub fn parseIntoAtoms(coff: Coff, cld: *Cld, object_index: u16) !void {
581 |     log.debug("parsing into atoms for object file '{s}'", .{coff.name});
582 |     const gpa = cld.gpa;
583 |     var symbols_by_section = std.AutoHashMap(u16, std.ArrayList(u32)).init(gpa);
584 |     defer {
585 |         var it = symbols_by_section.valueIterator();
586 |         while (it.next()) |syms| {
587 |             syms.deinit();
588 |         }
589 |         symbols_by_section.deinit();
590 |     }
591 |     for (coff.section_table.items) |_, sec_index| {
592 |         try symbols_by_section.putNoClobber(@intCast(u16, sec_index), std.ArrayList(u32).init(gpa));
593 |     }
594 | 
595 |     {
596 |         var sym_index: u32 = 0;
597 |         while (sym_index < coff.header.number_of_symbols) : (sym_index += 1) {
598 |             const symbol: Symbol = coff.symbols.items[sym_index];
599 |             if (symbol.isUndefined()) continue;
600 |             if (symbol.section_number <= 0) continue;
601 |             const map = symbols_by_section.getPtr(@intCast(u16, symbol.section_number - 1)) orelse continue;
602 |             try map.append(sym_index);
603 |             sym_index += symbol.number_aux_symbols;
604 |         }
605 |     }
606 | 
607 |     for (coff.section_table.items) |sec_header, sec_index| {
608 |         const sec_name = coff.getString(sec_header.name);
609 | 
610 |         log.debug("  parsing section '{s}'", .{sec_name});
611 | 
612 |         const syms = symbols_by_section.get(@intCast(u16, sec_index)).?;
613 |         if (syms.items.len == 0) {
614 |             log.debug("  skipping section because no symbols", .{});
615 |             continue;
616 |         }
617 | 
618 |         const target_section_index = (try cld.getMatchingSection(object_index, @intCast(u16, sec_index))) orelse {
619 |             log.info("ignored section '{s}'", .{sec_name});
620 |             continue;
621 |         };
622 | 
623 |         const atom = try Atom.create(gpa);
624 |         errdefer atom.destroy(gpa);
625 |         try cld.managed_atoms.append(gpa, atom);
626 |         atom.file = object_index;
627 |         atom.size = sec_header.size_of_raw_data;
628 |         atom.alignment = sec_header.alignment;
629 | 
630 |         for (syms.items) |sym_index| {
631 |             const symbol: Symbol = coff.symbols.items[sym_index];
632 |             if (symbol.value > 0) {
633 |                 try atom.contained.append(gpa, .{
634 |                     .sym_index = sym_index,
635 |                     .offset = symbol.value,
636 |                 });
637 |             } else try atom.aliases.append(gpa, sym_index);
638 |         }
639 |         atom.sym_index = atom.aliases.swapRemove(0);
640 |         try atom.code.appendSlice(gpa, coff.sections.items[sec_index].slice());
641 | 
642 |         if (sec_header.number_of_relocations > 0) {
643 |             atom.relocations = coff.relocations.get(@intCast(u16, sec_index)).?;
644 |         }
645 | 
646 |         const target_section: *SectionHeader = &cld.section_headers.items[target_section_index];
647 |         target_section.alignment = @maximum(target_section.alignment, atom.alignment);
648 |         target_section.size_of_raw_data = std.mem.alignForwardGeneric(u32, std.mem.alignForwardGeneric(
649 |             u32,
650 |             target_section.size_of_raw_data,
651 |             atom.alignment,
652 |         ) + atom.size, target_section.alignment);
653 |         target_section.virtual_size = target_section.size_of_raw_data;
654 | 
655 |         if (cld.atoms.getPtr(target_section_index)) |last| {
656 |             last.*.next = atom;
657 |             atom.prev = last.*;
658 |             last.* = atom;
659 |         } else try cld.atoms.putNoClobber(gpa, target_section_index, atom);
660 |     }
661 | }
662 | 


--------------------------------------------------------------------------------
/src/main.zig:
--------------------------------------------------------------------------------
 1 | const std = @import("std");
 2 | const Cld = @import("Cld.zig");
 3 | const mem = std.mem;
 4 | 
 5 | const io = std.io;
 6 | 
 7 | var gpa_allocator = std.heap.GeneralPurposeAllocator(.{ .stack_trace_frames = 8 }){};
 8 | const gpa = gpa_allocator.allocator();
 9 | 
10 | pub fn log(
11 |     comptime level: std.log.Level,
12 |     comptime scope: @TypeOf(.EnumLiteral),
13 |     comptime format: []const u8,
14 |     args: anytype,
15 | ) void {
16 |     if (@import("build_flags").enable_logging) {
17 |         std.log.defaultLog(level, scope, format, args);
18 |     }
19 | }
20 | 
21 | const usage =
22 |     \\Usage: coff [options] [files...] -o [path]
23 |     \\
24 |     \\Options:
25 |     \\-h, --help                         Print this help and exit
26 |     \\-o [path]                          Output path of the binary
27 | ;
28 | 
29 | pub fn main() !void {
30 |     defer if (@import("builtin").mode == .Debug) {
31 |         _ = gpa_allocator.deinit();
32 |     };
33 | 
34 |     // we use arena for the arguments and its parsing
35 |     var arena_allocator = std.heap.ArenaAllocator.init(gpa);
36 |     defer arena_allocator.deinit();
37 |     const arena = arena_allocator.allocator();
38 | 
39 |     const process_args = try std.process.argsAlloc(arena);
40 |     defer std.process.argsFree(arena, process_args);
41 | 
42 |     const args = process_args[1..]; // exclude 'coff' binary
43 |     if (args.len == 0) {
44 |         printHelpAndExit();
45 |     }
46 | 
47 |     var positionals = std.ArrayList([]const u8).init(arena);
48 |     var output_path: ?[]const u8 = null;
49 | 
50 |     var i: usize = 0;
51 |     while (i < args.len) : (i += 1) {
52 |         const arg = args[i];
53 |         if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) {
54 |             printHelpAndExit();
55 |         } else if (mem.eql(u8, arg, "-o")) {
56 |             if (i + 1 >= args.len) printErrorAndExit("Missing output path", .{});
57 |             output_path = args[i + 1];
58 |             i += 1;
59 |         } else if (mem.startsWith(u8, arg, "--")) {
60 |             printErrorAndExit("Unknown argument '{s}'", .{arg});
61 |         } else {
62 |             try positionals.append(arg);
63 |         }
64 |     }
65 | 
66 |     if (positionals.items.len == 0) {
67 |         printErrorAndExit("Expected one or more object files, none were given", .{});
68 |     }
69 | 
70 |     if (output_path == null) {
71 |         printErrorAndExit("Missing output path", .{});
72 |     }
73 | 
74 |     var cld = try Cld.openPath(gpa, output_path.?, .{});
75 |     defer cld.deinit();
76 | 
77 |     try cld.addObjects(positionals.items);
78 |     try cld.flush();
79 | }
80 | 
81 | fn printHelpAndExit() noreturn {
82 |     io.getStdOut().writer().print("{s}\n", .{usage}) catch {};
83 |     std.process.exit(0);
84 | }
85 | 
86 | fn printErrorAndExit(comptime fmt: []const u8, args: anytype) noreturn {
87 |     const writer = io.getStdErr().writer();
88 |     writer.print(fmt, args) catch {};
89 |     writer.writeByte('\n') catch {};
90 |     std.process.exit(1);
91 | }
92 | 


--------------------------------------------------------------------------------