├── .gitignore ├── README.md └── src ├── main.zig ├── zipfile.zig ├── Hexdumper.zig ├── streaming.zig └── hexdump-zip.zig /.gitignore: -------------------------------------------------------------------------------- 1 | /.zig-cache/ 2 | /zig-out/ 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hexdump-zip 2 | produce an annotated hexdump of a zipfile 3 | 4 | ## Build 5 | 6 | Download or install [zig](http://ziglang.org/). 7 | (Check the commit log of this repo to see which version was used recently.) 8 | 9 | ``` 10 | zig build 11 | ``` 12 | 13 | Executable binary is at `./zig-out/bin/hexdump-zip`. 14 | 15 | ## Run 16 | 17 | ``` 18 | hexdump-zip INPUT.zip OUTPUT.hex 19 | ``` 20 | 21 | To print to stdout, you can give `/dev/stdout` as the output path. 22 | -------------------------------------------------------------------------------- /src/main.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | const ZipfileDumper = @import("./hexdump-zip.zig").ZipfileDumper; 4 | const StreamingDumper = @import("./streaming.zig").StreamingDumper; 5 | 6 | fn usage() !void { 7 | std.log.err( 8 | \\usage: [options] INPUT.zip OUTPUT.hex 9 | \\ 10 | \\options: 11 | \\ --streaming 12 | \\ Enable streaming read mode. 13 | , .{}); 14 | return error.Usage; 15 | } 16 | 17 | pub fn main() !void { 18 | var gpa_instance: std.heap.GeneralPurposeAllocator(.{}) = .{}; 19 | defer _ = gpa_instance.deinit(); 20 | const gpa = gpa_instance.allocator(); 21 | 22 | var args = try std.process.argsWithAllocator(gpa); 23 | defer args.deinit(); 24 | _ = args.next() orelse return usage(); 25 | 26 | var is_streaming = false; 27 | var input_path_str = args.next() orelse return usage(); 28 | if (std.mem.eql(u8, input_path_str, "--streaming")) { 29 | is_streaming = true; 30 | input_path_str = args.next() orelse return usage(); 31 | } 32 | const output_path_str = args.next() orelse return usage(); 33 | if (args.next() != null) return usage(); 34 | 35 | var input_file = try std.fs.cwd().openFile(input_path_str, .{}); 36 | defer input_file.close(); 37 | 38 | var output_file = try std.fs.cwd().createFile(output_path_str, .{}); 39 | defer output_file.close(); 40 | 41 | if (is_streaming) { 42 | var dumper: StreamingDumper = .{ 43 | .input_file = input_file, 44 | .output_file = output_file, 45 | }; 46 | try dumper.doIt(); 47 | } else { 48 | var zipfile_dumper: ZipfileDumper = undefined; 49 | try zipfile_dumper.init(input_file, output_file, gpa); 50 | defer zipfile_dumper.deinit(); 51 | try zipfile_dumper.doIt(); 52 | } 53 | 54 | return std.process.cleanExit(); 55 | } 56 | -------------------------------------------------------------------------------- /src/zipfile.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | 3 | const Hexdumper = @import("./Hexdumper.zig"); 4 | 5 | pub const zip64_eocdr_size = 56; 6 | pub const zip64_eocdl_size = 20; 7 | pub const eocdr_size = 22; 8 | pub const eocdr_search_size: u64 = zip64_eocdl_size + 0xffff + eocdr_size; 9 | 10 | /// local file header signature 11 | pub const lfh_signature = 0x04034b50; 12 | 13 | /// optional data descriptor optional signature 14 | pub const oddo_signature = 0x08074b50; 15 | pub const oddo_signature_bytes = [4]u8{ 0x50, 0x4b, 0x07, 0x08 }; 16 | 17 | /// central file header signature 18 | pub const cfh_signature = 0x02014b50; 19 | 20 | /// zip64 end of central dir signature 21 | pub const zip64_eocdr_signature = 0x06064b50; 22 | 23 | /// zip64 end of central dir locator signature 24 | pub const zip64_eocdl_signature = 0x07064b50; 25 | 26 | /// end of central dir signature 27 | pub const eocdr_signature = 0x06054b50; 28 | 29 | pub const ExtraFieldIterator = struct { 30 | extra_fields: []const u8, 31 | cursor: u16 = 0, 32 | pub fn next(self: *@This()) !?ExtraField { 33 | if (self.cursor >= self.extra_fields.len -| 3) return null; 34 | const tag = readInt16(self.extra_fields, self.cursor); 35 | const size = readInt16(self.extra_fields, self.cursor + 2); 36 | if (self.cursor + 4 > self.extra_fields.len -| size) return error.ExtraFieldSizeExceedsExtraFieldsBuffer; 37 | const entire_buffer = self.extra_fields[self.cursor .. self.cursor + 4 + size]; 38 | self.cursor += 4 + size; 39 | return .{ 40 | .tag = tag, 41 | .entire_buffer = entire_buffer, 42 | }; 43 | } 44 | pub fn trailingPadding(self: @This()) []const u8 { 45 | return self.extra_fields[self.cursor..]; 46 | } 47 | }; 48 | pub const ExtraField = struct { 49 | tag: u16, 50 | entire_buffer: []const u8, 51 | }; 52 | 53 | fn dumpExtraFieldHeader(dumper: *Hexdumper, entire_buffer: []const u8, cursor: *usize) !void { 54 | dumper.indent(); // defer outdent after this. 55 | try dumper.readStructField(entire_buffer, 2, cursor, 2, "Tag"); 56 | try dumper.readStructField(entire_buffer, 2, cursor, 2, "Size"); 57 | } 58 | 59 | pub fn dumpExtraFields( 60 | dumper: *Hexdumper, 61 | offset: u64, 62 | buffer: []const u8, 63 | out_is_zip64: ?*bool, 64 | compressed_size: *u64, 65 | uncompressed_size: *u64, 66 | local_file_header_offset: ?*u64, 67 | disk_number: ?*u32, 68 | ) !void { 69 | var it = ExtraFieldIterator{ .extra_fields = buffer }; 70 | 71 | while (try it.next()) |extra_field| { 72 | const field_buffer = extra_field.entire_buffer; 73 | const section_offset = offset + @as(u64, @intCast(field_buffer.ptr - buffer.ptr)); 74 | var cursor: usize = 0; 75 | defer dumper.outdent(); // indented in dumpExtraFieldHeader 76 | switch (extra_field.tag) { 77 | 0x0001 => { 78 | try dumper.writeSectionHeader(section_offset, "ZIP64 Extended Information Extra Field (0x{x:0>4})", .{extra_field.tag}); 79 | try dumpExtraFieldHeader(dumper, field_buffer, &cursor); 80 | 81 | if (out_is_zip64) |is_zip64| is_zip64.* = true; 82 | const max_size = 8; 83 | if (compressed_size.* == 0xffffffff) { 84 | if (cursor + 8 > field_buffer.len) return error.InternalBufferOverflow; 85 | compressed_size.* = readInt64(field_buffer, cursor); 86 | try dumper.readStructField(field_buffer, max_size, &cursor, 8, "Compressed Size"); 87 | } 88 | if (uncompressed_size.* == 0xffffffff) { 89 | if (cursor + 8 > field_buffer.len) return error.InternalBufferOverflow; 90 | uncompressed_size.* = readInt64(field_buffer, cursor); 91 | try dumper.readStructField(field_buffer, max_size, &cursor, 8, "Uncompressed Size"); 92 | } 93 | if (local_file_header_offset != null and local_file_header_offset.?.* == 0xffffffff) { 94 | if (cursor + 8 > field_buffer.len) return error.InternalBufferOverflow; 95 | local_file_header_offset.?.* = readInt64(field_buffer, cursor); 96 | try dumper.readStructField(field_buffer, max_size, &cursor, 8, "Local File Header Offset"); 97 | } 98 | if (disk_number != null and disk_number.?.* == 0xffffffff) { 99 | if (cursor + 4 > field_buffer.len) return error.InternalBufferOverflow; 100 | disk_number.?.* = readInt32(field_buffer, cursor); 101 | try dumper.readStructField(field_buffer, max_size, &cursor, 4, "Disk Number"); 102 | } 103 | }, 104 | 0x5455 => { 105 | try dumper.writeSectionHeader(section_offset, "Info-ZIP Universal Time (0x{x:0>4})", .{extra_field.tag}); 106 | try dumpExtraFieldHeader(dumper, field_buffer, &cursor); 107 | 108 | // See the Info-ZIP source code proginfo/extrafld.txt 109 | const has_mtime = 1; 110 | if (field_buffer[cursor..].len >= 5 and field_buffer[cursor] & has_mtime != 0) { 111 | const max_size = 4; 112 | try dumper.readStructField(field_buffer, max_size, &cursor, 1, "flags"); 113 | try dumper.readStructField(field_buffer, max_size, &cursor, 4, "mtime"); 114 | } 115 | }, 116 | 0x7875 => { 117 | try dumper.writeSectionHeader(section_offset, "Info-ZIP Unix 32-bit uid/gid (0x{x:0>4})", .{extra_field.tag}); 118 | try dumpExtraFieldHeader(dumper, field_buffer, &cursor); 119 | 120 | // See the Info-ZIP source code proginfo/extrafld.txt 121 | if (field_buffer[cursor..].len >= 11 and 122 | field_buffer[cursor] == 1 and // version 123 | field_buffer[cursor + 1] == 4 and // UIDSize 124 | field_buffer[cursor + 6] == 4 and // GIDSize 125 | true) 126 | { 127 | const max_size = 4; 128 | try dumper.readStructField(field_buffer, max_size, &cursor, 1, "version (always 1)"); 129 | try dumper.readStructField(field_buffer, max_size, &cursor, 1, "UIDSize (always 4)"); 130 | try dumper.readStructField(field_buffer, max_size, &cursor, 4, "UID"); 131 | try dumper.readStructField(field_buffer, max_size, &cursor, 1, "GIDSize (always 4)"); 132 | try dumper.readStructField(field_buffer, max_size, &cursor, 4, "GID"); 133 | } 134 | }, 135 | 0x7075 => { 136 | try dumper.writeSectionHeader(section_offset, "Info-ZIP Unicode Path (0x{x:0>4})", .{extra_field.tag}); 137 | try dumpExtraFieldHeader(dumper, field_buffer, &cursor); 138 | 139 | // See the Info-ZIP source code proginfo/extrafld.txt 140 | if (field_buffer[cursor..].len >= 5 and field_buffer[cursor] == 1) { 141 | const max_size = 4; 142 | try dumper.readStructField(field_buffer, max_size, &cursor, 1, "version (always 1)"); 143 | try dumper.readStructField(field_buffer, max_size, &cursor, 4, "Old Name CRC32"); 144 | try dumper.writeBlob(field_buffer[cursor..], .{ .encoding = .utf8 }); 145 | cursor = field_buffer.len; 146 | } 147 | }, 148 | 0x000a => { 149 | try dumper.writeSectionHeader(section_offset, "NTFS (0x{x:0>4})", .{extra_field.tag}); 150 | try dumpExtraFieldHeader(dumper, field_buffer, &cursor); 151 | 152 | // This is documented in APPNOTE since version 4.5. 153 | if (field_buffer[cursor..].len >= 32 and 154 | readInt32(field_buffer, cursor) == 0 and // Reserved 155 | readInt16(field_buffer, cursor + 4) == 1 and // Tag for attribute #1 156 | readInt16(field_buffer, cursor + 6) == 24 and // Size of attribute #1, in bytes (24) 157 | true) 158 | { 159 | const max_size = 8; 160 | try dumper.readStructField(field_buffer, max_size, &cursor, 4, "Reserved (always 0)"); 161 | try dumper.readStructField(field_buffer, max_size, &cursor, 2, "Tag (always 1)"); 162 | try dumper.readStructField(field_buffer, max_size, &cursor, 2, "Size (always 24)"); 163 | try dumper.readStructField(field_buffer, max_size, &cursor, 8, "Mtime"); 164 | try dumper.readStructField(field_buffer, max_size, &cursor, 8, "Atime"); 165 | try dumper.readStructField(field_buffer, max_size, &cursor, 8, "Ctime"); 166 | } 167 | }, 168 | else => { 169 | try dumper.writeSectionHeader(section_offset, "Unknown Extra Field (0x{x:0>4})", .{extra_field.tag}); 170 | try dumpExtraFieldHeader(dumper, field_buffer, &cursor); 171 | }, 172 | } 173 | 174 | const extra = field_buffer[cursor..]; 175 | if (extra.len > 0) { 176 | try dumper.writeBlob(extra, .{}); 177 | } 178 | } 179 | 180 | const padding = it.trailingPadding(); 181 | if (padding.len > 0) { 182 | const section_offset = offset + @as(u64, @intCast(padding.ptr - buffer.ptr)); 183 | try dumper.writeSectionHeader(section_offset, "(unused space)", .{}); 184 | dumper.indent(); 185 | defer dumper.outdent(); 186 | try dumper.writeBlob(padding, .{}); 187 | } 188 | } 189 | 190 | fn readInt16(buffer: []const u8, offset: usize) u16 { 191 | return std.mem.readInt(u16, buffer[offset..][0..2], .little); 192 | } 193 | fn readInt32(buffer: []const u8, offset: usize) u32 { 194 | return std.mem.readInt(u32, buffer[offset..][0..4], .little); 195 | } 196 | fn readInt64(buffer: []const u8, offset: usize) u64 { 197 | return std.mem.readInt(u64, buffer[offset..][0..8], .little); 198 | } 199 | -------------------------------------------------------------------------------- /src/Hexdumper.zig: -------------------------------------------------------------------------------- 1 | output: std.io.AnyWriter, 2 | indentation: u8 = 0, 3 | 4 | const std = @import("std"); 5 | const assert = std.debug.assert; 6 | const Hexdumper = @This(); 7 | 8 | pub fn writeSectionHeader(self: *Hexdumper, offset: u64, comptime fmt: []const u8, args: anytype) !void { 9 | try self.printIndentation(); 10 | try self.printf(":0x{x} ; ", .{offset}); 11 | try self.printf(fmt, args); 12 | try self.write("\n"); 13 | } 14 | 15 | pub fn readStructField( 16 | self: *Hexdumper, 17 | buffer: []const u8, 18 | comptime max_size: usize, 19 | cursor: *usize, 20 | comptime size: usize, 21 | name: []const u8, 22 | ) !void { 23 | comptime assert(size <= max_size); 24 | const decimal_width_str = comptime switch (max_size) { 25 | 1 => "3", 26 | 2 => "5", 27 | 4 => "10", 28 | 8 => "20", 29 | else => unreachable, 30 | }; 31 | 32 | try self.printIndentation(); 33 | switch (size) { 34 | 1 => { 35 | const value = buffer[cursor.*]; 36 | try self.printf( // 37 | "" ++ 38 | "{x:0>2}" ++ (" " ** (max_size - size)) ++ 39 | " ; \"{s}\"" ++ (" " ** (max_size - size)) ++ 40 | " ; {d:0>" ++ decimal_width_str ++ "}" ++ 41 | " ; 0x{x:0>2}" ++ (" " ** (max_size - size)) ++ 42 | " ; {s}" ++ 43 | "\n", .{ 44 | buffer[cursor.* + 0], 45 | cp437[buffer[cursor.* + 0]], 46 | value, 47 | value, 48 | name, 49 | }); 50 | }, 51 | 2 => { 52 | const value = readInt16(buffer, cursor.*); 53 | try self.printf( // 54 | "" ++ 55 | "{x:0>2} {x:0>2}" ++ (" " ** (max_size - size)) ++ 56 | " ; \"{s}{s}\"" ++ (" " ** (max_size - size)) ++ 57 | " ; {d:0>" ++ decimal_width_str ++ "}" ++ 58 | " ; 0x{x:0>4}" ++ (" " ** (max_size - size)) ++ 59 | " ; {s}" ++ 60 | "\n", .{ 61 | buffer[cursor.* + 0], 62 | buffer[cursor.* + 1], 63 | cp437[buffer[cursor.* + 0]], 64 | cp437[buffer[cursor.* + 1]], 65 | value, 66 | value, 67 | name, 68 | }); 69 | }, 70 | 4 => { 71 | const value = readInt32(buffer, cursor.*); 72 | try self.printf( // 73 | "" ++ 74 | "{x:0>2} {x:0>2} {x:0>2} {x:0>2}" ++ (" " ** (max_size - size)) ++ 75 | " ; \"{s}{s}{s}{s}\"" ++ (" " ** (max_size - size)) ++ 76 | " ; {d:0>" ++ decimal_width_str ++ "}" ++ 77 | " ; 0x{x:0>8}" ++ (" " ** (max_size - size)) ++ 78 | " ; {s}" ++ 79 | "\n", .{ 80 | buffer[cursor.* + 0], 81 | buffer[cursor.* + 1], 82 | buffer[cursor.* + 2], 83 | buffer[cursor.* + 3], 84 | cp437[buffer[cursor.* + 0]], 85 | cp437[buffer[cursor.* + 1]], 86 | cp437[buffer[cursor.* + 2]], 87 | cp437[buffer[cursor.* + 3]], 88 | value, 89 | value, 90 | name, 91 | }); 92 | }, 93 | 8 => { 94 | const value = readInt64(buffer, cursor.*); 95 | try self.printf( // 96 | "" ++ 97 | "{x:0>2} {x:0>2} {x:0>2} {x:0>2} {x:0>2} {x:0>2} {x:0>2} {x:0>2}" ++ (" " ** (max_size - size)) ++ 98 | " ; \"{s}{s}{s}{s}{s}{s}{s}{s}\"" ++ (" " ** (max_size - size)) ++ 99 | " ; {d:0>" ++ decimal_width_str ++ "}" ++ 100 | " ; 0x{x:0>16}" ++ (" " ** (max_size - size)) ++ 101 | " ; {s}" ++ 102 | "\n", .{ 103 | buffer[cursor.* + 0], 104 | buffer[cursor.* + 1], 105 | buffer[cursor.* + 2], 106 | buffer[cursor.* + 3], 107 | buffer[cursor.* + 4], 108 | buffer[cursor.* + 5], 109 | buffer[cursor.* + 6], 110 | buffer[cursor.* + 7], 111 | cp437[buffer[cursor.* + 0]], 112 | cp437[buffer[cursor.* + 1]], 113 | cp437[buffer[cursor.* + 2]], 114 | cp437[buffer[cursor.* + 3]], 115 | cp437[buffer[cursor.* + 4]], 116 | cp437[buffer[cursor.* + 5]], 117 | cp437[buffer[cursor.* + 6]], 118 | cp437[buffer[cursor.* + 7]], 119 | value, 120 | value, 121 | name, 122 | }); 123 | }, 124 | else => unreachable, 125 | } 126 | cursor.* += size; 127 | } 128 | 129 | pub fn indent(self: *Hexdumper) void { 130 | self.indentation += 1; 131 | } 132 | pub fn outdent(self: *Hexdumper) void { 133 | self.indentation -= 1; 134 | } 135 | pub fn printIndentation(self: *Hexdumper) !void { 136 | var i: u8 = 0; 137 | while (i < self.indentation) : (i += 1) { 138 | try self.write(" "); 139 | } 140 | } 141 | pub fn write(self: *Hexdumper, str: []const u8) !void { 142 | try self.output.writeAll(str); 143 | } 144 | pub fn printf(self: *Hexdumper, comptime fmt: []const u8, args: anytype) !void { 145 | try self.output.print(fmt, args); 146 | } 147 | 148 | pub const PartialUtf8State = struct { 149 | codepoint: [4]u8 = undefined, 150 | bytes_saved: u2 = 0, 151 | bytes_remaining: u2 = 0, 152 | }; 153 | pub const BlobConfig = struct { 154 | row_length: u16 = 16, 155 | spaces: bool = true, 156 | encoding: enum { 157 | none, 158 | cp437, 159 | utf8, 160 | } = .none, 161 | }; 162 | 163 | pub fn writeBlob(self: *Hexdumper, buffer: []const u8, config: BlobConfig) !void { 164 | var partial_utf8_state = PartialUtf8State{}; 165 | try self.writeBlobPart(buffer, config, true, true, &partial_utf8_state); 166 | } 167 | pub fn writeBlobPart(self: *Hexdumper, buffer: []const u8, config: BlobConfig, is_beginning: bool, is_end: bool, partial_utf8_state: *PartialUtf8State) !void { 168 | var cursor: usize = 0; 169 | while (cursor < buffer.len) : (cursor += config.row_length) { 170 | const row_end = @min(cursor + config.row_length, buffer.len); 171 | try self.writeBlobRow( 172 | buffer[cursor..row_end], 173 | config, 174 | is_beginning and cursor == 0, 175 | is_end and row_end == buffer.len, 176 | partial_utf8_state, 177 | ); 178 | } 179 | } 180 | 181 | fn writeBlobRow(self: *Hexdumper, row: []const u8, config: BlobConfig, is_beginning: bool, is_end: bool, partial_utf8_state: *PartialUtf8State) !void { 182 | assert(row.len > 0); 183 | 184 | try self.printIndentation(); 185 | 186 | // Hex representation. 187 | for (row, 0..) |b, i| { 188 | if (config.spaces and i > 0) try self.write(" "); 189 | try self.printf("{x:0>2}", .{b}); 190 | } 191 | 192 | if (!is_beginning and config.encoding != .none) { 193 | // Fill out the end of the last row with spaces. 194 | var i: usize = row.len; 195 | while (i < config.row_length) : (i += 1) { 196 | assert(is_end); 197 | try self.write(" "); 198 | } 199 | } 200 | switch (config.encoding) { 201 | .none => {}, 202 | .cp437 => { 203 | try self.write(" ; cp437\""); 204 | for (row) |c| { 205 | switch (c) { 206 | '"', '\\' => { 207 | const content = [2]u8{ '\\', c }; 208 | try self.write(&content); 209 | }, 210 | else => { 211 | try self.write(cp437[c]); 212 | }, 213 | } 214 | } 215 | try self.write("\""); 216 | }, 217 | .utf8 => { 218 | try self.write(" ; utf8\""); 219 | 220 | // Input is utf8; output is utf8. 221 | var i: usize = 0; 222 | if (partial_utf8_state.bytes_remaining > 0) { 223 | // Finish writing partial codepoint. 224 | while (i < partial_utf8_state.bytes_remaining) : (i += 1) { 225 | partial_utf8_state.codepoint[partial_utf8_state.bytes_saved + i] = row[i]; 226 | } 227 | try self.writeEscapedCodepoint(partial_utf8_state.codepoint[0 .. partial_utf8_state.bytes_saved + partial_utf8_state.bytes_remaining]); 228 | partial_utf8_state.bytes_saved = 0; 229 | partial_utf8_state.bytes_remaining = 0; 230 | } 231 | 232 | while (i < row.len) : (i += 1) { 233 | const utf8_length = std.unicode.utf8ByteSequenceLength(row[i]) catch { 234 | // Invalid utf8 start byte. 235 | try self.write(error_character); 236 | continue; 237 | }; 238 | 239 | if (i + utf8_length > row.len) { 240 | // Save partial codepoint for next row. 241 | if (is_end) { 242 | // There is no next row. 243 | try self.write(error_character); 244 | break; 245 | } 246 | var j: usize = 0; 247 | while (j < row.len - i) : (j += 1) { 248 | partial_utf8_state.codepoint[j] = row[i + j]; 249 | } 250 | partial_utf8_state.bytes_saved = @intCast(j); 251 | partial_utf8_state.bytes_remaining = @intCast(utf8_length - j); 252 | break; 253 | } 254 | 255 | // We have a complete codepoint on this row. 256 | try self.writeEscapedCodepoint(row[i .. i + utf8_length]); 257 | i += utf8_length - 1; 258 | } 259 | try self.write("\""); 260 | }, 261 | } 262 | try self.write("\n"); 263 | } 264 | 265 | fn writeEscapedCodepoint(self: *Hexdumper, byte_sequence: []const u8) !void { 266 | const codepoint = std.unicode.utf8Decode(byte_sequence) catch { 267 | // invalid utf8 sequence becomes a single error character. 268 | return self.write(error_character); 269 | }; 270 | // some special escapes 271 | switch (codepoint) { 272 | '\n' => return self.write("\\n"), 273 | '\r' => return self.write("\\r"), 274 | '\t' => return self.write("\\t"), 275 | '"' => return self.write("\\\""), 276 | '\\' => return self.write("\\\\"), 277 | else => {}, 278 | } 279 | // numeric escapes 280 | switch (codepoint) { 281 | // ascii control codes 282 | 0...0x1f, 0x7f => return self.printf("\\x{x:0>2}", .{codepoint}), 283 | // unicode newline characters 284 | 0x805, 0x2028, 0x2029 => return self.printf("\\u{x:0>4}", .{codepoint}), 285 | else => {}, 286 | } 287 | // literal character 288 | return self.write(byte_sequence); 289 | } 290 | 291 | fn readInt16(buffer: []const u8, offset: usize) u16 { 292 | return std.mem.readInt(u16, buffer[offset..][0..2], .little); 293 | } 294 | fn readInt32(buffer: []const u8, offset: usize) u32 { 295 | return std.mem.readInt(u32, buffer[offset..][0..4], .little); 296 | } 297 | fn readInt64(buffer: []const u8, offset: usize) u64 { 298 | return std.mem.readInt(u64, buffer[offset..][0..8], .little); 299 | } 300 | 301 | const error_character = "\xef\xbf\xbd"; 302 | 303 | const cp437 = [_][]const u8{ 304 | "�", "☺", "☻", "♥", "♦", "♣", "♠", "•", "◘", "○", "◙", "♂", "♀", "♪", "♫", "☼", 305 | "►", "◄", "↕", "‼", "¶", "§", "▬", "↨", "↑", "↓", "→", "←", "∟", "↔", "▲", "▼", 306 | " ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", 307 | "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", 308 | "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", 309 | "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", 310 | "`", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", 311 | "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "⌂", 312 | "Ç", "ü", "é", "â", "ä", "à", "å", "ç", "ê", "ë", "è", "ï", "î", "ì", "Ä", "Å", 313 | "É", "æ", "Æ", "ô", "ö", "ò", "û", "ù", "ÿ", "Ö", "Ü", "¢", "£", "¥", "₧", "ƒ", 314 | "á", "í", "ó", "ú", "ñ", "Ñ", "ª", "º", "¿", "⌐", "¬", "½", "¼", "¡", "«", "»", 315 | "░", "▒", "▓", "│", "┤", "╡", "╢", "╖", "╕", "╣", "║", "╗", "╝", "╜", "╛", "┐", 316 | "└", "┴", "┬", "├", "─", "┼", "╞", "╟", "╚", "╔", "╩", "╦", "╠", "═", "╬", "╧", 317 | "╨", "╤", "╥", "╙", "╘", "╒", "╓", "╫", "╪", "┘", "┌", "█", "▄", "▌", "▐", "▀", 318 | "α", "ß", "Γ", "π", "Σ", "σ", "µ", "τ", "Φ", "Θ", "Ω", "δ", "∞", "φ", "ε", "∩", 319 | "≡", "±", "≥", "≤", "⌠", "⌡", "÷", "≈", "°", "∙", "·", "√", "ⁿ", "²", "■", " ", 320 | }; 321 | -------------------------------------------------------------------------------- /src/streaming.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const assert = std.debug.assert; 3 | 4 | const Hexdumper = @import("./Hexdumper.zig"); 5 | const z = @import("./zipfile.zig"); 6 | 7 | pub const StreamingDumper = struct { 8 | input_file: std.fs.File, 9 | input: @TypeOf(std.io.bufferedReader(@as(std.fs.File.Reader, undefined))) = undefined, 10 | output_file: std.fs.File, 11 | output: @TypeOf(std.io.bufferedWriter(@as(std.fs.File.Writer, undefined))) = undefined, 12 | // have to store this in the struct, because .any() take a pointer to the writer. 13 | output_writer: @TypeOf(std.io.bufferedWriter(@as(std.fs.File.Writer, undefined))).Writer = undefined, 14 | dumper: Hexdumper = undefined, 15 | put_back_signature: ?[4]u8 = null, 16 | offset: u64 = 0, 17 | indentation: u2 = 0, 18 | 19 | const Self = @This(); 20 | 21 | pub fn doIt(self: *Self) !void { 22 | self.input = std.io.bufferedReader(self.input_file.reader()); 23 | self.output = std.io.bufferedWriter(self.output_file.writer()); 24 | self.output_writer = self.output.writer(); 25 | self.dumper = .{ .output = self.output_writer.any() }; 26 | 27 | // Not sure how to make this an enum. 28 | var position: enum { 29 | start, 30 | local_stuff, 31 | central_directory, 32 | } = .start; 33 | 34 | while (true) { 35 | const signature = try self.peekSignature(); 36 | switch (signature) { 37 | z.lfh_signature => { 38 | if (!(position == .start or position == .local_stuff)) return error.WrongSignature; 39 | position = .local_stuff; 40 | try self.consumeLocalFile(); 41 | }, 42 | z.cfh_signature => { 43 | if (position == .local_stuff) { 44 | position = .central_directory; 45 | } else if (position != .central_directory) return error.WrongSignature; 46 | try self.consumeCentralFileHeader(); 47 | }, 48 | z.zip64_eocdr_signature => { 49 | if (!(position == .start or position == .central_directory)) return error.WrongSignature; 50 | try self.consumeZip64End(); 51 | break; 52 | }, 53 | z.eocdr_signature => { 54 | if (!(position == .start or position == .central_directory)) return error.WrongSignature; 55 | try self.consumeEnd(); 56 | break; 57 | }, 58 | else => return error.WrongSignature, 59 | } 60 | } 61 | 62 | // Assert EOF. 63 | if (self.input.reader().readByte()) |_| return error.ExpectedEof else |err| if (err != error.EndOfStream) return err; 64 | 65 | try self.output.flush(); 66 | } 67 | 68 | fn consumeLocalFile(self: *Self) !void { 69 | const offset = self.offset; 70 | var lfh_buffer: [30]u8 = undefined; 71 | try self.readNoEof(&lfh_buffer); 72 | 73 | // Dump the struct. 74 | { 75 | if (offset != 0) try self.dumper.write("\n"); 76 | try self.dumper.writeSectionHeader(offset, "Local File Header", .{}); 77 | var lfh_cursor: usize = 0; 78 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "Local file header signature"); 79 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "Version needed to extract (minimum)"); 80 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "General purpose bit flag"); 81 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "Compression method"); 82 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "File last modification time"); 83 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "File last modification date"); 84 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "CRC-32"); 85 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "Compressed size"); 86 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "Uncompressed size"); 87 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "File name length (n)"); 88 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "Extra field length (m)"); 89 | } 90 | 91 | // Extract meaningful information from the header. 92 | const general_purpose_bit_flag = readInt16(&lfh_buffer, 6); 93 | const is_utf8 = general_purpose_bit_flag & 0x800 != 0; 94 | const is_known_size = general_purpose_bit_flag & 0x8 == 0; 95 | var compressed_size: u64 = readInt32(&lfh_buffer, 18); 96 | var uncompressed_size: u64 = readInt32(&lfh_buffer, 22); 97 | var is_zip64 = false; 98 | const file_name_length = readInt16(&lfh_buffer, 26); 99 | const extra_fields_length = readInt16(&lfh_buffer, 28); 100 | 101 | // Variable-sized header components. 102 | if (file_name_length > 0) { 103 | self.dumper.indent(); 104 | defer self.dumper.outdent(); 105 | try self.dumper.write("\n"); 106 | try self.dumper.writeSectionHeader(self.offset, "File Name", .{}); 107 | try self.dumpBlob(file_name_length, .{ .encoding = if (is_utf8) .utf8 else .cp437 }); 108 | } 109 | if (extra_fields_length > 0) { 110 | self.dumper.indent(); 111 | defer self.dumper.outdent(); 112 | try self.dumper.write("\n"); 113 | try self.dumper.writeSectionHeader(self.offset, "Extra Fields", .{}); 114 | try self.consumeExtraFields(extra_fields_length, &is_zip64, &compressed_size, &uncompressed_size, null, null); 115 | } 116 | 117 | // File contents. 118 | if (is_known_size) { 119 | // Known size is easy. 120 | if (compressed_size > 0) { 121 | try self.dumper.write("\n"); 122 | try self.dumper.writeSectionHeader(self.offset, "File Contents", .{}); 123 | try self.dumpBlob(compressed_size, compact); 124 | } 125 | 126 | // Optional data descriptor is optional 127 | if (z.oddo_signature == try self.peekSignature()) { 128 | try self.consumeDataDescriptor(is_zip64); 129 | } 130 | } else { 131 | // Search for data descriptor to terminate the file contents. 132 | try self.dumper.write("\n"); 133 | try self.dumper.writeSectionHeader(self.offset, "File Contents With Unknown Length", .{}); 134 | 135 | const row_length = compact.row_length; 136 | var row_cursor: usize = 0; 137 | var oddo_signature_cursor: usize = 0; 138 | while (true) { 139 | assert(self.put_back_signature == null); 140 | const b = try self.input.reader().readByte(); 141 | self.offset += 1; 142 | if (b == z.oddo_signature_bytes[oddo_signature_cursor]) { 143 | // Maybe? 144 | oddo_signature_cursor += 1; 145 | if (oddo_signature_cursor == 4) { 146 | // Done. 147 | self.put_back_signature = z.oddo_signature_bytes; 148 | self.offset -= 4; 149 | try self.dumper.write("\n"); 150 | 151 | try self.consumeDataDescriptor(is_zip64); 152 | break; 153 | } 154 | } else { 155 | // Nope 156 | if (oddo_signature_cursor > 0) { 157 | // Flush what we've optimistically found so far. 158 | const mid_buffer_row_wrap = @min(oddo_signature_cursor, row_length - row_cursor); 159 | row_cursor += oddo_signature_cursor; 160 | for (z.oddo_signature_bytes[0..mid_buffer_row_wrap]) |b_| { 161 | try self.dumper.printf("{x:0>2}", .{b_}); 162 | } 163 | if (row_cursor >= row_length) { 164 | row_cursor -= row_length; 165 | try self.dumper.printf("\n", .{}); 166 | } 167 | for (z.oddo_signature_bytes[mid_buffer_row_wrap..oddo_signature_cursor]) |b_| { 168 | try self.dumper.printf("{x:0>2}", .{b_}); 169 | } 170 | oddo_signature_cursor = 0; 171 | } 172 | // Write the byte. 173 | if (row_cursor >= row_length) { 174 | row_cursor -= row_length; 175 | try self.dumper.printf("\n", .{}); 176 | } 177 | try self.dumper.printf("{x:0>2}", .{b}); 178 | row_cursor += 1; 179 | } 180 | } 181 | } 182 | 183 | // Done with Local file header, file contents, and optional data descriptor. 184 | } 185 | 186 | fn consumeDataDescriptor(self: *Self, is_zip64: bool) !void { 187 | try self.dumper.write("\n"); 188 | try self.dumper.writeSectionHeader(self.offset, "Optional Data Descriptor", .{}); 189 | 190 | var data_descriptor_buffer: [24]u8 = undefined; 191 | const data_descriptor_len: usize = if (is_zip64) 24 else 16; 192 | try self.readNoEof(data_descriptor_buffer[0..data_descriptor_len]); 193 | var data_descriptor_cursor: usize = 0; 194 | if (is_zip64) { 195 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 4, "optional data descriptor optional signature"); 196 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 4, "crc-32"); 197 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 8, "compressed size"); 198 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 8, "uncompressed size"); 199 | } else { 200 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "optional data descriptor optional signature"); 201 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "crc-32"); 202 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "compressed size"); 203 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "uncompressed size"); 204 | } 205 | } 206 | 207 | fn consumeCentralFileHeader(self: *Self) !void { 208 | try self.dumper.write("\n"); 209 | try self.dumper.writeSectionHeader(self.offset, "Central Directory Entry", .{}); 210 | 211 | var cdr_buffer: [46]u8 = undefined; 212 | try self.readNoEof(cdr_buffer[0..]); 213 | var cdr_cursor: usize = 0; 214 | 215 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Central directory file header signature"); 216 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Version made by"); 217 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Version needed to extract (minimum)"); 218 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "General purpose bit flag"); 219 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Compression method"); 220 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File last modification time"); 221 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File last modification date"); 222 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "CRC-32"); 223 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Compressed size"); 224 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Uncompressed size"); 225 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File name length (n)"); 226 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Extra field length (m)"); 227 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File comment length (k)"); 228 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Disk number where file starts"); 229 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Internal file attributes"); 230 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "External file attributes"); 231 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Relative offset of local file header"); 232 | 233 | const general_purpose_bit_flag = readInt16(&cdr_buffer, 8); 234 | const is_utf8 = general_purpose_bit_flag & 0x800 != 0; 235 | var compressed_size: u64 = readInt32(&cdr_buffer, 20); 236 | var uncompressed_size: u64 = readInt32(&cdr_buffer, 20); 237 | var local_file_header_offset: u64 = readInt32(&cdr_buffer, 42); 238 | var disk_number: u32 = readInt16(&cdr_buffer, 34); 239 | const file_name_length = readInt16(&cdr_buffer, 28); 240 | const extra_fields_length = readInt16(&cdr_buffer, 30); 241 | const file_comment_length = readInt16(&cdr_buffer, 32); 242 | 243 | if (file_name_length > 0) { 244 | self.dumper.indent(); 245 | defer self.dumper.outdent(); 246 | try self.dumper.writeSectionHeader(self.offset, "File name", .{}); 247 | try self.dumpBlob(file_name_length, .{ .encoding = if (is_utf8) .utf8 else .cp437 }); 248 | } 249 | if (extra_fields_length > 0) { 250 | self.dumper.indent(); 251 | defer self.dumper.outdent(); 252 | try self.dumper.writeSectionHeader(self.offset, "Extra Fields", .{}); 253 | try self.consumeExtraFields( 254 | extra_fields_length, 255 | null, 256 | &compressed_size, 257 | &uncompressed_size, 258 | &local_file_header_offset, 259 | &disk_number, 260 | ); 261 | } 262 | if (file_comment_length > 0) { 263 | self.dumper.indent(); 264 | defer self.dumper.outdent(); 265 | try self.dumper.writeSectionHeader(self.offset, "File Comment", .{}); 266 | try self.dumpBlob(file_comment_length, .{ .encoding = if (is_utf8) .utf8 else .cp437 }); 267 | } 268 | } 269 | 270 | fn consumeZip64End(self: *Self) !void { 271 | try self.dumper.write("\n"); 272 | try self.dumper.writeSectionHeader(self.offset, "zip64 end of central directory record", .{}); 273 | { 274 | var buffer: [56]u8 = undefined; 275 | try self.readNoEof(buffer[0..]); 276 | 277 | var cursor: usize = 0; 278 | const max_size = 8; 279 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "zip64 end of central directory record signature"); 280 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "size of zip64 end of central directory record"); 281 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "version made by"); 282 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "version needed to extract"); 283 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "number of this disk"); 284 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "number of the disk with the start of the central directory"); 285 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "total number of entries in the central directory on this disk"); 286 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "total number of entries in the central directory"); 287 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "size of the central directory"); 288 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "offset of start of central directory with respect to the starting disk number"); 289 | assert(cursor == buffer.len); 290 | const zip64_extensible_data_sector_size = readInt64(&buffer, 4) -| 44; 291 | if (zip64_extensible_data_sector_size > 0) { 292 | self.dumper.indent(); 293 | defer self.dumper.outdent(); 294 | try self.dumper.writeSectionHeader(self.offset, "zip64 extensible data sector", .{}); 295 | try self.dumpBlob(zip64_extensible_data_sector_size, compact); 296 | } 297 | } 298 | 299 | if (z.zip64_eocdl_signature != try self.peekSignature()) return error.ExpectedZip64EndOfCentralDirectoryLocator; 300 | try self.dumper.write("\n"); 301 | try self.dumper.writeSectionHeader(self.offset, "zip64 end of central directory locator", .{}); 302 | { 303 | var buffer: [20]u8 = undefined; 304 | try self.readNoEof(buffer[0..]); 305 | var cursor: usize = 0; 306 | 307 | const max_size = 8; 308 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "zip64 end of central dir locator signature"); 309 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "number of the disk with the start of the zip64 end of central directory"); 310 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "relative offset of the zip64 end of central directory record"); 311 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "total number of disks"); 312 | assert(cursor == buffer.len); 313 | } 314 | 315 | if (z.eocdr_signature != try self.peekSignature()) return error.ExpectedEndOfCentralDirectoryRecord; 316 | try self.consumeEnd(); 317 | } 318 | 319 | fn consumeEnd(self: *Self) !void { 320 | try self.dumper.write("\n"); 321 | try self.dumper.writeSectionHeader(self.offset, "End of central directory record", .{}); 322 | 323 | var buffer: [22]u8 = undefined; 324 | try self.readNoEof(buffer[0..]); 325 | var cursor: usize = 0; 326 | 327 | const max_size = 4; 328 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "End of central directory signature"); 329 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Number of this disk"); 330 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Disk where central directory starts"); 331 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Number of central directory records on this disk"); 332 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Total number of central directory records"); 333 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "Size of central directory (bytes)"); 334 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "Offset of start of central directory, relative to start of archive"); 335 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Comment Length"); 336 | assert(cursor == buffer.len); 337 | 338 | const comment_length = readInt16(&buffer, 20); 339 | if (comment_length > 0) { 340 | self.dumper.indent(); 341 | defer self.dumper.outdent(); 342 | try self.dumper.writeSectionHeader(self.offset, ".ZIP file comment", .{}); 343 | try self.dumpBlob(comment_length, .{ .encoding = .cp437 }); 344 | } 345 | } 346 | 347 | const compact = Hexdumper.BlobConfig{ 348 | .row_length = 512, 349 | .spaces = false, 350 | }; 351 | 352 | fn dumpBlob(self: *Self, length: u64, config: Hexdumper.BlobConfig) !void { 353 | var partial_utf8_state = Hexdumper.PartialUtf8State{}; 354 | var cursor: u64 = 0; 355 | while (cursor < length) { 356 | var buffer: [0x1000]u8 = undefined; 357 | const buffer_len = @min(buffer.len, length - cursor); 358 | try self.readNoEof(buffer[0..buffer_len]); 359 | const is_end = cursor + buffer_len == length; 360 | 361 | try self.dumper.writeBlobPart(buffer[0..buffer_len], config, cursor == 0, is_end, &partial_utf8_state); 362 | 363 | cursor += buffer_len; 364 | } 365 | } 366 | 367 | fn consumeExtraFields( 368 | self: *Self, 369 | extra_fields_length: u16, 370 | out_is_zip64: ?*bool, 371 | compressed_size: *u64, 372 | uncompressed_size: *u64, 373 | local_file_header_offset: ?*u64, 374 | disk_number: ?*u32, 375 | ) !void { 376 | const offset = self.offset; 377 | var buf: [0xffff]u8 = undefined; 378 | const buffer = buf[0..extra_fields_length]; 379 | try self.readNoEof(buffer); 380 | 381 | return z.dumpExtraFields(&self.dumper, offset, buffer, out_is_zip64, compressed_size, uncompressed_size, local_file_header_offset, disk_number); 382 | } 383 | 384 | fn peekSignature(self: *Self) !u32 { 385 | var sig_buf: [4]u8 = undefined; 386 | try self.readNoEof(&sig_buf); 387 | const signature = std.mem.readInt(u32, &sig_buf, .little); 388 | self.put_back_signature = sig_buf; 389 | self.offset -= 4; 390 | return signature; 391 | } 392 | 393 | fn readNoEof(self: *Self, buffer: []u8) !void { 394 | if (self.put_back_signature) |sig_buf| { 395 | @memcpy(buffer[0..4], &sig_buf); 396 | self.put_back_signature = null; 397 | try self.input.reader().readNoEof(buffer[4..]); 398 | } else { 399 | try self.input.reader().readNoEof(buffer); 400 | } 401 | self.offset += buffer.len; 402 | } 403 | }; 404 | 405 | fn readInt16(buffer: []const u8, offset: usize) u16 { 406 | return std.mem.readInt(u16, buffer[offset..][0..2], .little); 407 | } 408 | fn readInt32(buffer: []const u8, offset: usize) u32 { 409 | return std.mem.readInt(u32, buffer[offset..][0..4], .little); 410 | } 411 | fn readInt64(buffer: []const u8, offset: usize) u64 { 412 | return std.mem.readInt(u64, buffer[offset..][0..8], .little); 413 | } 414 | -------------------------------------------------------------------------------- /src/hexdump-zip.zig: -------------------------------------------------------------------------------- 1 | const std = @import("std"); 2 | const assert = std.debug.assert; 3 | 4 | const Hexdumper = @import("./Hexdumper.zig"); 5 | const z = @import("./zipfile.zig"); 6 | 7 | const SegmentList = std.ArrayList(Segment); 8 | const SegmentKind = union(enum) { 9 | local_file: LocalFileInfo, 10 | central_directory_entries: CentralDirectoryEntriesInfo, 11 | zip64_eocdr, 12 | zip64_eocdl, 13 | eocdr, 14 | }; 15 | const Segment = struct { 16 | offset: u64, 17 | kind: SegmentKind, 18 | }; 19 | const LocalFileInfo = struct { 20 | entry_index: u32, 21 | compressed_size: u64, 22 | is_zip64: bool, 23 | }; 24 | const CentralDirectoryEntriesInfo = struct { 25 | entry_count: u32, 26 | central_directory_size: u64, 27 | }; 28 | fn segmentLessThan(_: void, a: Segment, b: Segment) bool { 29 | return a.offset < b.offset; 30 | } 31 | 32 | pub const ZipfileDumper = struct { 33 | input_file: std.fs.File, 34 | file_size: u64, 35 | output_file: std.fs.File, 36 | output: @TypeOf(std.io.bufferedWriter(@as(std.fs.File.Writer, undefined))), 37 | // have to store this in the struct, because .any() take a pointer to the writer. 38 | output_writer: @TypeOf(std.io.bufferedWriter(@as(std.fs.File.Writer, undefined))).Writer, 39 | dumper: Hexdumper, 40 | segments: SegmentList, 41 | 42 | const Self = @This(); 43 | 44 | pub fn init(self: *Self, input_file: std.fs.File, output_file: std.fs.File, allocator: std.mem.Allocator) !void { 45 | self.input_file = input_file; 46 | self.file_size = try self.input_file.getEndPos(); 47 | // this limit eliminates most silly overflow checks on the file offset. 48 | if (self.file_size > 0x7fffffffffffffff) return error.FileTooBig; 49 | 50 | self.output_file = output_file; 51 | self.output = std.io.bufferedWriter(self.output_file.writer()); 52 | self.output_writer = self.output.writer(); 53 | self.dumper = .{ .output = self.output_writer.any() }; 54 | 55 | self.segments = SegmentList.init(allocator); 56 | } 57 | 58 | pub fn deinit(self: *Self) void { 59 | self.segments.deinit(); 60 | self.* = undefined; 61 | } 62 | 63 | pub fn doIt(self: *Self) !void { 64 | try self.findSegments(); 65 | try self.dumpSegments(); 66 | try self.output.flush(); 67 | } 68 | 69 | fn findSegments(self: *Self) !void { 70 | // find the eocdr 71 | if (self.file_size < z.eocdr_size) return error.NotAZipFile; 72 | // This buffer can contain: 73 | // * the zip64 end of central dir locator, 74 | // * the end of central directory record, 75 | // * and a 0xffff size zip file comment. 76 | var eocdr_search_buffer: [z.eocdr_search_size]u8 = undefined; 77 | const eocdr_search_slice = eocdr_search_buffer[0..@min(self.file_size, z.eocdr_search_size)]; 78 | const eocdr_search_slice_offset = self.file_size - eocdr_search_slice.len; 79 | try self.readNoEof(eocdr_search_slice_offset, eocdr_search_slice); 80 | // seek backward over the comment looking for the signature 81 | var eocdr_offset: u64 = undefined; 82 | var comment_length: u16 = 0; 83 | while (true) : (comment_length += 1) { 84 | eocdr_offset = self.file_size - (z.eocdr_size + comment_length); 85 | if (readInt32(eocdr_search_slice, eocdr_offset - eocdr_search_slice_offset) == z.eocdr_signature) { 86 | // found it 87 | break; 88 | } 89 | if (eocdr_offset == 0 or comment_length == 0xffff) return error.NotAZipFile; 90 | } 91 | const eocdr = eocdr_search_slice[eocdr_offset - eocdr_search_slice_offset .. eocdr_offset - eocdr_search_slice_offset + z.eocdr_size]; 92 | 93 | var disk_number: u32 = readInt16(eocdr, 4); 94 | var entry_count: u32 = readInt16(eocdr, 10); 95 | var central_directory_size: u64 = readInt32(eocdr, 12); 96 | var central_directory_offset: u64 = readInt32(eocdr, 16); 97 | 98 | // ZIP64 99 | const is_zip64 = eocdr_offset >= z.zip64_eocdl_size and readInt32(eocdr_search_slice, eocdr_offset - z.zip64_eocdl_size - eocdr_search_slice_offset) == z.zip64_eocdl_signature; 100 | if (is_zip64) { 101 | const zip64_eocdl_offset = eocdr_offset - z.zip64_eocdl_size; 102 | const zip64_eocdl = eocdr_search_slice[zip64_eocdl_offset - eocdr_search_slice_offset .. zip64_eocdl_offset + z.zip64_eocdl_size - eocdr_search_slice_offset]; 103 | const total_number_of_disks = readInt32(zip64_eocdl, 16); 104 | if (total_number_of_disks != 1) return error.MultiDiskZipfileNotSupported; 105 | const zip64_eocdr_offset = readInt64(zip64_eocdl, 8); 106 | 107 | var zip64_eocdr_buffer: [z.zip64_eocdr_size]u8 = undefined; 108 | try self.readNoEof(zip64_eocdr_offset, zip64_eocdr_buffer[0..]); 109 | const zip64_eocdr = zip64_eocdr_buffer[0..]; 110 | 111 | disk_number = readInt32(zip64_eocdr, 16); 112 | entry_count = readInt32(zip64_eocdr, 32); 113 | central_directory_size = readInt64(zip64_eocdr, 40); 114 | central_directory_offset = readInt64(zip64_eocdr, 48); 115 | 116 | try self.segments.append(Segment{ 117 | .offset = zip64_eocdr_offset, 118 | .kind = .zip64_eocdr, 119 | }); 120 | try self.segments.append(Segment{ 121 | .offset = zip64_eocdl_offset, 122 | .kind = .zip64_eocdl, 123 | }); 124 | } 125 | 126 | if (disk_number != 0) return error.MultiDiskZipfileNotSupported; 127 | const central_directory_end = central_directory_offset +| central_directory_size; 128 | if (central_directory_end > self.file_size) return error.CentralDirectorySizeExceedsFileBounds; 129 | 130 | var central_directory_cursor: u64 = central_directory_offset; 131 | { 132 | var entry_index: u32 = 0; 133 | while (entry_index < entry_count and central_directory_cursor + 46 <= central_directory_end) : (entry_index += 1) { 134 | // TODO: generalize not exceeding the central_directory_size 135 | var cfh_buffer: [46]u8 = undefined; 136 | try self.readNoEof(central_directory_cursor, cfh_buffer[0..]); 137 | 138 | var compressed_size: u64 = readInt32(&cfh_buffer, 20); 139 | var uncompressed_size: u64 = readInt32(&cfh_buffer, 24); 140 | const file_name_length = readInt16(&cfh_buffer, 28); 141 | const extra_fields_length = readInt16(&cfh_buffer, 30); 142 | const file_comment_length = readInt16(&cfh_buffer, 32); 143 | var local_header_offset: u64 = readInt32(&cfh_buffer, 42); 144 | 145 | central_directory_cursor += 46; 146 | central_directory_cursor += file_name_length; 147 | 148 | // ZIP64 149 | var found_zip64_extended_information = false; 150 | var extra_fields_buffer: [0xffff]u8 = undefined; 151 | const extra_fields = extra_fields_buffer[0..extra_fields_length]; 152 | try self.readNoEof(central_directory_cursor, extra_fields); 153 | var extra_fields_cursor: u32 = 0; 154 | while (extra_fields_cursor + 3 < extra_fields_length) { 155 | const tag = readInt16(extra_fields, extra_fields_cursor); 156 | extra_fields_cursor += 2; 157 | const size = readInt16(extra_fields, extra_fields_cursor); 158 | extra_fields_cursor += 2; 159 | if (extra_fields_cursor + size > extra_fields_length) return error.ExtraFieldSizeExceedsExtraFieldsBuffer; 160 | const extra_field = extra_fields[extra_fields_cursor .. extra_fields_cursor + size]; 161 | extra_fields_cursor += size; 162 | 163 | switch (tag) { 164 | 0x0001 => { 165 | // ZIP64 166 | if (found_zip64_extended_information) return error.DuplicateZip64ExtendedInformation; 167 | found_zip64_extended_information = true; 168 | var cursor: u16 = 0; 169 | if (uncompressed_size == 0xffffffff) { 170 | if (cursor + 8 > extra_field.len) return error.Zip64ExtendedInformationTruncated; 171 | uncompressed_size = readInt64(extra_field, cursor); 172 | cursor += 8; 173 | } 174 | if (compressed_size == 0xffffffff) { 175 | if (cursor + 8 > extra_field.len) return error.Zip64ExtendedInformationTruncated; 176 | compressed_size = readInt64(extra_field, cursor); 177 | cursor += 8; 178 | } 179 | if (local_header_offset == 0xffffffff) { 180 | if (cursor + 8 > extra_field.len) return error.Zip64ExtendedInformationTruncated; 181 | local_header_offset = readInt64(extra_field, cursor); 182 | cursor += 8; 183 | } 184 | // ignore the disk number 185 | }, 186 | else => {}, 187 | } 188 | } 189 | 190 | central_directory_cursor += extra_fields_length; 191 | central_directory_cursor += file_comment_length; 192 | 193 | try self.segments.append(Segment{ 194 | .offset = local_header_offset, 195 | .kind = .{ .local_file = .{ 196 | .entry_index = entry_index, 197 | .is_zip64 = found_zip64_extended_information, 198 | .compressed_size = compressed_size, 199 | } }, 200 | }); 201 | } 202 | } 203 | 204 | if (entry_count > 0) { 205 | try self.segments.append(Segment{ 206 | .offset = central_directory_offset, 207 | .kind = .{ .central_directory_entries = .{ 208 | .entry_count = entry_count, 209 | .central_directory_size = central_directory_size, 210 | } }, 211 | }); 212 | } 213 | 214 | try self.segments.append(Segment{ 215 | .offset = eocdr_offset, 216 | .kind = .eocdr, 217 | }); 218 | } 219 | 220 | fn dumpSegments(self: *Self) !void { 221 | std.sort.insertion(Segment, self.segments.items, {}, segmentLessThan); 222 | 223 | var cursor: u64 = 0; 224 | for (self.segments.items, 0..) |segment, i| { 225 | if (i != 0) { 226 | try self.dumper.write("\n"); 227 | } 228 | 229 | if (segment.offset > cursor) { 230 | try self.dumper.writeSectionHeader(cursor, "(unused space)", .{}); 231 | try self.dumpBlob(cursor, segment.offset - cursor, .{ 232 | .row_length = 512, 233 | .spaces = false, 234 | }); 235 | try self.dumper.write("\n"); 236 | cursor = segment.offset; 237 | } else if (segment.offset < cursor) { 238 | try self.dumper.printf("#seek -0x{x}\n\n", .{cursor - segment.offset}); 239 | cursor = segment.offset; 240 | } 241 | 242 | const length = switch (segment.kind) { 243 | .local_file => |info| try self.dumpLocalFile(segment.offset, info), 244 | .central_directory_entries => |info| try self.dumpCentralDirectoryEntries(segment.offset, info), 245 | .zip64_eocdr => try self.dumpZip64EndOfCentralDirectoryRecord(segment.offset), 246 | .zip64_eocdl => try self.dumpZip64EndOfCentralDirectoryLocator(segment.offset), 247 | .eocdr => try self.dumpEndOfCentralDirectory(segment.offset), 248 | }; 249 | cursor += length; 250 | } 251 | } 252 | 253 | fn dumpLocalFile(self: *Self, offset: u64, info: LocalFileInfo) !u64 { 254 | var cursor = offset; 255 | var lfh_buffer: [30]u8 = undefined; 256 | try self.readNoEof(cursor, lfh_buffer[0..]); 257 | if (readInt32(&lfh_buffer, 0) != z.lfh_signature) { 258 | try self.dumper.writeSectionHeader(offset, "WARNING: invalid local file header signature", .{}); 259 | try self.dumper.write("\n"); 260 | // if this isn't a local file, idk what it is. 261 | // call it unknown 262 | return 0; 263 | } 264 | 265 | try self.dumper.writeSectionHeader(offset, "Local File Header (#{})", .{info.entry_index}); 266 | var lfh_cursor: usize = 0; 267 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "Local file header signature"); 268 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "Version needed to extract (minimum)"); 269 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "General purpose bit flag"); 270 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "Compression method"); 271 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "File last modification time"); 272 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "File last modification date"); 273 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "CRC-32"); 274 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "Compressed size"); 275 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 4, "Uncompressed size"); 276 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "File name length (n)"); 277 | try self.dumper.readStructField(&lfh_buffer, 4, &lfh_cursor, 2, "Extra field length (m)"); 278 | cursor += lfh_cursor; 279 | 280 | const file_name_length = readInt16(&lfh_buffer, 26); 281 | const general_purpose_bit_flag = readInt16(&lfh_buffer, 6); 282 | const is_utf8 = general_purpose_bit_flag & 0x800 != 0; 283 | const extra_fields_length = readInt16(&lfh_buffer, 28); 284 | 285 | var compressed_size: u64 = readInt32(&lfh_buffer, 18); 286 | var uncompressed_size: u64 = readInt32(&lfh_buffer, 22); 287 | 288 | if (file_name_length > 0) { 289 | self.dumper.indent(); 290 | defer self.dumper.outdent(); 291 | try self.dumper.writeSectionHeader(cursor, "File Name", .{}); 292 | self.dumper.indent(); 293 | defer self.dumper.outdent(); 294 | try self.dumpBlob(cursor, file_name_length, .{ .encoding = if (is_utf8) .utf8 else .cp437 }); 295 | cursor += file_name_length; 296 | } 297 | if (extra_fields_length > 0) { 298 | self.dumper.indent(); 299 | defer self.dumper.outdent(); 300 | try self.dumper.writeSectionHeader(cursor, "Extra Fields", .{}); 301 | self.dumper.indent(); 302 | defer self.dumper.outdent(); 303 | try self.readExtraFields(cursor, extra_fields_length, null, &compressed_size, &uncompressed_size, null, null); 304 | cursor += extra_fields_length; 305 | } 306 | 307 | if (info.compressed_size > 0) { 308 | try self.dumper.writeSectionHeader(cursor, "File Contents", .{}); 309 | try self.dumpBlob(cursor, info.compressed_size, .{ 310 | .row_length = 512, 311 | .spaces = false, 312 | }); 313 | cursor += info.compressed_size; 314 | } 315 | 316 | // check for the optional data descriptor 317 | var data_descriptor_buffer: [24]u8 = undefined; 318 | const data_descriptor_len: usize = if (info.is_zip64) 24 else 16; 319 | if (self.readNoEof(cursor, data_descriptor_buffer[0..data_descriptor_len])) { 320 | if (readInt32(&data_descriptor_buffer, 0) == z.oddo_signature) { 321 | // this is a data descriptor 322 | try self.dumper.write("\n"); 323 | try self.dumper.writeSectionHeader(cursor, "Optional Data Descriptor", .{}); 324 | var data_descriptor_cursor: usize = 0; 325 | if (info.is_zip64) { 326 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 4, "optional data descriptor optional signature"); 327 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 4, "crc-32"); 328 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 8, "compressed size"); 329 | try self.dumper.readStructField(&data_descriptor_buffer, 8, &data_descriptor_cursor, 8, "uncompressed size"); 330 | } else { 331 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "optional data descriptor optional signature"); 332 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "crc-32"); 333 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "compressed size"); 334 | try self.dumper.readStructField(&data_descriptor_buffer, 4, &data_descriptor_cursor, 4, "uncompressed size"); 335 | } 336 | cursor += data_descriptor_cursor; 337 | } 338 | } else |_| { 339 | // ok, so there's no optional data descriptor here 340 | } 341 | 342 | return cursor - offset; 343 | } 344 | 345 | fn dumpCentralDirectoryEntries(self: *Self, offset: u64, info: CentralDirectoryEntriesInfo) !u64 { 346 | const central_directory_end = offset + info.central_directory_size; 347 | var cursor = offset; 348 | { 349 | var i: u32 = 0; 350 | while (i < info.entry_count and cursor + 46 <= central_directory_end) : (i += 1) { 351 | // TODO: generalize not exceeding the central_directory_size 352 | if (i > 0) try self.dumper.write("\n"); 353 | 354 | var cdr_buffer: [46]u8 = undefined; 355 | try self.readNoEof(cursor, cdr_buffer[0..]); 356 | if (readInt32(&cdr_buffer, 0) != z.cfh_signature) { 357 | try self.dumper.writeSectionHeader(cursor, "WARNING: invalid central file header signature", .{}); 358 | try self.dumper.write("\n"); 359 | return 0; 360 | } 361 | 362 | try self.dumper.writeSectionHeader(cursor, "Central Directory Entry (#{})", .{i}); 363 | var cdr_cursor: usize = 0; 364 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Central directory file header signature"); 365 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Version made by"); 366 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Version needed to extract (minimum)"); 367 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "General purpose bit flag"); 368 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Compression method"); 369 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File last modification time"); 370 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File last modification date"); 371 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "CRC-32"); 372 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Compressed size"); 373 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Uncompressed size"); 374 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File name length (n)"); 375 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Extra field length (m)"); 376 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "File comment length (k)"); 377 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Disk number where file starts"); 378 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 2, "Internal file attributes"); 379 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "External file attributes"); 380 | try self.dumper.readStructField(&cdr_buffer, 4, &cdr_cursor, 4, "Relative offset of local file header"); 381 | cursor += cdr_cursor; 382 | 383 | const general_purpose_bit_flag = readInt16(&cdr_buffer, 8); 384 | const is_utf8 = general_purpose_bit_flag & 0x800 != 0; 385 | const file_name_length = readInt16(&cdr_buffer, 28); 386 | const extra_fields_length = readInt16(&cdr_buffer, 30); 387 | const file_comment_length = readInt16(&cdr_buffer, 32); 388 | 389 | var is_zip64 = false; 390 | var compressed_size: u64 = readInt32(&cdr_buffer, 20); 391 | var uncompressed_size: u64 = readInt32(&cdr_buffer, 20); 392 | var local_file_header_offset: u64 = readInt32(&cdr_buffer, 42); 393 | var disk_number: u32 = readInt16(&cdr_buffer, 34); 394 | 395 | if (file_name_length > 0) { 396 | self.dumper.indent(); 397 | defer self.dumper.outdent(); 398 | try self.dumper.writeSectionHeader(cursor, "File name", .{}); 399 | self.dumper.indent(); 400 | defer self.dumper.outdent(); 401 | try self.dumpBlob(cursor, file_name_length, .{ .encoding = if (is_utf8) .utf8 else .cp437 }); 402 | cursor += file_name_length; 403 | } 404 | if (extra_fields_length > 0) { 405 | self.dumper.indent(); 406 | defer self.dumper.outdent(); 407 | try self.dumper.writeSectionHeader(cursor, "Extra Fields", .{}); 408 | self.dumper.indent(); 409 | defer self.dumper.outdent(); 410 | try self.readExtraFields(cursor, extra_fields_length, &is_zip64, &compressed_size, &uncompressed_size, &local_file_header_offset, &disk_number); 411 | cursor += extra_fields_length; 412 | } 413 | if (file_comment_length > 0) { 414 | self.dumper.indent(); 415 | defer self.dumper.outdent(); 416 | try self.dumper.writeSectionHeader(cursor, "File Comment", .{}); 417 | self.dumper.indent(); 418 | defer self.dumper.outdent(); 419 | try self.dumpBlob(cursor, file_comment_length, .{ .encoding = if (is_utf8) .utf8 else .cp437 }); 420 | cursor += file_comment_length; 421 | } 422 | } 423 | } 424 | 425 | return cursor - offset; 426 | } 427 | 428 | fn dumpZip64EndOfCentralDirectoryRecord(self: *Self, offset: u64) !u64 { 429 | var buffer: [56]u8 = undefined; 430 | try self.readNoEof(offset, buffer[0..]); 431 | var cursor: usize = 0; 432 | 433 | const max_size = 8; 434 | try self.dumper.writeSectionHeader(offset, "zip64 end of central directory record", .{}); 435 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "zip64 end of central directory record signature"); 436 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "size of zip64 end of central directory record"); 437 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "version made by"); 438 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "version needed to extract"); 439 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "number of this disk"); 440 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "number of the disk with the start of the central directory"); 441 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "total number of entries in the central directory on this disk"); 442 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "total number of entries in the central directory"); 443 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "size of the central directory"); 444 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "offset of start of central directory with respect to the starting disk number"); 445 | assert(cursor == buffer.len); 446 | 447 | const zip64_extensible_data_sector_size = readInt64(&buffer, 4) -| 44; 448 | if (zip64_extensible_data_sector_size > 0) { 449 | self.dumper.indent(); 450 | defer self.dumper.outdent(); 451 | try self.dumper.writeSectionHeader(offset + cursor, "zip64 extensible data sector", .{}); 452 | try self.dumpBlob(offset + cursor, zip64_extensible_data_sector_size, .{}); 453 | cursor += zip64_extensible_data_sector_size; 454 | } 455 | 456 | return cursor; 457 | } 458 | 459 | fn dumpZip64EndOfCentralDirectoryLocator(self: *Self, offset: u64) !u64 { 460 | var buffer: [20]u8 = undefined; 461 | try self.readNoEof(offset, buffer[0..]); 462 | var cursor: usize = 0; 463 | 464 | const max_size = 8; 465 | try self.dumper.writeSectionHeader(offset, "zip64 end of central directory locator", .{}); 466 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "zip64 end of central dir locator signature"); 467 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "number of the disk with the start of the zip64 end of central directory"); 468 | try self.dumper.readStructField(&buffer, max_size, &cursor, 8, "relative offset of the zip64 end of central directory record"); 469 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "total number of disks"); 470 | assert(cursor == buffer.len); 471 | 472 | return cursor; 473 | } 474 | 475 | fn dumpEndOfCentralDirectory(self: *Self, offset: u64) !u64 { 476 | var buffer: [22]u8 = undefined; 477 | try self.readNoEof(offset, buffer[0..]); 478 | var cursor: usize = 0; 479 | 480 | const max_size = 4; 481 | try self.dumper.writeSectionHeader(offset, "End of central directory record", .{}); 482 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "End of central directory signature"); 483 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Number of this disk"); 484 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Disk where central directory starts"); 485 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Number of central directory records on this disk"); 486 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Total number of central directory records"); 487 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "Size of central directory (bytes)"); 488 | try self.dumper.readStructField(&buffer, max_size, &cursor, 4, "Offset of start of central directory, relative to start of archive"); 489 | try self.dumper.readStructField(&buffer, max_size, &cursor, 2, "Comment Length"); 490 | assert(cursor == buffer.len); 491 | 492 | const comment_length = readInt16(&buffer, 20); 493 | if (comment_length > 0) { 494 | self.dumper.indent(); 495 | defer self.dumper.outdent(); 496 | try self.dumper.writeSectionHeader(offset + cursor, ".ZIP file comment", .{}); 497 | self.dumper.indent(); 498 | defer self.dumper.outdent(); 499 | try self.dumpBlob(offset + cursor, comment_length, .{ .encoding = .cp437 }); 500 | cursor += comment_length; 501 | } 502 | 503 | return cursor; 504 | } 505 | 506 | fn dumpBlob(self: *Self, offset: u64, length: u64, config: Hexdumper.BlobConfig) !void { 507 | var partial_utf8_state = Hexdumper.PartialUtf8State{}; 508 | var cursor: u64 = 0; 509 | while (cursor < length) { 510 | var buffer: [0x1000]u8 = undefined; 511 | const buffer_offset = offset + cursor; 512 | const buffer_len = @min(buffer.len, length - cursor); 513 | try self.readNoEof(buffer_offset, buffer[0..buffer_len]); 514 | const is_end = cursor + buffer_len == length; 515 | 516 | try self.dumper.writeBlobPart(buffer[0..buffer_len], config, cursor == 0, is_end, &partial_utf8_state); 517 | 518 | cursor += buffer_len; 519 | } 520 | } 521 | 522 | fn readExtraFields( 523 | self: *Self, 524 | offset: u64, 525 | extra_fields_length: u16, 526 | out_is_zip64: ?*bool, 527 | compressed_size: *u64, 528 | uncompressed_size: *u64, 529 | local_file_header_offset: ?*u64, 530 | disk_number: ?*u32, 531 | ) !void { 532 | var buf: [0xffff]u8 = undefined; 533 | const buffer = buf[0..extra_fields_length]; 534 | try self.readNoEof(offset, buffer); 535 | 536 | return z.dumpExtraFields(&self.dumper, offset, buffer, out_is_zip64, compressed_size, uncompressed_size, local_file_header_offset, disk_number); 537 | } 538 | 539 | fn readNoEof(self: *Self, offset: u64, buffer: []u8) !void { 540 | try self.input_file.seekTo(offset); 541 | try self.input_file.reader().readNoEof(buffer); 542 | } 543 | }; 544 | 545 | fn readInt16(buffer: []const u8, offset: usize) u16 { 546 | return std.mem.readInt(u16, buffer[offset..][0..2], .little); 547 | } 548 | fn readInt32(buffer: []const u8, offset: usize) u32 { 549 | return std.mem.readInt(u32, buffer[offset..][0..4], .little); 550 | } 551 | fn readInt64(buffer: []const u8, offset: usize) u64 { 552 | return std.mem.readInt(u64, buffer[offset..][0..8], .little); 553 | } 554 | --------------------------------------------------------------------------------