├── .github └── workflows │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── docs └── winexe.txt ├── expand_ms_compress.py ├── extract_diskettes.py ├── extract_images.py ├── pyproject.toml ├── requirements.in ├── requirements.txt └── res_extract ├── __init__.py ├── errors.py ├── icons.py ├── ne_resources.py └── resources.py /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | lint: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v3 16 | - uses: pre-commit/action@v3.0.0 17 | # test: 18 | # runs-on: ubuntu-latest 19 | # steps: 20 | # - uses: actions/checkout@v3 21 | # - uses: actions/setup-python@v4 22 | # with: 23 | # python-version: "3.11" 24 | # - name: Install dependencies 25 | # run: python -m pip install pytest-cov -e . 26 | # - run: pytest --cov --cov-report=term-missing . 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.ico 2 | *.png 3 | *.py[cod] 4 | /test_data 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/charliermarsh/ruff-pre-commit 3 | rev: v0.0.247 4 | hooks: 5 | - id: ruff 6 | args: 7 | - --fix 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v4.4.0 10 | hooks: 11 | - id: end-of-file-fixer 12 | - id: trailing-whitespace 13 | - repo: https://github.com/psf/black 14 | rev: 22.12.0 15 | hooks: 16 | - id: black 17 | args: 18 | - --quiet 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2020 Aarni Koskela 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Resource extraction tools 2 | ========================= 3 | 4 | All instructions assume you have successfully installed the requirements. 5 | 6 | Requires Python 3. 7 | 8 | Extract images and icons from a PE or NE file (.exe/.dll/...) 9 | ------------------------------------------------------------- 10 | 11 | ``` 12 | python extract_images.py /Volumes/OFFPRO_Z/EXCEL/EXCEL.EXE --png --ico --dir=./excel 13 | ``` 14 | 15 | will extract reconstituted ICO files as well as PNG files into `./excel`. 16 | 17 | Extract (multiple) diskette images into a directory 18 | --------------------------------------------------- 19 | 20 | ``` 21 | python3 extract_diskettes.py excel_5_diskettes/*.img -d excel_5_diskette_contents/ 22 | ``` 23 | 24 | will extract all files off the diskette images into `excel_5_diskette_contents`. 25 | 26 | 27 | 28 | Expand Microsoft compressed data 29 | -------------------------------- 30 | 31 | Requires `msextract` from [`libmspack`](https://github.com/kyz/libmspack/blob/master/libmspack/examples/msexpand.c) 32 | to be on your path. (On macOS, that tool compiles without any fuss if you have `automake` and `autoconf` installed.) 33 | 34 | ``` 35 | python3 expand_ms_compress.py --in-dir excel_5_diskette_contents/ --legacy-inf=excel_5_diskette_contents/EXCEL5.INF --out-dir=excel_5_expanded 36 | ``` 37 | 38 | will expand all underscorey files from your (previously extracted) Excel 5 diskettes into `excel_5_expanded`. 39 | -------------------------------------------------------------------------------- /docs/winexe.txt: -------------------------------------------------------------------------------- 1 | 2 | Executable-File Header Format (3.1) 3 | 4 | An executable (.EXE) file for the Windows operating system 5 | contains a combination of code and data or a combination of 6 | code, data, and resources. The executable file also contains 7 | two headers: an MS-DOS header and a Windows header. The next 8 | two sections describe these headers; the third section 9 | describes the code and data contained in a Windows executable 10 | file. 11 | 12 | MS-DOS Header 13 | 14 | The MS-DOS (old-style) executable-file header contains four 15 | distinct parts: a collection of header information (such as 16 | the signature word, the file size, and so on), a reserved 17 | section, a pointer to a Windows header (if one exists), and a 18 | stub program. The following illustration shows the MS-DOS 19 | executable-file header: 20 | If the word value at offset 18h is 40h or greater, the word 21 | value at 3Ch is typically an offset to a Windows header. 22 | Applications must verify this for each executable-file header 23 | being tested, because a few applications have a different 24 | header style. 25 | MS-DOS uses the stub program to display a message if Windows 26 | has not been loaded when the user attempts to run a program. 27 | 28 | Windows Header 29 | 30 | The Windows (new-style) executable-file header contains 31 | information that the loader requires for segmented executable 32 | files. This information includes the linker version number, 33 | data specified by the linker, data specified by the resource 34 | compiler, tables of segment data, tables of resource data, 35 | and so on. The following illustration shows the Windows 36 | executable-file header: 37 | The following sections describe the entries in the Windows 38 | executable-file header. 39 | 40 | Information Block 41 | 42 | The information block in the Windows header contains the 43 | linker version number, the lengths of various tables that 44 | further describe the executable file, the offsets from the 45 | beginning of the header to the beginning of these tables, the 46 | heap and stack sizes, and so on. The following list 47 | summarizes the contents of the header information block (the 48 | locations are relative to the beginning of the block): 49 | 50 | Location Description 51 | 00h Specifies the signature word. The low byte contains 52 | "N" (4Eh) and the high byte contains "E" (45h). 53 | 02h Specifies the linker version number. 54 | 03h Specifies the linker revision number. 55 | 04h Specifies the offset to the entry table (relative to 56 | the beginning of the header). 57 | 06h Specifies the length of the entry table, in bytes. 58 | 08h Reserved. 59 | 0Ch Specifies flags that describe the contents of the 60 | executable file. This value can be one or more of the 61 | following bits: 62 | 63 | Bit Meaning 64 | 0 The linker sets this bit if the executable-file 65 | format is SINGLEDATA. An executable file with 66 | this format contains one data segment. This bit 67 | is set if the file is a dynamic-link library 68 | (DLL). 69 | 1 The linker sets this bit if the executable-file 70 | format is MULTIPLEDATA. An executable file with 71 | this format contains multiple data segments. This 72 | bit is set if the file is a Windows application. 73 | If neither bit 0 nor bit 1 is set, the 74 | executable-file format is NOAUTODATA. An 75 | executable file with this format does not contain 76 | an automatic data segment. 77 | 2 Reserved. 78 | 3 Reserved. 79 | 8 Reserved. 80 | 9 Reserved. 81 | 11 If this bit is set, the first segment in the 82 | executable file contains code that loads the 83 | application. 84 | 13 If this bit is set, the linker detects errors at 85 | link time but still creates an executable file. 86 | 14 Reserved. 87 | 15 If this bit is set, the executable file is a 88 | library module. 89 | If bit 15 is set, the CS:IP registers point to an 90 | initialization procedure called with the value in 91 | the AX register equal to the module handle. The 92 | initialization procedure must execute a far 93 | return to the caller. If the procedure is 94 | successful, the value in AX is nonzero. 95 | Otherwise, the value in AX is zero. 96 | The value in the DS register is set to the 97 | library's data segment if SINGLEDATA is set. 98 | Otherwise, DS is set to the data segment of the 99 | application that loads the library. 100 | 0Eh Specifies the automatic data segment number. (0Eh is 101 | zero if the SINGLEDATA and MULTIPLEDATA bits are 102 | cleared.) 103 | 10h Specifies the initial size, in bytes, of the local 104 | heap. This value is zero if there is no local 105 | allocation. 106 | 12h Specifies the initial size, in bytes, of the stack. 107 | This value is zero if the SS register value does not 108 | equal the DS register value. 109 | 14h Specifies the segment:offset value of CS:IP. 110 | 18h Specifies the segment:offset value of SS:SP. 111 | The value specified in SS is an index to the module's 112 | segment table. The first entry in the segment table 113 | corresponds to segment number 1. 114 | If SS addresses the automatic data segment and SP is 115 | zero, SP is set to the address obtained by adding the 116 | size of the automatic data segment to the size of the 117 | stack. 118 | 1Ch Specifies the number of entries in the segment table. 119 | 1Eh Specifies the number of entries in the 120 | module-reference table. 121 | 20h Specifies the number of bytes in the nonresident-name 122 | table. 123 | 22h Specifies a relative offset from the beginning of the 124 | Windows header to the beginning of the segment table. 125 | 24h Specifies a relative offset from the beginning of the 126 | Windows header to the beginning of the resource 127 | table. 128 | 26h Specifies a relative offset from the beginning of the 129 | Windows header to the beginning of the resident-name 130 | table. 131 | 28h Specifies a relative offset from the beginning of the 132 | Windows header to the beginning of the 133 | module-reference table. 134 | 2Ah Specifies a relative offset from the beginning of the 135 | Windows header to the beginning of the imported-name 136 | table. 137 | 2Ch Specifies a relative offset from the beginning of the 138 | file to the beginning of the nonresident-name table. 139 | 30h Specifies the number of movable entry points. 140 | 32h Specifies a shift count that is used to align the 141 | logical sector. This count is log2 of the segment 142 | sector size. It is typically 4, although the default 143 | count is 9. (This value corresponds to the /alignment 144 | [/a] linker switch. When the linker command line 145 | contains /a:16, the shift count is 4. When the linker 146 | command line contains /a:512, the shift count is 9.) 147 | 34h Specifies the number of resource segments. 148 | 36h Specifies the target operating system, depending on 149 | which bits are set: 150 | 151 | Bit Meaning 152 | 153 | 0 Operating system format is unknown. 154 | 1 Reserved. 155 | 2 Operating system is Microsoft Windows. 156 | 3 Reserved. 157 | 4 Reserved. 158 | 37h Specifies additional information about the executable 159 | file. It can be one or more of the following values: 160 | 161 | Bit Meaning 162 | 163 | 1 If this bit is set, the executable file contains 164 | a Windows 2.x application that runs in version 3.x 165 | protected mode. 166 | 2 If this bit is set, the executable file contains 167 | a Windows 2.x application that supports 168 | proportional fonts. 169 | 3 If this bit is set, the executable file contains 170 | a fast-load area. 171 | 38h Specifies the offset, in sectors, to the beginning of 172 | the fast-load area. (Only Windows uses this value.) 173 | 3Ah Specifies the length, in sectors, of the fast-load 174 | area. (Only Windows uses this value.) 175 | 3Ch Reserved. 176 | 3Eh Specifies the expected version number for Windows. 177 | (Only Windows uses this value.) 178 | 179 | Segment Table 180 | 181 | The segment table contains information that describes each 182 | segment in an executable file. This information includes the 183 | segment length, segment type, and segment-relocation data. 184 | The following list summarizes the values found in the segment 185 | table (the locations are relative to the beginning of each 186 | entry): 187 | 188 | Location Description 189 | 190 | 00h Specifies the offset, in sectors, to the segment data 191 | (relative to the beginning of the file). A value of 192 | zero means no data exists. 193 | 02h Specifies the length, in bytes, of the segment, in 194 | the file. A value of zero indicates that the segment 195 | length is 64K, unless the selector offset is also 196 | zero. 197 | 04h Specifies flags that describe the contents of the 198 | executable file. This value can be one or more of the 199 | following: 200 | 201 | Bit Meaning 202 | 203 | 0 If this bit is set, the segment is a data 204 | segment. Otherwise, the segment is a code 205 | segment. 206 | 1 If this bit is set, the loader has allocated 207 | memory for the segment. 208 | 2 If this bit is set, the segment is loaded. 209 | 3 Reserved. 210 | 4 If this bit is set, the segment type is MOVABLE. 211 | Otherwise, the segment type is FIXED. 212 | 5 If this bit is set, the segment type is PURE or 213 | SHAREABLE. Otherwise, the segment type is IMPURE 214 | or NONSHAREABLE. 215 | 6 If this bit is set, the segment type is PRELOAD. 216 | Otherwise, the segment type is LOADONCALL. 217 | 7 If this bit is set and the segment is a code 218 | segment, the segment type is EXECUTEONLY. If this 219 | bit is set and the segment is a data segment, the 220 | segment type is READONLY. 221 | 8 If this bit is set, the segment contains 222 | relocation data. 223 | 9 Reserved. 224 | 10 Reserved. 225 | 11 Reserved. 226 | 12 If this bit is set, the segment is discardable. 227 | 13 Reserved. 228 | 14 Reserved. 229 | 15 Reserved. 230 | 06h Specifies the minimum allocation size of the segment, 231 | in bytes. A value of zero indicates that the minimum 232 | allocation size is 64K. 233 | 234 | Resource Table 235 | 236 | The resource table describes and identifies the location of 237 | each resource in the executable file. The table has the 238 | following form: 239 | WORD rscAlignShift; 240 | TYPEINFO rscTypes[]; 241 | WORD rscEndTypes; 242 | BYTE rscResourceNames[]; 243 | BYTE rscEndNames; 244 | Following are the members in the resource table: 245 | rscAlignShift Specifies the alignment shift count for 246 | resource data. When the shift count is used 247 | as an exponent of 2, the resulting value 248 | specifies the factor, in bytes, for computing 249 | the location of a resource in the executable 250 | file. 251 | rscTypes Specifies an array of TYPEINFO structures 252 | containing information about resource types. 253 | There must be one TYPEINFO structure for each 254 | type of resource in the executable file. 255 | rscEndTypes Specifies the end of the resource type 256 | definitions. This member must be zero. 257 | rscResourceNames Specifies the names (if any) associated with 258 | the resources in this table. Each name is 259 | stored as consecutive bytes; the first byte 260 | specifies the number of characters in the 261 | name. 262 | rscEndNames Specifies the end of the resource names and 263 | the end of the resource table. This member 264 | must be zero. 265 | 266 | Type Information 267 | 268 | The TYPEINFO structure has the following form: 269 | typedef struct _TYPEINFO { 270 | WORD rtTypeID; 271 | WORD rtResourceCount; 272 | DWORD rtReserved; 273 | NAMEINFO rtNameInfo[]; 274 | } TYPEINFO; 275 | Following are the members in the TYPEINFO structure: 276 | rtTypeID Specifies the type identifier of the resource. 277 | This integer value is either a resource-type 278 | value or an offset to a resource-type name. If 279 | the high bit in this member is set (0x8000), 280 | the value is one of the following resource-type 281 | values: 282 | 283 | Value Resource type 284 | 285 | RT_ACCELERATOR Accelerator table 286 | RT_BITMAP Bitmap 287 | RT_CURSOR Cursor 288 | RT_DIALOG Dialog box 289 | RT_FONT Font component 290 | RT_FONTDIR Font directory 291 | RT_GROUP_CURSOR Cursor directory 292 | RT_GROUP_ICON Icon directory 293 | RT_ICON Icon 294 | RT_MENU Menu 295 | RT_RCDATA Resource data 296 | RT_STRING String table 297 | If the high bit of the value in this member is 298 | not set, the value represents an offset, in 299 | bytes relative to the beginning of the resource 300 | table, to a name in the rscResourceNames 301 | member. 302 | rtResourceCount Specifies the number of resources of this 303 | type in the executable file. 304 | rtReserved Reserved. 305 | rtNameInfo Specifies an array of NAMEINFO structures 306 | containing information about individual 307 | resources. The rtResourceCount member specifies 308 | the number of structures in the array. 309 | 310 | Name Information 311 | 312 | The NAMEINFO structure has the following form: 313 | typedef struct _NAMEINFO { 314 | WORD rnOffset; 315 | WORD rnLength; 316 | WORD rnFlags; 317 | WORD rnID; 318 | WORD rnHandle; 319 | WORD rnUsage; 320 | } NAMEINFO; 321 | Following are the members in the NAMEINFO structure: 322 | rnOffset Specifies an offset to the contents of the resource 323 | data (relative to the beginning of the file). The 324 | offset is in terms of alignment units specified by 325 | the rscAlignShift member at the beginning of the 326 | resource table. 327 | rnLength Specifies the resource length, in bytes. 328 | rnFlags Specifies whether the resource is fixed, preloaded, 329 | or shareable. This member can be one or more of the 330 | following values: 331 | 332 | Value Meaning 333 | 334 | 0x0010 Resource is movable (MOVEABLE). Otherwise, it 335 | is fixed. 336 | 0x0020 Resource can be shared (PURE). 337 | 0x0040 Resource is preloaded (PRELOAD). Otherwise, it 338 | is loaded on demand. 339 | rnID Specifies or points to the resource identifier. If 340 | the identifier is an integer, the high bit is set 341 | (8000h). Otherwise, it is an offset to a resource 342 | string, relative to the beginning of the resource 343 | table. 344 | rnHandle Reserved. 345 | rnUsage Reserved. 346 | 347 | Resident-Name Table 348 | 349 | The resident-name table contains strings that identify 350 | exported functions in the executable file. As the name 351 | implies, these strings are resident in system memory and are 352 | never discarded. The resident-name strings are case-sensitive 353 | and are not null-terminated. The following list summarizes 354 | the values found in the resident-name table (the locations 355 | are relative to the beginning of each entry): 356 | 357 | Location Description 358 | 359 | 00h Specifies the length of a string. If there are no 360 | more strings in the table, this value is zero. 361 | 01h - xxhSpecifies the resident-name text. This string is 362 | case-sensitive and is not null-terminated. 363 | xxh + 01hSpecifies an ordinal number that identifies the 364 | string. This number is an index into the entry table. 365 | The first string in the resident-name table is the module 366 | name. 367 | 368 | Module-Reference Table 369 | 370 | The module-reference table contains offsets for module names 371 | stored in the imported-name table. Each entry in this table 372 | is 2 bytes long. 373 | 374 | Imported-Name Table 375 | 376 | The imported-name table contains the names of modules that 377 | the executable file imports. Each entry contains two parts: a 378 | single byte that specifies the length of the string and the 379 | string itself. The strings in this table are not 380 | null-terminated. 381 | 382 | Entry Table 383 | 384 | The entry table contains bundles of entry points from the 385 | executable file (the linker generates each bundle). The 386 | numbering system for these ordinal values is 1-based--that 387 | is, the ordinal value corresponding to the first entry point 388 | is 1. 389 | The linker generates the densest possible bundles under the 390 | restriction that it cannot reorder the entry points. This 391 | restriction is necessary because other executable files may 392 | refer to entry points within a given bundle by their ordinal 393 | values. 394 | The entry-table data is organized by bundle, each of which 395 | begins with a 2-byte header. The first byte of the header 396 | specifies the number of entries in the bundle (a value of 00h 397 | designates the end of the table). The second byte specifies 398 | whether the corresponding segment is movable or fixed. If the 399 | value in this byte is 0FFh, the segment is movable. If the 400 | value in this byte is 0FEh, the entry does not refer to a 401 | segment but refers, instead, to a constant defined within the 402 | module. If the value in this byte is neither 0FFh nor 0FEh, 403 | it is a segment index. 404 | 405 | For movable segments, each entry consists of 6 bytes and has 406 | the following form: 407 | 408 | Location Description 409 | 410 | 00h Specifies a byte value. This value can be a 411 | combination of the following bits: 412 | 413 | Bit(s) Meaning 414 | 415 | 0 If this bit is set, the entry is exported. 416 | 1 If this bit is set, the segment uses a global 417 | (shared) data segment. 418 | 3-7 If the executable file contains code that 419 | performs ring transitions, these bits specify 420 | the number of words that compose the stack. At 421 | the time of the ring transition, these words 422 | must be copied from one ring to the other. 423 | 01h Specifies an int 3fh instruction. 424 | 03h Specifies the segment number. 425 | 04h Specifies the segment offset. 426 | 427 | For fixed segments, each entry consists of 3 bytes and has the 428 | following form: 429 | 430 | Location Description 431 | 432 | 00h Specifies a byte value. This value can be a 433 | combination of the following bits: 434 | 435 | Bit(s) Meaning 436 | 437 | 0 If this bit is set, the entry is exported. 438 | 1 If this bit is set, the entry uses a global 439 | (shared) data segment. (This may be set only 440 | for SINGLEDATA library modules.) 441 | 3-7 If the executable file contains code that 442 | performs ring transitions, these bits specify 443 | the number of words that compose the stack. At 444 | the time of the ring transition, these words 445 | must be copied from one ring to the other. 446 | 01h Specifies an offset. 447 | 448 | Nonresident-Name Table 449 | 450 | The nonresident-name table contains strings that identify 451 | exported functions in the executable file. As the name 452 | implies, these strings are not always resident in system 453 | memory and are discardable. The nonresident-name strings are 454 | case-sensitive; they are not null-terminated. The following 455 | list summarizes the values found in the nonresident-name 456 | table (the specified locations are relative to the beginning 457 | of each entry): 458 | 459 | Location Description 460 | 461 | 00h Specifies the length, in bytes, of a string. If this 462 | byte is 00h, there are no more strings in the table. 463 | 01h - xxhSpecifies the nonresident-name text. This string is 464 | case-sensitive and is not null-terminated. 465 | xx + 01h Specifies an ordinal number that is an index to the 466 | entry table. 467 | The first name that appears in the nonresident-name table is 468 | the module description string (which was specified in the 469 | module-definition file). 470 | 471 | Code Segments and Relocation Data 472 | 473 | Code and data segments follow the Windows header. Some of the 474 | code segments may contain calls to functions in other 475 | segments and may, therefore, require relocation data to 476 | resolve those references. This relocation data is stored in a 477 | relocation table that appears immediately after the code or 478 | data in the segment. The first 2 bytes in this table specify 479 | the number of relocation items the table contains. A 480 | relocation item is a collection of bytes specifying the 481 | following information: 482 | Address type (segment only, offset only, segment and 483 | offset) 484 | Relocation type (internal reference, imported ordinal, 485 | imported name) 486 | Segment number or ordinal identifier (for internal 487 | references) 488 | Reference-table index or function ordinal number (for 489 | imported ordinals) 490 | Reference-table index or name-table offset (for imported 491 | names) 492 | Each relocation item contains 8 bytes of data, the first byte 493 | of which specifies one of the following relocation-address 494 | types: 495 | 496 | Value Meaning 497 | 498 | 0 Low byte at the specified offset 499 | 2 16-bit selector 500 | 3 32-bit pointer 501 | 5 16-bit offset 502 | 11 48-bit pointer 503 | 13 32-bit offset 504 | The second byte specifies one of the following relocation 505 | types: 506 | 507 | Value Meaning 508 | 509 | 0 Internal reference 510 | 1 Imported ordinal 511 | 2 Imported name 512 | 3 OSFIXUP 513 | The third and fourth bytes specify the offset of the 514 | relocation item within the segment. 515 | If the relocation type is imported ordinal, the fifth and 516 | sixth bytes specify an index to a module's reference table and 517 | the seventh and eighth bytes specify a function ordinal value. 518 | If the relocation type is imported name, the fifth and sixth 519 | bytes specify an index to a module's reference table and the 520 | seventh and eighth bytes specify an offset to an imported-name 521 | table. 522 | If the relocation type is internal reference and the segment 523 | is fixed, the fifth byte specifies the segment number, the 524 | sixth byte is zero, and the seventh and eighth bytes specify 525 | an offset to the segment. If the relocation type is internal 526 | reference and the segment is movable, the fifth byte specifies 527 | 0FFh, the sixth byte is zero; and the seventh and eighth bytes 528 | specify an ordinal value found in the segment's entry table. 529 | -------------------------------------------------------------------------------- /expand_ms_compress.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import collections 3 | import io 4 | import multiprocessing 5 | import os 6 | import re 7 | import shutil 8 | import subprocess 9 | import tempfile 10 | 11 | MSEXPAND = shutil.which("msexpand") 12 | 13 | if not MSEXPAND: 14 | raise ValueError("msexpand not found in PATH") 15 | 16 | 17 | def main(): 18 | ap = argparse.ArgumentParser( 19 | description="extract Microsoft legacy compressed files (using msexpand)", 20 | ) 21 | ap.add_argument("--in-dir", required=True, help="input directory") 22 | ap.add_argument( 23 | "--legacy-inf", 24 | help="(try to) read a legacy setup.inf file (e.g. excel 5, windows 3.11) to guess true file extensions", 25 | ) 26 | ap.add_argument("--out-dir", required=False, help="output directory") 27 | args = ap.parse_args() 28 | if not args.out_dir: 29 | args.out_dir = args.in_dir.rstrip(os.sep) + "_expanded" 30 | os.makedirs(args.out_dir, exist_ok=True) 31 | input_files = [ 32 | sde 33 | for sde in os.scandir(args.in_dir) 34 | if sde.is_file() and sde.name.endswith("_") 35 | ] 36 | if not input_files: 37 | raise ValueError(f"No files found in {args.in_dir}") 38 | 39 | filename_map: dict[str, list[os.DirEntry]] = {} 40 | input_filenames = {sde.name.lower(): sde for sde in input_files} 41 | if args.legacy_inf: 42 | with open(args.legacy_inf) as f: 43 | parse_legacy_inf(filename_map, input_filenames, f.read()) 44 | 45 | # TODO: add support for no filename_map (i.e. guess from extensions) 46 | 47 | if not filename_map: 48 | raise NotImplementedError( 49 | "No filename map was created. " 50 | "If you did pass --legacy-inf, it may not have been parsed correctly.", 51 | ) 52 | 53 | jobs = [] 54 | for dest_filename, source_sdes in sorted(filename_map.items()): 55 | dest_path = os.path.join(args.out_dir, dest_filename) 56 | src_paths = [sde.path for sde in source_sdes] 57 | jobs.append((src_paths, dest_path)) 58 | 59 | with multiprocessing.Pool() as pool: 60 | pool.starmap(msexpand, jobs) 61 | 62 | 63 | def msexpand(src_paths: list[str], dest_path: str) -> None: 64 | print(dest_path, "<-", src_paths) 65 | buf = io.BytesIO() 66 | # Expand and concatenate all source files into a single buffer... 67 | for src_path in src_paths: 68 | with tempfile.NamedTemporaryFile(prefix="ms_compress_") as tf: 69 | subprocess.check_call( 70 | [ 71 | MSEXPAND, 72 | src_path, 73 | tf.name, 74 | ], 75 | ) 76 | tf.seek(0) 77 | shutil.copyfileobj(tf, buf) 78 | # ... then write the buffer to the destination file. 79 | with open(dest_path, "wb") as outf: 80 | buf.seek(0) 81 | shutil.copyfileobj(buf, outf) 82 | 83 | 84 | def parse_legacy_inf( 85 | filename_map: dict[str, list[os.DirEntry]], 86 | input_filenames: dict[str, os.DirEntry], 87 | data: str, 88 | ): 89 | if data.startswith("[Source Media Descriptions]"): 90 | parse_excel5_style_inf(filename_map, input_filenames, data) 91 | elif ";; SETUP.INF" in data[:512]: 92 | parse_windows3_style_inf(filename_map, input_filenames, data) 93 | else: 94 | raise NotImplementedError("Unknown legacy INF format") 95 | 96 | 97 | def parse_excel5_style_inf( 98 | filename_map: dict[str, list[os.DirEntry]], 99 | input_filenames: dict[str, os.DirEntry], 100 | data: str, 101 | ): 102 | fp = io.StringIO(data) 103 | artifact_info = collections.defaultdict(list) 104 | group_name = None 105 | for line in fp: 106 | line = line.strip() 107 | if line.startswith("["): 108 | group_name = line.strip("[]") 109 | continue 110 | if not line.startswith('"'): 111 | continue 112 | if " = " not in line: 113 | continue 114 | artifact_name, bits = line.split(" = ", 1) 115 | bits = [(bit.strip() or None) for bit in bits.split(",")] 116 | if len(bits) == 1: 117 | continue 118 | artifact_name = artifact_name.strip('"') 119 | src_or_dest = bits[1] 120 | dest_or_none = bits[2] 121 | artifact_info[(group_name, artifact_name)].append((src_or_dest, dest_or_none)) 122 | for key, infos in artifact_info.items(): 123 | if len(infos) == 1: 124 | src_or_dest, dest_or_none = infos[0] 125 | source_file_guess = src_or_dest[:-1].lower() + "_" 126 | if source_file_guess in input_filenames: 127 | filename_map[src_or_dest] = [input_filenames[source_file_guess]] 128 | else: 129 | print("Legacy INF: unable to map source file for", key, src_or_dest) 130 | else: 131 | source_files = [s[0] for s in infos] 132 | dest_file = next((s[1] for s in infos if s[1]), None) 133 | if dest_file and all(sf in input_filenames for sf in source_files): 134 | filename_map[dest_file] = [input_filenames[sf] for sf in source_files] 135 | else: 136 | print( 137 | "Legacy INF: unable to map source file for concatenation", 138 | key, 139 | infos, 140 | ) 141 | 142 | 143 | def parse_windows3_style_inf( 144 | filename_map: dict[str, list[os.DirEntry]], 145 | input_filenames: dict[str, os.DirEntry], 146 | data: str, 147 | ): 148 | # This format is pretty ad-hoc, so we'll just do a simple regex to find 8.3 filenames 149 | # and map them to the best guess of the true filename 150 | misses = set() 151 | for filename_match in re.finditer(r"(\w{1,8}\.\w{1,3})", data): 152 | filename = filename_match.group(1) 153 | compressed_guess = filename.lower()[:-1] + "_" 154 | input_file = input_filenames.get(compressed_guess) 155 | if input_file: 156 | filename_map[filename] = [input_file] 157 | else: 158 | misses.add(filename) 159 | if misses: 160 | print("Legacy INF: unable to map source file for", misses) 161 | 162 | 163 | if __name__ == "__main__": 164 | main() 165 | -------------------------------------------------------------------------------- /extract_diskettes.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | import sys 5 | 6 | from fs import open_fs 7 | 8 | 9 | def main(): 10 | ap = argparse.ArgumentParser( 11 | description="extract diskette images into a directory using pyfatfs", 12 | ) 13 | ap.add_argument("image", nargs="+") 14 | ap.add_argument("-d", "--dir", required=True, help="output directory") 15 | args = ap.parse_args() 16 | os.makedirs(args.dir, exist_ok=True) 17 | for image_filename in args.image: 18 | with open_fs(f"fat://{image_filename}") as fs: 19 | for file in fs.walk.files(): 20 | dest_path = os.path.join(args.dir, file.removeprefix("/")) 21 | with fs.open(file, "rb") as inf: 22 | with open(dest_path, "wb") as outf: 23 | shutil.copyfileobj(inf, outf) 24 | print( 25 | f"{image_filename}#{file} => {dest_path}, {outf.tell()} bytes", 26 | file=sys.stderr, 27 | ) 28 | try: 29 | fi = fs.getinfo(file) 30 | os.utime(dest_path, (fi.modified, fi.modified)) 31 | except Exception: 32 | pass 33 | 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /extract_images.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import io 3 | import logging 4 | import os 5 | import sys 6 | 7 | from pe_tools import KnownResourceTypes 8 | from PIL import Image 9 | 10 | from res_extract import icons as libicons 11 | from res_extract.errors import ParseError 12 | from res_extract.resources import get_resources_from_file 13 | 14 | log = logging.getLogger(__name__) 15 | 16 | 17 | def extract_images( 18 | *, 19 | dest_dir: str, 20 | source_file, 21 | extract_ico: bool, 22 | extract_png: bool, 23 | name_prefix: str = "", 24 | log_prefix: str, 25 | ): 26 | resources = list(get_resources_from_file(source_file)) 27 | for r in resources: 28 | if r.type_id == KnownResourceTypes.RT_BITMAP: 29 | # Here's hoping DibImageFile can handle this! 30 | img = Image.open(io.BytesIO(r.data)) 31 | img.load() 32 | if extract_png: 33 | png_path = os.path.join( 34 | dest_dir, f"{name_prefix}bmp_{r.filename_part}.png" 35 | ) 36 | img.save(png_path) 37 | print(log_prefix, "=>", png_path) 38 | 39 | for r, ico_data in libicons.extract_icons(resources): 40 | _write_ico_image( 41 | ico_data=ico_data, 42 | dest_dir=dest_dir, 43 | extract_ico=extract_ico, 44 | extract_png=extract_png, 45 | name=f"{name_prefix}ico_{r.filename_part}", 46 | log_prefix=log_prefix, 47 | ) 48 | 49 | for r, cur_data in libicons.extract_cursors(resources): 50 | _write_ico_image( 51 | ico_data=cur_data, 52 | dest_dir=dest_dir, 53 | extract_ico=extract_ico, 54 | extract_png=extract_png, 55 | name=f"{name_prefix}cur_{r.filename_part}", 56 | log_prefix=log_prefix, 57 | ico_extension=".cur", 58 | ) 59 | 60 | 61 | def _write_ico_image( 62 | *, 63 | ico_data: bytes, 64 | dest_dir: str, 65 | extract_ico: bool, 66 | extract_png: bool, 67 | ico_extension: str = ".ico", 68 | name: str, 69 | log_prefix: str, 70 | ): 71 | """ 72 | Write an ICO/CUR file. 73 | """ 74 | if extract_ico: 75 | ico_path = os.path.join(dest_dir, f"{name}{ico_extension}") 76 | with open(ico_path, "wb") as outf: 77 | outf.write(ico_data) 78 | print(log_prefix, "=>", outf.name) 79 | if extract_png: 80 | img = Image.open(io.BytesIO(ico_data)) 81 | print(img, img.info) 82 | for size in img.info.get("sizes") or (None,): # CURs don't have a "sizes" key 83 | if size: 84 | w, h = size 85 | img.size = size 86 | suffix = f"_{w}x{h}" 87 | else: 88 | suffix = "" 89 | img.load() 90 | png_path = os.path.join(dest_dir, f"{name}{suffix}.png") 91 | img.save(png_path) 92 | print(log_prefix, "=>", png_path) 93 | 94 | 95 | def main(): 96 | ap = argparse.ArgumentParser() 97 | ap.add_argument("file", nargs="+") 98 | ap.add_argument("-d", "--dir", required=True) 99 | ap.add_argument("--continue-on-errors", default=False, action="store_true") 100 | ap.add_argument( 101 | "--ico", 102 | default=False, 103 | action="store_true", 104 | help="extract icon/cursor resources as ico/cur", 105 | ) 106 | ap.add_argument( 107 | "--png", 108 | default=False, 109 | action="store_true", 110 | help="extract image-like resources as png", 111 | ) 112 | ap.add_argument("--process-images", default=False, action="store_true") 113 | ap.add_argument("--debug", default=False, action="store_true") 114 | args = ap.parse_args() 115 | if args.debug: 116 | logging.basicConfig(level=logging.DEBUG) 117 | dest_dir = args.dir 118 | os.makedirs(dest_dir, exist_ok=True) 119 | if not (args.ico or args.png): 120 | print("Warning: neither --ico nor --png specified, nothing will be extracted") 121 | for source_file in args.file: 122 | success = False 123 | if os.path.getsize(source_file) == 0: 124 | log.warning("%s: empty file", source_file) 125 | continue 126 | try: 127 | with open(source_file, "rb") as fin: 128 | extract_images( 129 | dest_dir=dest_dir, 130 | source_file=fin, 131 | extract_ico=args.ico, 132 | extract_png=args.png, 133 | name_prefix=( 134 | f"{os.path.basename(source_file)}_" 135 | if len(args.file) > 1 136 | else "" 137 | ), 138 | log_prefix=source_file, 139 | ) 140 | success = True 141 | except ParseError as exc: 142 | log.warning("%s: %s", source_file, exc) 143 | except Exception: 144 | if args.continue_on_errors: 145 | log.exception(f"Failed extracting from {source_file}", exc_info=True) 146 | else: 147 | print("Error while extracting", source_file, file=sys.stderr) 148 | raise 149 | if not success and args.process_images: 150 | try: 151 | im = Image.open(source_file) 152 | im.load() 153 | if args.png: 154 | dest_file = os.path.join( 155 | dest_dir, 156 | os.path.basename(source_file) + ".png", 157 | ) 158 | im.save(dest_file) 159 | print( 160 | f"Image {source_file} ({im.size} {im.format}) converted to {dest_file}", 161 | ) 162 | except Exception as exc: 163 | log.warning("%s: not an image either: %s", source_file, exc) 164 | 165 | 166 | if __name__ == "__main__": 167 | main() 168 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | target-version = "py39" 3 | select = [ 4 | "B", 5 | "C", 6 | "COM", 7 | "E", 8 | "F", 9 | "I", 10 | "UP", 11 | "TID252", 12 | ] 13 | ignore = [ 14 | "B007", 15 | "B905", 16 | "C901", # Complexity 17 | "E501", # Line length 18 | ] 19 | unfixable = [ 20 | "F841", 21 | ] 22 | 23 | [tool.ruff.flake8-tidy-imports] 24 | ban-relative-imports = "all" 25 | -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | Pillow 2 | pe-tools 3 | pyfatfs 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with Python 3.11 3 | # by the following command: 4 | # 5 | # pip-compile requirements.in 6 | # 7 | appdirs==1.4.4 8 | # via fs 9 | fs==2.4.16 10 | # via pyfatfs 11 | grope==2.0.1 12 | # via pe-tools 13 | pe-tools==0.3.10 14 | # via -r requirements.in 15 | pillow==9.4.0 16 | # via -r requirements.in 17 | pyfatfs==1.0.5 18 | # via -r requirements.in 19 | six==1.16.0 20 | # via fs 21 | 22 | # The following packages are considered to be unsafe in a requirements file: 23 | # setuptools 24 | -------------------------------------------------------------------------------- /res_extract/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akx/res-extract/b261795a4a520c5d0e51813c2acf069e3cd1702d/res_extract/__init__.py -------------------------------------------------------------------------------- /res_extract/errors.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | 4 | class ParseError(ValueError): 5 | pass 6 | 7 | 8 | class NotNEFile(ParseError): 9 | pass 10 | 11 | 12 | class BadResourceTable(ParseError): 13 | pass 14 | -------------------------------------------------------------------------------- /res_extract/icons.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | from collections.abc import Iterable 4 | 5 | from pe_tools import Struct3, u8, u16, u32 6 | from pe_tools.rsrc import KnownResourceTypes 7 | 8 | from res_extract.resources import ResourceEntry 9 | 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | # H/T https://docs.microsoft.com/en-us/previous-versions/ms997538(v=msdn.10)?redirectedfrom=MSDN 14 | # H/T https://devblogs.microsoft.com/oldnewthing/20101019-00/?p=12503 15 | # H/T https://devblogs.microsoft.com/oldnewthing/20120720-00/?p=7083 16 | # H/T https://github.com/katahiromz/RisohEditor/blob/master/src/IconRes.cpp 17 | 18 | 19 | class IconOrCursorHeader(Struct3): # née GRPICONDIR 20 | idReserved: u16 21 | idType: u16 22 | idCount: u16 23 | 24 | 25 | class ResourceIconDirEntry(Struct3): # née GRPICONDIRENTRY 26 | bWidth: u8 27 | bHeight: u8 28 | bColorCount: u8 29 | bReserved: u8 30 | wPlanes: u16 31 | wBitCount: u16 32 | dwBytesInRes: u32 33 | nId: u16 34 | 35 | 36 | class ResourceCursorDirEntry(Struct3): # née GRPCURSORDIRENTRY 37 | bWidth: u16 38 | bHeight: u16 39 | wPlanes: u16 40 | wBitCount: u16 41 | dwBytesInRes: u32 42 | nId: u16 43 | 44 | 45 | class ICONDIRENTRY(Struct3): 46 | bWidth: u8 47 | bHeight: u8 48 | bColorCount: u8 49 | bReserved: u8 50 | wPlanes: u16 51 | wBitCount: u16 52 | dwBytesInRes: u32 53 | dwImageOffset: u32 54 | 55 | 56 | def reassemble_ico(dents_and_datas, idType: int, height_divisor: int = 1) -> bytes: 57 | stream = io.BytesIO() 58 | header = IconOrCursorHeader( 59 | idReserved=0, idType=idType, idCount=len(dents_and_datas) 60 | ) 61 | stream.write(header.pack()) 62 | offsets = [] 63 | offset = stream.tell() + len(dents_and_datas) * ICONDIRENTRY.calcsize() 64 | for gdent, _ in dents_and_datas: 65 | vs = vars(gdent).copy() 66 | vs.pop("nId") 67 | vs["dwImageOffset"] = offset 68 | vs["bHeight"] //= height_divisor # For cursors; the actual data may have a trailing 1-bit mask 69 | offsets.append(offset) 70 | offset += vs["dwBytesInRes"] 71 | fdent = ICONDIRENTRY(**vs) 72 | stream.write(fdent.pack()) 73 | for offset, (_, data) in zip(offsets, dents_and_datas): 74 | assert stream.tell() == offset # sanity check 75 | stream.write(data) 76 | stream.flush() 77 | return stream.getvalue() 78 | 79 | 80 | def _assemble_group_resources(resources, assembler, data_type, group_type): 81 | group_resources = [] 82 | icon_resources = [] 83 | for re in resources: 84 | if re.type_id == group_type: 85 | group_resources.append(re) 86 | elif re.type_id == data_type: 87 | icon_resources.append(re) 88 | icon_datas = {(r.res_id, r.lang_id): r.data for r in icon_resources} 89 | for r in group_resources: 90 | yield (r, assembler(r, icon_datas)) 91 | 92 | 93 | def _reassemble_ico_from_group_resource( 94 | group_resource: ResourceEntry, 95 | icon_datas: dict, 96 | ) -> bytes: 97 | header = IconOrCursorHeader.unpack_from(group_resource.data) 98 | dents_and_datas = [] 99 | for i in range(header.idCount): 100 | offset = 6 + i * ResourceIconDirEntry.calcsize() 101 | entry = ResourceIconDirEntry.unpack_from(group_resource.data[offset:]) 102 | log.debug( 103 | "%s: header %s, %d/%d: %s", 104 | group_resource, 105 | header, 106 | i + 1, 107 | header.idCount, 108 | entry, 109 | ) 110 | idata = icon_datas[(entry.nId, group_resource.lang_id)] 111 | assert len(idata) >= entry.dwBytesInRes, (len(idata),) 112 | dents_and_datas.append((entry, idata[: entry.dwBytesInRes])) 113 | return reassemble_ico(dents_and_datas, idType=header.idType) 114 | 115 | 116 | def _reassemble_cur_from_group_resource( 117 | group_resource: ResourceEntry, 118 | cur_datas: dict, 119 | ) -> bytes: 120 | header = IconOrCursorHeader.unpack_from(group_resource.data) 121 | dents_and_datas = [] 122 | for i in range(header.idCount): 123 | offset = 6 + i * ResourceCursorDirEntry.calcsize() 124 | entry = ResourceCursorDirEntry.unpack_from(group_resource.data[offset:]) 125 | cdata = cur_datas[(entry.nId, group_resource.lang_id)] 126 | assert len(cdata) >= entry.dwBytesInRes, (len(cdata),) 127 | this_ent_data = cdata[: entry.dwBytesInRes] 128 | this_ent_data = this_ent_data[4:] # Drop LOCALHEADER (4 bytes, hotspot x/y) 129 | dents_and_datas.append((entry, this_ent_data)) 130 | return reassemble_ico(dents_and_datas, idType=header.idType, height_divisor=2) 131 | 132 | 133 | def extract_icons(resources: Iterable[ResourceEntry]): 134 | return _assemble_group_resources( 135 | resources, 136 | assembler=_reassemble_ico_from_group_resource, 137 | data_type=KnownResourceTypes.RT_ICON, 138 | group_type=KnownResourceTypes.RT_GROUP_ICON, 139 | ) 140 | 141 | 142 | def extract_cursors(resources: Iterable[ResourceEntry]): 143 | return _assemble_group_resources( 144 | resources, 145 | assembler=_reassemble_cur_from_group_resource, 146 | data_type=KnownResourceTypes.RT_CURSOR, 147 | group_type=KnownResourceTypes.RT_GROUP_CURSOR, 148 | ) 149 | -------------------------------------------------------------------------------- /res_extract/ne_resources.py: -------------------------------------------------------------------------------- 1 | """ 2 | Read resource entries from NE binaries. 3 | """ 4 | from __future__ import annotations 5 | 6 | import logging 7 | import struct 8 | from dataclasses import dataclass 9 | 10 | from pe_tools import KnownResourceTypes 11 | 12 | from res_extract.errors import BadResourceTable, NotNEFile 13 | from res_extract.resources import ResourceEntry 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | 18 | def read_u8(s) -> int: 19 | c = s.read(1) 20 | return struct.unpack(" int: 24 | c = s.read(2) 25 | return struct.unpack(" int: 29 | c = s.read(4) 30 | return struct.unpack(" 31: 121 | raise BadResourceTable( 122 | f"NE resource table align_shift {align_shift} is suspiciously large", 123 | ) 124 | resources_to_rename = [] 125 | while True: 126 | type_id = read_u16(res_table_stream) 127 | if type_id == 0: 128 | break 129 | count = read_u16(res_table_stream) 130 | _reserved = read_u32(res_table_stream) 131 | for i in range(count): 132 | res_offset = read_u16(res_table_stream) * (1 << align_shift) 133 | res_length = read_u16(res_table_stream) * (1 << align_shift) 134 | _res_flags = read_u16(res_table_stream) 135 | res_id = read_u16(res_table_stream) 136 | _res_handle = read_u16(res_table_stream) 137 | _res_usage = read_u16(res_table_stream) 138 | 139 | re = NEResourceEntry( 140 | type_id=(type_id & 0x7FFF), 141 | res_id=(res_id & 0x7FFF), 142 | res_name=None, 143 | res_offset=res_offset, 144 | res_length=res_length, 145 | ) 146 | 147 | # Do these skips here in the loop so we read the table correctly 148 | # without needing to seek 149 | if not type_id & 0x8000: 150 | log.debug( 151 | f"%s: skipping resource with string-offset type ID {type_id}", 152 | log_prefix, 153 | ) 154 | continue 155 | if not res_id & 0x8000: # We'll deal with these later 156 | resources_to_rename.append(re) 157 | continue 158 | yield re 159 | 160 | if not resources_to_rename: 161 | # No need to read the name table either 162 | return 163 | 164 | # Read name table... 165 | resource_names = {} 166 | while True: 167 | offset = res_table_stream.tell() - res_table_offset 168 | name_len = read_u8(res_table_stream) 169 | if name_len == 0: 170 | break 171 | name = res_table_stream.read(name_len).decode("ascii", errors="replace") 172 | resource_names[offset] = name 173 | 174 | for resource in resources_to_rename: 175 | rid = resource.res_id 176 | if rid not in resource_names: 177 | log.warning( 178 | f"%s: resource with ID {rid} has no name in resource name table", 179 | log_prefix, 180 | ) 181 | continue 182 | resource.res_name = resource_names[rid] 183 | yield resource 184 | 185 | 186 | def read_ne_resources(exe): 187 | name = str(getattr(exe, "name", exe)) 188 | signature = exe.read(2) 189 | if signature == b"MZ": 190 | # If the word value at offset 18h is 40h or greater, the word 191 | # value at 3Ch is typically an offset to a Windows header. 192 | exe.seek(0x18) 193 | word_18 = read_u16(exe) 194 | if word_18 >= 0x40: 195 | exe.seek(0x3C) 196 | ne_header_offset = read_u16(exe) 197 | else: 198 | ne_header_offset = 0x480 # Just a guess! 199 | else: 200 | raise NotNEFile( 201 | f"{name} doesn't look like a NE file (initial MZ signature is {signature!r})", 202 | ) 203 | exe.seek(ne_header_offset) 204 | header = NEHeader.from_stream(exe) 205 | if header.ne_magic != b"NE": 206 | raise NotNEFile( 207 | f"{name} doesn't look like a NE file (magic {header.ne_magic!r} at offset {hex(ne_header_offset)} not 'NE')", 208 | ) 209 | exe.seek(ne_header_offset + header.resource_table_offset) 210 | resource_entries = list(read_ne_resource_table(exe, log_prefix=str(exe))) 211 | for re in resource_entries: 212 | exe.seek(re.res_offset) 213 | data = exe.read(re.res_length) 214 | assert len(data) == re.res_length 215 | yield ResourceEntry( 216 | data=data, 217 | lang_id=0, 218 | name=re.res_name, 219 | res_id=re.res_id, 220 | type_id=re.type_id, 221 | ) 222 | 223 | 224 | def main(): 225 | with open("./excel5.exe", "rb") as infp: 226 | for re in read_ne_resources(infp): 227 | print(re) 228 | 229 | 230 | if __name__ == "__main__": 231 | main() 232 | -------------------------------------------------------------------------------- /res_extract/resources.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Iterable 4 | from dataclasses import dataclass 5 | 6 | import grope 7 | from pe_tools import KnownResourceTypes, parse_pe 8 | 9 | 10 | @dataclass 11 | class ResourceEntry: 12 | type_id: int 13 | res_id: int 14 | lang_id: int 15 | data: bytes 16 | name: str | None = None 17 | 18 | @property 19 | def type(self): 20 | return KnownResourceTypes.get_type_name(self.type_id) 21 | 22 | @property 23 | def filename_part(self) -> str: 24 | bits = [] 25 | if self.name: 26 | bits.append(self.name) 27 | else: 28 | bits.append(str(self.res_id)) 29 | 30 | if self.lang_id: 31 | bits.append(str(self.lang_id)) 32 | return "_".join(bits) 33 | 34 | def __repr__(self): 35 | return f"{self.type}({self.res_id} @ {self.lang_id}, {len(self.data)} bytes)" 36 | 37 | 38 | def get_resources_from_file(exe_fp) -> Iterable[ResourceEntry]: 39 | try: 40 | pe = parse_pe(grope.wrap_io(exe_fp)) 41 | except RuntimeError as rte: 42 | if "Not a PE file" in str(rte): 43 | pe = None 44 | else: 45 | raise 46 | 47 | if pe: 48 | for type_id, resources_of_type_map in pe.parse_resources().items(): 49 | for res_id, lang_to_res in resources_of_type_map.items(): 50 | for lang, data in lang_to_res.items(): 51 | yield ResourceEntry( 52 | type_id=type_id, 53 | res_id=res_id, 54 | lang_id=lang, 55 | data=bytes(data), 56 | ) 57 | return 58 | # Assume NE then... 59 | exe_fp.seek(0) 60 | from res_extract.ne_resources import read_ne_resources 61 | 62 | yield from read_ne_resources(exe_fp) 63 | --------------------------------------------------------------------------------