├── .github
    └── workflows
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docs
    └── winexe.txt
├── expand_ms_compress.py
├── extract_diskettes.py
├── extract_images.py
├── pyproject.toml
├── requirements.in
├── requirements.txt
└── res_extract
    ├── __init__.py
    ├── errors.py
    ├── icons.py
    ├── ne_resources.py
    └── resources.py


/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   lint:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - uses: pre-commit/action@v3.0.0
17 | #  test:
18 | #    runs-on: ubuntu-latest
19 | #    steps:
20 | #      - uses: actions/checkout@v3
21 | #      - uses: actions/setup-python@v4
22 | #        with:
23 | #          python-version: "3.11"
24 | #      - name: Install dependencies
25 | #        run: python -m pip install pytest-cov -e .
26 | #      - run: pytest --cov --cov-report=term-missing .
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.ico
2 | *.png
3 | *.py[cod]
4 | /test_data
5 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/charliermarsh/ruff-pre-commit
 3 |     rev: v0.0.247
 4 |     hooks:
 5 |       - id: ruff
 6 |         args:
 7 |           - --fix
 8 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 9 |     rev: v4.4.0
10 |     hooks:
11 |       - id: end-of-file-fixer
12 |       - id: trailing-whitespace
13 |   - repo: https://github.com/psf/black
14 |     rev: 22.12.0
15 |     hooks:
16 |       - id: black
17 |         args:
18 |           - --quiet
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2020 Aarni Koskela
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Resource extraction tools
 2 | =========================
 3 | 
 4 | All instructions assume you have successfully installed the requirements.
 5 | 
 6 | Requires Python 3.
 7 | 
 8 | Extract images and icons from a PE or NE file (.exe/.dll/...)
 9 | -------------------------------------------------------------
10 | 
11 | ```
12 | python extract_images.py /Volumes/OFFPRO_Z/EXCEL/EXCEL.EXE --png --ico --dir=./excel
13 | ```
14 | 
15 | will extract reconstituted ICO files as well as PNG files into `./excel`.
16 | 
17 | Extract (multiple) diskette images into a directory
18 | ---------------------------------------------------
19 | 
20 | ```
21 | python3 extract_diskettes.py excel_5_diskettes/*.img -d excel_5_diskette_contents/
22 | ```
23 | 
24 | will extract all files off the diskette images into `excel_5_diskette_contents`.
25 | 
26 | 
27 | 
28 | Expand Microsoft compressed data
29 | --------------------------------
30 | 
31 | Requires `msextract` from [`libmspack`](https://github.com/kyz/libmspack/blob/master/libmspack/examples/msexpand.c)
32 | to be on your path. (On macOS, that tool compiles without any fuss if you have `automake` and `autoconf` installed.)
33 | 
34 | ```
35 | python3 expand_ms_compress.py --in-dir excel_5_diskette_contents/ --legacy-inf=excel_5_diskette_contents/EXCEL5.INF --out-dir=excel_5_expanded
36 | ```
37 | 
38 | will expand all underscorey files from your (previously extracted) Excel 5 diskettes into `excel_5_expanded`.
39 | 


--------------------------------------------------------------------------------
/docs/winexe.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | Executable-File Header Format (3.1)
  3 | 
  4 | An executable (.EXE) file for the Windows operating system
  5 | contains a combination of code and data or a combination of
  6 | code, data, and resources. The executable file also contains
  7 | two headers: an MS-DOS header and a Windows header. The next
  8 | two sections describe these headers; the third section
  9 | describes the code and data contained in a Windows executable
 10 | file.
 11 | 
 12 | MS-DOS Header
 13 | 
 14 | The MS-DOS (old-style) executable-file header contains four
 15 | distinct parts: a collection of header information (such as
 16 | the signature word, the file size, and so on), a reserved
 17 | section, a pointer to a Windows header (if one exists), and a
 18 | stub program. The following illustration shows the MS-DOS
 19 | executable-file header:
 20 | If the word value at offset 18h is 40h or greater, the word
 21 | value at 3Ch is typically an offset to a Windows header.
 22 | Applications must verify this for each executable-file header
 23 | being tested, because a few applications have a different
 24 | header style.
 25 | MS-DOS uses the stub program to display a message if Windows
 26 | has not been loaded when the user attempts to run a program.
 27 | 
 28 | Windows Header
 29 | 
 30 | The Windows (new-style) executable-file header contains
 31 | information that the loader requires for segmented executable
 32 | files. This information includes the linker version number,
 33 | data specified by the linker, data specified by the resource
 34 | compiler, tables of segment data, tables of resource data,
 35 | and so on. The following illustration shows the Windows
 36 | executable-file header:
 37 | The following sections describe the entries in the Windows
 38 | executable-file header.
 39 | 
 40 | Information Block
 41 | 
 42 | The information block in the Windows header contains the
 43 | linker version number, the lengths of various tables that
 44 | further describe the executable file, the offsets from the
 45 | beginning of the header to the beginning of these tables, the
 46 | heap and stack sizes, and so on. The following list
 47 | summarizes the contents of the header information block (the
 48 | locations are relative to the beginning of the block):
 49 | 
 50 | Location Description
 51 | 00h      Specifies the signature word. The low byte contains
 52 |          "N" (4Eh) and the high byte contains "E" (45h).
 53 | 02h      Specifies the linker version number.
 54 | 03h      Specifies the linker revision number.
 55 | 04h      Specifies the offset to the entry table (relative to
 56 |          the beginning of the header).
 57 | 06h      Specifies the length of the entry table, in bytes.
 58 | 08h      Reserved.
 59 | 0Ch      Specifies flags that describe the contents of the
 60 |          executable file. This value can be one or more of the
 61 |          following bits:
 62 | 
 63 |          Bit Meaning
 64 |          0   The linker sets this bit if the executable-file
 65 |              format is SINGLEDATA. An executable file with
 66 |              this format contains one data segment. This bit
 67 |              is set if the file is a dynamic-link library
 68 |              (DLL).
 69 |          1   The linker sets this bit if the executable-file
 70 |              format is MULTIPLEDATA. An executable file with
 71 |              this format contains multiple data segments. This
 72 |              bit is set if the file is a Windows application.
 73 |              If neither bit 0 nor bit 1 is set, the
 74 |              executable-file format is NOAUTODATA. An
 75 |              executable file with this format does not contain
 76 |              an automatic data segment.
 77 |          2   Reserved.
 78 |          3   Reserved.
 79 |          8   Reserved.
 80 |          9   Reserved.
 81 |          11  If this bit is set, the first segment in the
 82 |              executable file contains code that loads the
 83 |              application.
 84 |          13  If this bit is set, the linker detects errors at
 85 |              link time but still creates an executable file.
 86 |          14  Reserved.
 87 |          15  If this bit is set, the executable file is a
 88 |              library module.
 89 |              If bit 15 is set, the CS:IP registers point to an
 90 |              initialization procedure called with the value in
 91 |              the AX register equal to the module handle. The
 92 |              initialization procedure must execute a far
 93 |              return to the caller. If the procedure is
 94 |              successful, the value in AX is nonzero.
 95 |              Otherwise, the value in AX is zero.
 96 |              The value in the DS register is set to the
 97 |              library's data segment if SINGLEDATA is set.
 98 |              Otherwise, DS is set to the data segment of the
 99 |              application that loads the library.
100 | 0Eh      Specifies the automatic data segment number. (0Eh is
101 |          zero if the SINGLEDATA and MULTIPLEDATA bits are
102 |          cleared.)
103 | 10h      Specifies the initial size, in bytes, of the local
104 |          heap. This value is zero if there is no local
105 |          allocation.
106 | 12h      Specifies the initial size, in bytes, of the stack.
107 |          This value is zero if the SS register value does not
108 |          equal the DS register value.
109 | 14h      Specifies the segment:offset value of CS:IP.
110 | 18h      Specifies the segment:offset value of SS:SP.
111 |          The value specified in SS is an index to the module's
112 |          segment table. The first entry in the segment table
113 |          corresponds to segment number 1.
114 |          If SS addresses the automatic data segment and SP is
115 |          zero, SP is set to the address obtained by adding the
116 |          size of the automatic data segment to the size of the
117 |          stack.
118 | 1Ch      Specifies the number of entries in the segment table.
119 | 1Eh      Specifies the number of entries in the
120 |          module-reference table.
121 | 20h      Specifies the number of bytes in the nonresident-name
122 |          table.
123 | 22h      Specifies a relative offset from the beginning of the
124 |          Windows header to the beginning of the segment table.
125 | 24h      Specifies a relative offset from the beginning of the
126 |          Windows header to the beginning of the resource
127 |          table.
128 | 26h      Specifies a relative offset from the beginning of the
129 |          Windows header to the beginning of the resident-name
130 |          table.
131 | 28h      Specifies a relative offset from the beginning of the
132 |          Windows header to the beginning of the
133 |          module-reference table.
134 | 2Ah      Specifies a relative offset from the beginning of the
135 |          Windows header to the beginning of the imported-name
136 |          table.
137 | 2Ch      Specifies a relative offset from the beginning of the
138 |          file to the beginning of the nonresident-name table.
139 | 30h      Specifies the number of movable entry points.
140 | 32h      Specifies a shift count that is used to align the
141 |          logical sector. This count is log2 of the segment
142 |          sector size. It is typically 4, although the default
143 |          count is 9. (This value corresponds to the /alignment
144 |          [/a] linker switch. When the linker command line
145 |          contains /a:16, the shift count is 4. When the linker
146 |          command line contains /a:512, the shift count is 9.)
147 | 34h      Specifies the number of resource segments.
148 | 36h      Specifies the target operating system, depending on
149 |          which bits are set:
150 | 
151 |          Bit Meaning
152 | 
153 |          0   Operating system format is unknown.
154 |          1   Reserved.
155 |          2   Operating system is Microsoft Windows.
156 |          3   Reserved.
157 |          4   Reserved.
158 | 37h      Specifies additional information about the executable
159 |          file. It can be one or more of the following values:
160 | 
161 |          Bit Meaning
162 | 
163 |          1   If this bit is set, the executable file contains
164 |              a Windows 2.x application that runs in version 3.x
165 |               protected mode.
166 |          2   If this bit is set, the executable file contains
167 |              a Windows 2.x application that supports
168 |              proportional fonts.
169 |          3   If this bit is set, the executable file contains
170 |              a fast-load area.
171 | 38h      Specifies the offset, in sectors, to the beginning of
172 |          the fast-load area. (Only Windows uses this value.)
173 | 3Ah      Specifies the length, in sectors, of the fast-load
174 |          area. (Only Windows uses this value.)
175 | 3Ch      Reserved.
176 | 3Eh      Specifies the expected version number for Windows.
177 |          (Only Windows uses this value.)
178 | 
179 | Segment Table
180 | 
181 | The segment table contains information that describes each
182 | segment in an executable file. This information includes the
183 | segment length, segment type, and segment-relocation data.
184 | The following list summarizes the values found in the segment
185 | table (the locations are relative to the beginning of each
186 | entry):
187 | 
188 | Location Description
189 | 
190 | 00h      Specifies the offset, in sectors, to the segment data
191 |          (relative to the beginning of the file). A value of
192 |          zero means no data exists.
193 | 02h      Specifies the length, in bytes, of the segment, in
194 |          the file. A value of zero indicates that the segment
195 |          length is 64K, unless the selector offset is also
196 |          zero.
197 | 04h      Specifies flags that describe the contents of the
198 |          executable file. This value can be one or more of the
199 |          following:
200 | 
201 |          Bit Meaning
202 | 
203 |          0   If this bit is set, the segment is a data
204 |              segment. Otherwise, the segment is a code
205 |              segment.
206 |          1   If this bit is set, the loader has allocated
207 |              memory for the segment.
208 |          2   If this bit is set, the segment is loaded.
209 |          3   Reserved.
210 |          4   If this bit is set, the segment type is MOVABLE.
211 |              Otherwise, the segment type is FIXED.
212 |          5   If this bit is set, the segment type is PURE or
213 |              SHAREABLE. Otherwise, the segment type is IMPURE
214 |              or NONSHAREABLE.
215 |          6   If this bit is set, the segment type is PRELOAD.
216 |              Otherwise, the segment type is LOADONCALL.
217 |          7   If this bit is set and the segment is a code
218 |              segment, the segment type is EXECUTEONLY. If this
219 |              bit is set and the segment is a data segment, the
220 |              segment type is READONLY.
221 |          8   If this bit is set, the segment contains
222 |              relocation data.
223 |          9   Reserved.
224 |          10  Reserved.
225 |          11  Reserved.
226 |          12  If this bit is set, the segment is discardable.
227 |          13  Reserved.
228 |          14  Reserved.
229 |          15  Reserved.
230 | 06h      Specifies the minimum allocation size of the segment,
231 |          in bytes. A value of zero indicates that the minimum
232 |          allocation size is 64K.
233 | 
234 | Resource Table
235 | 
236 | The resource table describes and identifies the location of
237 | each resource in the executable file. The table has the
238 | following form:
239 | WORD     rscAlignShift;
240 | TYPEINFO rscTypes[];
241 | WORD     rscEndTypes;
242 | BYTE     rscResourceNames[];
243 | BYTE     rscEndNames;
244 | Following are the members in the resource table:
245 | rscAlignShift    Specifies the alignment shift count for
246 |                  resource data. When the shift count is used
247 |                  as an exponent of 2, the resulting value
248 |                  specifies the factor, in bytes, for computing
249 |                  the location of a resource in the executable
250 |                  file.
251 | rscTypes         Specifies an array of TYPEINFO structures
252 |                  containing information about resource types.
253 |                  There must be one TYPEINFO structure for each
254 |                  type of resource in the executable file.
255 | rscEndTypes      Specifies the end of the resource type
256 |                  definitions. This member must be zero.
257 | rscResourceNames Specifies the names (if any) associated with
258 |                  the resources in this table. Each name is
259 |                  stored as consecutive bytes; the first byte
260 |                  specifies the number of characters in the
261 |                  name.
262 | rscEndNames      Specifies the end of the resource names and
263 |                  the end of the resource table. This member
264 |                  must be zero.
265 | 
266 | Type Information
267 | 
268 | The TYPEINFO structure has the following form:
269 | typedef struct _TYPEINFO {
270 |     WORD        rtTypeID;
271 |     WORD        rtResourceCount;
272 |     DWORD       rtReserved;
273 |     NAMEINFO    rtNameInfo[];
274 | } TYPEINFO;
275 | Following are the members in the TYPEINFO structure:
276 | rtTypeID       Specifies the type identifier of the resource.
277 |                This integer value is either a resource-type
278 |                value or an offset to a resource-type name. If
279 |                the high bit in this member is set (0x8000),
280 |                the value is one of the following resource-type
281 |                values:
282 | 
283 |                Value            Resource type
284 | 
285 |                RT_ACCELERATOR   Accelerator table
286 |                RT_BITMAP        Bitmap
287 |                RT_CURSOR        Cursor
288 |                RT_DIALOG        Dialog box
289 |                RT_FONT          Font component
290 |                RT_FONTDIR       Font directory
291 |                RT_GROUP_CURSOR  Cursor directory
292 |                RT_GROUP_ICON    Icon directory
293 |                RT_ICON          Icon
294 |                RT_MENU          Menu
295 |                RT_RCDATA        Resource data
296 |                RT_STRING        String table
297 |                If the high bit of the value in this member is
298 |                not set, the value represents an offset, in
299 |                bytes relative to the beginning of the resource
300 |                table, to a name in the rscResourceNames
301 |                member.
302 | rtResourceCount    Specifies the number of resources of this
303 |                type in the executable file.
304 | rtReserved     Reserved.
305 | rtNameInfo     Specifies an array of	NAMEINFO structures
306 |                containing information about individual
307 |                resources. The rtResourceCount member specifies
308 |                the number of structures in the array.
309 | 
310 | Name Information
311 | 
312 | The NAMEINFO structure has the following form:
313 | typedef struct _NAMEINFO {
314 |     WORD rnOffset;
315 |     WORD rnLength;
316 |     WORD rnFlags;
317 |     WORD rnID;
318 |     WORD rnHandle;
319 |     WORD rnUsage;
320 | } NAMEINFO;
321 | Following are the members in the NAMEINFO structure:
322 | rnOffset Specifies an offset to the contents of the resource
323 |          data (relative to the beginning of the file). The
324 |          offset is in terms of alignment units specified by
325 |          the rscAlignShift member at the beginning of the
326 |          resource table.
327 | rnLength Specifies the resource length, in bytes.
328 | rnFlags  Specifies whether the resource is fixed, preloaded,
329 |          or shareable. This member can be one or more of the
330 |          following values:
331 | 
332 |          Value  Meaning
333 | 
334 |          0x0010 Resource is movable (MOVEABLE). Otherwise, it
335 |                 is fixed.
336 |          0x0020 Resource can be shared (PURE).
337 |          0x0040 Resource is preloaded (PRELOAD). Otherwise, it
338 |                 is loaded on demand.
339 | rnID     Specifies or points to the resource identifier. If
340 |          the identifier is an integer, the high bit is set
341 |          (8000h). Otherwise, it is an offset to a resource
342 |          string, relative to the beginning of the resource
343 |          table.
344 | rnHandle Reserved.
345 | rnUsage  Reserved.
346 | 
347 | Resident-Name Table
348 | 
349 | The resident-name table contains strings that identify
350 | exported functions in the executable file. As the name
351 | implies, these strings are resident in system memory and are
352 | never discarded. The resident-name strings are case-sensitive
353 | and are not null-terminated. The following list summarizes
354 | the values found in the resident-name table (the locations
355 | are relative to the beginning of each entry):
356 | 
357 | Location Description
358 | 
359 | 00h      Specifies the length of a string. If there are no
360 |          more strings in the table, this value is zero.
361 | 01h - xxhSpecifies the resident-name text. This string is
362 |          case-sensitive and is not null-terminated.
363 | xxh + 01hSpecifies an ordinal number that identifies the
364 |          string. This number is an index into the entry table.
365 | The first string in the resident-name table is the module
366 | name.
367 | 
368 | Module-Reference Table
369 | 
370 | The module-reference table contains offsets for module names
371 | stored in the imported-name table. Each entry in this table
372 | is 2 bytes long.
373 | 
374 | Imported-Name Table
375 | 
376 | The imported-name table contains the names of modules that
377 | the executable file imports. Each entry contains two parts: a
378 | single byte that specifies the length of the string and the
379 | string itself. The strings in this table are not
380 | null-terminated.
381 | 
382 | Entry Table
383 | 
384 | The entry table contains bundles of entry points from the
385 | executable file (the linker generates each bundle). The
386 | numbering system for these ordinal values is 1-based--that
387 | is, the ordinal value corresponding to the first entry point
388 | is 1.
389 | The linker generates the densest possible bundles under the
390 | restriction that it cannot reorder the entry points. This
391 | restriction is necessary because other executable files may
392 | refer to entry points within a given bundle by their ordinal
393 | values.
394 | The entry-table data is organized by bundle, each of which
395 | begins with a 2-byte header. The first byte of the header
396 | specifies the number of entries in the bundle (a value of 00h
397 | designates the end of the table). The second byte specifies
398 | whether the corresponding segment is movable or fixed. If the
399 | value in this byte is 0FFh, the segment is movable. If the
400 | value in this byte is 0FEh, the entry does not refer to a
401 | segment but refers, instead, to a constant defined within the
402 | module. If the value in this byte is neither 0FFh nor 0FEh,
403 | it is a segment index.
404 | 
405 | For movable segments, each entry consists of 6 bytes and has
406 | the following form:
407 | 
408 | Location Description
409 | 
410 | 00h      Specifies a byte value. This value can be a
411 |          combination of the following bits:
412 | 
413 |          Bit(s)    Meaning
414 | 
415 |          0     If this bit is set, the entry is exported.
416 |          1     If this bit is set, the segment uses a global
417 |                (shared) data segment.
418 |          3-7   If the executable file contains code that
419 |                performs ring transitions, these bits specify
420 |                the number of words that compose the stack. At
421 |                the time of the ring transition, these words
422 |                must be copied from one ring to the other.
423 | 01h      Specifies an int 3fh instruction.
424 | 03h      Specifies the segment number.
425 | 04h      Specifies the segment offset.
426 | 
427 | For fixed segments, each entry consists of 3 bytes and has the
428 | following form:
429 | 
430 | Location Description
431 | 
432 | 00h      Specifies a byte value. This value can be a
433 |          combination of the following bits:
434 | 
435 |          Bit(s)    Meaning
436 | 
437 |          0     If this bit is set, the entry is exported.
438 |          1     If this bit is set, the entry uses a global
439 |                (shared) data segment. (This may be set only
440 |                for SINGLEDATA library modules.)
441 |          3-7   If the executable file contains code that
442 |                performs ring transitions, these bits specify
443 |                the number of words that compose the stack. At
444 |                the time of the ring transition, these words
445 |                must be copied from one ring to the other.
446 | 01h      Specifies an offset.
447 | 
448 | Nonresident-Name Table
449 | 
450 | The nonresident-name table contains strings that identify
451 | exported functions in the executable file. As the name
452 | implies, these strings are not always resident in system
453 | memory and are discardable. The nonresident-name strings are
454 | case-sensitive; they are not null-terminated. The following
455 | list summarizes the values found in the nonresident-name
456 | table (the specified locations are relative to the beginning
457 | of each entry):
458 | 
459 | Location Description
460 | 
461 | 00h      Specifies the length, in bytes, of a string. If this
462 |          byte is 00h, there are no more strings in the table.
463 | 01h - xxhSpecifies the nonresident-name text. This string is
464 |          case-sensitive and is not null-terminated.
465 | xx + 01h Specifies an ordinal number that is an index to the
466 |          entry table.
467 | The first name that appears in the nonresident-name table is
468 | the module description string (which was specified in the
469 | module-definition file).
470 | 
471 | Code Segments and Relocation Data
472 | 
473 | Code and data segments follow the Windows header. Some of the
474 | code segments may contain calls to functions in other
475 | segments and may, therefore, require relocation data to
476 | resolve those references. This relocation data is stored in a
477 | relocation table that appears immediately after the code or
478 | data in the segment. The first 2 bytes in this table specify
479 | the number of relocation items the table contains. A
480 | relocation item is a collection of bytes specifying the
481 | following information:
482 |   Address type (segment only, offset only, segment and
483 |   offset)
484 |   Relocation type (internal reference, imported ordinal,
485 |   imported name)
486 |   Segment number or ordinal identifier (for internal
487 |   references)
488 |   Reference-table index or function ordinal number (for
489 |   imported ordinals)
490 |   Reference-table index or name-table offset (for imported
491 |   names)
492 | Each relocation item contains 8 bytes of data, the first byte
493 | of which specifies one of the following relocation-address
494 | types:
495 | 
496 | Value  Meaning
497 | 
498 | 0      Low byte at the specified offset
499 | 2      16-bit selector
500 | 3      32-bit pointer
501 | 5      16-bit offset
502 | 11     48-bit pointer
503 | 13     32-bit offset
504 | The second byte specifies one of the following relocation
505 | types:
506 | 
507 | Value  Meaning
508 | 
509 | 0      Internal reference
510 | 1      Imported ordinal
511 | 2      Imported name
512 | 3      OSFIXUP
513 | The third and fourth bytes specify the offset of the
514 | relocation item within the segment.
515 | If the relocation type is imported ordinal, the fifth and
516 | sixth bytes specify an index to a module's reference table and
517 | the seventh and eighth bytes specify a function ordinal value.
518 | If the relocation type is imported name, the fifth and sixth
519 | bytes specify an index to a module's reference table and the
520 | seventh and eighth bytes specify an offset to an imported-name
521 | table.
522 | If the relocation type is internal reference and the segment
523 | is fixed, the fifth byte specifies the segment number, the
524 | sixth byte is zero, and the seventh and eighth bytes specify
525 | an offset to the segment. If the relocation type is internal
526 | reference and the segment is movable, the fifth byte specifies
527 | 0FFh, the sixth byte is zero; and the seventh and eighth bytes
528 | specify an ordinal value found in the segment's entry table.
529 | 


--------------------------------------------------------------------------------
/expand_ms_compress.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import collections
  3 | import io
  4 | import multiprocessing
  5 | import os
  6 | import re
  7 | import shutil
  8 | import subprocess
  9 | import tempfile
 10 | 
 11 | MSEXPAND = shutil.which("msexpand")
 12 | 
 13 | if not MSEXPAND:
 14 |     raise ValueError("msexpand not found in PATH")
 15 | 
 16 | 
 17 | def main():
 18 |     ap = argparse.ArgumentParser(
 19 |         description="extract Microsoft legacy compressed files (using msexpand)",
 20 |     )
 21 |     ap.add_argument("--in-dir", required=True, help="input directory")
 22 |     ap.add_argument(
 23 |         "--legacy-inf",
 24 |         help="(try to) read a legacy setup.inf file (e.g. excel 5, windows 3.11) to guess true file extensions",
 25 |     )
 26 |     ap.add_argument("--out-dir", required=False, help="output directory")
 27 |     args = ap.parse_args()
 28 |     if not args.out_dir:
 29 |         args.out_dir = args.in_dir.rstrip(os.sep) + "_expanded"
 30 |     os.makedirs(args.out_dir, exist_ok=True)
 31 |     input_files = [
 32 |         sde
 33 |         for sde in os.scandir(args.in_dir)
 34 |         if sde.is_file() and sde.name.endswith("_")
 35 |     ]
 36 |     if not input_files:
 37 |         raise ValueError(f"No files found in {args.in_dir}")
 38 | 
 39 |     filename_map: dict[str, list[os.DirEntry]] = {}
 40 |     input_filenames = {sde.name.lower(): sde for sde in input_files}
 41 |     if args.legacy_inf:
 42 |         with open(args.legacy_inf) as f:
 43 |             parse_legacy_inf(filename_map, input_filenames, f.read())
 44 | 
 45 |     # TODO: add support for no filename_map (i.e. guess from extensions)
 46 | 
 47 |     if not filename_map:
 48 |         raise NotImplementedError(
 49 |             "No filename map was created. "
 50 |             "If you did pass --legacy-inf, it may not have been parsed correctly.",
 51 |         )
 52 | 
 53 |     jobs = []
 54 |     for dest_filename, source_sdes in sorted(filename_map.items()):
 55 |         dest_path = os.path.join(args.out_dir, dest_filename)
 56 |         src_paths = [sde.path for sde in source_sdes]
 57 |         jobs.append((src_paths, dest_path))
 58 | 
 59 |     with multiprocessing.Pool() as pool:
 60 |         pool.starmap(msexpand, jobs)
 61 | 
 62 | 
 63 | def msexpand(src_paths: list[str], dest_path: str) -> None:
 64 |     print(dest_path, "<-", src_paths)
 65 |     buf = io.BytesIO()
 66 |     # Expand and concatenate all source files into a single buffer...
 67 |     for src_path in src_paths:
 68 |         with tempfile.NamedTemporaryFile(prefix="ms_compress_") as tf:
 69 |             subprocess.check_call(
 70 |                 [
 71 |                     MSEXPAND,
 72 |                     src_path,
 73 |                     tf.name,
 74 |                 ],
 75 |             )
 76 |             tf.seek(0)
 77 |             shutil.copyfileobj(tf, buf)
 78 |     # ... then write the buffer to the destination file.
 79 |     with open(dest_path, "wb") as outf:
 80 |         buf.seek(0)
 81 |         shutil.copyfileobj(buf, outf)
 82 | 
 83 | 
 84 | def parse_legacy_inf(
 85 |     filename_map: dict[str, list[os.DirEntry]],
 86 |     input_filenames: dict[str, os.DirEntry],
 87 |     data: str,
 88 | ):
 89 |     if data.startswith("[Source Media Descriptions]"):
 90 |         parse_excel5_style_inf(filename_map, input_filenames, data)
 91 |     elif ";; SETUP.INF" in data[:512]:
 92 |         parse_windows3_style_inf(filename_map, input_filenames, data)
 93 |     else:
 94 |         raise NotImplementedError("Unknown legacy INF format")
 95 | 
 96 | 
 97 | def parse_excel5_style_inf(
 98 |     filename_map: dict[str, list[os.DirEntry]],
 99 |     input_filenames: dict[str, os.DirEntry],
100 |     data: str,
101 | ):
102 |     fp = io.StringIO(data)
103 |     artifact_info = collections.defaultdict(list)
104 |     group_name = None
105 |     for line in fp:
106 |         line = line.strip()
107 |         if line.startswith("["):
108 |             group_name = line.strip("[]")
109 |             continue
110 |         if not line.startswith('"'):
111 |             continue
112 |         if " = " not in line:
113 |             continue
114 |         artifact_name, bits = line.split(" = ", 1)
115 |         bits = [(bit.strip() or None) for bit in bits.split(",")]
116 |         if len(bits) == 1:
117 |             continue
118 |         artifact_name = artifact_name.strip('"')
119 |         src_or_dest = bits[1]
120 |         dest_or_none = bits[2]
121 |         artifact_info[(group_name, artifact_name)].append((src_or_dest, dest_or_none))
122 |     for key, infos in artifact_info.items():
123 |         if len(infos) == 1:
124 |             src_or_dest, dest_or_none = infos[0]
125 |             source_file_guess = src_or_dest[:-1].lower() + "_"
126 |             if source_file_guess in input_filenames:
127 |                 filename_map[src_or_dest] = [input_filenames[source_file_guess]]
128 |             else:
129 |                 print("Legacy INF: unable to map source file for", key, src_or_dest)
130 |         else:
131 |             source_files = [s[0] for s in infos]
132 |             dest_file = next((s[1] for s in infos if s[1]), None)
133 |             if dest_file and all(sf in input_filenames for sf in source_files):
134 |                 filename_map[dest_file] = [input_filenames[sf] for sf in source_files]
135 |             else:
136 |                 print(
137 |                     "Legacy INF: unable to map source file for concatenation",
138 |                     key,
139 |                     infos,
140 |                 )
141 | 
142 | 
143 | def parse_windows3_style_inf(
144 |     filename_map: dict[str, list[os.DirEntry]],
145 |     input_filenames: dict[str, os.DirEntry],
146 |     data: str,
147 | ):
148 |     # This format is pretty ad-hoc, so we'll just do a simple regex to find 8.3 filenames
149 |     # and map them to the best guess of the true filename
150 |     misses = set()
151 |     for filename_match in re.finditer(r"(\w{1,8}\.\w{1,3})", data):
152 |         filename = filename_match.group(1)
153 |         compressed_guess = filename.lower()[:-1] + "_"
154 |         input_file = input_filenames.get(compressed_guess)
155 |         if input_file:
156 |             filename_map[filename] = [input_file]
157 |         else:
158 |             misses.add(filename)
159 |     if misses:
160 |         print("Legacy INF: unable to map source file for", misses)
161 | 
162 | 
163 | if __name__ == "__main__":
164 |     main()
165 | 


--------------------------------------------------------------------------------
/extract_diskettes.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import shutil
 4 | import sys
 5 | 
 6 | from fs import open_fs
 7 | 
 8 | 
 9 | def main():
10 |     ap = argparse.ArgumentParser(
11 |         description="extract diskette images into a directory using pyfatfs",
12 |     )
13 |     ap.add_argument("image", nargs="+")
14 |     ap.add_argument("-d", "--dir", required=True, help="output directory")
15 |     args = ap.parse_args()
16 |     os.makedirs(args.dir, exist_ok=True)
17 |     for image_filename in args.image:
18 |         with open_fs(f"fat://{image_filename}") as fs:
19 |             for file in fs.walk.files():
20 |                 dest_path = os.path.join(args.dir, file.removeprefix("/"))
21 |                 with fs.open(file, "rb") as inf:
22 |                     with open(dest_path, "wb") as outf:
23 |                         shutil.copyfileobj(inf, outf)
24 |                         print(
25 |                             f"{image_filename}#{file} => {dest_path}, {outf.tell()} bytes",
26 |                             file=sys.stderr,
27 |                         )
28 |                     try:
29 |                         fi = fs.getinfo(file)
30 |                         os.utime(dest_path, (fi.modified, fi.modified))
31 |                     except Exception:
32 |                         pass
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     main()
37 | 


--------------------------------------------------------------------------------
/extract_images.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import io
  3 | import logging
  4 | import os
  5 | import sys
  6 | 
  7 | from pe_tools import KnownResourceTypes
  8 | from PIL import Image
  9 | 
 10 | from res_extract import icons as libicons
 11 | from res_extract.errors import ParseError
 12 | from res_extract.resources import get_resources_from_file
 13 | 
 14 | log = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | def extract_images(
 18 |     *,
 19 |     dest_dir: str,
 20 |     source_file,
 21 |     extract_ico: bool,
 22 |     extract_png: bool,
 23 |     name_prefix: str = "",
 24 |     log_prefix: str,
 25 | ):
 26 |     resources = list(get_resources_from_file(source_file))
 27 |     for r in resources:
 28 |         if r.type_id == KnownResourceTypes.RT_BITMAP:
 29 |             # Here's hoping DibImageFile can handle this!
 30 |             img = Image.open(io.BytesIO(r.data))
 31 |             img.load()
 32 |             if extract_png:
 33 |                 png_path = os.path.join(
 34 |                     dest_dir, f"{name_prefix}bmp_{r.filename_part}.png"
 35 |                 )
 36 |                 img.save(png_path)
 37 |                 print(log_prefix, "=>", png_path)
 38 | 
 39 |     for r, ico_data in libicons.extract_icons(resources):
 40 |         _write_ico_image(
 41 |             ico_data=ico_data,
 42 |             dest_dir=dest_dir,
 43 |             extract_ico=extract_ico,
 44 |             extract_png=extract_png,
 45 |             name=f"{name_prefix}ico_{r.filename_part}",
 46 |             log_prefix=log_prefix,
 47 |         )
 48 | 
 49 |     for r, cur_data in libicons.extract_cursors(resources):
 50 |         _write_ico_image(
 51 |             ico_data=cur_data,
 52 |             dest_dir=dest_dir,
 53 |             extract_ico=extract_ico,
 54 |             extract_png=extract_png,
 55 |             name=f"{name_prefix}cur_{r.filename_part}",
 56 |             log_prefix=log_prefix,
 57 |             ico_extension=".cur",
 58 |         )
 59 | 
 60 | 
 61 | def _write_ico_image(
 62 |     *,
 63 |     ico_data: bytes,
 64 |     dest_dir: str,
 65 |     extract_ico: bool,
 66 |     extract_png: bool,
 67 |     ico_extension: str = ".ico",
 68 |     name: str,
 69 |     log_prefix: str,
 70 | ):
 71 |     """
 72 |     Write an ICO/CUR file.
 73 |     """
 74 |     if extract_ico:
 75 |         ico_path = os.path.join(dest_dir, f"{name}{ico_extension}")
 76 |         with open(ico_path, "wb") as outf:
 77 |             outf.write(ico_data)
 78 |             print(log_prefix, "=>", outf.name)
 79 |     if extract_png:
 80 |         img = Image.open(io.BytesIO(ico_data))
 81 |         print(img, img.info)
 82 |         for size in img.info.get("sizes") or (None,):  # CURs don't have a "sizes" key
 83 |             if size:
 84 |                 w, h = size
 85 |                 img.size = size
 86 |                 suffix = f"_{w}x{h}"
 87 |             else:
 88 |                 suffix = ""
 89 |             img.load()
 90 |             png_path = os.path.join(dest_dir, f"{name}{suffix}.png")
 91 |             img.save(png_path)
 92 |             print(log_prefix, "=>", png_path)
 93 | 
 94 | 
 95 | def main():
 96 |     ap = argparse.ArgumentParser()
 97 |     ap.add_argument("file", nargs="+")
 98 |     ap.add_argument("-d", "--dir", required=True)
 99 |     ap.add_argument("--continue-on-errors", default=False, action="store_true")
100 |     ap.add_argument(
101 |         "--ico",
102 |         default=False,
103 |         action="store_true",
104 |         help="extract icon/cursor resources as ico/cur",
105 |     )
106 |     ap.add_argument(
107 |         "--png",
108 |         default=False,
109 |         action="store_true",
110 |         help="extract image-like resources as png",
111 |     )
112 |     ap.add_argument("--process-images", default=False, action="store_true")
113 |     ap.add_argument("--debug", default=False, action="store_true")
114 |     args = ap.parse_args()
115 |     if args.debug:
116 |         logging.basicConfig(level=logging.DEBUG)
117 |     dest_dir = args.dir
118 |     os.makedirs(dest_dir, exist_ok=True)
119 |     if not (args.ico or args.png):
120 |         print("Warning: neither --ico nor --png specified, nothing will be extracted")
121 |     for source_file in args.file:
122 |         success = False
123 |         if os.path.getsize(source_file) == 0:
124 |             log.warning("%s: empty file", source_file)
125 |             continue
126 |         try:
127 |             with open(source_file, "rb") as fin:
128 |                 extract_images(
129 |                     dest_dir=dest_dir,
130 |                     source_file=fin,
131 |                     extract_ico=args.ico,
132 |                     extract_png=args.png,
133 |                     name_prefix=(
134 |                         f"{os.path.basename(source_file)}_"
135 |                         if len(args.file) > 1
136 |                         else ""
137 |                     ),
138 |                     log_prefix=source_file,
139 |                 )
140 |                 success = True
141 |         except ParseError as exc:
142 |             log.warning("%s: %s", source_file, exc)
143 |         except Exception:
144 |             if args.continue_on_errors:
145 |                 log.exception(f"Failed extracting from {source_file}", exc_info=True)
146 |             else:
147 |                 print("Error while extracting", source_file, file=sys.stderr)
148 |                 raise
149 |         if not success and args.process_images:
150 |             try:
151 |                 im = Image.open(source_file)
152 |                 im.load()
153 |                 if args.png:
154 |                     dest_file = os.path.join(
155 |                         dest_dir,
156 |                         os.path.basename(source_file) + ".png",
157 |                     )
158 |                     im.save(dest_file)
159 |                     print(
160 |                         f"Image {source_file} ({im.size} {im.format}) converted to {dest_file}",
161 |                     )
162 |             except Exception as exc:
163 |                 log.warning("%s: not an image either: %s", source_file, exc)
164 | 
165 | 
166 | if __name__ == "__main__":
167 |     main()
168 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.ruff]
 2 | target-version = "py39"
 3 | select = [
 4 |     "B",
 5 |     "C",
 6 |     "COM",
 7 |     "E",
 8 |     "F",
 9 |     "I",
10 |     "UP",
11 |     "TID252",
12 | ]
13 | ignore = [
14 |     "B007",
15 |     "B905",
16 |     "C901", # Complexity
17 |     "E501", # Line length
18 | ]
19 | unfixable = [
20 |     "F841",
21 | ]
22 | 
23 | [tool.ruff.flake8-tidy-imports]
24 | ban-relative-imports = "all"
25 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | Pillow
2 | pe-tools
3 | pyfatfs
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with Python 3.11
 3 | # by the following command:
 4 | #
 5 | #    pip-compile requirements.in
 6 | #
 7 | appdirs==1.4.4
 8 |     # via fs
 9 | fs==2.4.16
10 |     # via pyfatfs
11 | grope==2.0.1
12 |     # via pe-tools
13 | pe-tools==0.3.10
14 |     # via -r requirements.in
15 | pillow==9.4.0
16 |     # via -r requirements.in
17 | pyfatfs==1.0.5
18 |     # via -r requirements.in
19 | six==1.16.0
20 |     # via fs
21 | 
22 | # The following packages are considered to be unsafe in a requirements file:
23 | # setuptools
24 | 


--------------------------------------------------------------------------------
/res_extract/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akx/res-extract/b261795a4a520c5d0e51813c2acf069e3cd1702d/res_extract/__init__.py


--------------------------------------------------------------------------------
/res_extract/errors.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | 
 4 | class ParseError(ValueError):
 5 |     pass
 6 | 
 7 | 
 8 | class NotNEFile(ParseError):
 9 |     pass
10 | 
11 | 
12 | class BadResourceTable(ParseError):
13 |     pass
14 | 


--------------------------------------------------------------------------------
/res_extract/icons.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import logging
  3 | from collections.abc import Iterable
  4 | 
  5 | from pe_tools import Struct3, u8, u16, u32
  6 | from pe_tools.rsrc import KnownResourceTypes
  7 | 
  8 | from res_extract.resources import ResourceEntry
  9 | 
 10 | log = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | # H/T https://docs.microsoft.com/en-us/previous-versions/ms997538(v=msdn.10)?redirectedfrom=MSDN
 14 | # H/T https://devblogs.microsoft.com/oldnewthing/20101019-00/?p=12503
 15 | # H/T https://devblogs.microsoft.com/oldnewthing/20120720-00/?p=7083
 16 | # H/T https://github.com/katahiromz/RisohEditor/blob/master/src/IconRes.cpp
 17 | 
 18 | 
 19 | class IconOrCursorHeader(Struct3):  # née GRPICONDIR
 20 |     idReserved: u16
 21 |     idType: u16
 22 |     idCount: u16
 23 | 
 24 | 
 25 | class ResourceIconDirEntry(Struct3):  # née GRPICONDIRENTRY
 26 |     bWidth: u8
 27 |     bHeight: u8
 28 |     bColorCount: u8
 29 |     bReserved: u8
 30 |     wPlanes: u16
 31 |     wBitCount: u16
 32 |     dwBytesInRes: u32
 33 |     nId: u16
 34 | 
 35 | 
 36 | class ResourceCursorDirEntry(Struct3):  # née GRPCURSORDIRENTRY
 37 |     bWidth: u16
 38 |     bHeight: u16
 39 |     wPlanes: u16
 40 |     wBitCount: u16
 41 |     dwBytesInRes: u32
 42 |     nId: u16
 43 | 
 44 | 
 45 | class ICONDIRENTRY(Struct3):
 46 |     bWidth: u8
 47 |     bHeight: u8
 48 |     bColorCount: u8
 49 |     bReserved: u8
 50 |     wPlanes: u16
 51 |     wBitCount: u16
 52 |     dwBytesInRes: u32
 53 |     dwImageOffset: u32
 54 | 
 55 | 
 56 | def reassemble_ico(dents_and_datas, idType: int, height_divisor: int = 1) -> bytes:
 57 |     stream = io.BytesIO()
 58 |     header = IconOrCursorHeader(
 59 |         idReserved=0, idType=idType, idCount=len(dents_and_datas)
 60 |     )
 61 |     stream.write(header.pack())
 62 |     offsets = []
 63 |     offset = stream.tell() + len(dents_and_datas) * ICONDIRENTRY.calcsize()
 64 |     for gdent, _ in dents_and_datas:
 65 |         vs = vars(gdent).copy()
 66 |         vs.pop("nId")
 67 |         vs["dwImageOffset"] = offset
 68 |         vs["bHeight"] //= height_divisor  # For cursors; the actual data may have a trailing 1-bit mask
 69 |         offsets.append(offset)
 70 |         offset += vs["dwBytesInRes"]
 71 |         fdent = ICONDIRENTRY(**vs)
 72 |         stream.write(fdent.pack())
 73 |     for offset, (_, data) in zip(offsets, dents_and_datas):
 74 |         assert stream.tell() == offset  # sanity check
 75 |         stream.write(data)
 76 |     stream.flush()
 77 |     return stream.getvalue()
 78 | 
 79 | 
 80 | def _assemble_group_resources(resources, assembler, data_type, group_type):
 81 |     group_resources = []
 82 |     icon_resources = []
 83 |     for re in resources:
 84 |         if re.type_id == group_type:
 85 |             group_resources.append(re)
 86 |         elif re.type_id == data_type:
 87 |             icon_resources.append(re)
 88 |     icon_datas = {(r.res_id, r.lang_id): r.data for r in icon_resources}
 89 |     for r in group_resources:
 90 |         yield (r, assembler(r, icon_datas))
 91 | 
 92 | 
 93 | def _reassemble_ico_from_group_resource(
 94 |     group_resource: ResourceEntry,
 95 |     icon_datas: dict,
 96 | ) -> bytes:
 97 |     header = IconOrCursorHeader.unpack_from(group_resource.data)
 98 |     dents_and_datas = []
 99 |     for i in range(header.idCount):
100 |         offset = 6 + i * ResourceIconDirEntry.calcsize()
101 |         entry = ResourceIconDirEntry.unpack_from(group_resource.data[offset:])
102 |         log.debug(
103 |             "%s: header %s, %d/%d: %s",
104 |             group_resource,
105 |             header,
106 |             i + 1,
107 |             header.idCount,
108 |             entry,
109 |         )
110 |         idata = icon_datas[(entry.nId, group_resource.lang_id)]
111 |         assert len(idata) >= entry.dwBytesInRes, (len(idata),)
112 |         dents_and_datas.append((entry, idata[: entry.dwBytesInRes]))
113 |     return reassemble_ico(dents_and_datas, idType=header.idType)
114 | 
115 | 
116 | def _reassemble_cur_from_group_resource(
117 |     group_resource: ResourceEntry,
118 |     cur_datas: dict,
119 | ) -> bytes:
120 |     header = IconOrCursorHeader.unpack_from(group_resource.data)
121 |     dents_and_datas = []
122 |     for i in range(header.idCount):
123 |         offset = 6 + i * ResourceCursorDirEntry.calcsize()
124 |         entry = ResourceCursorDirEntry.unpack_from(group_resource.data[offset:])
125 |         cdata = cur_datas[(entry.nId, group_resource.lang_id)]
126 |         assert len(cdata) >= entry.dwBytesInRes, (len(cdata),)
127 |         this_ent_data = cdata[: entry.dwBytesInRes]
128 |         this_ent_data = this_ent_data[4:]  # Drop LOCALHEADER (4 bytes, hotspot x/y)
129 |         dents_and_datas.append((entry, this_ent_data))
130 |     return reassemble_ico(dents_and_datas, idType=header.idType, height_divisor=2)
131 | 
132 | 
133 | def extract_icons(resources: Iterable[ResourceEntry]):
134 |     return _assemble_group_resources(
135 |         resources,
136 |         assembler=_reassemble_ico_from_group_resource,
137 |         data_type=KnownResourceTypes.RT_ICON,
138 |         group_type=KnownResourceTypes.RT_GROUP_ICON,
139 |     )
140 | 
141 | 
142 | def extract_cursors(resources: Iterable[ResourceEntry]):
143 |     return _assemble_group_resources(
144 |         resources,
145 |         assembler=_reassemble_cur_from_group_resource,
146 |         data_type=KnownResourceTypes.RT_CURSOR,
147 |         group_type=KnownResourceTypes.RT_GROUP_CURSOR,
148 |     )
149 | 


--------------------------------------------------------------------------------
/res_extract/ne_resources.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Read resource entries from NE binaries.
  3 | """
  4 | from __future__ import annotations
  5 | 
  6 | import logging
  7 | import struct
  8 | from dataclasses import dataclass
  9 | 
 10 | from pe_tools import KnownResourceTypes
 11 | 
 12 | from res_extract.errors import BadResourceTable, NotNEFile
 13 | from res_extract.resources import ResourceEntry
 14 | 
 15 | log = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def read_u8(s) -> int:
 19 |     c = s.read(1)
 20 |     return struct.unpack("<B", c)[0]
 21 | 
 22 | 
 23 | def read_u16(s) -> int:
 24 |     c = s.read(2)
 25 |     return struct.unpack("<H", c)[0]
 26 | 
 27 | 
 28 | def read_u32(s) -> int:
 29 |     c = s.read(4)
 30 |     return struct.unpack("<I", c)[0]
 31 | 
 32 | 
 33 | @dataclass
 34 | class NEHeader:
 35 |     ne_magic: bytes
 36 |     linker_version: int
 37 |     linker_revision: int
 38 |     entry_table_offset: int
 39 |     entry_table_length: int
 40 |     file_checksum: int
 41 |     prog_flags: int
 42 |     appl_flags: int
 43 |     auto_data_segment: int
 44 |     initial_heap_size: int
 45 |     initial_stack_size: int
 46 |     entry_point: int
 47 |     initial_stack_pointer: int
 48 |     segment_count: int
 49 |     module_reference_count: int
 50 |     non_resident_name_table_size: int
 51 |     segment_table_offset: int
 52 |     resource_table_offset: int
 53 |     resident_name_table_offset: int
 54 |     module_reference_table_offset: int
 55 |     imported_names_table_offset: int
 56 |     non_resident_name_table_offset: int
 57 |     movable_entry_point_count: int
 58 |     file_alignment_shift_count: int
 59 |     resource_table_entries: int
 60 |     target_os: int
 61 |     other_flags: int
 62 |     ret_thunk_offset: int
 63 |     seg_ref_thunk_offset: int
 64 |     min_code_swap_size: int
 65 |     expected_win_version: int
 66 | 
 67 |     @classmethod
 68 |     def from_stream(cls, s):
 69 |         return cls(
 70 |             ne_magic=s.read(2),
 71 |             linker_version=read_u8(s),
 72 |             linker_revision=read_u8(s),
 73 |             entry_table_offset=read_u16(s),
 74 |             entry_table_length=read_u16(s),
 75 |             file_checksum=read_u32(s),
 76 |             prog_flags=read_u8(s),
 77 |             appl_flags=read_u8(s),
 78 |             auto_data_segment=read_u16(s),
 79 |             initial_heap_size=read_u16(s),
 80 |             initial_stack_size=read_u16(s),
 81 |             entry_point=read_u32(s),
 82 |             initial_stack_pointer=read_u32(s),
 83 |             segment_count=read_u16(s),
 84 |             module_reference_count=read_u16(s),
 85 |             non_resident_name_table_size=read_u16(s),
 86 |             segment_table_offset=read_u16(s),
 87 |             resource_table_offset=read_u16(s),
 88 |             resident_name_table_offset=read_u16(s),
 89 |             module_reference_table_offset=read_u16(s),
 90 |             imported_names_table_offset=read_u16(s),
 91 |             non_resident_name_table_offset=read_u32(s),
 92 |             movable_entry_point_count=read_u16(s),
 93 |             file_alignment_shift_count=read_u16(s),
 94 |             resource_table_entries=read_u16(s),
 95 |             target_os=read_u8(s),
 96 |             other_flags=read_u8(s),
 97 |             ret_thunk_offset=read_u16(s),
 98 |             seg_ref_thunk_offset=read_u16(s),
 99 |             min_code_swap_size=read_u16(s),
100 |             expected_win_version=read_u16(s),
101 |         )
102 | 
103 | 
104 | @dataclass
105 | class NEResourceEntry:
106 |     type_id: int
107 |     res_id: int
108 |     res_name: str | None
109 |     res_offset: int
110 |     res_length: int
111 | 
112 |     @property
113 |     def type(self):
114 |         return KnownResourceTypes.get_type_name(self.type_id)
115 | 
116 | 
117 | def read_ne_resource_table(res_table_stream, *, log_prefix=""):
118 |     res_table_offset = res_table_stream.tell()
119 |     align_shift = read_u16(res_table_stream)
120 |     if align_shift > 31:
121 |         raise BadResourceTable(
122 |             f"NE resource table align_shift {align_shift} is suspiciously large",
123 |         )
124 |     resources_to_rename = []
125 |     while True:
126 |         type_id = read_u16(res_table_stream)
127 |         if type_id == 0:
128 |             break
129 |         count = read_u16(res_table_stream)
130 |         _reserved = read_u32(res_table_stream)
131 |         for i in range(count):
132 |             res_offset = read_u16(res_table_stream) * (1 << align_shift)
133 |             res_length = read_u16(res_table_stream) * (1 << align_shift)
134 |             _res_flags = read_u16(res_table_stream)
135 |             res_id = read_u16(res_table_stream)
136 |             _res_handle = read_u16(res_table_stream)
137 |             _res_usage = read_u16(res_table_stream)
138 | 
139 |             re = NEResourceEntry(
140 |                 type_id=(type_id & 0x7FFF),
141 |                 res_id=(res_id & 0x7FFF),
142 |                 res_name=None,
143 |                 res_offset=res_offset,
144 |                 res_length=res_length,
145 |             )
146 | 
147 |             # Do these skips here in the loop so we read the table correctly
148 |             # without needing to seek
149 |             if not type_id & 0x8000:
150 |                 log.debug(
151 |                     f"%s: skipping resource with string-offset type ID {type_id}",
152 |                     log_prefix,
153 |                 )
154 |                 continue
155 |             if not res_id & 0x8000:  # We'll deal with these later
156 |                 resources_to_rename.append(re)
157 |                 continue
158 |             yield re
159 | 
160 |     if not resources_to_rename:
161 |         # No need to read the name table either
162 |         return
163 | 
164 |     # Read name table...
165 |     resource_names = {}
166 |     while True:
167 |         offset = res_table_stream.tell() - res_table_offset
168 |         name_len = read_u8(res_table_stream)
169 |         if name_len == 0:
170 |             break
171 |         name = res_table_stream.read(name_len).decode("ascii", errors="replace")
172 |         resource_names[offset] = name
173 | 
174 |     for resource in resources_to_rename:
175 |         rid = resource.res_id
176 |         if rid not in resource_names:
177 |             log.warning(
178 |                 f"%s: resource with ID {rid} has no name in resource name table",
179 |                 log_prefix,
180 |             )
181 |             continue
182 |         resource.res_name = resource_names[rid]
183 |         yield resource
184 | 
185 | 
186 | def read_ne_resources(exe):
187 |     name = str(getattr(exe, "name", exe))
188 |     signature = exe.read(2)
189 |     if signature == b"MZ":
190 |         # If the word value at offset 18h is 40h or greater, the word
191 |         # value at 3Ch is typically an offset to a Windows header.
192 |         exe.seek(0x18)
193 |         word_18 = read_u16(exe)
194 |         if word_18 >= 0x40:
195 |             exe.seek(0x3C)
196 |             ne_header_offset = read_u16(exe)
197 |         else:
198 |             ne_header_offset = 0x480  # Just a guess!
199 |     else:
200 |         raise NotNEFile(
201 |             f"{name} doesn't look like a NE file (initial MZ signature is {signature!r})",
202 |         )
203 |     exe.seek(ne_header_offset)
204 |     header = NEHeader.from_stream(exe)
205 |     if header.ne_magic != b"NE":
206 |         raise NotNEFile(
207 |             f"{name} doesn't look like a NE file (magic {header.ne_magic!r} at offset {hex(ne_header_offset)} not 'NE')",
208 |         )
209 |     exe.seek(ne_header_offset + header.resource_table_offset)
210 |     resource_entries = list(read_ne_resource_table(exe, log_prefix=str(exe)))
211 |     for re in resource_entries:
212 |         exe.seek(re.res_offset)
213 |         data = exe.read(re.res_length)
214 |         assert len(data) == re.res_length
215 |         yield ResourceEntry(
216 |             data=data,
217 |             lang_id=0,
218 |             name=re.res_name,
219 |             res_id=re.res_id,
220 |             type_id=re.type_id,
221 |         )
222 | 
223 | 
224 | def main():
225 |     with open("./excel5.exe", "rb") as infp:
226 |         for re in read_ne_resources(infp):
227 |             print(re)
228 | 
229 | 
230 | if __name__ == "__main__":
231 |     main()
232 | 


--------------------------------------------------------------------------------
/res_extract/resources.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from collections.abc import Iterable
 4 | from dataclasses import dataclass
 5 | 
 6 | import grope
 7 | from pe_tools import KnownResourceTypes, parse_pe
 8 | 
 9 | 
10 | @dataclass
11 | class ResourceEntry:
12 |     type_id: int
13 |     res_id: int
14 |     lang_id: int
15 |     data: bytes
16 |     name: str | None = None
17 | 
18 |     @property
19 |     def type(self):
20 |         return KnownResourceTypes.get_type_name(self.type_id)
21 | 
22 |     @property
23 |     def filename_part(self) -> str:
24 |         bits = []
25 |         if self.name:
26 |             bits.append(self.name)
27 |         else:
28 |             bits.append(str(self.res_id))
29 | 
30 |         if self.lang_id:
31 |             bits.append(str(self.lang_id))
32 |         return "_".join(bits)
33 | 
34 |     def __repr__(self):
35 |         return f"{self.type}({self.res_id} @ {self.lang_id}, {len(self.data)} bytes)"
36 | 
37 | 
38 | def get_resources_from_file(exe_fp) -> Iterable[ResourceEntry]:
39 |     try:
40 |         pe = parse_pe(grope.wrap_io(exe_fp))
41 |     except RuntimeError as rte:
42 |         if "Not a PE file" in str(rte):
43 |             pe = None
44 |         else:
45 |             raise
46 | 
47 |     if pe:
48 |         for type_id, resources_of_type_map in pe.parse_resources().items():
49 |             for res_id, lang_to_res in resources_of_type_map.items():
50 |                 for lang, data in lang_to_res.items():
51 |                     yield ResourceEntry(
52 |                         type_id=type_id,
53 |                         res_id=res_id,
54 |                         lang_id=lang,
55 |                         data=bytes(data),
56 |                     )
57 |         return
58 |     # Assume NE then...
59 |     exe_fp.seek(0)
60 |     from res_extract.ne_resources import read_ne_resources
61 | 
62 |     yield from read_ne_resources(exe_fp)
63 | 


--------------------------------------------------------------------------------