├── src └── debloat │ ├── __init__.py │ ├── tests │ ├── __init__.py │ └── debloat_test.py │ ├── utilities │ ├── __init__.py │ ├── rsrc.py │ ├── readers.py │ ├── pyflate.py │ └── nsisParser.py │ ├── debloat.icns │ ├── debloat.ico │ ├── auxiliary.py │ ├── gui.spec │ ├── hook │ └── hook-tkinterdnd2.py │ ├── processor.pyi │ ├── main.py │ ├── performanceTest.py │ ├── gui.py │ └── processor.py ├── requirements.txt ├── .gitattributes ├── setup.py ├── setup.cfg ├── .gitignore ├── pyproject.toml ├── LICENSE ├── .github └── workflows │ └── python-publish.yml ├── README.md └── changelog.txt /src/debloat/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/debloat/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/debloat/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tkinterdnd2 2 | pefile -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | if __name__ == "__main__": 4 | setup() -------------------------------------------------------------------------------- /src/debloat/debloat.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Squiblydoo/debloat/HEAD/src/debloat/debloat.icns -------------------------------------------------------------------------------- /src/debloat/debloat.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Squiblydoo/debloat/HEAD/src/debloat/debloat.ico -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = debloat 3 | 4 | [options] 5 | package_dir= 6 | =src 7 | packages=find: 8 | 9 | [options.packages.find] 10 | where=src -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | build/ 3 | dist/ 4 | virt/ 5 | src/debloat.egg-info/ 6 | src/debloat/dist 7 | src/debloat/.vscode 8 | src/debloat/samples 9 | src/debloat/UnSorted_samples 10 | src/debloat/Old_Sample_Set 11 | src/debloat/TODO.md 12 | src/debloat/unsolved 13 | src/debloat/temp 14 | -------------------------------------------------------------------------------- /src/debloat/auxiliary.py: -------------------------------------------------------------------------------- 1 | """This file contains auxillary commands for removing bloat. 2 | 3 | The commands in this file are not included in the automated processor 4 | and can be used by other scripts.""" 5 | import pefile 6 | 7 | def trim_null_bytes(out_path: str,\ 8 | pe: pefile.PE) -> None: 9 | '''Remove nullbytes from end of file 10 | 11 | Key Arguments: 12 | out_path -- new file to write 13 | pe -- a pe file opject''' 14 | trimmed_pe = pe.trim() 15 | with open(out_path, "wb") as output_file: 16 | output_file.write(trimmed_pe) -------------------------------------------------------------------------------- /src/debloat/tests/debloat_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import debloat.processor as processor 3 | import pefile 4 | 5 | 6 | # Can we print sizes? 7 | def test_readable_size(): 8 | assert processor.readable_size(10) == "10 bytes" 9 | 10 | 11 | 12 | def test_signture_abnormality(): 13 | # Is there information after the signature? 14 | # Signature is at 10 with a size of 5, total file is 15 15 | assert processor.handle_signature_abnormality(10, 5, 15) == False 16 | # Is there information after the signature? 17 | # Signature is at 10 with a size of 5, total file is 20 18 | assert processor.handle_signature_abnormality(10, 5, 20) == True 19 | -------------------------------------------------------------------------------- /src/debloat/gui.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | 4 | a = Analysis( 5 | ['gui.py'], 6 | pathex=[], 7 | binaries=[], 8 | datas=[], 9 | hiddenimports=[], 10 | hookspath=['./hook'], 11 | hooksconfig={}, 12 | runtime_hooks=[], 13 | excludes=[], 14 | noarchive=False, 15 | ) 16 | pyz = PYZ(a.pure) 17 | 18 | exe = EXE( 19 | pyz, 20 | a.scripts, 21 | a.binaries, 22 | a.datas, 23 | [], 24 | name='gui', 25 | debug=False, 26 | bootloader_ignore_signals=False, 27 | strip=False, 28 | upx=True, 29 | upx_exclude=[], 30 | runtime_tmpdir=None, 31 | console=False, 32 | disable_windowed_traceback=False, 33 | argv_emulation=False, 34 | target_arch=None, 35 | codesign_identity=None, 36 | entitlements_file=None, 37 | ) 38 | -------------------------------------------------------------------------------- /src/debloat/utilities/rsrc.py: -------------------------------------------------------------------------------- 1 | import enum 2 | class RSRC(enum.IntEnum): 3 | CURSOR = 0x01 # noqa 4 | BITMAP = 0x02 # noqa 5 | ICON = 0x03 # noqa 6 | MENU = 0x04 # noqa 7 | DIALOG = 0x05 # noqa 8 | STRING = 0x06 # noqa 9 | FONTDIR = 0x07 # noqa 10 | FONT = 0x08 # noqa 11 | ACCELERATOR = 0x09 # noqa 12 | RCDATA = 0x0A # noqa 13 | MESSAGETABLE = 0x0B # noqa 14 | ICON_GROUP = 0x0E # noqa 15 | VERSION = 0x10 # noqa 16 | DLGINCLUDE = 0x11 # noqa 17 | PLUGPLAY = 0x13 # noqa 18 | VXD = 0x14 # noqa 19 | ANICURSOR = 0x15 # noqa 20 | ANIICON = 0x16 # noqa 21 | HTML = 0x17 # noqa 22 | MANIFEST = 0x18 # noqa 23 | 24 | def __str__(self): 25 | return self.name -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "debloat" 7 | version = "1.6.5" 8 | authors = [ 9 | { name="Squiblydoo", email="Squiblydoo@pm.me" }, 10 | ] 11 | description = "Debloat is an tool to remove excess garbage from bloated executables." 12 | readme = "README.md" 13 | requires-python = ">=3.6" 14 | dependencies = [ 15 | "tkinterdnd2>=0.3.0", 16 | "pefile>=2023.2.0" 17 | ] 18 | classifiers = [ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: BSD License", 21 | "Operating System :: OS Independent", 22 | ] 23 | 24 | [project.urls] 25 | "Homepage" = "https://github.com/Squiblydoo/debloat" 26 | "Bug Tracker" = "https://github.com/Squiblydoo/debloat/issues" 27 | 28 | 29 | [project.scripts] 30 | debloat = "debloat.main:main" 31 | debloat-gui = "debloat.gui:main" 32 | -------------------------------------------------------------------------------- /src/debloat/hook/hook-tkinterdnd2.py: -------------------------------------------------------------------------------- 1 | """pyinstaller hook file. 2 | 3 | You need to use this hook-file if you are packaging a project using tkinterdnd2. 4 | Just put hook-tkinterdnd2.py in the same directory where you call pyinstaller and type: 5 | 6 | pyinstaller myproject/myproject.py --additional-hooks-dir=. 7 | """ 8 | 9 | import os 10 | import platform 11 | from PyInstaller.utils.hooks import collect_data_files, collect_dynamic_libs 12 | 13 | 14 | s = platform.system() 15 | p = { 16 | 'Windows': ({'win-arm64', 'win-x86', 'win-x64' },{'tkdnd_unix.tcl', 'tkdnd_macosx.tcl'}), 17 | 'Linux': ({'linux-x64', 'linux-arm64'}, {'tkdnd_windows.tcl', 'tkdnd_macosx.tcl'}), 18 | 'Darwin': ({'osx-x64', 'osx-arm64'}, {'tkdnd_windows.tcl', 'tkdnd_unix.tcl'}), 19 | } 20 | if s in p: 21 | datas = set([ 22 | x for x in ( 23 | *collect_data_files('tkinterdnd2'), 24 | *collect_dynamic_libs('tkinterdnd2'), 25 | ) 26 | if os.path.split(x[1])[1] in p[s][0] and os.path.split(x[0])[1] not in p[s][1] 27 | ]) 28 | else: 29 | raise RuntimeError(f'TkinterDnD2 is not supported on platform "{s}".') -------------------------------------------------------------------------------- /src/debloat/processor.pyi: -------------------------------------------------------------------------------- 1 | import pefile 2 | from _typeshed import Incomplete 3 | from pefile import Structure as Structure 4 | from typing import Callable, Optional, Tuple 5 | 6 | PACKER: Incomplete 7 | 8 | def readable_size(value: int) -> str: ... 9 | def write_multiple_files(out_path: str, files: list, log_message: Callable[[str], None]) -> None: ... 10 | def write_patched_file(out_path: str, pe: pefile.PE) -> Tuple[int, str]: ... 11 | def handle_signature_abnormality(signature_address: int, signature_size: int, beginning_file_size: int) -> bool: ... 12 | def check_and_extract_NSIS(possible_header: bytearray, data: bytearray) -> list: ... 13 | def check_for_packer(possible_header: bytearray) -> int: ... 14 | def find_last_section(pe: pefile.PE) -> Optional[pefile.SectionStructure]: ... 15 | def get_signature_info(pe: pefile.PE) -> Tuple[int, int]: ... 16 | def adjust_offsets(pe: pefile.PE, gap_offset: int, gap_size: int): ... 17 | def refinery_strip(pe: pefile.PE, data: memoryview, block_size=...) -> int: ... 18 | def refinery_trim_resources(pe: pefile.PE, pe_data: bytearray) -> int: ... 19 | def remove_resources(pe: pefile.PE, pe_data: bytearray) -> Tuple[bytearray, int]: ... 20 | def check_section_compression(pe: pefile.PE, pe_data: bytearray, end_of_real_data, log_message: Callable[[str], None]) -> Tuple[pefile.PE, int, str]: ... 21 | def trim_junk(pe: pefile.PE, bloated_content: bytes, original_size_with_junk: int) -> int: ... 22 | def process_pe(pe: pefile.PE, out_path: str, last_ditch_processing: bool, log_message: Callable[[str], None]) -> None: ... 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Squiblydoo 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /src/debloat/main.py: -------------------------------------------------------------------------------- 1 | """This file handles passing the CLI arguments into the processor""" 2 | import os 3 | import sys 4 | from pathlib import Path 5 | import argparse 6 | import pefile 7 | import debloat.processor 8 | from debloat.processor import DEBLOAT_VERSION 9 | from debloat.processor import RESULT_CODES 10 | 11 | 12 | def main() -> int: 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("executable", 15 | help="Path to the executable to be debloated", 16 | type=Path) 17 | parser.add_argument("--output", 18 | help="Output location", 19 | type=Path, 20 | required=False) 21 | parser.add_argument("-yolo", "--last-ditch", dest="last_ditch_processing", 22 | help=""" 23 | Run last-ditch processing. In this mode Debloat may remove the 24 | whole PE Overlay as a last resort if no smarter method works. 25 | """, 26 | action='store_true', default=False) 27 | parser.add_argument("-c", "--cert", dest="cert_preservation", 28 | help=""" 29 | Preserve the certificate on the end of the file if there is a certificate. 30 | The certificate will no longer be valid.""", 31 | action='store_true', 32 | required=False, 33 | default=False) 34 | parser.add_argument("-v", "--version", action='version', version='debloat version ' + DEBLOAT_VERSION, help="Prints program version") 35 | args = parser.parse_args() 36 | 37 | file_path = args.executable 38 | out_path = args.output 39 | file_size = os.path.getsize(file_path) 40 | 41 | if not out_path: 42 | out_path = file_path.parent \ 43 | / f"{file_path.stem}_patched{file_path.suffix}" 44 | 45 | try: 46 | with open(file_path, "rb") as bloated_file: 47 | pe_data = bloated_file.read() 48 | pe = pefile.PE(data=pe_data, fast_load=True) 49 | except Exception: 50 | print(''' 51 | Provided file is not an executable! Please try again with an executable. 52 | Maybe it needs unzipped?''' 53 | ) 54 | return 1 55 | 56 | result_code = debloat.processor.process_pe(pe, 57 | out_path=str(out_path), 58 | last_ditch_processing=args.last_ditch_processing, 59 | cert_preservation=args.cert_preservation, 60 | log_message=print, 61 | beginning_file_size=file_size 62 | ) 63 | print("Tactic identifed:", RESULT_CODES.get(result_code)) 64 | return 0 65 | 66 | if __name__ == "__main__": 67 | sys.exit(main()) 68 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | name: Build Executables 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ubuntu-latest, macos-13, macos-14, windows-latest] 17 | 18 | steps: 19 | - name: Checkout repository 20 | uses: actions/checkout@v2 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: '3.x' 26 | 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | pip install pyinstaller tkinterdnd2 pefile 31 | working-directory: src/debloat 32 | 33 | - name: Build executable on Linux 34 | if: matrix.os == 'ubuntu-latest' 35 | run: | 36 | pyinstaller --onefile --noconsole --icon=debloat.ico --collect-all tkinterdnd2 --name debloat gui.py 37 | working-directory: src/debloat 38 | 39 | - name: Build executable on macOS (Intel and ARM) 40 | if: startsWith(matrix.os, 'macos') 41 | run: | 42 | pyinstaller --onefile --noconsole --additional-hooks-dir=./hook --icon=debloat.icns --name debloat gui.py 43 | working-directory: src/debloat 44 | 45 | - name: Build executable on Windows 46 | if: matrix.os == 'windows-latest' 47 | run: | 48 | pyinstaller --onefile --noconsole --additional-hooks-dir=./hook --icon=debloat.ico --name debloat gui.py 49 | working-directory: src/debloat 50 | 51 | - name: Set output name 52 | id: set-tar-name 53 | if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos') 54 | run: echo "TAR_NAME=debloat.tar" >> $GITHUB_ENV 55 | 56 | - name: 'Tar files' 57 | if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'macos') 58 | run: | 59 | cd src/debloat/dist/ && 60 | tar -cf ${{ env.TAR_NAME}} * 61 | 62 | - name: Upload Linux build artifacts 63 | uses: actions/upload-artifact@v4 64 | if: startsWith(matrix.os, 'ubuntu') 65 | with: 66 | name: Linux_debloat 67 | path: src/debloat/dist/${{ env.TAR_NAME }} 68 | 69 | - name: Upload Intel Mac build artifacts 70 | uses: actions/upload-artifact@v4 71 | if: matrix.os == 'macos-13' 72 | with: 73 | name: macOS_x86_intel_debloat 74 | path: src/debloat/dist/${{ env.TAR_NAME }} 75 | 76 | - name: Upload ARM Mac build artifacts 77 | uses: actions/upload-artifact@v4 78 | if: matrix.os == 'macos-14' 79 | with: 80 | name: macOS_ARM_debloat 81 | path: src/debloat/dist/${{ env.TAR_NAME }} 82 | 83 | - name: Upload Windows build artifacts 84 | uses: actions/upload-artifact@v4 85 | if: startsWith(matrix.os, 'windows') 86 | with: 87 | name: ${{ matrix.os }}_debloat 88 | path: src/debloat/dist/ 89 | -------------------------------------------------------------------------------- /src/debloat/performanceTest.py: -------------------------------------------------------------------------------- 1 | ## This script is for batch processing of samples and can be used for 2 | ## measuring memory usage. 3 | 4 | import os 5 | import hashlib 6 | from memray import commands, FileReader 7 | from memray._memray import size_fmt 8 | import debloat.processor 9 | import timeit 10 | import argparse 11 | import cProfile 12 | import pstats 13 | import tempfile 14 | 15 | argparser = argparse.ArgumentParser( 16 | prog = "Debloat Performance test", 17 | description = "This program takes a test type (--mem or --cpu) and performs tests using one or more samples. If no sample or directory is specified, it defaults to a 'samples' directory in the current working directory." 18 | ) 19 | argparser.add_argument("--cpu", help="Run the CPU profiler", action="store_true") 20 | argparser.add_argument("--mem", help="Run the memory profiler", action="store_true") 21 | argparser.add_argument("--sample", help="Run the debloat processor on a single sample") 22 | argparser.add_argument("--directory", help="Specify sample directory", default="samples") 23 | argparser.add_argument("--keep", help="Keeps patched copies.", action="store_true") 24 | args = argparser.parse_args() 25 | 26 | def process_samples(sample, directory): 27 | file_size=os.path.getsize(args.directory +"/"+ sample) 28 | setup = f"import pefile; import debloat; filename = '{args.directory}/{sample}'; " 29 | code = f"binary = pefile.PE(filename, fast_load=True); result= debloat.processor.process_pe(binary, filename + '.patched', last_ditch_processing=False, cert_preservation=False, log_message=lambda *args, **kwargs: None, beginning_file_size={file_size}); print(result, end=' ')" 30 | 31 | if args.mem: 32 | mem_profiler(setup, code, file_size, sample, directory) 33 | if args.cpu: 34 | cpu_profiler() 35 | if not args.keep: 36 | try: 37 | os.remove(args.directory + "/" + sample + ".patched") 38 | except: 39 | pass 40 | 41 | 42 | def mem_profiler(setup, code, file_size, sample, directory): 43 | with tempfile.NamedTemporaryFile() as f: 44 | commands.main(["run", "-f", "-q", "-o", f.name, "-c", setup+code]) 45 | reader = FileReader(os.fspath(f.name), report_progress=False) 46 | # Uncomment to hash outputed samples. 47 | #with open(directory +"/"+ sample + ".patched", "rb") as g: 48 | # out = g.read() 49 | # out_hash = hashlib.sha256(out).hexdigest() 50 | times = timeit.repeat(stmt=code, setup=setup, number=1, repeat=3) 51 | print(sample, size_fmt(file_size), size_fmt(reader.metadata.peak_memory), [round(x,2) for x in times]) 52 | 53 | def cpu_profiler(): 54 | cProfile.run(setup+code, "tmp.prof") 55 | p = pstats.Stats("tmp.prof") 56 | p.sort_stats('tot').print_stats(10) 57 | p.sort_stats('cumulative').print_stats(10) 58 | 59 | if args.sample: 60 | process_samples(args.sample, args.directory) 61 | 62 | else: 63 | print("Debloat Method/ Original Filename / Disk Size / Mem Usage / Time to process x 3") 64 | for sample in os.listdir(args.directory): 65 | process_samples(sample, args.directory) 66 | 67 | -------------------------------------------------------------------------------- /src/debloat/gui.py: -------------------------------------------------------------------------------- 1 | """This file handles all GUI components.""" 2 | import os 3 | import time 4 | from pathlib import Path 5 | from tkinter import * 6 | import tkinter.scrolledtext as st 7 | from typing import Tuple, Optional, Any 8 | from tkinterdnd2 import DND_FILES, TkinterDnD 9 | import pefile 10 | import debloat.processor 11 | from debloat.processor import DEBLOAT_VERSION 12 | from debloat.processor import RESULT_CODES 13 | 14 | class MainWindow(TkinterDnD.Tk): 15 | def __init__(self) -> None: 16 | '''Define main GUI window.''' 17 | TkinterDnD.Tk.__init__(self) 18 | self.title("Debloat " + DEBLOAT_VERSION) 19 | # I removed the Tkinter Icon since it didn't work on most 20 | # platforms and just caused more problems than necessary. 21 | self.geometry("600x600") 22 | 23 | # Label and PathBox for the main function of program. 24 | self.pathbox_Label = Label(self, \ 25 | text="Drag and drop file onto text bar.") 26 | self.pathbox_Label.pack() 27 | self.pathbox = Entry(self, width=150) 28 | self.pathbox.pack(padx=20) 29 | self.pathbox.drop_target_register(DND_FILES) 30 | self.pathbox.dnd_bind("<>", self.process_entry) 31 | 32 | # Define button that will be used to the process file. 33 | self.process_button = Button(self, \ 34 | text="Process file", \ 35 | command=self.process) 36 | self.process_button.pack(pady=10) 37 | 38 | # Safe processing value and checkbox: Maybe not even needed? 39 | self.unsafe_processing = BooleanVar(value=False) 40 | self.unsafe_checkbox = Checkbutton(self, 41 | text="Check to run last ditch effort processing", 42 | variable=self.unsafe_processing) 43 | self.unsafe_checkbox.pack() 44 | 45 | self.cert_preservation = BooleanVar(value=False) 46 | self.cert_checkbox = Checkbutton(self, 47 | text="Preserve Cert. Cert will be invalid but informational.", 48 | variable=self.cert_preservation) 49 | self.cert_checkbox.pack() 50 | 51 | 52 | 53 | # Define Scrollbox for output of program. 54 | self.output_scrollbox = st.ScrolledText(self, 55 | width=100, 56 | height=100, 57 | wrap=WORD) 58 | self.output_scrollbox.pack(padx=20, pady=20) 59 | 60 | def clear_pathbox(self) -> None: 61 | '''Clear any text in the pathbox.''' 62 | self.pathbox.delete(0,"end") 63 | 64 | def output_scrollbox_handler(self, message: str, end = "\n", flush=True) -> None: 65 | '''Insert messages in the textbox.''' 66 | self.output_scrollbox.insert(INSERT, message + end) 67 | self.update() 68 | 69 | def process_entry(self, event: Any) -> None: 70 | '''Check and update user provided file path.''' 71 | self.pathbox.insert("end", event.data) 72 | file_path = self.pathbox.get() 73 | if file_path[0] == '{' and file_path[-1] == '}': 74 | file_path = file_path[1:-1] 75 | self.pathbox.delete(0,"end") 76 | self.pathbox.insert(0, file_path) 77 | 78 | def process(self) -> None: 79 | '''Process the file at the user provided path.''' 80 | start_time = time.time() 81 | file_path = Path(self.pathbox.get()) 82 | self.output_scrollbox_handler("Processing. Please wait.") 83 | try: 84 | with open(file_path, "rb") as bloated_file: 85 | pe_data = bloated_file.read() 86 | pe = pefile.PE(data=pe_data, fast_load=True) 87 | except Exception: 88 | self.output_scrollbox_handler(''' 89 | Provided file is not an executable! Please try again 90 | with an executable. Maybe it needs unzipped?''') 91 | self.clear_pathbox() 92 | return 93 | file_size = os.path.getsize(file_path) 94 | out_path = file_path.parent \ 95 | / f"{file_path.stem}_patched{file_path.suffix}" 96 | 97 | result_code = debloat.processor.process_pe(pe, out_path, 98 | self.unsafe_processing.get(), 99 | self.cert_preservation.get(), 100 | log_message=self.output_scrollbox_handler, 101 | beginning_file_size=file_size) 102 | self.output_scrollbox_handler("Tactic identified: " , RESULT_CODES.get(result_code) +"\n") 103 | self.output_scrollbox_handler("-----Processing took %s seconds ---\n" \ 104 | % round((time.time() - start_time), 2)) 105 | self.clear_pathbox() 106 | 107 | def main() -> None: 108 | root = MainWindow() 109 | root.mainloop() 110 | 111 | if __name__== "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![debloat](https://user-images.githubusercontent.com/77356206/215351855-9f89c298-36b4-4234-89b5-dc3f26d1f8b0.png) 2 | 3 | # Debloat 4 | Debloat is a GUI and CLI tool to remove excess garbage from bloated executables. 5 | 6 | By excess garbage, I mean 100 - 800 MB of junk bytes added to a binary to keep it from going into a sandbox. This method of adding junk is called "inflating" or "pumping" a binary. Debloat currently handles the 10 most common inflation tactics. 7 | 8 | Being built with Python, the application can easily be leveraged in other workflows. Currently, debloat is used by [CCCS's AssemblyLine](https://www.cyber.gc.ca/en/tools-services/assemblyline) and [CERT Polska's MWDB](https://github.com/CERT-Polska/karton-archive-extractor). 9 | 10 | The program can be compiled for Windows, MacOS, Linux. The GUI and CLI have minimal options: it is intended to be as simple as possible and the logic within the program handles the different use cases automatically. 11 | 12 | Compiled binaries have already been included in the [Releases](https://github.com/Squiblydoo/debloat/releases/). 13 | 14 | The debloat can installed using `pip install debloat`. Use `debloat` to launch the CLI and `debloat-gui` to launch the GUI. 15 | 16 | For advanced users, Debloat can also be imported into other scripts and the processing functions can be called individually. 17 | 18 | ## How to use the GUI? 19 | The GUI of Debloat intends to be as intuitive as possible. 20 | When launched, you can drag and drop bloated file onto the text bar and press the "Process file" button. 21 | Some technical information will be printed to the scrolling textbox and the file without bloat will be written to the directory the file was pulled from. 22 | Sound easy? It is! 23 | 24 | Processing files will take a few seconds.
25 | ![image](https://github.com/Squiblydoo/debloat/assets/77356206/3d2756cd-bc83-44e8-b223-edd8ed464369) 26 | 27 | 28 | ## How to use the CLI? 29 | After installing using `pip install debloat` use the command `debloat`.
30 | `debloat` can take two arguments. The first argument is required: the file to debloat. The second argument is optional: the output location. When no output is provided, it will be written to the same directory as the original file. 31 | 32 | The gui can also be launched from the CLI using the command `debloat-gui`. 33 | 34 | ## Does it always work? 35 | Not yet. 36 | Based on my recent analysis, debloat is able to [remove junk from bloated files 97.8% of the time](https://x.com/SquiblydooBlog/status/1795419380991291424). 37 | 38 | In previous versions, `debloat` could accidentally remove too much of the binary. That is no longer the case unless you use the "--last-ditch" switch. If you ever need this switch, consider sharing the sample for additional analysis. This option has now been added to the GUI. Functionally, what the function does is it will remove the whole overlay, if there is one. In some cases this is necessary as no pattern for the junk was found---this is most commonly the case in samples that do not compress well. 39 | 40 | ## Use Cases (Images from [Malcat](https://malcat.fr/)) 41 | ### Full support 42 | - [x] Bloat appended to the end of a Signed PE.
43 | In the image below, the bloat has been appended to the end of the executable.
44 | ![Screenshot 2023-02-11 at 3 32 36 PM](https://user-images.githubusercontent.com/77356206/218279963-00780b59-8227-47dd-a0af-41096f6ae17b.png) 45 | 46 | - [X] Signed or Unsigned Packed executable.
47 | In the image below, the bloat has been appended to the executable after packing.
48 | ![Screenshot 2023-02-11 at 3 44 10 PM](https://user-images.githubusercontent.com/77356206/218280433-6dbcf51a-68c8-48e1-a89a-ad0b818a0afc.png) 49 | 50 | - [X] Signed executable includes bloat in the .rsrc section of the PE.
51 | In the image below, the bloat is identified as in the .rsrc section and is removed from the PE.
52 | ![Screenshot 2023-02-11 at 3 35 21 PM](https://user-images.githubusercontent.com/77356206/218280086-7cd548f8-e16b-4290-9283-a8a848de1419.png) 53 | 54 | - [X] Cases where bloat is added inside a PE Section.
55 | In the image below, the bloat has been included in a PE section named [0].
56 | ![Screenshot 2023-02-11 at 3 26 52 PM](https://user-images.githubusercontent.com/77356206/218279753-ed2c9102-482a-4639-aeb1-df8efc9c4e2e.png) 57 | 58 | - [X] Cases where the executable is a Nullsoft Scriptable Installer System executable (NSIS aka Nullsoft) 59 | These exe are installers that may contain one or more files. The files contained may or may not be malicious. (Sometimes actors will add files simply for increasing the file size.) All files within the installer are extracted to a new directory. The directory also contains the script for the installer which can be consulted to determine which files may be malicious. 60 | In the image below, Malcat has identified the executable as a NSIS installer. 61 | ![image](https://github.com/Squiblydoo/debloat/assets/77356206/86780abc-da4b-4808-bccb-733d97fa80d8) 62 | 63 | # Partial Support 64 | 65 | - [X] Cases where the junk is too random and the entropy is too high. In these cases, a switch/option called "--last-ditch" 66 | 67 | ### Other use cases 68 | There are use cases where the tool does not work. However, I plan to solve for them before publishing too much about them. 69 | 70 | ## Why? 71 | There appear to be a limited number of tools to easily process bloated executables. The two tools I have seen the most are “foremost” which is intended for recovering binaries from a disk image and “pecheck”. 72 | 73 | [Foremost](https://www.kali.org/tools/foremost/) works best in instances where the junk bytes are null (0x00) and it struggles when the binary has a fake or real signature. Its use in removing bloat from files is not its original purpose. 74 | 75 | [Pecheck](https://github.com/DidierStevens/DidierStevensSuite/blob/master/pecheck.py) has been developed over 14+ years and has some confusing commandline options. The option to remove bloated content is not the primary function of the script. Pecheck has to be combined with another tool ([disitool](https://blog.didierstevens.com/programs/disitool/)) in order to handle signed executables. In my experience, there are other times where pecheck can get confused and return an executable twice the size of the original bloated executable. All these factors seem OK if you are handling a small number of binaries, but as the number of binaries and methods increase, a tool specific to removing bloat is needed. 76 | 77 | [Binary Refinery](https://github.com/binref/refinery) is an amazing tool. It was written with the intention of being a [CyberChef](https://github.com/gchq/CyberChef) of the commandline. While both tools are amazing, they both have a shortcoming that requires the user to know what formulas should be applied. 78 | 79 | There are good solid manual methods to remove bloat from binaries, but these methods can be tedious and not all analysts have the skills to do this. This tool removes the burden of needing to know how to manually remove bloat. Additionally, it allows for better scale. The principles used in the script allow allow for better scale if automation is desired. 80 | 81 | 82 | ## How to build? 83 | Follow the build commands appropriate to your platform. The main difference between build commands is the format of the icon. 84 |
85 | MacOS
86 | `pyinstaller --onefile --noconsole --additional-hooks-dir=./hook --icon=debloat.icns gui.py` 87 | 88 | Windows
89 | `pyinstaller --onefile --noconsole --additional-hooks-dir=./hook --icon=debloat.ico gui.py` 90 | 91 | Linux
92 | `pyinstaller --onefile --noconsole --icon=debloat.ico --collect-all tkinterdnd2 gui.py` 93 | 94 | ## Want to discuss? 95 | Consider joining the [debloat Discord](discord.gg/dvGXKaY5qr). 96 | 97 | ## Credits 98 | Big shoutout to Jesko Hüttenhain creator of [Binary Refinery](https://github.com/binref/refinery). The NSIS extraction is based on his reverse engineering of the NSIS file format. Check out Binary Refinery if you have not. 99 | 100 | ## Where is this project going next? 101 | Batch processing: process all files in a directory and produce a report. 102 | 103 | Better support for using processing methods outside of debloat. 104 | 105 | Support for debloating without unzipping. 106 | -------------------------------------------------------------------------------- /changelog.txt: -------------------------------------------------------------------------------- 1 | 1.6.5 2 | - Fixed bug introduced in 1.6.1 which consistently resulted in failure to parse NSIS installers. 3 | - Updated NSIS extraction script to include new functionality from BinaryRefinery 4 | - Removed use of ByteString which was removed in python3.14 5 | 6 | 1.6.4 7 | - Added an additional check to identify the Code-signing signature anomaly. This check previously exited if the anomaly was found but it did not check to determine if enough of the file was removed. Now a size check has been added in order to determine if additional processing is required. 8 | 9 | 1.6.3 10 | - Fixes bug where debloat failed to handle malformed files. 11 | 12 | 1.6.2 13 | - Fixes bug in adjust_offsets method that impacted tactic-7. Bug was introduced in 1.5.6.4 as a bad attempt at error handling. 14 | - When adjusting offsets, it was possible for an error to be thrown because adjusting the offset would set it to an invalid value. However, this would happen because the value was invalid to begin with. The incorrect value was being improperly handled. I'm not 100% sure that I have it correct, but the new change works as expected. 15 | 16 | 1.6.1 17 | - Fixes legacy bug that could result in failure to identify NSIS installers. 18 | - In previous builds, we only checked a small window for the NSIS header. That window has been increased. 19 | - Updates the tkinterdnd hook file to only collect binaries associated with the operating system it is being built for. 20 | - Add placeholders for 2 new use cases to solve for. 21 | - Updates buildCLI.txt to specify output filename. 22 | - Add file for GitHub build automation. 23 | 24 | 1.6.0 25 | - Improves NSIS Parser to handle an irregular NSIS format 26 | - Adds solution for Use Case 17 27 | - Attackers can include junk marked as the code signing signature. In previous versions, the certificate preservation would preserve the junk. Without certificate preservation, the junk would be removed but return a Result Code of "0 - No Solution Found" even though the file was deflated. 28 | - Bug Fix 29 | - Adds error handling to escape non-unicode PE section names 30 | 31 | 1.5.6.6 32 | - Bug Fix 33 | - Patches bug in Result-Code 4 where an excess could be removed. 34 | - This was due to a miscalculation. In these instances, the "dynamic trim" and "refinery trim" methods were essentially being applied to the same data, then calculating an excess of junk. 35 | - The check for duplicate items in an NSIS Installer has been improved. 36 | - Previous check looked for item at the same offset; this version checks to see that all features are the same. 37 | 38 | 1.5.6.5 39 | - Bug Fix 40 | - Inadvertently changed "sample_compression" limit, thought it'd be OK, but it actually causes this check's main purpose to fail (that is, failing quickly when needed). Got some new ideas out of it though. 41 | 42 | 1.5.6.4 43 | - Bug Fixes 44 | - Fixed logic that could incorrectly flag .text sections as suspicious. 45 | - Handled rare error that could occur in updating offsets. 46 | - Certificate preservation now works reliably for all use-cases. 47 | 48 | 1.5.6.3 49 | - Bug Fixes 50 | - Modified NSIS Parser to address issue identified in the implementation. More details here: https://github.com/binref/refinery/issues/49 51 | - TLDR, NSIS Installers with the properly of uncompressed data was not previously accounted for due to lack of examples. They now are accounted for. 52 | - Modified compression check in bloated overlay analysis 53 | - previous compression check was erroneous and worked only based on miracles. 54 | - Improvements 55 | - Modified trimming threshold: 0.05 -> 0.15 56 | - New trimming threshold allows for lower compressed junk. 57 | - New trimming threshold removes more junk without being too aggressive. 58 | - Known issue 59 | - The certificate preservation option does not preserve the certificate in all use-cases, particularly cases where junk is in the overlay. 60 | 61 | 1.5.6.2 62 | - Bug Fix 63 | - Not all possible paths returned a result code. An additional result code was added. 64 | 65 | 1.5.6.1 66 | - Bug Fix 67 | - Added the result code for real this time. 68 | 69 | 1.5.6 70 | - Cert Support 71 | - Added support in both CLI and GUI to preserve the authenticode certificate. 72 | - Authenticode certificate is removed by default because the certificate becomes invalid. When it becomes invalid it becomes unclear whether the certificate was always invalid or not. 73 | - Bug Fix 74 | - A result code was missing which could cause problems in processing that looked for a result code. 75 | 76 | 1.5.5 77 | - General Improvements 78 | - Added functionality to print debloat version/ added to GUI UI 79 | - Deduped results_codes into processor file 80 | - New Use Case 81 | - Identified a use case that wasn't being solved, improved program logic to solve. 82 | - Packed files with a bloated section. 83 | 84 | 1.5.4 85 | - General Improvements 86 | - This version prints report codes indicating which inflation tactic is identified. 87 | - This version can now handle instances where no pattern exists within the junk data, or the pattern is disrupted by a few characters. This version uses the trimming method from binary refinery in two cases that were found to be more efficient. 88 | - A performance testing script has been included. 89 | 90 | The new updates hand a few edge use-cases that were not solvable before and fixes one bug. 91 | 92 | Bugfix: If debloat was unable to trim a inflated section, it would tell you it could and then exit telling you that it could not. 93 | 94 | New use-case solved: This solves the use-case where there a pattern exists in the overlay, but additional bytes have been added to disrupt the pattern. As much as 1 byte is enough to disrupt the pattern. This is not a problem anymore. 95 | 96 | 97 | 1.5.3.4 98 | - NSIS Parser improvements 99 | - Additional use cases for NSIS were identified and tested. These identified additional bugs which are fixed in this version. These use cases were added and tested: 100 | - bzip2_liquid 101 | - bzip2_solid 102 | - lzma_liquid 103 | - lzma_solid 104 | - zlib_liquid 105 | - zlib_solid 106 | 107 | 1.5.3.3 108 | - Modified NSIS Parser significantly. 109 | - Two use cases were identified where the parser were not working adequately. This resulted in identifying two logic bugs which resulted in fixing one and a large rewrite of some portions of the NSIS Parser. Rewrite was done by Huettenhain (https://github.com/huettenhain) for the original project of the NSIS Parser (https://github.com/binref/refinery) and then was incorporated into Debloat by me (Squiblydoo). 110 | - Removed some code that was unused. 111 | 112 | 1.5.3.2 113 | - Fixed a bug with the RSRC trimming 114 | - These were some long standing issues: 115 | - The default threshold and default size_limit were brought into conformance with Refinery Trim 116 | - With the previously high threshold, it could result in problems from removing the entire resource. 117 | - I also reverted the compression method in this section. The one used elsewhere was found not to be compatible with this part of the processing. 118 | 119 | 1.5.3.1 120 | - Fixed NSIS extractor bug. 121 | - Bug was caused due to the failure of adding some bytes when iterating through NSIS entries. 122 | - Bug was caused by a missing variable. 123 | - Updated the imports for nsisParser and readers 124 | - (Somehow?) It was working without these needing to be explicitly mentioned, but it has been updated for completeness. 125 | 126 | 1.5.3 127 | - Fixed alignment bug 128 | - There was a bug where I was subtracting instead of adding bytes to fix alignment. It now adds instead of subtracts. 129 | - Polished the trim 130 | - The "find_chunk_start" method had some unclear logic, that has been improved. 131 | - Instead of trying to remove all junk, the method now returns all bytes if the full regex was unable to match. 132 | - So, if the step is 1000 or 2000 bytes and not all of them are junk, it will leave all 1000 133 | - The logic is that they aren't really hurting anything by being here, and it is better to leave them than accidentally remove them. 134 | 135 | 1.5.2 136 | - Merged Optimization changes 137 | - Changes primarily related to the trim_junk function 138 | - Primary changes reduced the active memory cost 139 | - No changes in the functionality were made in this release. 140 | 141 | 1.5.1 142 | - Made modifications recommended by gdesmar for memory improvements. 143 | - Added the ability to pass the size of the file to the process_pe method 144 | - This reduces memory usage to calculate the length 145 | - Bug fixes suggested by gdesmar such as passing the correct object type 146 | - New compression algorithim implemented 147 | - See https://github.com/Squiblydoo/debloat/pull/18 to learn more about performance enhancements. 148 | - Implemented the optional "beginning_file_size" parameter for "process_pe" in both main.py and gui.py 149 | - Fixed typecasting bug introduced in 1.5.0 in relation to the "write_multiple_files" method 150 | 151 | 1.5.0 152 | - Added capability to handle Nullsoft Scriptable Install System (NSIS, aka Nullsoft) executables. 153 | - Setup instructions and binaries are extracted from the Nullsoft installer to a separate directory. 154 | - At this time, the user needs to resubmit files if they are bloated. Currently, debloat has no way of determining which files are malicious. 155 | 156 | - Fully renamed "Unsafe" Processing to "last_ditch_processing" 157 | - Last ditch better represents its purpose. 158 | - "Unsafe" is a name that is often used in the context of untrusted code. 159 | - Fixed inconsistency in naming of "last ditch processing" 160 | 161 | - Adjusted how debloat determines if junk was removed or not: 162 | - Previously, it could think junk removed if 1 or more bytes were removed or if only the signature was removed. 163 | - Now debloat checks for a 10% removal at the least 164 | 165 | - Updated documentation regarding Linux build command. 166 | - This had been updated elsewhere, but the update had not made it to the README 167 | 168 | 1.4.3 169 | - Fixed a logic bug where debloating a section did not debloat the proper section. 170 | - This worked previously when the bloated section was the last section 171 | - Finished a TODO item: namely, change all the offsets in the sections when the bloated section wasn't the last section of the binary. 172 | 173 | 1.4.2 174 | - Added checkbox for unsafe processing in GUI 175 | - Moved RSRC class out of processor into utilities 176 | - Fixed bug where chunk_start could fail to be given a value with the result that the program would stop functioning but not inform the user. Better error handling in this case to come. 177 | 178 | 1.4.1 179 | - Fixed loading PE in GUI 180 | 181 | 1.4.0 182 | - Fixed headers in a few use cases where I had missed them before. 183 | - Fixed removing resource method. Works properly now. 184 | - Fixed instance where the dynamic trim regex could pick up illegal characters 185 | - Now last_loads PE for better loading time. 186 | - Now manipuates PE data in the buffer. 187 | 188 | 1.3.2.2 189 | - Fixed a bug where the Delta_last_non_junk value could fail to be set in one use case. 190 | 191 | 1.3.2.1 192 | - Temporary fix for release version. 193 | 194 | 1.3.2 195 | - Added Dynamic Trim for trimming bytes from both the Overlay and bloated sections 196 | - Dynamic trim identifies the junk and creates a targeted regex to remove it. 197 | 198 | - Improved output. 199 | - Output wasn't being updated as the program ran. I now clear the buffer and update the UI after each output message. 200 | 201 | 1.3.1 202 | - Fixed required versions in pyproject.toml 203 | 204 | 1.3.0 205 | - Merged refactoring changes per nazywam's recommendation 206 | - Updated text length per PEP8 207 | - Started docstrings and other documentation for methods 208 | - Updated variable names for PEP8 consistency 209 | -------------------------------------------------------------------------------- /src/debloat/utilities/readers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Most of this code was repurposed from Binary Refinery (https://github.com/binref/refinery), used under the 3-Clause BSD License 4 | 5 | from __future__ import annotations 6 | import io 7 | import itertools 8 | import enum 9 | import struct 10 | import re 11 | import functools 12 | from types import TracebackType 13 | from typing import List, Union, Tuple, Optional, Iterable, TypeVar, Generic, Any 14 | 15 | T = TypeVar('T', bound=Union[bytearray, bytes, memoryview]) 16 | UnpackType = Union[int, bool, float, bytes] 17 | 18 | def signed(k: int, bits: int): 19 | M = 1 << bits 20 | k = k & (M - 1) 21 | return k - M if k >> (bits - 1) else k 22 | 23 | def exception_to_string(exception: BaseException, default=None) -> str: 24 | """ 25 | Attempts to convert a given exception to a good description that can be exposed to the user. 26 | """ 27 | if not exception.args: 28 | return exception.__class__.__name__ 29 | it = (a for a in exception.args if isinstance(a, str)) 30 | if default is None: 31 | default = str(exception) 32 | return max(it, key=len, default=default).strip() 33 | 34 | class StreamDetour: 35 | def __init__(self, stream: io.IOBase, 36 | offset=None, whence=io.SEEK_SET) -> None: 37 | self.stream = stream 38 | self.offset = offset 39 | self.whence = whence 40 | 41 | def __enter__(self) -> io.IOBase: 42 | self.cursor = self.stream.tell() 43 | if self.offset is not None: 44 | self.stream.seek(self.offset, self.whence) 45 | return self.stream 46 | 47 | def __exit__(self, *args) -> None: 48 | self.stream.seek(self.cursor, io.SEEK_SET) 49 | 50 | class MemoryFile(Generic[T], io.IOBase): 51 | 52 | closed: bool 53 | read_as_bytes: bool 54 | 55 | _data: T 56 | _cursor: int # Defines where in the file we are currently reading from 57 | _closed: bool 58 | 59 | class SEEK(int, enum.Enum): 60 | CUR = io.SEEK_CUR 61 | END = io.SEEK_END 62 | SET = io.SEEK_SET 63 | 64 | def __init__(self, data: T, read_as_bytes: bool = False, 65 | file_number: Optional[int] = None) -> None: 66 | self._data = data 67 | self._cursor = 0 68 | self._closed = False 69 | self.read_as_bytes = read_as_bytes 70 | self.file_number = file_number 71 | 72 | def close(self) -> None: 73 | self._closed = True 74 | 75 | @property 76 | def closed(self) -> bool: 77 | return self._closed 78 | 79 | # Enter and exit methods for context manager 80 | def __enter__(self) -> 'MemoryFile': 81 | return self 82 | 83 | def __exit__(self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: TracebackType | None) -> None: 84 | return super().__exit__(exc_type, exc_val, exc_tb) 85 | 86 | def flush(self) -> None: 87 | return super().flush() 88 | 89 | def isatty(self) -> bool: 90 | return super().isatty() 91 | 92 | def __iter__(self) -> Iterable[bytes]: 93 | return self 94 | 95 | def __len__(self) -> int: 96 | return len(self._data) 97 | 98 | def __next__(self) -> bytes: 99 | if self._cursor >= len(self._data): 100 | raise StopIteration 101 | else: 102 | return self.readline() 103 | 104 | def file_number(self) -> Optional[int]: 105 | if self.file_number is None: 106 | return None 107 | return self.file_number 108 | 109 | def readable(self) -> bool: 110 | return super().readable() 111 | 112 | def seekable(self) -> bool: 113 | return super().seekable() 114 | 115 | @property 116 | def is_eof(self) -> bool: 117 | return self._closed or self._cursor >= len(self._data) 118 | 119 | @property 120 | def remaining_bytes(self) -> int: 121 | return len(self._data) - self.tell() 122 | 123 | def writable(self) -> bool: 124 | if self._closed: 125 | return False 126 | # Readonly for memoryview? 127 | return isinstance(self._data, bytearray) 128 | 129 | def read(self, size: int = -1, peek: bool = False) -> T: 130 | beginning = self._cursor 131 | if size is None or size < 0: 132 | end = len(self._data) 133 | else: 134 | end = min(self._cursor + size, len(self._data)) 135 | result = self._data[beginning:end] 136 | if self.read_as_bytes and not isinstance(result, bytes): 137 | result = bytes(result) 138 | if not peek: 139 | self._cursor = end 140 | return result 141 | 142 | def peek(self, size: int = -1) -> memoryview: 143 | cursor = self._cursor 144 | mv = memoryview(self._data) 145 | if size is None or size < 0: 146 | return mv[cursor:] 147 | return mv[cursor:cursor + size] 148 | 149 | def read1(self, size: int = -1, peek: bool = False) -> T: 150 | return self.read(size, peek) 151 | 152 | def _find_linebreak(self, beginning: int, end: int) -> int: 153 | if not isinstance(self._data, memoryview): 154 | return self._data.find(b'\n', beginning, end) 155 | for k in range(beginning, end): 156 | if self._data[k] == 0xA: return k 157 | return -1 158 | 159 | def readline(self, size: int = -1) -> T: 160 | beginning, end = self._cursor, len(self._data) 161 | if size is not None and size >= 0: 162 | end = beginning + size 163 | p = self._find_linebreak(beginning, end) 164 | self._cursor = end if p < 0 else p + 1 165 | result = self._data[beginning:self._cursor] 166 | if self.read_as_bytes and not isinstance(result, bytes): 167 | result = bytes(result) 168 | return result 169 | 170 | def readlines(self, size: int = -1) -> Iterable[T]: 171 | if size is None or size < 0: 172 | yield from self 173 | else: 174 | total = 0 175 | while total < size: 176 | line = next(self) 177 | total += len(line) 178 | yield line 179 | 180 | def readinto1(self, buffer: Any) -> int: 181 | data = self.read(len(buffer)) 182 | size = len(data) 183 | buffer[:size] = data 184 | return size 185 | 186 | def readinto(self, buffer: Any) -> int: 187 | return self.readinto1(buffer) 188 | 189 | def tell(self) -> int: 190 | return self._cursor 191 | 192 | def seek_relative(self, offset: int) -> int: 193 | return self.seek(self._cursor + offset) 194 | 195 | def seek_set(self, offset: int) -> int: 196 | if offset < 0: 197 | return self.seek(offset, self.SEEK.END) 198 | else: 199 | return self.seek(offset, self.SEEK.SET) 200 | 201 | def get_buffer(self) -> T: 202 | return self._data 203 | 204 | def get_value(self) -> T: 205 | return self._data 206 | 207 | def seek(self, offset: int, whence=io.SEEK_SET) -> int: 208 | if whence == io.SEEK_SET: 209 | if offset < 0: 210 | raise ValueError('Negative seek position {}'.format(offset)) 211 | self._cursor = offset 212 | elif whence == io.SEEK_CUR: 213 | self._cursor += offset 214 | elif whence == io.SEEK_END: 215 | self._cursor = len(self._data) + offset 216 | self._cursor = max(0, min(self._cursor, len(self._data))) 217 | return self._cursor 218 | 219 | def write_lines(self, lines: Iterable[Union[bytes, bytearray, memoryview]]) -> None: 220 | for line in lines: 221 | self.append(line) 222 | 223 | def truncate(self, size=None) -> None: 224 | if size is not None: 225 | if not (0 <= size <= len(self._data)): 226 | raise ValueError('Invalid size {}'.format(size)) 227 | self._cursor = size 228 | del self._data[self._cursor:] 229 | 230 | def append_byte(self, byte: int) -> None: 231 | try: 232 | cursor = self._cursor 233 | if cursor < len(self._data): 234 | self._data[cursor] = byte 235 | else: 236 | self._data.append(byte) 237 | except Exception as T: 238 | raise io.UnsupportedOperation('append_byte') from T 239 | else: 240 | self._cursor += 1 241 | 242 | def append(self, data: Iterable[int]) -> int: 243 | output_data = self._data 244 | end = len(output_data) 245 | beginning = self._cursor 246 | if beginning == end: 247 | output_data.extend(data) 248 | self._cursor = end = len(output_data) 249 | return end - beginning 250 | try: 251 | size = len(data) 252 | except Exception as T: 253 | it = iter(data) 254 | for cursor, byte in enumerate(it, end - beginning): 255 | output_data[cursor] = byte 256 | if cursor >= end - 1: 257 | break 258 | else: 259 | cursor += 1 260 | self._cursor = cursor 261 | return cursor - beginning 262 | output_data.extend(it) 263 | else: 264 | self._cursor += size 265 | try: 266 | self._data[beginning:self._cursor] = data 267 | except Exception as T: 268 | self._cursor = beginning 269 | raise io.UnsupportedOperation('append') from T 270 | return size 271 | self._cursor = end = len(output_data) 272 | return end - beginning 273 | 274 | def __getitem__(self, slice: Any) -> T: 275 | result = self._data[slice] 276 | if isinstance(result, bytes) and not self.read_as_bytes: 277 | result = bytes(result) 278 | return result 279 | 280 | def replay(self, offset: int, length: int) -> None: 281 | if offset not in range(self._cursor + 1): 282 | raise ValueError('Invalid offset {}'.format(offset)) 283 | rep, r = divmod(length, offset) 284 | offset = -offset - len(self) + self._cursor 285 | replay = self._data[offset:offset + r] 286 | if rep > 0: 287 | replay = bytes(self._data[offset:self._cursor]) * rep + replay 288 | self.append(replay) 289 | 290 | 291 | class order(str, enum.Enum): 292 | big = 'big' 293 | little = 'little' 294 | 295 | class StructReader(MemoryFile[T]): 296 | 297 | class Unaligned(RuntimeError): 298 | pass 299 | 300 | def __init__(self, data: T, bigendian: bool = False): 301 | super().__init__(data) 302 | self._number_of_bits = 0 303 | self._buffer_bits = 0 304 | self._bigendian = bigendian 305 | 306 | def __enter__(self) -> 'StructReader': 307 | return self 308 | 309 | def __exit__(self) -> None: 310 | return super().__exit__() 311 | 312 | @property 313 | def bigendian(self): 314 | self.bigendian = True 315 | try: 316 | yield self 317 | finally: 318 | self.bigendian = False 319 | 320 | @property 321 | def byteorder_format(self) -> str: 322 | return '>' if self.bigendian else '<' 323 | 324 | @property 325 | def byteorder_name(self) -> str: 326 | return 'big' if self._bigendian else 'little' 327 | 328 | def readinto(self, buffer: Any) -> int: 329 | return super().readinto(buffer) 330 | 331 | def seek(self, offset: int, whence: int = io.SEEK_SET) -> int: 332 | return super().seek(offset, whence) 333 | 334 | def read_exactly(self, size: Optional[int] = None, 335 | peek: bool = False) -> T: 336 | if not self.byte_aligned: 337 | raise StructReader.Unaligned('Buffer is not byte aligned') 338 | data = self.read1(size, peek) 339 | if size and len(data) < size: 340 | raise EOF(data) 341 | return data 342 | 343 | @property 344 | def byte_aligned(self) -> bool: 345 | return not self._number_of_bits 346 | 347 | def byte_align(self, blocksize: int = 1) -> Tuple[int, int]: 348 | if self.byte_aligned: 349 | return 0, 0 350 | number_of_bits = self._number_of_bits 351 | buffer_bits = self._buffer_bits 352 | self._number_of_bits = 0 353 | self._buffer_bits = 0 354 | mod = self._cursor % blocksize 355 | self.seek_relative(mod and blocksize - mod) 356 | return number_of_bits, buffer_bits 357 | 358 | # TODO: Review Read Integer if needed 359 | def read_integer(self, length: int, peek: bool = False) -> int: 360 | """ 361 | Read `length` many bits from the underlying stream as an integer. 362 | """ 363 | if length < self._number_of_bits: 364 | new_count = self._number_of_bits - length 365 | if self.bigendian: 366 | result = self._buffer_bist >> new_count 367 | if not peek: 368 | self._buffer_bist ^= result << new_count 369 | else: 370 | result = self._buffer_bist & 2 ** length - 1 371 | if not peek: 372 | self._buffer_bist >>= length 373 | if not peek: 374 | self._number_of_bits = new_count 375 | return result 376 | 377 | 378 | number_of_bits, buffer_bits = self.byte_align() 379 | number_of_missing_bits = length - number_of_bits 380 | bytecount, rest = divmod(number_of_missing_bits, 8) 381 | if rest: 382 | bytecount += 1 383 | rest = 8 - rest 384 | if bytecount == 1: 385 | result, = self.read_exactly(1, peek) 386 | else: 387 | result = int.from_bytes(self.read_exactly(bytecount, peek), self.byteorder_name) 388 | if not number_of_bits and not rest: 389 | return result 390 | if self.bigendian: 391 | rbmask = 2 ** rest - 1 # noqa 392 | excess = result & rbmask # noqa 393 | result >>= rest # noqa 394 | result ^= buffer_bits << number_of_missing_bits # noqa 395 | else: 396 | excess = result >> number_of_missing_bits # noqa 397 | result ^= excess << number_of_missing_bits # noqa 398 | result <<= number_of_bits # noqa 399 | result |= buffer_bits # noqa 400 | assert excess.bit_length() <= rest 401 | if not peek: 402 | self._number_of_bits = rest 403 | self._buffer_bist = excess 404 | return result 405 | 406 | def read_bytes(self, size: int, peek: bool = False) -> bytes: 407 | if self.byte_aligned: 408 | data = self.read_exactly(size, peek) 409 | if not isinstance(data, bytes): 410 | data = bytes(data) 411 | return data 412 | else: 413 | return self.read_bits(size * 8, peek).tobytes() 414 | 415 | def read_bit(self) -> int: 416 | return self.read_integer(1) 417 | 418 | def read_bits(self, number_of_bits: int) -> Iterable[int]: 419 | chunk = self.read_integrer(number_of_bits) 420 | for k in range(number_of_bits -1, -1, -1): 421 | yield chunk >> k & 1 422 | 423 | def read_flags(self, number_of_bits: int, reverse=False) -> Iterable[bool]: 424 | bits = list(self.read_bits(number_of_bits)) 425 | if reverse: 426 | bits.reverse() 427 | for bit in bits: 428 | yield bool(bit) 429 | 430 | def read_struct(self, spec: str, unwrap=False, 431 | peek=False) -> Union[List[UnpackType], UnpackType]: 432 | if not spec: 433 | raise ValueError('No format specified') 434 | byte_order = spec[:1] 435 | if byte_order in '': 436 | spec = spec[1:] 437 | else: 438 | byte_order = self.byteorder_format 439 | data = [] 440 | current_cursor = self.tell() 441 | 442 | for k, part in enumerate(re.split('(\\d*[auwE])', spec)): 443 | if k % 2 == 1: 444 | count = 1 if len(part) == 1 else int(part[:~0]) 445 | part = part[~0] 446 | for _ in range(count): 447 | if part == 'a': 448 | size = self.read_integer(8) 449 | data.append(self.read_bytes(size)) 450 | elif part == 'u': 451 | data.append(self.read_integer(8)) 452 | elif part == 'w': 453 | data.append(self.read_integer(16)) 454 | elif part == 'E': 455 | data.append(self.read_integer(32)) 456 | else: 457 | raise ValueError('Invalid format {}'.format(part)) 458 | continue 459 | else: 460 | part = F'{byte_order}{part}' 461 | data.extend(struct.unpack(part, self.read_exactly(struct.calcsize(part)))) 462 | if unwrap and len(data) == 1: 463 | return data[0] 464 | if peek: 465 | self.seek_set(current_cursor) 466 | return data 467 | 468 | def read_nibble(self, peek: bool = False) -> int: 469 | return self.read_integer(4, peek) 470 | 471 | 472 | def u8(self, peek: bool = False) -> int: 473 | return self.read_integer(8, peek) 474 | def i8(self, peek: bool = False) -> int: 475 | return signed(self.read_integer(8, peek), 8) 476 | def u16(self, peek: bool = False) -> int: 477 | return self.read_integer(16, peek) 478 | def u32(self, peek: bool = False) -> int: 479 | return self.read_integer(32, peek) 480 | def u64(self, peek: bool = False) -> int: 481 | return self.read_integer(64, peek) 482 | def i16(self, peek: bool = False) -> int: 483 | return signed(self.read_integer(16, peek), 16) 484 | def i32(self, peek: bool = False) -> int: 485 | return signed(self.read_integer(32, peek), 32) 486 | def i64(self, peek: bool = False) -> int: 487 | return signed(self.read_integer(64, peek), 64) 488 | def f32(self, peek: bool = False) -> float: 489 | return self.read_struct('f', unwrap=True, peek=peek) 490 | def f64(self, peek: bool = False) -> float: 491 | return self.read_struct('d', unwrap=True, peek=peek) 492 | def read_byte(self, peek: bool = False) -> int: 493 | return self.read_integer(8, peek) 494 | def read_char(self, peek: bool = False) -> int: 495 | return signed(self.read_integer(8, peek), 8) 496 | 497 | def read_terminated_array(self, terminator: bytes, 498 | alignment: int = 1) -> bytearray: 499 | pos = self.tell() 500 | buffer = self.get_buffer() 501 | try: 502 | end = pos - 1 503 | while True: 504 | end = buffer.find(terminator, end + 1) 505 | if end < 0 or not (end - pos) % alignment: 506 | break 507 | except AttributeError: 508 | result = bytearray() 509 | while not self.is_eof: 510 | result.extend(self.read_bytes(alignment)) 511 | if result.endswith(terminator): 512 | return result[:-len(terminator)] 513 | self.seek(pos) 514 | raise EOF 515 | else: 516 | data = self.read_exactly(end - pos) 517 | self.seek_relative(len(terminator)) 518 | return bytearray(data) 519 | 520 | def read_c_string(self, encoding=None) -> Union[str, bytearray]: 521 | data = self.read_terminated_array(b'\0') 522 | if encoding is not None: 523 | data = data.decode(encoding) 524 | return data 525 | 526 | def read_w_string(self, encoding=None) -> Union[str, bytearray]: 527 | data = self.read_terminated_array(b'\0\0', 2) 528 | if encoding is not None: 529 | data = data.decode(encoding) 530 | return data 531 | 532 | def read_length_prefixed(self, 533 | prefix_size: int = 32, 534 | encoding: Optional[str] = None, 535 | block_size: int = 1) -> Union[T, str]: 536 | prefix = self.read_integer(prefix_size) * block_size 537 | data = self.read(prefix) 538 | if encoding is not None: 539 | data = data.decode(encoding) 540 | return data 541 | 542 | def read_length_prefixed_ascii(self, 543 | prefix_size: int = 32) -> Union[T, str]: 544 | return self.read_length_prefixed(prefix_size, 'ascii') 545 | 546 | def read_length_prefixed_utf8(self, 547 | prefix_size: int = 32) -> Union[T, str]: 548 | return self.read_length_prefixed(prefix_size, 'utf-8') 549 | 550 | def read_length_prefixed_utf16(self, 551 | prefix_size: int = 32, 552 | bytecount: bool = False) -> Union[T, str]: 553 | block_size = 1 if bytecount else 2 554 | return self.read_length_prefixed(prefix_size, 'utf-16le', block_size) 555 | 556 | # TODO: Review function if needed 557 | def read_7bit_encoded_int(self, max_bits: int = 0) -> int: 558 | result = 0 559 | for k in itertools.count(): 560 | if k == max_bits: 561 | raise ValueError('Invalid 7-bit encoded integer') 562 | byte = self.read_byte() 563 | result |= (byte & 0x7F) << (7 * k) 564 | if not byte & 0x80: 565 | break 566 | return result 567 | 568 | class StructMeta(type): 569 | def __new__(mcls, name, bases, nmspc, parser=StructReader): 570 | return type.__new__(mcls, name, bases, nmspc) 571 | 572 | def __init__(cls, name, bases, nmspc, parser=StructReader): 573 | super(StructMeta, cls).__init__(name, bases, nmspc) 574 | original__init__ = cls.__init__ 575 | 576 | @functools.wraps(original__init__) 577 | def wrapped__init__(self: Struct, reader, *args, **kwargs): 578 | if not isinstance(reader, parser): 579 | if issubclass(parser, reader.__class__): 580 | raise ValueError( 581 | F'A reader of type {reader.__class__.__name__} was passed to {cls.__name__}, ' 582 | F'but a {parser.__name__} is required.') 583 | reader = parser(reader) 584 | start = reader.tell() 585 | view = memoryview(reader.get_buffer()) 586 | original__init__(self, reader, *args, **kwargs) 587 | self.__data = view[start:reader.tell()] 588 | 589 | cls.__init__ = wrapped__init__ 590 | 591 | class Struct(metaclass=StructMeta): 592 | 593 | _data: Union[memoryview, bytearray] 594 | 595 | def __len__(self): 596 | return len(self._data) 597 | 598 | def __bytes__(self): 599 | return bytes(self._data) 600 | 601 | def get_data(self, decouple=False): 602 | if decouple and isinstance(self._data, memoryview): 603 | self._data = bytearray(self._data) 604 | return self._data 605 | 606 | def __init__(self, reader: StructReader): 607 | pass 608 | 609 | 610 | 611 | 612 | class EOF(EOFError): 613 | def __init__(self, rest: Union[bytes, bytearray, memoryview] = b''): 614 | super().__init__('End of File') 615 | self.rest = rest -------------------------------------------------------------------------------- /src/debloat/utilities/pyflate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # ============================ MOTIFICATION NOTE ============================ 3 | # The content of this file has been modified for use in binary refinery; it 4 | # has been ported from Python2 to Python3 and the BZip2 implementation was 5 | # rewritten to support NSIS-specific BZip stream and block headers, which are 6 | # different from the official standard values. The original code was taken 7 | # from the following location: 8 | # https://github.com/pfalcon/pyflate/blob/master/pyflate.py 9 | 10 | # ============================ ORIGINAL LICENSING ============================ 11 | # Copyright 2006--2007-01-21 Paul Sladen 12 | # http://www.paul.sladen.org/projects/compression/ 13 | # 14 | # You may use and distribute this code under any DFSG-compatible license (eg. 15 | # BSD, GNU GPLv2). 16 | # 17 | # Stand-alone pure-Python DEFLATE (gzip) and bzip2 decoder/decompressor. This 18 | # is probably most useful for research purposes/index building; there is 19 | # certainly some room for improvement in the Huffman bit-matcher. 20 | # 21 | # With the as-written implementation, there was a known bug in BWT decoding 22 | # to do with repeated strings. This has been worked around; see bwt_reverse(). 23 | # Correct output is produced in all test cases but ideally the problem would 24 | # be found... 25 | # ============================================================================ 26 | from __future__ import annotations 27 | from typing import List, Tuple, Iterable, Optional, BinaryIO 28 | 29 | import itertools 30 | import abc 31 | 32 | 33 | class BitfieldBase(abc.ABC): 34 | 35 | def __init__(self, x): 36 | if isinstance(x, BitfieldBase): 37 | self.f = x.f 38 | self.bits = x.bits 39 | self.bitfield = x.bitfield 40 | self.count = x.bitfield 41 | else: 42 | self.f = x 43 | self.bits = 0 44 | self.bitfield = 0x0 45 | self.count = 0 46 | 47 | def _read(self, n): 48 | s = self.f.read(n) 49 | if not s: 50 | raise RuntimeError('length error') 51 | self.count += len(s) 52 | return s 53 | 54 | def needbits(self, n): 55 | while self.bits < n: 56 | self._more() 57 | 58 | def _mask(self, n): 59 | return (1 << n) - 1 60 | 61 | def toskip(self): 62 | return self.bits & 0x7 63 | 64 | def align(self): 65 | self.readbits(self.toskip()) 66 | 67 | def dropbits(self, n=8): 68 | while n >= self.bits and n > 7: 69 | n -= self.bits 70 | self.bits = 0 71 | n -= len(self.f._read(n >> 3)) << 3 72 | if n: 73 | self.readbits(n) 74 | 75 | def dropbytes(self, n=1): 76 | self.dropbits(n << 3) 77 | 78 | def tell(self): 79 | return self.count - ((self.bits + 7) >> 3), 7 - ((self.bits - 1) & 0x7) 80 | 81 | def tellbits(self): 82 | bytes, bits = self.tell() 83 | return (bytes << 3) + bits 84 | 85 | @abc.abstractmethod 86 | def _more(self): 87 | pass 88 | 89 | @abc.abstractmethod 90 | def snoopbits(self, n=8): 91 | pass 92 | 93 | @abc.abstractmethod 94 | def readbits(self, n=8): 95 | pass 96 | 97 | 98 | class LBitfield(BitfieldBase): 99 | 100 | def _more(self): 101 | c = self._read(1) 102 | self.bitfield += c[0] << self.bits 103 | self.bits += 8 104 | 105 | def snoopbits(self, n=8): 106 | if n > self.bits: 107 | self.needbits(n) 108 | return self.bitfield & self._mask(n) 109 | 110 | def readbits(self, n=8): 111 | if n > self.bits: 112 | self.needbits(n) 113 | r = self.bitfield & self._mask(n) 114 | self.bits -= n 115 | self.bitfield >>= n 116 | return r 117 | 118 | 119 | class RBitfield(BitfieldBase): 120 | 121 | def _more(self): 122 | c = self._read(1) 123 | self.bitfield <<= 8 124 | self.bitfield += c[0] 125 | self.bits += 8 126 | 127 | def snoopbits(self, n=8): 128 | if n > self.bits: 129 | self.needbits(n) 130 | return (self.bitfield >> (self.bits - n)) & self._mask(n) 131 | 132 | def readbits(self, n=8): 133 | if n > self.bits: 134 | self.needbits(n) 135 | r = (self.bitfield >> (self.bits - n)) & self._mask(n) 136 | self.bits -= n 137 | self.bitfield &= ~(self._mask(n) << self.bits) 138 | return r 139 | 140 | 141 | class HuffmanLength: 142 | code: int 143 | bits: int 144 | symbol: Optional[int] 145 | reverse_symbol: Optional[int] 146 | 147 | def __init__(self, code, bits=0): 148 | self.code = code 149 | self.bits = bits 150 | self.symbol = None 151 | self.reverse_symbol = None 152 | 153 | def __lt__(self, other): 154 | return self.__cmp(other) < 0 155 | 156 | def __gt__(self, other): 157 | return self.__cmp(other) > 0 158 | 159 | def __eq__(self, other): 160 | return self.__cmp(other) == 0 161 | 162 | def __le__(self, other): 163 | return self.__cmp(other) <= 0 164 | 165 | def __ge__(self, other): 166 | return self.__cmp(other) >= 0 167 | 168 | def __ne__(self, other): 169 | return self.__cmp(other) != 0 170 | 171 | def __cmp(self, other): 172 | a, b = self.bits, other.bits 173 | if a == b: 174 | a, b = self.code, other.code 175 | return (a > b) - (a < b) 176 | 177 | 178 | def reverse_bits(v: int, n: int): 179 | a = 1 << 0 180 | b = 1 << (n - 1) 181 | z = 0 182 | for i in range(n - 1, -1, -2): 183 | z |= (v >> i) & a 184 | z |= (v << i) & b 185 | a <<= 1 186 | b >>= 1 187 | return z 188 | 189 | 190 | def reverse_bytes(v, n): 191 | a = 0xff << 0 192 | b = 0xff << (n - 8) 193 | z = 0 194 | for i in range(n - 8, -8, -16): 195 | z |= (v >> i) & a 196 | z |= (v << i) & b 197 | a <<= 8 198 | b >>= 8 199 | return z 200 | 201 | 202 | class HuffmanTable: 203 | table: List[HuffmanLength] 204 | 205 | def __init__(self, bootstrap): 206 | table = [] 207 | start, bits = bootstrap[0] 208 | for finish, endbits in bootstrap[1:]: 209 | if bits: 210 | for code in range(start, finish): 211 | table.append(HuffmanLength(code, bits)) 212 | start, bits = finish, endbits 213 | if endbits == -1: 214 | break 215 | table.sort() 216 | self.table = table 217 | 218 | def populate_huffman_symbols(self): 219 | bits, symbol = -1, -1 220 | for x in self.table: 221 | symbol += 1 222 | if x.bits != bits: 223 | symbol <<= (x.bits - bits) 224 | bits = x.bits 225 | x.symbol = symbol 226 | x.reverse_symbol = reverse_bits(symbol, bits) 227 | 228 | def min_max_bits(self): 229 | self.min_bits, self.max_bits = 16, -1 230 | for x in self.table: 231 | if x.bits < self.min_bits: self.min_bits = x.bits 232 | if x.bits > self.max_bits: self.max_bits = x.bits 233 | 234 | def _find_symbol(self, bits: int, symbol: int, table: Iterable[HuffmanLength]) -> int: 235 | for h in table: 236 | if h.bits == bits and h.reverse_symbol == symbol: 237 | return h.code 238 | return -1 239 | 240 | def find_next_symbol(self, field: LBitfield, reversed=True): 241 | cached_length = -1 242 | cached = None 243 | for x in self.table: 244 | if cached_length != x.bits: 245 | cached = field.snoopbits(x.bits) 246 | cached_length = x.bits 247 | if (reversed and x.reverse_symbol == cached) or (not reversed and x.symbol == cached): 248 | field.readbits(x.bits) 249 | return x.code 250 | raise RuntimeError(F'symbol not found even after end of table at {field.tell()}') 251 | 252 | 253 | class OrderedHuffmanTable(HuffmanTable): 254 | def __init__(self, lengths): 255 | _ordered_lengths = list(enumerate(lengths)) 256 | _ordered_lengths.append((len(lengths), -1)) 257 | super().__init__(_ordered_lengths) 258 | 259 | 260 | CODE_LENGTH_ORDERS = ( 261 | 0x10, 0x11, 0x12, 0x00, 0x08, 0x07, 0x09, 0x06, 0x0A, 0x05, 262 | 0x0B, 0x04, 0x0C, 0x03, 0x0D, 0x02, 0x0E, 0x01, 0x0F) 263 | 264 | DISTANCE_BASE = ( 265 | 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000D, 0x0011, 0x0019, 266 | 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00C1, 0x0101, 0x0181, 0x0201, 0x0301, 267 | 0x0401, 0x0601, 0x0801, 0x0C01, 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001) 268 | 269 | LENGTH_BASE = ( 270 | 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000D, 271 | 0x000F, 0x0011, 0x0013, 0x0017, 0x001B, 0x001F, 0x0023, 0x002B, 0x0033, 0x003B, 272 | 0x0043, 0x0053, 0x0063, 0x0073, 0x0083, 0x00A3, 0x00C3, 0x00E3, 0x0102) 273 | 274 | 275 | def extra_distance_bits(n): 276 | if 0 <= n <= 1: 277 | return 0 278 | elif 2 <= n <= 29: 279 | return (n >> 1) - 1 280 | else: 281 | raise RuntimeError('illegal distance code') 282 | 283 | 284 | def extra_length_bits(n): 285 | if 257 <= n <= 260 or n == 285: 286 | return 0 287 | elif 261 <= n <= 284: 288 | return ((n - 257) >> 2) - 1 289 | else: 290 | raise RuntimeError('illegal length code') 291 | 292 | 293 | def move_to_front(array: list, index): 294 | array[:] = itertools.chain( 295 | itertools.islice(array, index, index + 1), 296 | itertools.islice(array, 0, index), 297 | itertools.islice(array, index + 1, None) 298 | ) 299 | 300 | 301 | def bwt_transform(data): 302 | tmp = bytearray(sorted(data)) 303 | base = list(map(tmp.find, range(256))) 304 | pointers = [-1] * len(data) 305 | for i, symbol in enumerate(data): 306 | pointers[base[symbol]] = i 307 | base[symbol] += 1 308 | return pointers 309 | 310 | 311 | def bwt_reverse(data, end): 312 | out = bytearray(len(data)) 313 | transform = bwt_transform(data) 314 | 315 | # STRAGENESS WARNING: There was a bug somewhere here in that 316 | # if the output of the BWT resolves to a perfect copy of N 317 | # identical strings (think exact multiples of 255 'X' here), 318 | # then a loop is formed. When decoded, the output string would 319 | # be cut off after the first loop, typically '\0\0\0\0\xfb'. 320 | # The previous loop construct was: 321 | # 322 | # next = T[end] 323 | # while next != end: 324 | # out += L[next] 325 | # next = T[next] 326 | # out += L[next] 327 | # 328 | # For the moment, I've instead replaced it with a check to see 329 | # if there has been enough output generated. I didn't figured 330 | # out where the off-by-one-ism is yet---that actually produced 331 | # the cyclic loop. 332 | 333 | for i in range(len(data)): 334 | end = transform[end] 335 | out[i] = data[end] 336 | 337 | return out 338 | 339 | 340 | class _DecompressionFile(abc.ABC): 341 | 342 | def readable(self) -> bool: 343 | return True 344 | 345 | def seekable(self) -> bool: 346 | return False 347 | 348 | def writable(self) -> bool: 349 | return False 350 | 351 | def write(self, __b): 352 | raise NotImplementedError 353 | 354 | data: BinaryIO 355 | bits: BitfieldBase 356 | nsis: bool 357 | done: bool 358 | current_block: bytearray 359 | 360 | def __init__(self, data: BinaryIO, nsis: bool = True): 361 | self.data = data 362 | self.nsis = nsis 363 | self.done = False 364 | self.current_block = bytearray() 365 | 366 | def readall(self) -> bytes: 367 | return self.read() 368 | 369 | def readinto(self, __buffer): 370 | data = self.read(len(__buffer)) 371 | size = len(data) 372 | __buffer[:size] = data 373 | return size 374 | 375 | def read(self, size=-1): 376 | while size not in range(len(self.current_block)): 377 | if not self._readblock(): 378 | break 379 | block = self.current_block 380 | if size < 0 or size >= len(block): 381 | self.current_block = bytearray() 382 | return block 383 | else: 384 | out = block[:size] 385 | del block[:size] 386 | return out 387 | 388 | @abc.abstractmethod 389 | def _readblock(self) -> bool: 390 | pass 391 | 392 | 393 | class BZip2File(_DecompressionFile): 394 | 395 | blocksize: int 396 | block_header_size: int 397 | block_header_type: Tuple[int, int] 398 | current_block: bytearray 399 | 400 | def __init__(self, data: BinaryIO, nsis: bool = True): 401 | super().__init__(data, nsis) 402 | self.bits = RBitfield(data) 403 | 404 | if nsis: 405 | self.blocksize = 9 406 | self.block_header_size = 8 407 | self.block_header_type = (0x31, 0x17) 408 | else: 409 | if data.read(2) != b'BZ': 410 | raise RuntimeError('BZip2 header magic is missing') 411 | if self.bits.readbits(8) != ord('h'): 412 | raise RuntimeError('BZip2 header contains unknown compression method') 413 | blocksize = self.bits.readbits(8) 414 | if 0x31 <= blocksize <= 0x39: 415 | blocksize = blocksize - 0x30 416 | else: 417 | raise RuntimeError('BZip2 header specifies invalid block size') 418 | self.blocksize = blocksize 419 | self.block_header_size = 48 420 | self.block_header_type = (0x314159265359, 0x177245385090) 421 | 422 | self.blocksize *= 100_000 423 | 424 | def _readblock(self): 425 | out = self.current_block 426 | if self.done: 427 | return False 428 | br = self.bits 429 | blocktype = br.readbits(self.block_header_size) 430 | if not self.nsis: 431 | _ = br.readbits(32) # crc 432 | if blocktype == self.block_header_type[0]: 433 | if not self.nsis and br.readbits(1): 434 | raise RuntimeError('BZip2 randomised support not implemented') 435 | pointer = br.readbits(24) 436 | huffman_used_map = br.readbits(16) 437 | map_mask = 1 << 15 438 | used = [] 439 | while map_mask > 0: 440 | if huffman_used_map & map_mask: 441 | huffman_used_bitmap = br.readbits(16) 442 | bit_mask = 1 << 15 443 | while bit_mask > 0: 444 | if huffman_used_bitmap & bit_mask: 445 | pass 446 | used += [bool(huffman_used_bitmap & bit_mask)] 447 | bit_mask >>= 1 448 | else: 449 | used += [False] * 16 450 | map_mask >>= 1 451 | huffman_groups = br.readbits(3) 452 | if not 2 <= huffman_groups <= 6: 453 | raise RuntimeError('BZip2 number of Huffman groups not in range 2..6') 454 | selectors_used = br.readbits(15) 455 | mtf = list(range(huffman_groups)) 456 | selectors_list = [] 457 | for i in range(selectors_used): 458 | c = 0 459 | while br.readbits(1): 460 | c += 1 461 | if c >= huffman_groups: 462 | raise RuntimeError('BZip2 chosen selector greater than number of groups (max 6)') 463 | if c >= 0: 464 | move_to_front(mtf, c) 465 | selectors_list += mtf[0:1] 466 | groups_lengths = [] 467 | symbols_in_use = sum(used) + 2 # remember RUN[AB] RLE symbols 468 | for _ in range(huffman_groups): 469 | length = br.readbits(5) 470 | lengths = [] 471 | for i in range(symbols_in_use): 472 | if not 0 <= length <= 20: 473 | raise RuntimeError('BZip2 Huffman length code outside range 0..20') 474 | while br.readbits(1): 475 | length -= (br.readbits(1) * 2) - 1 476 | lengths += [length] 477 | groups_lengths += [lengths] 478 | 479 | tables = [] 480 | for g in groups_lengths: 481 | codes = OrderedHuffmanTable(g) 482 | codes.populate_huffman_symbols() 483 | codes.min_max_bits() 484 | tables.append(codes) 485 | 486 | favourites = [y for y, x in enumerate(used) if x] 487 | selector_pointer = 0 488 | decoded = 0 489 | repeat = repeat_power = 0 490 | buffer = bytearray() 491 | t = None 492 | while True: 493 | decoded -= 1 494 | if decoded <= 0: 495 | decoded = 50 496 | if selector_pointer <= len(selectors_list): 497 | t = tables[selectors_list[selector_pointer]] 498 | selector_pointer += 1 499 | r = t.find_next_symbol(br, False) 500 | if 0 <= r <= 1: 501 | if repeat == 0: 502 | repeat_power = 1 503 | repeat += repeat_power << r 504 | repeat_power <<= 1 505 | continue 506 | elif repeat > 0: 507 | buffer.extend(itertools.repeat(favourites[0], repeat)) 508 | repeat = 0 509 | if r == symbols_in_use - 1: 510 | break 511 | else: 512 | o = favourites[r - 1] 513 | move_to_front(favourites, r - 1) 514 | buffer.append(o) 515 | # RLE step 516 | nt = bwt_reverse(buffer, pointer) 517 | done = bytearray() 518 | n = len(nt) 519 | i = 0 520 | while i < n: 521 | if i < n - 4 and nt[i] == nt[i + 1] == nt[i + 2] == nt[i + 3]: 522 | done.extend(itertools.repeat(nt[i], nt[i + 4] + 4)) 523 | i += 5 524 | else: 525 | done.append(nt[i]) 526 | i += 1 527 | out.extend(done) 528 | return True 529 | elif blocktype == self.block_header_type[1]: 530 | br.align() 531 | self.done = True 532 | return False 533 | else: 534 | raise RuntimeError( 535 | F'unknown BZip2 block value 0x{blocktype:0{self.block_header_size // 4}X}') 536 | 537 | 538 | class GZipFile(_DecompressionFile): 539 | 540 | def __init__(self, data: BinaryIO, nsis: bool = True): 541 | super().__init__(data, nsis) 542 | br = self.bits = LBitfield(data) 543 | if not nsis and self.data.read(2) != b'\x1F\x8B': 544 | raise RuntimeError('Unknown (not 1F8B) header') 545 | if not nsis and br.readbits(8) != 8: 546 | raise RuntimeError('Unknown (not type 8 DEFLATE) compression method') 547 | if not nsis: 548 | self.flags = br.readbits(8) 549 | self.mtime = br.readbits(32) 550 | self.extra_flags = br.readbits(8) 551 | self.os_type = br.readbits(8) 552 | self.file_name = '' 553 | self.comment = '' 554 | 555 | if self.flags & 0x04: 556 | # structured GZ_FEXTRA miscellaneous data 557 | xlen = br.readbits(16) 558 | br.dropbytes(xlen) 559 | while self.flags & 0x08: 560 | # original GZ_FNAME filename 561 | cc = br.readbits(8) 562 | if not cc: 563 | break 564 | self.file_name += chr(cc) 565 | while self.flags & 0x10: 566 | # human readable GZ_FCOMMENT 567 | cc = br.readbits(8) 568 | if not cc: 569 | break 570 | self.comment += chr(cc) 571 | if self.flags & 0x02: 572 | # header-only GZ_FHCRC checksum 573 | br.readbits(16) 574 | 575 | def _readblock(self) -> bool: 576 | if self.done: 577 | return False 578 | br = self.bits 579 | out = self.current_block 580 | lastbit = br.readbits(1) 581 | blocktype = br.readbits(2) 582 | 583 | def _error_unused(msg): 584 | return RuntimeError(F'illegal unused {msg} in use at {br.tell()}') 585 | 586 | if blocktype == 0: 587 | br.align() 588 | length = br.readbits(16) 589 | if not self.nsis and 0 != length & br.readbits(16): 590 | raise RuntimeError('stored block lengths do not match each other') 591 | if not br.bits: 592 | it = self.data.read(length) 593 | else: 594 | it = (br.readbits(8) for _ in range(length)) 595 | out.extend(it) 596 | 597 | elif blocktype == 1 or blocktype == 2: 598 | main_literals, main_distances = None, None 599 | 600 | if blocktype == 1: # Static Huffman 601 | static_huffman_bootstrap = [(0, 8), (144, 9), (256, 7), (280, 8), (288, -1)] 602 | static_huffman_lengths_bootstrap = [(0, 5), (32, -1)] 603 | main_literals = HuffmanTable(static_huffman_bootstrap) 604 | main_distances = HuffmanTable(static_huffman_lengths_bootstrap) 605 | 606 | elif blocktype == 2: # Dynamic Huffman 607 | len_codes = br.readbits(5) 608 | literals = len_codes + 257 609 | distances = br.readbits(5) + 1 610 | code_lengths_length = br.readbits(4) + 4 611 | table = [0] * 19 612 | for i in range(code_lengths_length): 613 | table[CODE_LENGTH_ORDERS[i]] = br.readbits(3) 614 | dynamic_codes = OrderedHuffmanTable(table) 615 | dynamic_codes.populate_huffman_symbols() 616 | dynamic_codes.min_max_bits() 617 | 618 | # Decode the code_lengths for both tables at once, 619 | # then split the list later 620 | 621 | code_lengths = [] 622 | n = 0 623 | while n < (literals + distances): 624 | r = dynamic_codes.find_next_symbol(br) 625 | if 0 <= r <= 15: # literal bitlength for this code 626 | count = 1 627 | what = r 628 | elif r == 16: # repeat last code 629 | count = 3 + br.readbits(2) 630 | # Is this supposed to default to '0' if in the zeroth position? 631 | what = code_lengths[-1] 632 | elif r == 17: # repeat zero 633 | count = 3 + br.readbits(3) 634 | what = 0 635 | elif r == 18: # repeat zero lots 636 | count = 11 + br.readbits(7) 637 | what = 0 638 | else: 639 | raise RuntimeError('next code length is outside of the range 0 <= r <= 18') 640 | code_lengths += [what] * count 641 | n += count 642 | 643 | main_literals = OrderedHuffmanTable(code_lengths[:literals]) 644 | main_distances = OrderedHuffmanTable(code_lengths[literals:]) 645 | 646 | main_literals.populate_huffman_symbols() 647 | main_distances.populate_huffman_symbols() 648 | main_literals.min_max_bits() 649 | main_distances.min_max_bits() 650 | literal_count = 0 651 | 652 | while True: 653 | r = main_literals.find_next_symbol(br) 654 | if 0 <= r <= 255: 655 | literal_count += 1 656 | out.append(r) 657 | elif r == 256: 658 | if literal_count > 0: 659 | literal_count = 0 660 | break 661 | elif 257 <= r <= 285: # dictionary lookup 662 | if literal_count > 0: 663 | literal_count = 0 664 | length_extra = br.readbits(extra_length_bits(r)) 665 | length = LENGTH_BASE[r - 257] + length_extra 666 | 667 | r1 = main_distances.find_next_symbol(br) 668 | if 0 <= r1 <= 29: 669 | distance = DISTANCE_BASE[r1] + br.readbits(extra_distance_bits(r1)) 670 | while length > distance: 671 | out += out[-distance:] 672 | length -= distance 673 | if length == distance: 674 | out += out[-distance:] 675 | else: 676 | out += out[-distance:length - distance] 677 | elif 30 <= r1 <= 31: 678 | raise _error_unused('distance symbol') 679 | elif 286 <= r <= 287: 680 | raise _error_unused('literal/length symbol') 681 | elif blocktype == 3: 682 | raise _error_unused('blocktype') 683 | 684 | if lastbit: 685 | self.done = True 686 | br.align() 687 | try: 688 | _ = br.readbits(32) # crc 689 | _ = br.readbits(32) # length 690 | except Exception: 691 | if not self.nsis: 692 | raise 693 | return False 694 | else: 695 | return True 696 | -------------------------------------------------------------------------------- /src/debloat/processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file handles the processing of binaries and helper methods. 3 | 4 | Three methods rely heavily on parts of Binary Refinery 5 | https://github.com/binref/refinery 6 | Copyright 2019 Jesko Hüttenhain under the 3-Clause BSD License 7 | The methods are: 8 | refinery_strip() 9 | adjust_offsets() 10 | refinery_trim_resources() 11 | The RSRC Class is also from refinery. 12 | """ 13 | from pathlib import Path 14 | import re 15 | from typing import Tuple, Optional, Any, Callable, List 16 | import pefile 17 | import binascii 18 | import zlib 19 | from pefile import Structure, SectionStructure, DIRECTORY_ENTRY 20 | from typing import Generator, Iterable, Optional 21 | 22 | import debloat.utilities.nsisParser as nsisParser 23 | import debloat.utilities.rsrc as rsrc 24 | 25 | DEBLOAT_VERSION = "1.6.5" 26 | 27 | RESULT_CODES = { 28 | 0: "No Solution found.", 29 | 1: "Junk after signature.", 30 | 2: "Single repeated byte in overlay.", 31 | 3: "Pattern in overlay.", 32 | 4: "Sets of repeated bytes in overlay.", 33 | 5: "NSIS Installer.", 34 | 6: "Bloat in PE resources", 35 | 7: "Bloat in PE section", 36 | 8: "Bloat in .NET resource", 37 | 9: "Non-essential, high entropy overlay", 38 | 10: "High compression with bytes at end.", 39 | 11: ".NET Single File with junk", 40 | 12: "Packed file with bloated section", 41 | 13: "Random overlay with high compression", 42 | 14: "Junk interspersed with data", 43 | 15: "VMProtected junk", 44 | 16: "InnoSetup Installer", 45 | 17: "Junk in the certificate", 46 | 18: "SFX Archive", 47 | 19: "Electron Application" 48 | } 49 | 50 | 51 | _KB = 1000 52 | _MB = _KB * _KB 53 | 54 | def readable_size(value: int) -> str: 55 | '''Return bytes in human readable format.''' 56 | if value <= 1024: 57 | return '%s bytes' % value 58 | elif value < 1024 * 1024: 59 | return '%.1f KB' % (float(value) / 1024.0) 60 | elif value < 1024 * 1024 * 1024: 61 | return '%.1f MB' % (float(value) / 1024.0 / 1024.0) 62 | else: 63 | return '%.1f GB' % (float(value) / 1024.0 / 1024.0 / 1024.0) 64 | 65 | def write_multiple_files(out_path: str, 66 | files: list, log_message: Callable[[str], None]) -> None: 67 | ''' 68 | Writes multiple files to disk when applicable. 69 | ''' 70 | log_message("Installer unpacked!\n") 71 | log_message(f"The files are being written to {out_path}") 72 | for file in files: 73 | out_file_path = Path(out_path) / Path(file.path.replace("\\", "/")) 74 | out_dir_path = out_file_path.parent 75 | out_dir_path.mkdir(parents=True, exist_ok=True) 76 | with open(out_file_path, "wb") as f: 77 | f.write(file.data) 78 | log_message("File: " + str(Path(file.path.replace("\\", "/")))) 79 | log_message("") 80 | log_message("The user will need to determine which file is malicious if any.") 81 | log_message("If a file is bloated: resubmit it through the tool to debloat it.") 82 | return 83 | 84 | 85 | def write_patched_file(out_path: str, 86 | pe: pefile.PE) -> Tuple[int, str]: 87 | '''Writes the patched file to disk. 88 | 89 | Keyword Arguments: 90 | out_path -- the path and file name to write 91 | pe -- the pefile that is being processed 92 | end_of_real_data -- an int indicating the size of bytes to write''' 93 | with open(out_path, 'wb') as writer: 94 | writer.write(pe.write()) 95 | final_filesize = len(pe.write()) 96 | return final_filesize, out_path 97 | 98 | def handle_signature_abnormality(signature_address: int, 99 | signature_size: int, 100 | beginning_file_size: int, 101 | data_to_delete: List) -> Tuple[bool, int]: 102 | '''Remove all bytes after a PE signature''' 103 | # If the signature_address is 0, there was no original signature. 104 | # We are setting the signature address to the filesize in order to 105 | # skip the next check. 106 | if signature_address == 0: 107 | signature_address = beginning_file_size 108 | # Check to see if there is data after the signature; if so, it is 109 | # junk data 110 | signature_abnormality = False 111 | if signature_size > (beginning_file_size - signature_size): 112 | result_code = 17 113 | signature_abnormality = True 114 | elif beginning_file_size > (signature_address + signature_size): 115 | result_code = 1 116 | signature_abnormality = True 117 | 118 | if signature_abnormality is True: 119 | data_to_delete.append((signature_address + signature_size, beginning_file_size)) 120 | else: 121 | result_code = 0 122 | return signature_abnormality, result_code 123 | 124 | def check_and_extract_NSIS(possible_header: bytearray, pe: pefile.PE) -> list: 125 | '''Check if the PE is an NSIS installer.''' 126 | extractor = nsisParser.extractNSIS() 127 | confirm_if_nsis = extractor._find_archive_offset(memoryview(possible_header)) 128 | if confirm_if_nsis is None: 129 | return 130 | extracted_files = extractor.unpack(memoryview(pe.__data__)) 131 | return extracted_files 132 | 133 | 134 | def find_last_section(pe: pefile.PE) -> Optional[pefile.SectionStructure]: 135 | '''Iterate through PE sections to identify the last one.''' 136 | last_section = None 137 | for section in pe.sections: 138 | if last_section is None \ 139 | or section.PointerToRawData > last_section.PointerToRawData: 140 | last_section = section 141 | return last_section 142 | 143 | def get_signature_info(pe: pefile.PE, cert_preservation) -> Tuple[int, int]: 144 | '''Remove PE signature and update header.''' 145 | signature_address = pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']].VirtualAddress 146 | signature_size = pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']].Size 147 | pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']].VirtualAddress = 0 148 | # If the cert is to be preservered, we do not need to modify the size in the header. 149 | if cert_preservation == False: 150 | pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']].Size = 0 151 | 152 | return signature_address, signature_size 153 | 154 | 155 | def adjust_offsets(pe: pefile.PE, gap_offset: int, gap_size: int): 156 | base = pe.OPTIONAL_HEADER.ImageBase 157 | alignment = pe.OPTIONAL_HEADER.FileAlignment 158 | rva_offset = pe.get_rva_from_offset(gap_offset) 159 | tva_offset = rva_offset + base 160 | 161 | section = pe.get_section_by_offset(gap_offset) 162 | new_section_size = section.SizeOfRawData - gap_size 163 | if new_section_size % alignment != 0: 164 | raise RuntimeError( 165 | F'trimming 0x{gap_size:X} bytes from section {(section.Name)} of size 0x{section.SizeOfRawData:X} ' 166 | F'violates required section alignment of 0x{alignment:X} bytes') 167 | inside_section_offset = gap_offset - section.PointerToRawData 168 | if inside_section_offset > new_section_size: 169 | overlap = inside_section_offset - new_section_size 170 | raise RuntimeError(F'trimming from section {(section.Name)}; data extends {overlap} beyond section') 171 | 172 | rva_lbound = section.VirtualAddress 173 | rva_ubound = section.VirtualAddress + section.Misc_VirtualSize - 1 174 | tva_lbound = rva_lbound + base 175 | tva_ubound = rva_ubound + base 176 | 177 | def adjust_attributes_of_structure( 178 | structure: Structure, 179 | threshold: int, 180 | valid_values_lower_bound: Optional[int], 181 | valid_values_upper_bound: Optional[int], 182 | attributes: Iterable[str] 183 | ): 184 | for attribute in attributes: 185 | old_value = getattr(structure, attribute, 0) 186 | if old_value <= gap_offset: 187 | continue 188 | if valid_values_lower_bound is not None and old_value < valid_values_lower_bound: 189 | continue 190 | if valid_values_upper_bound is not None and old_value > valid_values_upper_bound: 191 | continue 192 | new_value = old_value - gap_size 193 | if new_value < gap_offset: 194 | raise RuntimeError(F'adjusting attribute {attribute} of {structure.name} would result in negative value: {new_value}') 195 | setattr(structure, attribute, new_value) 196 | 197 | it: Iterable[Structure] = iter(pe.__structures__) 198 | remove = [] 199 | 200 | for index, structure in enumerate(it): 201 | old_offset = structure.get_file_offset() 202 | new_offset = old_offset - gap_offset 203 | 204 | if old_offset > gap_offset: 205 | if old_offset < gap_offset + gap_size: 206 | remove.append(index) 207 | continue 208 | if isinstance(structure, SectionStructure) and new_offset % alignment != 0: 209 | raise RuntimeError( 210 | F'section {(structure.Name)} would be moved to offset 0x{new_offset:X}, ' 211 | F'violating section alignment value 0x{alignment:X}.') 212 | structure.set_file_offset(new_offset) 213 | 214 | try: 215 | adjust_attributes_of_structure(structure, rva_offset, rva_lbound, rva_ubound, ( 216 | 'OffsetToData', 217 | 'AddressOfData', 218 | 'VirtualAddress', 219 | 'AddressOfNames', 220 | 'AddressOfNameOrdinals', 221 | 'AddressOfFunctions', 222 | 'AddressOfEntryPoint', 223 | 'AddressOfRawData', 224 | 'BaseOfCode', 225 | 'BaseOfData', 226 | )) 227 | adjust_attributes_of_structure(structure, tva_offset, tva_lbound, tva_ubound, ( 228 | 'StartAddressOfRawData', 229 | 'EndAddressOfRawData', 230 | 'AddressOfIndex', 231 | 'AddressOfCallBacks', 232 | )) 233 | adjust_attributes_of_structure(structure, gap_offset, None, None, ( 234 | 'OffsetModuleName', 235 | 'PointerToRawData', 236 | )) 237 | except Exception as e: 238 | continue 239 | 240 | for attribute in ( 241 | 'CvHeaderOffset', 242 | 'OffsetIn2Qwords', 243 | 'OffsetInQwords', 244 | 'Offset', 245 | 'OffsetLow', 246 | 'OffsetHigh' 247 | ): 248 | if not hasattr(structure, attribute): 249 | continue 250 | 251 | while remove: 252 | index = remove.pop() 253 | pe.__structures__[index:index + 1] = [] 254 | 255 | section.SizeOfRawData = new_section_size 256 | return pe 257 | 258 | 259 | def refinery_strip(data: memoryview, alignment=1, block_size=_MB) -> int: 260 | if not data: 261 | return 0 262 | threshold = 0.15 263 | data_overhang = len(data) % alignment 264 | result = data_overhang 265 | 266 | if 0 < threshold < 1: 267 | def compression_ratio(offset: int): 268 | ratio = len(zlib.compress(data[:offset], level=1)) / offset 269 | return ratio 270 | upper = len(data) 271 | lower = result 272 | 273 | if compression_ratio(upper) <= threshold: 274 | while block_size < upper - lower: 275 | pivot = (lower + upper) // 2 276 | ratio = compression_ratio(pivot) 277 | if ratio > threshold: 278 | lower = pivot + 1 279 | continue 280 | upper = pivot 281 | if abs(ratio - threshold) < 1e-10: 282 | break 283 | result = upper 284 | while result > 1 and data[result - 2] == data[result -1]: 285 | result -= 1 286 | 287 | result = max(result, data_overhang) 288 | 289 | result = result + (data_overhang - result) % alignment 290 | 291 | if result > len(data): 292 | excess = result - len(data) 293 | excess = excess + (-excess % alignment) 294 | result = result - excess 295 | 296 | return result 297 | 298 | 299 | def refinery_trim_resources(pe: pefile.PE, data_to_delete: List) -> int: 300 | size_limit = 10000 301 | size_removed = 0 302 | 303 | def find_bloated_resources(pe: pefile.PE, directory, level: int = 0, *path) -> Generator[Structure, None, None]: 304 | for entry in directory.entries: 305 | name = getattr(entry, 'name') 306 | numeric_id = getattr(entry, 'id') 307 | if not name: 308 | if level == 0 and numeric_id in iter(rsrc.RSRC): 309 | name = rsrc.RSRC(entry.id) 310 | elif numeric_id is not None: 311 | name = str(numeric_id) 312 | name = name and str(name) or '?' 313 | if entry.struct.DataIsDirectory: 314 | yield from find_bloated_resources(pe, entry.directory, level + 1, *path, name) 315 | continue 316 | struct: Structure = entry.data.struct 317 | name = '/'.join((*path, name)) 318 | if struct.Size <= size_limit: 319 | continue 320 | yield name, struct 321 | 322 | RSRC_INDEX = DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE'] 323 | pe.parse_data_directories(directories=[RSRC_INDEX]) 324 | 325 | try: 326 | resources = pe.DIRECTORY_ENTRY_RESOURCE 327 | except AttributeError: 328 | return 0 329 | for name, resource in find_bloated_resources(pe, resources): 330 | offset = pe.get_offset_from_rva(resource.OffsetToData) 331 | # Offset may be modified from debloating a previous resource 332 | original_offset = offset 333 | for slice_start, slice_end in data_to_delete: 334 | if slice_start <= original_offset: 335 | original_offset += slice_end-slice_start 336 | old_size = resource.Size 337 | new_size = refinery_strip(memoryview(pe.__data__)[original_offset:original_offset + old_size], pe.OPTIONAL_HEADER.FileAlignment) 338 | gap_size = old_size - new_size 339 | if gap_size <= 0: 340 | continue 341 | resource.Size = new_size 342 | adjust_offsets(pe, offset + new_size, gap_size) 343 | size_removed += gap_size 344 | data_to_delete.append((original_offset + new_size, original_offset + old_size)) 345 | 346 | pe.OPTIONAL_HEADER.DATA_DIRECTORY[RSRC_INDEX].Size -= size_removed 347 | 348 | def get_compressed_size(data: memoryview, offset: int, level: int = -1): 349 | if offset <= 1024: 350 | return len(zlib.compress(data[:offset], level=level)) 351 | 352 | compress_obj = zlib.compressobj(level=level) 353 | compress_data_len = 0 354 | index = 0 355 | for index in range(offset//1024): 356 | chunk = data[index*1024 : (index+1)*1024] 357 | compress_data_len += len(compress_obj.compress(chunk)) 358 | leftover = offset%1024 359 | if leftover: 360 | chunk = data[(index+1)*1024 : (index+1)*1024 + leftover] 361 | compress_data_len += len(compress_obj.compress(chunk)) 362 | compress_data_len += len(compress_obj.flush()) 363 | return compress_data_len 364 | 365 | def check_section_compression(pe: pefile.PE, data_to_delete: List, 366 | log_message: Callable[[str], None]) -> Tuple[pefile.PE, int, str]: 367 | biggest_section = None 368 | biggest_uncompressed = int 369 | result = "" 370 | for section in pe.sections: 371 | section_name = section.Name.decode("utf8", errors="backslashreplace") 372 | compressed_section_size = get_compressed_size( 373 | memoryview(pe.__data__)[section.PointerToRawData : section.PointerToRawData+section.SizeOfRawData], 374 | section.SizeOfRawData 375 | ) 376 | section_compression_ratio = section.SizeOfRawData / compressed_section_size * 100 377 | log_message("Section: " + section_name, end="\t", flush=True) 378 | log_message(" Compression Ratio: " + str(round(section_compression_ratio, 2)) +"%", end="\t",flush=True) 379 | log_message("Size of section: " + readable_size(section.SizeOfRawData) +".",flush=True) 380 | if biggest_section is None or section.SizeOfRawData > biggest_section.SizeOfRawData: 381 | biggest_section = section 382 | biggest_uncompressed = section_compression_ratio 383 | # Handle specific bloated sections 384 | if biggest_section.Name.decode("utf8", errors="backslashreplace") == ".rsrc\x00\x00\x00": 385 | # Get biggest resource or resources and drop them from the 386 | # Resource table 387 | log_message(''' 388 | Bloat was located in the resource section. Removing bloat.. 389 | ''') 390 | refinery_trim_resources(pe, data_to_delete) 391 | result_code = 6 # Bloated resource 392 | return result, result_code 393 | 394 | elif biggest_section.Name.decode("utf8", errors="backslashreplace") == ".text\x00\x00\x00" and biggest_uncompressed > 3000: 395 | # Data stored in the .text section is often a .NET Resource. The following checks 396 | # to confirm it is .NET and then drops the resources. 397 | if pe.OPTIONAL_HEADER.DATA_DIRECTORY[14].Size: 398 | log_message(''' 399 | Bloat was detected in the text section. Bloat is likely in a .NET Resource 400 | This use case cannot be processed at this time. ''') 401 | result_code = 0 # No solution 402 | return result, result_code 403 | if biggest_uncompressed > 3000: 404 | log_message(''' 405 | The compression ratio of ''' + biggest_section.Name.decode("utf8", errors="backslashreplace") + ''' is indicative of a bloated section. 406 | ''', end="", flush=True) 407 | # Get the size of the section. 408 | biggest_section_end = biggest_section.PointerToRawData + biggest_section.SizeOfRawData 409 | original_section_size = biggest_section.SizeOfRawData 410 | biggest_section_data = memoryview(pe.__data__)[biggest_section.PointerToRawData:biggest_section_end] 411 | delta_last_non_junk, result_code = trim_junk(pe, biggest_section_data, original_section_size) 412 | # Remove the junk from the section. 413 | if delta_last_non_junk > original_section_size: 414 | log_message("Section was not able to be reduced.") 415 | result_code = 0 416 | return result, result_code 417 | data_to_delete.append((biggest_section.PointerToRawData + delta_last_non_junk, biggest_section_end)) 418 | 419 | section_bytes_to_remove = original_section_size - delta_last_non_junk 420 | # Adjust all offsets for the file. 421 | adjust_offsets(pe, biggest_section.PointerToRawData, section_bytes_to_remove) 422 | log_message("Bloated section reduced.") 423 | result_code = 7 # Bloated PE section 424 | return result, result_code 425 | 426 | # If no bloat was found, return an expected return value 427 | result_code = 0 # No solution 428 | return result, result_code 429 | 430 | def find_chunk_start(targeted_regex, chunk_start, original_size_with_junk, bloated_content: memoryview, step): 431 | bloated_content_len = len(bloated_content) 432 | compiled_targeted_regex = re.compile(targeted_regex) 433 | chunk_end = chunk_start 434 | while original_size_with_junk > chunk_end: 435 | chunk_end = chunk_start + step 436 | targeted_regex_match = compiled_targeted_regex.search(binascii.hexlify(bytes(bloated_content[max(bloated_content_len - chunk_end, 0):bloated_content_len - chunk_start])[::-1])) 437 | if targeted_regex_match: 438 | chunk_start += targeted_regex_match.end(0) 439 | else: 440 | # If the targeted_regex_match does not 441 | # return anything, that indicates the previous loop 442 | # had content which did not match. We'll use that 443 | # to help ensure we do not remove too much of the file. 444 | chunk_start -= step * 2 445 | break 446 | return chunk_start 447 | 448 | def trim_junk(pe: pefile.PE, bloated_content: memoryview, 449 | original_size_with_junk: int) -> int: 450 | '''Attempts multiple methods to trim junk from the end of a section.''' 451 | alignment = pe.OPTIONAL_HEADER.FileAlignment 452 | 453 | # Regex Explained: 454 | # Match raw bytes that are repeated more than 20 times at the end 455 | # of a binary. 456 | delta_last_non_junk = original_size_with_junk 457 | # First Method: Trims 1 repeating byte. 458 | # Check against 200 bytes, if successful, calculate full match. 459 | junk_match = re.search(rb'^(..)\1{20,}', bytes(bloated_content[:-601:-1])) 460 | chunk_start = 0 461 | if not junk_match: 462 | # Second method: remove junk using refinery_strip. This method 463 | # is more efficent than a previous check that was used here. 464 | delta_last_non_junk = refinery_strip(bloated_content, alignment) 465 | result_code = 3 # Pattern in overlay. 466 | 467 | # Junk was identified. A new size is assigned and returned. 468 | else: 469 | # First method continued... 470 | bloated_content_len = len(bloated_content) 471 | targeted_regex = rb"("+ binascii.hexlify(junk_match.group(1)) + rb")\1{1,}" 472 | precompiled_chunk = binascii.hexlify(junk_match.group(1)) * int(1000/len(junk_match.group(1))) 473 | chunk_end = chunk_start 474 | while original_size_with_junk > chunk_end: 475 | chunk_end = chunk_start + 1000 476 | chunk = binascii.hexlify(bytes(bloated_content[max(bloated_content_len - chunk_end, 0):bloated_content_len - chunk_start])[::-1]) 477 | if chunk == precompiled_chunk: 478 | chunk_start += 1000 479 | continue 480 | else: 481 | # If the chunk does not match the precompiled chunk, 482 | # we will return to the previous chunk_start in order 483 | # to ensure important bytes are not removed. 484 | if chunk_start > 1000: 485 | chunk_start -= 1000 486 | break 487 | junk_to_remove = chunk_start 488 | 489 | # Third Method: check for a series of one repeated byte. 490 | # If the trimming did not remove more than half of the bytes then 491 | # this suggests the attacker may have put a random series of 492 | # repeated bytes. We use refinery_trim for efficiency. 493 | if junk_to_remove * 2 < original_size_with_junk / 2: 494 | delta_last_non_junk = refinery_strip(bloated_content, alignment) 495 | junk_to_remove = 0 # Reset junk_to_remove because Refinery Strip will remove it. 496 | result_code = 4 # Sets of repeated bytes in overlay. 497 | else: 498 | result_code = 2 # Single repeated byte in overlay 499 | delta_last_non_junk -= junk_to_remove 500 | 501 | # The returned size must account for the file alignment. 502 | # We will make sure it is aligned by adding bytes. 503 | not_aligned = alignment - (delta_last_non_junk % alignment) 504 | delta_last_non_junk = delta_last_non_junk + not_aligned 505 | if not result_code: 506 | result_code = 0 507 | return delta_last_non_junk, result_code 508 | 509 | def process_pe(pe: pefile.PE, out_path: str, last_ditch_processing: bool, 510 | cert_preservation: bool,log_message: Callable[[str], None], 511 | beginning_file_size: int = 0) -> None: 512 | '''Prepare PE, perform checks, remote junk, write patched binary.''' 513 | result_code = 0 514 | if not beginning_file_size: 515 | beginning_file_size = len(pe.write()) 516 | 517 | # Remove Signature and modify size of Optional Header Security entry. 518 | signature_address, signature_size = get_signature_info(pe, cert_preservation) 519 | 520 | if cert_preservation == True: 521 | cert = [(signature_address, signature_address + signature_size)] 522 | certData = memoryview(pe.__data__)[signature_address:signature_address + signature_size] 523 | data_to_delete = [(signature_address, signature_address + signature_size)] 524 | else: 525 | if signature_size > 0: 526 | log_message("""A certificate is being removed from this file.\n-To preserve the certificate use the Cert Preservation option.""") 527 | data_to_delete = [(signature_address, signature_address + signature_size)] 528 | 529 | signature_abnormality, result_code = handle_signature_abnormality(signature_address, 530 | signature_size, 531 | beginning_file_size, 532 | data_to_delete) 533 | if signature_abnormality is True and sum(slice_end-slice_start for slice_start, slice_end in data_to_delete) >= (beginning_file_size * 0.1): 534 | pass 535 | # Handle Overlays: this includes packers and overlays which are completely junk 536 | elif pe.get_overlay_data_start_offset() and signature_size < len(pe.__data__) - pe.get_overlay_data_start_offset(): 537 | possible_header = pe.__data__[pe.get_overlay_data_start_offset():pe.get_overlay_data_start_offset() + 20_000] 538 | # Check first to see if the file is NSIS 539 | nsis_extracted = check_and_extract_NSIS(possible_header, pe) 540 | if nsis_extracted: 541 | write_multiple_files(out_path, nsis_extracted, log_message) 542 | result_code = 5 # NSIS Installer 543 | return result_code 544 | 545 | else: 546 | log_message("Attempting dynamic trim...") 547 | last_section = find_last_section(pe) 548 | if last_section is None: 549 | log_message("Unable to process. This may indicate the file is malformed.") 550 | return 0 551 | overlay = memoryview(pe.__data__)[last_section.PointerToRawData + last_section.SizeOfRawData:signature_address or beginning_file_size] 552 | 553 | # The following checks a sample of the overlay to determine if it will be able to be removed. 554 | overlay_compression_sample = get_compressed_size(memoryview(overlay)[-2000:], 2000) 555 | sample_compression = beginning_file_size / overlay_compression_sample 556 | file_size_wo_overlay = len(memoryview(pe.__data__)[:last_section.PointerToRawData + last_section.SizeOfRawData]) 557 | if sample_compression > 400000: 558 | required_data_from_overlay, result_code = trim_junk(pe, overlay, beginning_file_size) 559 | end_of_real_data = file_size_wo_overlay + required_data_from_overlay 560 | data_to_delete.append(((file_size_wo_overlay + required_data_from_overlay), beginning_file_size )) 561 | 562 | else: 563 | result, result_code = check_section_compression(pe, data_to_delete, log_message=log_message) 564 | if len(data_to_delete) == 1: 565 | end_of_real_data = beginning_file_size 566 | else: 567 | result_code = 12 # Packed with junk in section 568 | end_of_real_data = beginning_file_size - sum(slice_end-slice_start for slice_start, slice_end in data_to_delete) 569 | 570 | if end_of_real_data > beginning_file_size * 0.9: 571 | if last_ditch_processing is True: 572 | log_message(""" 573 | "Last ditch" switch detected. Running last ditch debloat technique:\n 574 | This is the last resort that removes the whole overlay: this works in cases where the overlay lacks a pattern. 575 | However, if the file does not run after this, it is in indicator that this method removed critical data. 576 | """) 577 | end_of_real_data = last_section.PointerToRawData + last_section.SizeOfRawData 578 | data_to_delete.append((end_of_real_data, beginning_file_size)) 579 | else: 580 | log_message(""" 581 | Overlay was unable to be trimmed. Try unpacking with UniExtract2 or re-running 582 | Debloat with the "--last-ditch" parameter.""" 583 | ) 584 | elif result_code == 12: 585 | # The end was already determined and no more data needs to be removed. 586 | pass 587 | else: 588 | data_to_delete.append((end_of_real_data, beginning_file_size)) 589 | # Handle bloated sections 590 | # TODO: break up into functions 591 | else: 592 | # In order to solve some use cases, we will find the biggest section 593 | # within the binary. 594 | result, result_code = check_section_compression(pe, data_to_delete, log_message=log_message) 595 | log_message(result) 596 | # All processing is done. Report results. 597 | # There is always the signature in the list 598 | if len(data_to_delete) == 0 or sum(slice_end-slice_start for slice_start, slice_end in data_to_delete) <= (beginning_file_size * 0.1): 599 | log_message("""No automated method for reducing the size worked. Please consider sharing the 600 | sample for additional analysis. 601 | Email: Squiblydoo@pm.me 602 | Twitter: @SquiblydooBlog. 603 | """) 604 | result_code = 0 605 | return result_code 606 | else: 607 | pe_data = bytearray() 608 | start = 0 609 | for slice_start, slice_end in sorted(data_to_delete): 610 | pe_data += bytearray(pe.__data__[start:slice_start]) 611 | start = slice_end 612 | pe_data += bytearray(pe.__data__[start:beginning_file_size]) 613 | if cert_preservation == True and signature_size > 0: 614 | if result_code == 17: 615 | log_message("Certificate is being used for junk and will be removed.") 616 | else: 617 | pe_data += certData 618 | pe.OPTIONAL_HEADER.DATA_DIRECTORY[pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']].VirtualAddress = len(pe_data) - signature_size 619 | 620 | pe.__data__ = pe_data 621 | final_filesize, new_pe_name = write_patched_file(out_path, 622 | pe) 623 | reduction_calculation = round(((beginning_file_size \ 624 | - final_filesize) \ 625 | / beginning_file_size) * 100, 2) 626 | log_message("Beginning File size: " \ 627 | + readable_size(beginning_file_size) + ".") 628 | log_message("File was reduced by " \ 629 | + str(reduction_calculation) + "%.") 630 | log_message("Final file size: " \ 631 | + readable_size(final_filesize) + ".") 632 | log_message("Processing complete.\nFile written to '" \ 633 | + str(new_pe_name) + "'.") 634 | return result_code 635 | -------------------------------------------------------------------------------- /src/debloat/utilities/nsisParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Most of this code was repurposed from Binary Refinery (https://github.com/binref/refinery), used under the 3-Clause BSD License 4 | 5 | from collections import namedtuple 6 | import struct 7 | 8 | import enum 9 | 10 | import itertools 11 | import re 12 | import io 13 | import dataclasses 14 | 15 | import zlib 16 | import lzma 17 | 18 | from datetime import datetime 19 | 20 | import logging 21 | from debloat.utilities.readers import StructReader, Struct, StreamDetour, MemoryFile 22 | from debloat.utilities.pyflate import BZip2File, GZipFile 23 | from typing import ( 24 | BinaryIO, 25 | NamedTuple, 26 | Iterable, 27 | Iterator, 28 | Callable, 29 | Union, 30 | Optional, 31 | List, 32 | Dict, 33 | Type) 34 | 35 | logging.basicConfig(level=logging.WARN) 36 | 37 | class UnpackResult: 38 | 39 | def get_data(self) -> Union[bytes, bytearray, memoryview]: 40 | if Callable(self.data): 41 | self.data = self.data() 42 | return self.data 43 | 44 | def __init__(self, _br__path: str, _br__data: Union[Union[bytes, bytearray, memoryview], Callable[[], Union[bytes, bytearray, memoryview]]], **_br__meta): 45 | self.path = _br__path 46 | self.data = _br__data 47 | self.meta = _br__meta 48 | for key in [key for key, value in _br__meta.items() if value is None]: 49 | del _br__meta[key] 50 | 51 | class ArchiveUnit: 52 | def __init__(self, *paths, 53 | list=False, join_path=False, 54 | drop_path=False, fuzzy=0, exact=False, 55 | regex=False, 56 | date=b'date', 57 | path=b'path', **kwargs): 58 | self.paths = paths 59 | self.list = list 60 | self.join_path = join_path 61 | self.drop_path = drop_path 62 | self.fuzzy = fuzzy 63 | self.exact = exact 64 | self.regex = regex 65 | self.path = path 66 | self.date = date, 67 | self.kwargs = kwargs 68 | 69 | def _pack( 70 | self, 71 | path: str, 72 | date: Optional[Union[datetime, str]], 73 | data: Union[Union[bytes, bytearray, memoryview], Callable[[], Union[bytes, bytearray, memoryview]]], 74 | **meta 75 | ) -> UnpackResult: 76 | if isinstance(date, datetime): 77 | date = date.isoformat(' ', 'seconds') 78 | if isinstance(date, str): 79 | meta[self.args.date.decode(self.codec)] = date 80 | return UnpackResult(path, data, **meta) 81 | 82 | class DeflateFile(io.RawIOBase): 83 | 84 | data: MemoryFile 85 | dc: zlib.decompress 86 | 87 | def __new__(cls, data: MemoryFile): 88 | self = super().__new__(cls) 89 | self.data = data 90 | self.dc = zlib.decompressobj(-15) 91 | return io.BufferedReader(self) 92 | 93 | def readall(self) -> bytes: 94 | return self.read() 95 | 96 | def readinto(self, __buffer): 97 | data = self.read(len(__buffer)) 98 | size = len(data) 99 | __buffer[:size] = data 100 | return size 101 | 102 | def read(self, size=-1): 103 | buffer = self.dc.unconsumed_tail or self.data.read(size) 104 | kwargs = {} 105 | if size > 0: 106 | kwargs.update(max_length=size) 107 | return self.dc.decompress(buffer, **kwargs) 108 | 109 | def readable(self) -> bool: 110 | return True 111 | 112 | def seekable(self) -> bool: 113 | return False 114 | 115 | def writable(self) -> bool: 116 | return False 117 | 118 | def write(self, __b): 119 | raise NotImplementedError 120 | 121 | class LZMAOptions(NamedTuple): 122 | filter_flag: bool 123 | dictionary_size: int 124 | 125 | class NSBlockHeaderOffset(Struct): 126 | def __init__(self, reader: StructReader, is64bit: bool): 127 | self.offset = reader.u64() if is64bit else reader.u32() 128 | self.size = reader.u32() 129 | 130 | class NSMethod(str, enum.Enum): 131 | Copy = 'COPY' 132 | LZMA = 'LZMA' 133 | BZip2 = 'BZIP2' 134 | Deflate = 'DEFLATE' 135 | NSGzip = 'NsisGzip' 136 | 137 | class Op(enum.IntEnum): 138 | INVALID_OPCODE = 0 # noqa 139 | RET = enum.auto() # noqa; Return 140 | NOP = enum.auto() # noqa; Nop, Goto 141 | ABORT = enum.auto() # noqa; Abort 142 | QUIT = enum.auto() # noqa; Quit 143 | CALL = enum.auto() # noqa; Call, InitPluginsDir 144 | UPDATETEXT = enum.auto() # noqa; DetailPrint 145 | SLEEP = enum.auto() # noqa; Sleep 146 | BRINGTOFRONT = enum.auto() # noqa; BringToFront 147 | CHDETAILSVIEW = enum.auto() # noqa; SetDetailsView 148 | SETFILEATTRIBUTES = enum.auto() # noqa; SetFileAttributes 149 | CREATEDIR = enum.auto() # noqa; CreateDirectory, SetOutPath 150 | IFFILEEXISTS = enum.auto() # noqa; IfFileExists 151 | SETFLAG = enum.auto() # noqa; SetRebootFlag, ... 152 | IFFLAG = enum.auto() # noqa; IfAbort, IfSilent, IfErrors, IfRebootFlag 153 | GETFLAG = enum.auto() # noqa; GetInstDirError, GetErrorLevel 154 | RENAME = enum.auto() # noqa; Rename 155 | GETFULLPATHNAME = enum.auto() # noqa; GetFullPathName 156 | SEARCHPATH = enum.auto() # noqa; SearchPath 157 | GETTEMPFILENAME = enum.auto() # noqa; GetTempFileName 158 | EXTRACTFILE = enum.auto() # noqa; File 159 | DELETEFILE = enum.auto() # noqa; Delete 160 | MESSAGEBOX = enum.auto() # noqa; MessageBox 161 | RMDIR = enum.auto() # noqa; RMDir 162 | STRLEN = enum.auto() # noqa; StrLen 163 | ASSIGNVAR = enum.auto() # noqa; StrCpy 164 | STRCMP = enum.auto() # noqa; StrCmp 165 | READENVSTR = enum.auto() # noqa; ReadEnvStr, ExpandEnvStrings 166 | INTCMP = enum.auto() # noqa; IntCmp, IntCmpU 167 | INTOP = enum.auto() # noqa; IntOp 168 | INTFMT = enum.auto() # noqa; IntFmt/Int64Fmt 169 | PUSHPOP = enum.auto() # noqa; Push/Pop/Exchange 170 | FINDWINDOW = enum.auto() # noqa; FindWindow 171 | SENDMESSAGE = enum.auto() # noqa; SendMessage 172 | ISWINDOW = enum.auto() # noqa; IsWindow 173 | GETDLGITEM = enum.auto() # noqa; GetDlgItem 174 | SETCTLCOLORS = enum.auto() # noqa; SetCtlColors 175 | SETBRANDINGIMAGE = enum.auto() # noqa; SetBrandingImage / LoadAndSetImage 176 | CREATEFONT = enum.auto() # noqa; CreateFont 177 | SHOWWINDOW = enum.auto() # noqa; ShowWindow, EnableWindow, HideWindow 178 | SHELLEXEC = enum.auto() # noqa; ExecShell 179 | EXECUTE = enum.auto() # noqa; Exec, ExecWait 180 | GETFILETIME = enum.auto() # noqa; GetFileTime 181 | GETDLLVERSION = enum.auto() # noqa; GetDLLVersion 182 | # GETFONTVERSION = enum.auto() # noqa; Park : 2.46.2 183 | # GETFONTNAME = enum.auto() # noqa; Park : 2.46.3 184 | REGISTERDLL = enum.auto() # noqa; RegDLL, UnRegDLL, CallInstDLL 185 | CREATESHORTCUT = enum.auto() # noqa; CreateShortCut 186 | COPYFILES = enum.auto() # noqa; CopyFiles 187 | REBOOT = enum.auto() # noqa; Reboot 188 | WRITEINI = enum.auto() # noqa; WriteINIStr, DeleteINISec, DeleteINIStr, FlushINI 189 | READINISTR = enum.auto() # noqa; ReadINIStr 190 | DELREG = enum.auto() # noqa; DeleteRegValue, DeleteRegKey 191 | WRITEREG = enum.auto() # noqa; WriteRegStr, WriteRegExpandStr, WriteRegBin, WriteRegDWORD 192 | READREGSTR = enum.auto() # noqa; ReadRegStr, ReadRegDWORD 193 | REGENUM = enum.auto() # noqa; EnumRegKey, EnumRegValue 194 | FCLOSE = enum.auto() # noqa; FileClose 195 | FOPEN = enum.auto() # noqa; FileOpen 196 | FPUTS = enum.auto() # noqa; FileWrite, FileWriteByte 197 | FGETS = enum.auto() # noqa; FileRead, FileReadByte 198 | # Park: 199 | # FPUTWS = enum.auto() # noqa; FileWriteUTF16LE, FileWriteWord 200 | # FGETWS = enum.auto() # noqa; FileReadUTF16LE, FileReadWord 201 | FSEEK = enum.auto() # noqa; FileSeek 202 | FINDCLOSE = enum.auto() # noqa; FindClose 203 | FINDNEXT = enum.auto() # noqa; FindNext 204 | FINDFIRST = enum.auto() # noqa; FindFirst 205 | WRITEUNINSTALLER = enum.auto() # noqa; WriteUninstaller 206 | # Park : since 2.46.3 the log is enabled in main Park version 207 | # LOG = enum.auto() # noqa; LogSet, LogText 208 | SECTIONSET = enum.auto() # noqa; Get*, Set* 209 | INSTTYPESET = enum.auto() # noqa; InstTypeSetText, InstTypeGetText, SetCurInstType, GetCurInstType 210 | # Before NSIS v3.06: Instructions not actually implemented in exehead, but used in compiler. 211 | # GETLABELADDR = enum.auto() # noqa; both of these get converted to ASSIGNVAR 212 | # GETFUNCTIONADDR = enum.auto() # noqa 213 | # In NSIS v3.06 and later it was changed to: 214 | GETOSINFO = enum.auto() # noqa 215 | RESERVEDOPCODE = enum.auto() # noqa 216 | LOCKWINDOW = enum.auto() # noqa; LockWindow 217 | # Two unicode commands available only in Unicode archive: 218 | FPUTWS = enum.auto() # noqa; FileWriteUTF16LE, FileWriteWord 219 | FGETWS = enum.auto() # noqa; FileReadUTF16LE, FileReadWord 220 | # Since NSIS v3.06 the fllowing IDs codes was moved here: 221 | # Opcodes listed here are not actually used in exehead. 222 | # No exehead opcodes should be present after these! 223 | # GETLABELADDR = enum.auto() # noqa; ASSIGNVAR 224 | # GETFUNCTIONADDR = enum.auto() # noqa; ASSIGNVAR 225 | # The following IDs are not IDs in real order. 226 | # We just need some IDs to translate eny extended layout to main layout: 227 | LOG = enum.auto() # noqa; LogSet, LogText 228 | # Park 229 | FINDPROC = enum.auto() # noqa; FindProc 230 | GETFONTVERSION = enum.auto() # noqa; GetFontVersion 231 | GETFONTNAME = enum.auto() # noqa; GetFontName 232 | 233 | @classmethod 234 | def from_int(cls, value: int) -> 'Op': 235 | try: 236 | return cls(value) 237 | except ValueError: 238 | return cls.INVALID_OPCODE 239 | _Op_PARAMETER_COUNT = { 240 | Op.INVALID_OPCODE : 0, 241 | Op.RET : 0, 242 | Op.NOP : 1, 243 | Op.ABORT : 1, 244 | Op.QUIT : 0, 245 | Op.CALL : 2, 246 | Op.UPDATETEXT : 6, 247 | Op.SLEEP : 1, 248 | Op.BRINGTOFRONT : 0, 249 | Op.CHDETAILSVIEW : 2, 250 | Op.SETFILEATTRIBUTES: 2, 251 | Op.CREATEDIR : 3, 252 | Op.IFFILEEXISTS : 3, 253 | Op.SETFLAG : 3, 254 | Op.IFFLAG : 4, 255 | Op.GETFLAG : 2, 256 | Op.RENAME : 4, 257 | Op.GETFULLPATHNAME : 3, 258 | Op.SEARCHPATH : 2, 259 | Op.GETTEMPFILENAME : 2, 260 | Op.EXTRACTFILE : 6, 261 | Op.DELETEFILE : 2, 262 | Op.MESSAGEBOX : 6, 263 | Op.RMDIR : 2, 264 | Op.STRLEN : 2, 265 | Op.ASSIGNVAR : 4, 266 | Op.STRCMP : 5, 267 | Op.READENVSTR : 3, 268 | Op.INTCMP : 6, 269 | Op.INTOP : 4, 270 | Op.INTFMT : 4, 271 | Op.PUSHPOP : 6, 272 | Op.FINDWINDOW : 5, 273 | Op.SENDMESSAGE : 6, 274 | Op.ISWINDOW : 3, 275 | Op.GETDLGITEM : 3, 276 | Op.SETCTLCOLORS : 2, 277 | Op.SETBRANDINGIMAGE : 4, 278 | Op.CREATEFONT : 5, 279 | Op.SHOWWINDOW : 4, 280 | Op.SHELLEXEC : 6, 281 | Op.EXECUTE : 3, 282 | Op.GETFILETIME : 3, 283 | Op.GETDLLVERSION : 4, 284 | Op.REGISTERDLL : 6, 285 | Op.CREATESHORTCUT : 6, 286 | Op.COPYFILES : 4, 287 | Op.REBOOT : 1, 288 | Op.WRITEINI : 5, 289 | Op.READINISTR : 4, 290 | Op.DELREG : 5, 291 | Op.WRITEREG : 6, 292 | Op.READREGSTR : 5, 293 | Op.REGENUM : 5, 294 | Op.FCLOSE : 1, 295 | Op.FOPEN : 4, 296 | Op.FPUTS : 3, 297 | Op.FGETS : 4, 298 | Op.FSEEK : 4, 299 | Op.FINDCLOSE : 1, 300 | Op.FINDNEXT : 2, 301 | Op.FINDFIRST : 3, 302 | Op.WRITEUNINSTALLER : 4, 303 | Op.SECTIONSET : 5, 304 | Op.INSTTYPESET : 4, 305 | Op.GETOSINFO : 6, 306 | Op.RESERVEDOPCODE : 2, 307 | Op.LOCKWINDOW : 1, 308 | Op.FPUTWS : 4, 309 | Op.FGETWS : 4, 310 | Op.LOG : 2, 311 | Op.FINDPROC : 2, 312 | Op.GETFONTVERSION : 2, 313 | Op.GETFONTNAME : 2, 314 | } 315 | 316 | NS_SHELL_STRINGS = { 317 | 0x00: 'DESKTOP', 318 | 0x01: 'INTERNET', 319 | 0x02: 'SMPROGRAMS', 320 | 0x03: 'CONTROLS', 321 | 0x04: 'PRINTERS', 322 | 0x05: 'DOCUMENTS', 323 | 0x06: 'FAVORITES', 324 | 0x07: 'SMSTARTUP', 325 | 0x08: 'RECENT', 326 | 0x09: 'SENDTO', 327 | 0x0A: 'BITBUCKET', 328 | 0x0B: 'STARTMENU', 329 | 0x0D: 'MUSIC', 330 | 0x0E: 'VIDEOS', 331 | 0x10: 'DESKTOP', 332 | 0x11: 'DRIVES', 333 | 0x12: 'NETWORK', 334 | 0x13: 'NETHOOD', 335 | 0x14: 'FONTS', 336 | 0x15: 'TEMPLATES', 337 | 0x16: 'STARTMENU', 338 | 0x17: 'SMPROGRAMS', 339 | 0x18: 'SMSTARTUP', 340 | 0x19: 'DESKTOP', 341 | 0x1A: 'APPDATA', 342 | 0x1B: 'PRINTHOOD', 343 | 0x1C: 'LOCALAPPDATA', 344 | 0x1D: 'ALTSTARTUP', 345 | 0x1E: 'ALTSTARTUP', 346 | 0x1F: 'FAVORITES', 347 | 0x20: 'INTERNET_CACHE', 348 | 0x21: 'COOKIES', 349 | 0x22: 'HISTORY', 350 | 0x23: 'APPDATA', 351 | 0x24: 'WINDIR', 352 | 0x25: 'SYSDIR', 353 | 0x26: 'PROGRAM_FILES', 354 | 0x27: 'PICTURES', 355 | 0x28: 'PROFILE', 356 | 0x29: 'SYSTEMX86', 357 | 0x2A: 'PROGRAM_FILESX86', 358 | 0x2B: 'PROGRAM_FILES_COMMON', 359 | 0x2C: 'PROGRAM_FILES_COMMONX8', 360 | 0x2D: 'TEMPLATES', 361 | 0x2E: 'DOCUMENTS', 362 | 0x2F: 'ADMINTOOLS', 363 | 0x30: 'ADMINTOOLS', 364 | 0x31: 'CONNECTIONS', 365 | 0x35: 'MUSIC', 366 | 0x36: 'PICTURES', 367 | 0x37: 'VIDEOS', 368 | 0x38: 'RESOURCES', 369 | 0x39: 'RESOURCES_LOCALIZED', 370 | 0x3A: 'COMMON_OEM_LINKS', 371 | 0x3B: 'CDBURN_AREA', 372 | 0x3D: 'COMPUTERSNEARME', 373 | } 374 | 375 | NS_VARIABLE_STRINGS = ( 376 | "CMDLINE", 377 | "INSTDIR", 378 | "OUTDIR", 379 | "EXEDIR", 380 | "LANGUAGE", 381 | "TEMP", 382 | "PLUGINSDIR", 383 | "EXEPATH", # NSIS 2.26+ 384 | "EXEFILE", # NSIS 2.26+ 385 | "HWNDPARENT", 386 | "CLICK", # set from page->clicknext 387 | "OUTDIR", # NSIS 2.04+ 388 | ) 389 | 390 | class NSHeaderFlags(enum.IntFlag): 391 | Undefined = 0 392 | Uninstall = 1 393 | Silent = 2 394 | NoCrc = 4 395 | ForceCrc = 8 396 | LongOffset = 16 397 | ExternalFileSupport = 32 398 | ExternalFile = 64 399 | IsStubInstaller = 128 400 | 401 | 402 | 403 | class NSType(enum.IntEnum): 404 | Nsis2 = 0 405 | Nsis3 = enum.auto() 406 | Park1 = enum.auto() 407 | Park2 = enum.auto() 408 | Park3 = enum.auto() 409 | 410 | class NSScriptInstruction(Struct): 411 | def __init__(self, reader: StructReader): 412 | self.opcode = reader.u32() 413 | self.arguments = [reader.u32() for _ in range(6)] 414 | 415 | class NSScriptExtendedInstruction(Struct): 416 | def __init__(self, reader: StructReader): 417 | self.opcode = reader.u32() 418 | self.arguments = [reader.u32() for _ in range(8)] 419 | 420 | 421 | class NSCharCode(enum.IntEnum): 422 | NONE = 0 423 | CHAR = enum.auto() 424 | SKIP = enum.auto() 425 | SHELL = enum.auto() 426 | VAR = enum.auto() 427 | LANG = enum.auto() 428 | 429 | @property 430 | def special(self): 431 | return self > NSCharCode.CHAR 432 | 433 | @dataclasses.dataclass 434 | class NSItem: 435 | offset: int 436 | name: Optional[str] = None 437 | mtime: Optional[datetime] = None 438 | is_compressed: bool = True 439 | is_uninstaller: bool = False 440 | attributes: Optional[int] = None 441 | size: Optional[int] = None 442 | compressed_size: Optional[int] = None 443 | estimated_size: Optional[int] = None 444 | dictionary_size: int = 1 445 | patch_size: int = 0 446 | prefix: Optional[str] = None 447 | 448 | @property 449 | def path(self) -> str: 450 | path = self.name 451 | if self.prefix: 452 | path = F'{self.prefix}\\{path}' 453 | return path 454 | 455 | def __str__(self) -> str: 456 | return self.name 457 | 458 | def __eq__(self, other) -> bool: 459 | if not other or not isinstance(other, self.__class__): 460 | return False 461 | return ( 462 | self.offset == other.offset 463 | and self.mtime == other.mtime 464 | and self.is_compressed == other.is_compressed 465 | and self.is_uninstaller == other.is_uninstaller 466 | and self.attributes == other.attributes 467 | and self.size == other.size 468 | and self.compressed_size == other.compressed_size 469 | and self.estimated_size == other.estimated_size 470 | and self.dictionary_size == other.dictionary_size 471 | and self.patch_size == other.patch_size 472 | and self.path == other.path 473 | ) 474 | 475 | 476 | class NSHeader(Struct): 477 | BACKSLASH = ord('\\') # noqa 478 | NS_CMDLINE = 20 # noqa 479 | NS_INSTDIR = 21 # noqa 480 | NS_OUTDIR = 22 # noqa 481 | NS_EXEDIR = 23 # noqa 482 | NS_LANGUAGE = 24 # noqa 483 | NS_TEMP = 25 # noqa 484 | NS_PLUGINSDIR = 26 # noqa 485 | NS_EXEPATH = 27 # noqa NSIS 2.26+ 486 | NS_EXEFILE = 28 # noqa NSIS 2.26+ 487 | NS_HWNDPARENT_225 = 27 # noqa 488 | NS_HWNDPARENT_226 = 29 # noqa 489 | NS_CLICK = 30 # noqa 490 | NS_OUTDIR_225 = 29 # noqa NSIS 2.04 - 2.25 491 | NS_OUTDIR_226 = 31 # noqa NSIS 2.26+ 492 | 493 | def _string_args_to_single_arg(self, arg1: int, 494 | arg2: Optional[int] = None) -> int: 495 | if self.type >= NSType.Park1: 496 | return arg1 & 0x7FFF 497 | else: 498 | if arg2 is None: 499 | arg2 = arg1 >> 8 500 | arg1 &= 0x7F 501 | arg2 &= 0x7F 502 | return arg1 | arg2 << 7 503 | 504 | def _get_char_code(self, char: int) -> NSCharCode: 505 | if self.type >= NSType.Park1: 506 | if char < 0x80: 507 | return NSCharCode.CHAR 508 | lookup = { 509 | 0xE000: NSCharCode.SKIP, 510 | 0xE001: NSCharCode.VAR, 511 | 0xE002: NSCharCode.SHELL, 512 | 0xE003: NSCharCode.LANG, 513 | } 514 | elif self.type is NSType.Nsis3: 515 | if char > 4: 516 | return NSCharCode.CHAR 517 | lookup = { 518 | 0x0002: NSCharCode.SHELL, 519 | 0x0003: NSCharCode.VAR, 520 | 0x0004: NSCharCode.SKIP, 521 | } 522 | elif self.type is NSType.Nsis2: 523 | lookup = { 524 | 0x00FC: NSCharCode.SKIP, 525 | 0x00FD: NSCharCode.VAR, 526 | 0x00FE: NSCharCode.SHELL, 527 | } 528 | else: 529 | raise ValueError(F'Unknown NSIS type {self.type}.') 530 | return lookup.get(char, NSCharCode.NONE) 531 | 532 | def _string_code_shell(self, index1: int, 533 | index2: Optional[int] = None) -> str: 534 | if index2 is None: 535 | index2 = index1 >> 8 536 | index1 &= 0xFF 537 | if index1 & 0x80 != 0: 538 | offset = index1 & 0x3F 539 | with StreamDetour(self.strings, offset): 540 | if self.strings.tell() != offset: 541 | raise ValueError(F'Failed to detour to offset 0x{offset:02X}.') 542 | path = self._read_current_string() 543 | if path.startswith('ProgramFilesDir'): 544 | return '$PROGRAMFILES' 545 | if path.startswith('CommonFilesDir'): 546 | return '$COMMONFILES' 547 | suffix = 32 * (index1 >> 5 & 2) 548 | return F'$REG{suffix}({path})' 549 | for index in (index1, index2): 550 | shell = NS_SHELL_STRINGS.get(index) 551 | if shell is not None: 552 | return F'$SHELL:{shell}' 553 | else: 554 | return F'Error:$SHELL:{index1:02X}{index2:02X}' 555 | 556 | def _string_code_variable(self, index: int) -> str: 557 | varcount = 20 + len(NS_VARIABLE_STRINGS) 558 | if self._is_nsis200: 559 | varcount -= 3 560 | elif self._is_nsis225: 561 | varcount -= 2 562 | if index < 20: 563 | if index >= 10: 564 | return F'$R{index - 10}' 565 | return F'$V{index}' 566 | else: 567 | if index < varcount: 568 | if self._is_nsis225 and index >= self.NS_EXEPATH: 569 | index += 2 570 | try: 571 | variable = NS_VARIABLE_STRINGS[index - 20] 572 | except IndexError: 573 | return F'Error:$V:{index}' 574 | else: 575 | return F'${variable}' 576 | return F'Error:$V:{index}' 577 | 578 | def _string_code_language(self, index: int) -> str: 579 | return F'$LANGUAGE:{index}' 580 | 581 | @property 582 | def _read_char(self) -> str: 583 | return self.strings.u16 if self.unicode else self.strings.u8 584 | 585 | def _seek_to_string(self, position: int) -> bool: 586 | pos = position * self.charsize 587 | return self.strings.seek(pos) == pos 588 | 589 | def _read_string(self, position: int) -> Optional[str]: 590 | if position < 0: 591 | return self._string_code_language(-(position + 1)) 592 | if not self._seek_to_string(position): 593 | return None 594 | return self._read_current_string() 595 | 596 | def _read_string_raw(self, position: int) -> Optional[str]: 597 | if not self._seek_to_string(position): 598 | return None 599 | if self.unicode: 600 | return self.strings.read_w_string() 601 | else: 602 | return self.strings.read_c_string() 603 | 604 | def _is_var_absolute_path(self, position: int) -> bool: 605 | var = self._get_var_index(position) 606 | if var is None: 607 | return False 608 | return var in ( 609 | self.NS_INSTDIR, 610 | self.NS_EXEDIR, 611 | self.NS_TEMP, 612 | self.NS_PLUGINSDIR, 613 | ) 614 | 615 | def _is_good_string(self, position: int) -> bool: 616 | if position == 0: 617 | return False 618 | if not self._seek_to_string(position - 1): 619 | return False 620 | prefix = self._read_char() 621 | return prefix == 0 or prefix == self.BACKSLASH 622 | 623 | def _is_var_str(self, position: int, index: int) -> bool: 624 | if index > 0x7FFF: 625 | return False 626 | var_index = self._get_var_index(position) 627 | if var_index is None: 628 | return False 629 | if self._get_resource_finished(position, 0) is None: 630 | return False 631 | return var_index == index 632 | 633 | def _get_var_index(self, position: int) -> Optional[int]: 634 | if not self._seek_to_string(position): 635 | raise LookupError(F'Failed to seek to string at position 0x{position:08X}.') 636 | try: 637 | code = self._read_char() 638 | if self._get_char_code(code) is not NSCharCode.VAR: 639 | return None 640 | arg1 = self._read_char() 641 | if arg1 == 0: 642 | return None 643 | if self.unicode: 644 | args = arg1, 645 | else: 646 | arg2 = self._read_char() 647 | if arg2 == 0: 648 | return None 649 | args = arg1, arg2 650 | return self._string_args_to_single_arg(*args) 651 | except EOFError: 652 | return None 653 | 654 | def _get_resource(self, position: int) -> Optional[int]: 655 | if self.unicode: 656 | if len(self.strings) - position >= 4: 657 | return 2 658 | else: 659 | if len(self.strings) - position >= 3: 660 | return 3 661 | return None 662 | 663 | def _get_resource_finished(self, position: int, 664 | terminator: int) -> Optional[int]: 665 | if not self._seek_to_string(position): 666 | return None 667 | self.strings.seek_relative(3) 668 | if self.unicode: 669 | self.strings.seek_relative(1) 670 | if self.strings.remaining_bytes < self.charsize: 671 | return None 672 | if self._read_char() != terminator: 673 | return None 674 | return 3 if self.unicode else 4 675 | 676 | 677 | @property 678 | def charsize(self) -> int: 679 | return 2 if self.unicode else 1 680 | 681 | def _read_current_string(self) -> str: 682 | string = io.StringIO() 683 | chars = iter(self._read_char, 0) 684 | for letter in chars: 685 | code = self._get_char_code(letter) 686 | if code is NSCharCode.CHAR: 687 | string.write(chr(letter)) 688 | continue 689 | if code.special: 690 | try: 691 | var1 = next(chars) 692 | except StopIteration: 693 | break 694 | if var1 == 0: 695 | break 696 | if code is NSCharCode.SKIP: 697 | letter = var1 698 | else: 699 | if not self.unicode: 700 | try: 701 | var2 = next(chars) 702 | except StopIteration: 703 | break 704 | if var2 == 0: 705 | break 706 | vars = var1, var2 707 | else: 708 | vars = var1, 709 | if code is NSCharCode.SHELL: 710 | string.write(self._string_code_shell(*vars)) 711 | continue 712 | else: 713 | var = self._string_args_to_single_arg(*vars) 714 | if code is NSCharCode.VAR: 715 | string.write(self._string_code_variable(var)) 716 | if code is NSCharCode.LANG: 717 | string.write(self._string_code_language(var)) 718 | continue 719 | string.write(chr(letter)) 720 | return string.getvalue() 721 | 722 | def opcode(self, cmd: NSScriptInstruction) -> Op: 723 | code = cmd.opcode 724 | if self.type < NSType.Park1: 725 | if self._log_cmd_is_enabled: 726 | return Op.from_int(code) 727 | if code < Op.SECTIONSET: 728 | return Op.from_int(code) 729 | if code is Op.SECTIONSET: 730 | return Op.LOG 731 | return Op.from_int(code - 1) 732 | if code < Op.REGISTERDLL: 733 | return Op.from_int(code) 734 | if self.type >= NSType.Park2: 735 | if code == Op.REGISTERDLL: 736 | return Op.GETFONTVERSION 737 | code -= 1 738 | if self.type >= NSType.Park3: 739 | if code == Op.REGISTERDLL: 740 | return Op.GETFONTNAME 741 | code -= 1 742 | if code >= Op.FSEEK: 743 | if self.unicode: 744 | if code == Op.FSEEK: 745 | return Op.FPUTWS 746 | if code == Op.FSEEK + 1: 747 | return Op.FGETWS 748 | code -= 2 749 | if code >= Op.SECTIONSET and self._log_cmd_is_enabled: 750 | if code == Op.SECTIONSET: 751 | return Op.LOG 752 | return Op.from_int(code - 1) 753 | if code == Op.FPUTWS: 754 | return Op.FINDPROC 755 | return Op.from_int(code) 756 | 757 | def _find_bad_cmd(self) -> None: 758 | self._bad_cmd = -1 759 | for instruction in self.instructions: 760 | cmd = self.opcode(instruction) 761 | arg = instruction.arguments 762 | if cmd is Op.INVALID_OPCODE: 763 | continue 764 | if cmd >= self._bad_cmd >= 0: 765 | continue 766 | if self.type is NSType.Nsis3: 767 | if cmd == Op.RESERVEDOPCODE: 768 | self._bad_cmd = cmd 769 | continue 770 | else: 771 | if cmd == Op.RESERVEDOPCODE or cmd == Op.GETOSINFO: 772 | self._bad_cmd = cmd 773 | continue 774 | last_non_empty_index = max((k for k, a in enumerate(arg, 1) if a), default=0) 775 | if cmd == Op.FINDPROC and last_non_empty_index == 0: 776 | self._bad_cmd = cmd 777 | continue 778 | if _Op_PARAMETER_COUNT[cmd] < last_non_empty_index: 779 | self._bad_cmd = cmd 780 | 781 | def _guess_nsis_version(self): 782 | self.strong_nsis = False 783 | self.strong_park = False 784 | char_mask = 0x8080 if self.unicode else 0x80 785 | self.strings.seek(0) 786 | while not self.strings.is_eof: 787 | string = self._read_current_string() 788 | if string is None: 789 | continue 790 | if len(string) < 2: 791 | continue 792 | if ord(string[0]) != 3: 793 | continue 794 | if ord(string[1]) & char_mask == char_mask: 795 | self.type = NSType.Nsis3 796 | self.strong_nsis = True 797 | break 798 | if self.unicode: 799 | if not self.strong_nsis: 800 | self.type = NSType.Park1 801 | self.strong_park = True 802 | elif self.type is NSType.Nsis2: 803 | for instruction in self.instructions: 804 | cmd = self.opcode(instruction) 805 | arg = instruction.arguments 806 | if cmd is Op.GETDLGITEM: 807 | if self._is_var_str(arg[1], self.NS_HWNDPARENT_225): 808 | self._is_nsis225 = True 809 | if arg[0] == self.NS_OUTDIR_225: 810 | self._is_nsis200 = True 811 | break 812 | if cmd is Op.ASSIGNVAR: 813 | if arg[0] == self.NS_OUTDIR_225 and arg[2] == 0 and arg[3] == 0: 814 | self._is_nsis225 = self._is_var_str(arg[1], self.NS_OUTDIR) 815 | got_park_version = False 816 | mask = 0 817 | IN = 4 if self.unicode else 2 818 | if not self.strong_nsis and not self._is_nsis225 and not self._is_nsis200: 819 | for instruction in self.instructions: 820 | cmd = instruction.opcode 821 | arg = instruction.arguments 822 | alt = arg[3] 823 | if cmd < Op.WRITEUNINSTALLER or cmd > Op.WRITEUNINSTALLER + IN: 824 | continue 825 | if arg[4] != 0 or arg[5] != 0 or arg[0] <= 1 or alt <= 1: 826 | continue 827 | if not self._is_good_string(arg[0]) or not self._is_good_string(alt): 828 | continue 829 | index = self._get_var_index(alt) 830 | if index is None: 831 | continue 832 | additional = self._get_resource_finished(alt, self.BACKSLASH) 833 | if index != self.NS_INSTDIR: 834 | continue 835 | if self._read_string_raw(alt + additional) == self._read_string_raw(arg[0]): 836 | inserts = cmd - Op.WRITEUNINSTALLER.value 837 | mask |= 1 << inserts 838 | if mask == 1: 839 | got_park_version = True 840 | elif mask: 841 | shift = 0 842 | nt = self.type 843 | if self.unicode: 844 | shift = 2 845 | if mask == 1 << (shift + 1): 846 | nt = NSType.Park2 847 | if mask == 1 << (shift + 2): 848 | nt = NSType.Park3 849 | if nt != self.type: 850 | got_park_version = True 851 | self.type = nt 852 | self._find_bad_cmd() 853 | if self._bad_cmd < Op.REGISTERDLL: 854 | return 855 | if self.strong_park and not got_park_version: 856 | if self._bad_cmd < Op.SECTIONSET: 857 | self.type = NSType.Park3 858 | self._log_cmd_is_enabled = True 859 | self._find_bad_cmd() 860 | if self._bad_cmd in range(Op.SECTIONSET): 861 | self.type = NSType.Park2 862 | self._log_cmd_is_enabled = False 863 | self._find_bad_cmd() 864 | if self._bad_cmd in range(Op.SECTIONSET): 865 | self.type = NSType.Park1 866 | self._find_bad_cmd() 867 | if self._bad_cmd >= Op.SECTIONSET: 868 | self._log_cmd_is_enabled = not self._log_cmd_is_enabled 869 | self._find_bad_cmd() 870 | if self._bad_cmd >= Op.SECTIONSET and self._log_cmd_is_enabled: 871 | self._log_cmd_is_enabled = False 872 | self._find_bad_cmd() 873 | 874 | def _read_items(self) -> List[NSItem]: 875 | prefixes = ['$INSTDIR'] 876 | out_dir = '' 877 | out_dir_index = ( 878 | self.NS_OUTDIR_225 879 | ) if self._is_nsis225 else ( 880 | self.NS_OUTDIR_226 881 | ) 882 | items: List[NSItem] = [] 883 | 884 | for cmd_index, instruction in enumerate(self.instructions): 885 | def set_path(index:int) -> None: 886 | item.prefix = None 887 | item.name = self._read_string(index) 888 | if not self._is_var_absolute_path(index): 889 | item.prefix = prefixes[-1] 890 | 891 | cmd = self.opcode(instruction) 892 | arg = instruction.arguments 893 | 894 | if cmd is Op.INVALID_OPCODE: 895 | continue 896 | elif cmd is Op.CREATEDIR: 897 | if not arg[1]: 898 | continue 899 | _path = arg[0] 900 | index = self._get_var_index(_path) 901 | if index in (out_dir_index, self.NS_OUTDIR): 902 | _path += self._get_resource(_path) 903 | path = self._read_string(_path) 904 | if index == out_dir_index: 905 | path = out_dir + path 906 | elif index == self.NS_OUTDIR: 907 | path = prefixes[-1] + path 908 | prefixes.append(path) 909 | elif cmd is Op.ASSIGNVAR: 910 | if arg[0] != out_dir_index: 911 | continue 912 | if self._is_var_str(arg[1], self.NS_OUTDIR) and arg[2] == 0 and arg[3] == 0: 913 | out_dir = prefixes[-1] 914 | elif cmd is Op.EXTRACTFILE: 915 | try: 916 | time = datetime.fromtimestamp(arg[4] << 32 | arg[3]) 917 | except Exception: 918 | time = None 919 | item = NSItem(arg[2], mtime=time) 920 | set_path(arg[1]) 921 | items.append(item) 922 | if not self._is_var_str(arg[1], 10): 923 | continue 924 | cmd_back_offset = 28 925 | if cmd_index > 1: 926 | previous = self.instructions[cmd_index - 1] 927 | if self.opcode(previous) is Op.NOP: 928 | cmd_back_offset -= 2 929 | if cmd_index <= cmd_back_offset: 930 | continue 931 | previous = self.instructions[cmd_index - cmd_back_offset] 932 | if self.opcode(previous) is Op.ASSIGNVAR: 933 | previous_arguments = previous.arguments 934 | if previous_arguments[0] == 14 and previous_arguments[2] == 0 and previous_arguments[3] == 0: 935 | set_path(previous_arguments[1]) 936 | elif cmd is Op.SETFILEATTRIBUTES: 937 | if cmd_index > 0: 938 | previous = self.instructions[cmd_index - 1] 939 | previous_arguments = previous.arguments 940 | if self.opcode(previous) is Op.EXTRACTFILE and arg[0] == previous_arguments[1]: 941 | item = items[-1] 942 | item.attributes = arg[1] 943 | elif cmd is Op.WRITEUNINSTALLER: 944 | if arg[4] or arg[5] or arg[0] <=1 or arg[3] <= 1: 945 | continue 946 | if not self._is_good_string(arg[0]): 947 | continue 948 | if self._bad_cmd in range(Op.WRITEUNINSTALLER): 949 | continue 950 | item = NSItem(arg[1]) 951 | set_path(arg[0]) 952 | item.patch_size = arg[2] 953 | item.is_uninstaller = True 954 | items.append(item) 955 | return items 956 | 957 | @property 958 | def script(self): 959 | script = io.StringIO() 960 | name_width = max(len(op.name) for op in Op) 961 | addr_width = len(F'{len(self.instructions):X}') 962 | for k, instruction in enumerate(self.instructions): 963 | if k > 0: 964 | script.write('\n') 965 | opcode = self.opcode(instruction) 966 | script.write(F'{k:0{addr_width}X} {opcode.name:{name_width}}') 967 | for j, arg in enumerate(instruction.arguments[:_Op_PARAMETER_COUNT.get(opcode, 6)]): 968 | if j > 0: 969 | script.write(', ') 970 | if arg > 20 and self._is_good_string(arg): 971 | script.write(repr(self._read_string(arg))) 972 | elif arg < 0x100: 973 | script.write(str(arg)) 974 | elif arg < 0x10000: 975 | script.write(F'${arg:04X}') 976 | else: 977 | script.write(F'${arg:08X}') 978 | return script.getvalue() 979 | 980 | def _string_code_language(self, index: int) -> str: 981 | return F'$LANGUAGE:{index:04X}' 982 | 983 | 984 | def __init__(self, reader: StructReader[bytearray], size: int, extended: bool): 985 | self.is64bit = size >= 4 + 12 * 8 and not any( 986 | struct.unpack('8xI' * 8, reader.peek(12 * 8))) 987 | block_header_offset_size = 12 if self.is64bit else 8 988 | required_size = block_header_offset_size * 8 + 4 989 | if size < required_size: 990 | raise ValueError(F'Header size 0x{size:08X} is too small. Minimum required size is 0x{required_size:08X}.') 991 | # TODO: Confirm role of unknown value. Copilot believes it to be 992 | # a signature indicating the end of the NSIS installer header. 993 | self.unknown_value = reader.u32() 994 | self.block_header_offsets = [NSBlockHeaderOffset( 995 | reader.read(block_header_offset_size), 996 | is64bit=self.is64bit) for _ in range(8)] 997 | self.block_header_entries = self.block_header_offsets[2] 998 | self.block_header_strings = self.block_header_offsets[3] 999 | self.block_header_langtables = self.block_header_offsets[4] 1000 | 1001 | for key, offset in enumerate(self.block_header_offsets): 1002 | width = 0x10 if self.is64bit else 8 1003 | table = {2: 'entries', 3: 'strings', 4: 'langtables'}.get(key) 1004 | message = F'Block {key}: offset=0x{offset.offset:0{width}X}, size=0x{offset.size:0{width}X}' 1005 | if table is not None: 1006 | message += F'{message} ({table})' 1007 | logging.debug(message) 1008 | 1009 | self.type = NSType.Nsis2 # Default to NSIS 2 1010 | 1011 | reader.seek_set(self.block_header_entries.offset) 1012 | InsnParser = NSScriptExtendedInstruction if extended else NSScriptInstruction 1013 | self.instructions: List[NSScriptInstruction] = [ 1014 | InsnParser(reader) for _ in range(self.block_header_entries.size)] 1015 | 1016 | if self.block_header_entries.offset > size: 1017 | raise ValueError(F'Header indicates {self.block_header_entries.size} entries, but only {size} bytes remain.') 1018 | if self.block_header_strings.offset > size: 1019 | raise ValueError(F'Header indicates {self.block_header_strings.size} strings, but only {size} bytes remain.') 1020 | if self.block_header_langtables.offset > size: 1021 | raise ValueError(F'Header indicates {self.block_header_langtables.size} langtables, but only {size} bytes remain.') 1022 | if self.block_header_langtables.offset < self.block_header_strings.offset: 1023 | raise ValueError(F'Langtables block is before strings block.') 1024 | string_table_size = self.block_header_langtables.offset - self.block_header_strings.offset 1025 | if string_table_size < 2: 1026 | raise ValueError(F'String table size is too small.') 1027 | reader.seek_set(self.block_header_strings.offset) 1028 | strings = reader.read(string_table_size) 1029 | self.unicode = strings[:2] == B'\0\0' 1030 | if strings[-1] != 0 or (self.unicode and strings[-2] != 0): 1031 | raise ValueError(F'String table is not null-terminated.') 1032 | if self.unicode and string_table_size % 2 != 0: 1033 | raise ValueError(F'String table is not even-sized.') 1034 | 1035 | self.strings = StructReader(strings) 1036 | if self.block_header_entries.size > (1 << 25): 1037 | raise ValueError(F'Header indicates {self.block_header_entries.size} entries, which is too large.') 1038 | 1039 | self._log_cmd_is_enabled = False 1040 | self._is_nsis225 = False 1041 | self._is_nsis200 = False 1042 | self_bad_cmd = -1 1043 | 1044 | self._guess_nsis_version() 1045 | 1046 | items: Dict[(str, int), NSItem] = {} 1047 | for item in self._read_items(): 1048 | if items.setdefault((item.path, item.offset), item) != item: 1049 | raise ValueError(F'Duplicate item: {item.path} at 0x{item.offset:08X}') 1050 | 1051 | self.items = [items[t] for t in sorted(items.keys())] 1052 | 1053 | @property 1054 | def nsis_deflate(self): 1055 | return self.type is not NSType.Nsis3 1056 | 1057 | @property 1058 | def encoding(self): 1059 | return 'utf-16' if self.unicode else 'latin1' 1060 | 1061 | @property 1062 | def charsize(self): 1063 | return 2 if self.unicode else 1 1064 | 1065 | 1066 | 1067 | class NSArchive(Struct): 1068 | MAGICS = [ 1069 | # https://nsis.sourceforge.io/Can_I_decompile_an_existing_installer 1070 | B'\xEF\xBE\xAD\xDE' B'Null' B'soft' B'Inst', # v1.6 1071 | B'\xEF\xBE\xAD\xDE' B'Null' B'Soft' B'Inst', # v1.3 1072 | B'\xED\xBE\xAD\xDE' B'Null' B'Soft' B'Inst', # v1.1 1073 | B'\xEF\xBE\xAD\xDE' B'nsis' B'inst' B'all\0', # v1.0 1074 | ] 1075 | 1076 | @dataclasses.dataclass 1077 | class Entry: 1078 | offset: int 1079 | data: bytearray 1080 | compressed_size: int 1081 | decompression_failed: bool = False 1082 | 1083 | 1084 | def __init__(self, reader: StructReader[bytearray]): 1085 | self.flags = NSHeaderFlags(reader.u32()) 1086 | self.signature = reader.read(0x10) 1087 | header_data = None 1088 | header_size = reader.u32() 1089 | header_data_length = None 1090 | archive_size = reader.u32() 1091 | self.archive_offset = reader.tell() 1092 | body_size = archive_size - self.archive_offset 1093 | if body_size < 0: 1094 | raise ValueError("Invalid archive size") 1095 | if header_size < self.archive_offset: 1096 | raise ValueError("Invalid header size") 1097 | if reader.remaining_bytes < body_size: 1098 | raise ValueError( 1099 | F'Header indicates archive size 0x{archive_size:08X}, ' 1100 | F'but only 0x{reader.remaining_bytes:08X} bytes remain.') 1101 | 1102 | 1103 | 1104 | # Preview_bytes and preview check will check the compression format. This takes 1105 | # a few bytes and checks the header to determine the format 1106 | 1107 | # Header Matching Logic: 1108 | # X is the header size as given by the first header 1109 | # T is a value less than 0xE 1110 | # Y is a value different from 0x80 1111 | # XX XX XX XX __ __ __ __ __ __ __ non-solid, uncompressed 1112 | # 00 00 00 00 00 00 00 00 XX XX XX XX non-solid, uncompressed, extended 1113 | # 5D 00 00 DD DD 00 __ __ __ __ __ solid LZMA 1114 | # 00 5D 00 00 DD DD 00 __ __ __ __ solid LZMA, empty filter 1115 | # 01 5D 00 00 DD DD 00 __ __ __ __ solid LZMA, BCJ filter 1116 | # __ __ __ 80 5D 00 00 DD DD 00 __ non-solid LZMA 1117 | # __ __ __ 80 00 5D 00 00 DD DD 00 non-solid LZMA, empty filter 1118 | # __ __ __ 80 01 5D 00 00 DD DD 00 non-solid LZMA, BCJ filter 1119 | # __ __ __ 80 01 0T __ __ __ __ __ non-solid BZip 1120 | # __ __ __ 80 __ __ __ __ __ __ __ non-solid deflate 1121 | # 01 0T __ YY __ __ __ __ __ __ __ solid BZip 1122 | # __ __ __ YY __ __ __ __ __ __ __ solid Deflate 1123 | 1124 | def lzmacheck(preview): 1125 | if B'\x5D\0\0' not in preview[:4]: 1126 | return False 1127 | filter_flag = preview_bytes[0] <= 1 1128 | reader.seek_relative(3 + int(filter_flag)) 1129 | self.lzma_options = LZMAOptions(filter_flag, reader.u32()) 1130 | return True 1131 | 1132 | def bzipcheck(preview): 1133 | return preview[0] == 0x31 and preview[1] < 14 1134 | 1135 | preview_bytes = bytes(reader.peek(16)) 1136 | preview_check = preview_bytes.find(header_size.to_bytes(4, byteorder='little')) 1137 | 1138 | # The default "solid" value is True and default method is deflate. 1139 | # Regarding Solid: 1140 | # "If /SOLID is used, all of the installer data is compressed in one block. This results in greater compression ratios." 1141 | # We determine if the compression is solid or not by checking the headers. 1142 | # https://nsis.sourceforge.io/Docs/Chapter4.html# 1143 | self.solid = True 1144 | self.extended = False 1145 | self.lzma_options: Optional[LZMAOptions] = None 1146 | self.method = NSMethod.Deflate 1147 | self.entries: Dict[int, bytearray] = {} 1148 | self.entry_offset_delta = 4 1149 | self._solid_iter = None 1150 | if preview_check >= 0: 1151 | header_data_length = header_size 1152 | self.method = NSMethod.Copy 1153 | self.solid = False 1154 | if not preview_check: 1155 | header_prefix_size = 0x04 1156 | elif preview_check == 8: 1157 | header_prefix_size = 0x10 1158 | self.extended = True 1159 | else: 1160 | raise ValueError(F'Invalid header size: 0x{header_size:08X}, unknown NSIS format') 1161 | reader.seek_relative(header_prefix_size) 1162 | self.entry_offset_delta = header_prefix_size 1163 | header_data = reader.read_exactly(header_data_length) 1164 | elif lzmacheck(preview_bytes): 1165 | self.method = NSMethod.LZMA 1166 | elif preview_bytes[3] == 0x80: 1167 | self.solid = False 1168 | reader.seek_relative(4) 1169 | preview_bytes = bytes(reader.peek(4)) 1170 | if lzmacheck(preview_bytes): 1171 | self.method = NSMethod.LZMA 1172 | elif bzipcheck(preview_bytes): 1173 | self.method = NSMethod.BZip2 1174 | elif bzipcheck(preview_bytes): 1175 | self.method = NSMethod.BZip2 1176 | 1177 | reader.seek_set(self.archive_offset) 1178 | self.entries: Dict[int, bytearray] = {} 1179 | #self.entry_offset_delta = 0 1180 | #self._solid_iter = None 1181 | 1182 | if header_data is None: 1183 | item = self._decompress_items(reader) 1184 | header_entry = next(item) 1185 | if header_entry.decompression_failed: 1186 | raise ValueError( 1187 | 'This archive seems to use an NSIS-specific deflate ' 1188 | 'algorithm which has not been implemented yet.') 1189 | if self.solid: 1190 | self._solid_iter = item 1191 | self.entry_offset_delta += header_entry.compressed_size 1192 | header_data = header_entry.data 1193 | else: 1194 | self.entry_offset_delta += len(header_data) 1195 | 1196 | if not header_data: 1197 | raise ValueError("Empty header") 1198 | logging.debug(F'Header size: 0x{header_size:08X}') 1199 | 1200 | self.header = NSHeader(header_data, size=header_size, extended=self.extended) 1201 | self.reader = reader 1202 | 1203 | if self.method is NSMethod.Deflate and self.header.nsis_deflate: 1204 | self.method = NSMethod.NSGzip 1205 | 1206 | @property 1207 | def script(self): 1208 | return self.header.script 1209 | 1210 | @property 1211 | def offset_items(self): 1212 | return self.archive_offset + self.entry_offset_delta 1213 | 1214 | def _extract_item_data(self, item: NSItem) -> Entry: 1215 | if self.solid: 1216 | while True: 1217 | try: 1218 | entry = self.entries[item.offset] 1219 | except KeyError: 1220 | try: 1221 | entry = next(self._solid_iter) 1222 | except StopIteration: 1223 | raise LookupError(F'Failed to find item at offset 0x{item.offset:08X}.') 1224 | self.entries[entry.offset - self.entry_offset_delta] = entry.data 1225 | else: 1226 | return entry 1227 | else: 1228 | self.reader.seek(self.offset_items + item.offset) 1229 | decompressed = self._decompress_items(self.reader) 1230 | entry = next(decompressed).data 1231 | return entry 1232 | 1233 | class SolidReader(Iterable[Entry]): 1234 | def __init__(self, src: BinaryIO, prefix_length: int): 1235 | self.src = src 1236 | self.pos = 0 1237 | self.prefix_length = prefix_length 1238 | 1239 | def __iter__(self): 1240 | return self 1241 | 1242 | def __next__(self): 1243 | offset = self.pos 1244 | mask = (1 << ((self.prefix_length * 8) - 1)) - 1 1245 | size = self.src.read(self.prefix_length) 1246 | if len(size) != self.prefix_length: 1247 | raise StopIteration 1248 | size = int.from_bytes(size, byteorder='little') 1249 | read = size & mask 1250 | data = self.src.read(read) 1251 | if len(data) != read: 1252 | raise EOFError('Unexpected end of stream while decompressing archive entries.') 1253 | self.pos = offset + read + 4 1254 | return NSArchive.Entry(offset, data, size) 1255 | 1256 | class PartsReader(SolidReader): 1257 | def __init__(self, src: BinaryIO, decompressor: Optional[Type[BinaryIO]], prefix_length: int): 1258 | super().__init__(src, prefix_length) 1259 | self._dc = decompressor 1260 | 1261 | def __next__(self): 1262 | item = super().__next__() 1263 | is_compressed = bool(item.compressed_size & 0x80000000) 1264 | item.compressed_size &= 0x7FFFFFFF 1265 | if is_compressed: 1266 | try: 1267 | dc = self._dc(MemoryFile(item.data)) 1268 | item.data = dc.read() 1269 | except Exception: 1270 | item.decompression_failed = True 1271 | return item 1272 | 1273 | class LZMAFix: 1274 | ''' Creates a wrapper to compensate for how NSIS handles LZMA''' 1275 | def __init__(self, src: MemoryFile): 1276 | self._src = src 1277 | self._fix = MemoryFile(bytes(src.read(5)) + B'\xFF' * 8) 1278 | 1279 | def __getattr__(self, key): 1280 | return getattr(self._src, key) 1281 | 1282 | def read(self, size: int = -1): 1283 | src = self._src 1284 | fix = self._fix 1285 | if not fix.remaining_bytes: 1286 | return src.read(size) 1287 | if size < 0: 1288 | size = fix.remaining_bytes + src.remaining_bytes 1289 | data = bytearray(size) 1290 | wrapper = fix.read(size) 1291 | data[:len(wrapper)] = wrapper 1292 | data[len(wrapper):] = src.read(size - len(wrapper)) 1293 | return data 1294 | 1295 | 1296 | 1297 | def _decompress_items(self, reader: StructReader[bytearray]) -> Iterator[Entry]: 1298 | """ Decompresses the items in the archive. """ 1299 | def NSISLZMAFile(d): 1300 | if use_filter := self.lzma_options.filter_flag: 1301 | use_filter = d.u8() 1302 | if use_filter > 1: 1303 | raise ValueError(F'LZMA/BCJ chunk with invalid filter indicator byte 0x{use_filter:X}') 1304 | if not use_filter: 1305 | _filter = None 1306 | _format = None 1307 | _stream = self.LZMAFix(d) 1308 | else: 1309 | pv = d.u8() 1310 | ds = max(self.lzma_options.dictionary_size, d.u32()) 1311 | if (pv >= 225): 1312 | raise ValueError('Unexpected LZMA properties; value exceeds 225.') 1313 | pv, lc = divmod(pv, 9) 1314 | pb, lp = divmod(pv, 5) 1315 | _filter = [ 1316 | dict(id=lzma.FILTER_X86), 1317 | dict(id=lzma.FILTER_LZMA1, dict_size=ds, lc=lc, lp=lp, pb=pb)] 1318 | _format = lzma.FORMAT_RAW 1319 | _stream = d 1320 | 1321 | return lzma.LZMAFile(_stream, filters=_filter, format=_format) 1322 | 1323 | decompressor: Type[BinaryIO]= { 1324 | NSMethod.Copy : None, 1325 | NSMethod.Deflate : DeflateFile, 1326 | NSMethod.NSGzip : GZipFile, 1327 | NSMethod.LZMA : NSISLZMAFile, 1328 | NSMethod.BZip2 : BZip2File, 1329 | }[self.method] 1330 | prefix_length = 8 if self.extended else 4 1331 | if self.solid: 1332 | return self.SolidReader(decompressor(reader), prefix_length) 1333 | else: 1334 | return self.PartsReader(reader, decompressor, prefix_length) 1335 | 1336 | 1337 | class extractNSIS(ArchiveUnit): 1338 | """ 1339 | A class to extract an NSIS file. 1340 | """ 1341 | @classmethod 1342 | def _find_archive_offset(cls, data: memoryview, before: int = -1, flaw_max=2) -> int: 1343 | def signatures(*magics): 1344 | for changes in range(flaw_max + 1): 1345 | for magic in magics: 1346 | if not changes: 1347 | yield 0, magic 1348 | continue 1349 | for positions in itertools.permutations(range(len(magic)), r=changes): 1350 | signature = bytearray(magic) 1351 | for position in positions: 1352 | signature[position] = 0x2E 1353 | yield changes, bytes(signature) 1354 | best_guess = None 1355 | search_space = memoryview(data) 1356 | for flaws, sig in signatures(*NSArchive.MAGICS): 1357 | if flaws > 1: 1358 | search_space = search_space[:0x20_000] 1359 | matches = [m.start() - 4 for m in re.finditer(sig, 1360 | search_space, 1361 | flags=re.DOTALL)] 1362 | if before >= 0: 1363 | matches = [match for match in matches if match < before] 1364 | matches.reverse() 1365 | archive = None 1366 | for match in matches: 1367 | if match % 0x200 == 0: 1368 | archive = match 1369 | break 1370 | if not archive: 1371 | if matches and not best_guess: 1372 | best_guess = matches[-1] 1373 | else: 1374 | message = F'Archive signature was found at offset 0x{archive:08X}.' 1375 | if flaws > 0: 1376 | message += F' the signature has {flaws} flaws and was likely modified.' 1377 | logging.debug(message) 1378 | return archive 1379 | if best_guess: 1380 | message = F'Archive signature was found at offset 0x{best_guess:08X}, but it has too many flaws to be reliable.' 1381 | logging.debug(message) 1382 | return best_guess 1383 | 1384 | 1385 | def unpack(self, data: memoryview): 1386 | memory = memoryview(data) 1387 | before = -1 1388 | _error = None 1389 | while True: 1390 | offset = self._find_archive_offset(data, before) 1391 | if offset is None: 1392 | _error = _error or ValueError("Unable to find NSIS archive marker") 1393 | raise _error 1394 | try: 1395 | archive = NSArchive(memory[offset:]) 1396 | except Exception as e: 1397 | _error = e 1398 | before = offset 1399 | else: 1400 | break 1401 | 1402 | unpacked_items = [] 1403 | for item in archive.header.items: 1404 | unpacked_items.append(self._pack(item.path, item.mtime, archive._extract_item_data(item))) 1405 | unpacked_items.append(self._pack('setup.nsis', None, archive.script.encode('utf-8'))) 1406 | return unpacked_items 1407 | 1408 | 1409 | 1410 | --------------------------------------------------------------------------------