├── elfesteem ├── macho_init.py ├── macho │ ├── __init__.py │ └── common.py ├── __init__.py ├── compatibility_python23.py ├── binary.py ├── strpatchwork.py ├── intervals.py ├── rprc.py ├── new_cstruct.py ├── minidump_init.py ├── cstruct.py └── jclass_init.py ├── tests ├── binary_input │ ├── Ange │ │ ├── d_tiny.dll │ │ ├── dllfw.dll │ │ ├── delayfake.exe │ │ ├── dllbound-ld.exe │ │ ├── exportobf.exe │ │ ├── nosectionW7.exe │ │ ├── tinydllXP.dll │ │ ├── weirdsord.exe │ │ ├── bottomsecttbl.exe │ │ ├── namedresource.exe │ │ ├── resourceloop.exe │ │ ├── imports_relocW7.exe │ │ └── imports_tinyXP.exe │ ├── elf_cpp.o │ ├── macho │ │ ├── sh │ │ ├── OSXII │ │ ├── Decibels │ │ ├── LyonMetro │ │ ├── SweetHome3D │ │ ├── macho_32.o │ │ ├── macho_64.o │ │ ├── MacTheRipper │ │ ├── macho_32.out │ │ ├── macho_64.out │ │ ├── macho_fat.out │ │ ├── TelephonyUtil.o │ │ ├── libcoretls.dylib │ │ ├── libSystem.B.dylib │ │ ├── libecpg.6.5.dylib │ │ ├── macho_lcbuild.out │ │ ├── libdns_services.dylib │ │ ├── libPrintServiceQuota.1.dylib │ │ └── libATCommandStudioDynamic.dylib │ ├── tiny45.bin │ ├── tiny52.bin │ ├── tiny64.bin │ ├── tiny76.bin │ ├── tiny84.bin │ ├── pe_mingw.exe │ ├── windows.dmp │ ├── cku192.irix40 │ ├── cku196.clix-3.1 │ ├── coff_mingw.obj │ ├── elf64_small.out │ ├── elf_small.out │ ├── pe_vstudio.dll │ ├── minidump-i386.dmp │ ├── cku200.dec-osf-1.3a │ ├── ducati-m3_p768.bin │ ├── minidump-x86_64.dmp │ ├── cku190.rs6aix32c-3.2.4 │ ├── cku192.ultrix43c-mips3 │ ├── notle-tesla-dsp.xe64T │ ├── cku193a05.apollo-sr10-s5r3 │ ├── C28346_Load_Program_to_Flash.out │ ├── tiny45.asm │ ├── tiny52.asm │ ├── tiny64.asm │ ├── tiny76.asm │ ├── tiny84.asm │ └── README.txt ├── examples_macos.sh ├── examples_linux.sh ├── test_minidump_manipulation.py ├── test_intervals.py ├── test_all.py ├── test_rprc_manipulation.py ├── test_elf_manipulation.py └── test_pe_manipulation.py ├── setup.py ├── elfcli ├── .github └── workflows │ ├── codeql.yml │ ├── tools.yml │ └── python-versions.yml ├── examples ├── test_pe.py ├── minidump_to_pe.py ├── readelf.py └── otool.py ├── .travis.yml └── README.md /elfesteem/macho_init.py: -------------------------------------------------------------------------------- 1 | from elfesteem.macho import * 2 | -------------------------------------------------------------------------------- /elfesteem/macho/__init__.py: -------------------------------------------------------------------------------- 1 | from elfesteem.macho.init import * 2 | -------------------------------------------------------------------------------- /tests/binary_input/Ange/d_tiny.dll: -------------------------------------------------------------------------------- 1 | MZPE * tiny data PE (61 bytes) 2 |  -------------------------------------------------------------------------------- /tests/binary_input/elf_cpp.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/elf_cpp.o -------------------------------------------------------------------------------- /tests/binary_input/macho/sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/sh -------------------------------------------------------------------------------- /tests/binary_input/tiny45.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny45.bin -------------------------------------------------------------------------------- /tests/binary_input/tiny52.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny52.bin -------------------------------------------------------------------------------- /tests/binary_input/tiny64.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny64.bin -------------------------------------------------------------------------------- /tests/binary_input/tiny76.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny76.bin -------------------------------------------------------------------------------- /tests/binary_input/tiny84.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny84.bin -------------------------------------------------------------------------------- /tests/binary_input/macho/OSXII: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/OSXII -------------------------------------------------------------------------------- /tests/binary_input/pe_mingw.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/pe_mingw.exe -------------------------------------------------------------------------------- /tests/binary_input/windows.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/windows.dmp -------------------------------------------------------------------------------- /tests/binary_input/Ange/dllfw.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/dllfw.dll -------------------------------------------------------------------------------- /tests/binary_input/cku192.irix40: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku192.irix40 -------------------------------------------------------------------------------- /tests/binary_input/cku196.clix-3.1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku196.clix-3.1 -------------------------------------------------------------------------------- /tests/binary_input/coff_mingw.obj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/coff_mingw.obj -------------------------------------------------------------------------------- /tests/binary_input/elf64_small.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/elf64_small.out -------------------------------------------------------------------------------- /tests/binary_input/elf_small.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/elf_small.out -------------------------------------------------------------------------------- /tests/binary_input/macho/Decibels: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/Decibels -------------------------------------------------------------------------------- /tests/binary_input/macho/LyonMetro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/LyonMetro -------------------------------------------------------------------------------- /tests/binary_input/pe_vstudio.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/pe_vstudio.dll -------------------------------------------------------------------------------- /elfesteem/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __all__ = ['pe_init', 'elf_init', 'jclass_init', 'strpatchwork'] 4 | -------------------------------------------------------------------------------- /tests/binary_input/macho/SweetHome3D: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/SweetHome3D -------------------------------------------------------------------------------- /tests/binary_input/macho/macho_32.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_32.o -------------------------------------------------------------------------------- /tests/binary_input/macho/macho_64.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_64.o -------------------------------------------------------------------------------- /tests/binary_input/minidump-i386.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/minidump-i386.dmp -------------------------------------------------------------------------------- /tests/binary_input/Ange/delayfake.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/delayfake.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/dllbound-ld.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/dllbound-ld.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/exportobf.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/exportobf.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/nosectionW7.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/nosectionW7.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/tinydllXP.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/tinydllXP.dll -------------------------------------------------------------------------------- /tests/binary_input/Ange/weirdsord.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/weirdsord.exe -------------------------------------------------------------------------------- /tests/binary_input/cku200.dec-osf-1.3a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku200.dec-osf-1.3a -------------------------------------------------------------------------------- /tests/binary_input/ducati-m3_p768.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/ducati-m3_p768.bin -------------------------------------------------------------------------------- /tests/binary_input/macho/MacTheRipper: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/MacTheRipper -------------------------------------------------------------------------------- /tests/binary_input/macho/macho_32.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_32.out -------------------------------------------------------------------------------- /tests/binary_input/macho/macho_64.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_64.out -------------------------------------------------------------------------------- /tests/binary_input/macho/macho_fat.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_fat.out -------------------------------------------------------------------------------- /tests/binary_input/minidump-x86_64.dmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/minidump-x86_64.dmp -------------------------------------------------------------------------------- /tests/binary_input/Ange/bottomsecttbl.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/bottomsecttbl.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/namedresource.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/namedresource.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/resourceloop.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/resourceloop.exe -------------------------------------------------------------------------------- /tests/binary_input/cku190.rs6aix32c-3.2.4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku190.rs6aix32c-3.2.4 -------------------------------------------------------------------------------- /tests/binary_input/cku192.ultrix43c-mips3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku192.ultrix43c-mips3 -------------------------------------------------------------------------------- /tests/binary_input/macho/TelephonyUtil.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/TelephonyUtil.o -------------------------------------------------------------------------------- /tests/binary_input/macho/libcoretls.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libcoretls.dylib -------------------------------------------------------------------------------- /tests/binary_input/notle-tesla-dsp.xe64T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/notle-tesla-dsp.xe64T -------------------------------------------------------------------------------- /tests/binary_input/Ange/imports_relocW7.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/imports_relocW7.exe -------------------------------------------------------------------------------- /tests/binary_input/Ange/imports_tinyXP.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/imports_tinyXP.exe -------------------------------------------------------------------------------- /tests/binary_input/macho/libSystem.B.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libSystem.B.dylib -------------------------------------------------------------------------------- /tests/binary_input/macho/libecpg.6.5.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libecpg.6.5.dylib -------------------------------------------------------------------------------- /tests/binary_input/macho/macho_lcbuild.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_lcbuild.out -------------------------------------------------------------------------------- /tests/binary_input/cku193a05.apollo-sr10-s5r3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku193a05.apollo-sr10-s5r3 -------------------------------------------------------------------------------- /tests/binary_input/macho/libdns_services.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libdns_services.dylib -------------------------------------------------------------------------------- /tests/binary_input/C28346_Load_Program_to_Flash.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/C28346_Load_Program_to_Flash.out -------------------------------------------------------------------------------- /tests/binary_input/macho/libPrintServiceQuota.1.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libPrintServiceQuota.1.dylib -------------------------------------------------------------------------------- /tests/binary_input/macho/libATCommandStudioDynamic.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libATCommandStudioDynamic.dylib -------------------------------------------------------------------------------- /tests/examples_macos.sh: -------------------------------------------------------------------------------- 1 | #! /bin/zsh 2 | 3 | # Note that we don't test all files, because some are not well parsed by the 4 | # system's otool. 5 | 6 | for file in tests/binary_input/macho/{[DLST],lib[AScde],macho_}*; do 7 | echo "=== $file ===" 8 | diff -c =(otool -l $file) =(python ./examples/otool.py --llvm=native -l $file 2>/dev/null) 9 | done 10 | -------------------------------------------------------------------------------- /tests/examples_linux.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | options="-h -S -r -s --dyn-syms -d -l -g" 4 | options="-h -S" 5 | for option in $options; do 6 | for file in /bin/sh tests/binary_input/elf_small.out; do 7 | echo "=== readelf $option $file ===" 8 | diff -c <(readelf $option $file) <(python ./examples/readelf.py $option --readelf=native $file 2>/dev/null) 9 | done 10 | done 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | setup( 6 | name = 'ELF-Esteem', 7 | version = '0.1', 8 | packages = ['elfesteem', 'elfesteem.macho'], 9 | requires = ['python (>= 2.3)'], 10 | scripts = ['examples/readelf.py','examples/otool.py','examples/readpe.py'], 11 | # Metadata 12 | author = 'Philippe BIONDI', 13 | author_email = 'phil(at)secdev.org', 14 | description = 'ELF-Esteem: ELF file manipulation library', 15 | license = 'LGPLv2.1', 16 | url = 'https://github.com/airbus-seclab/elfesteem', 17 | # keywords = '', 18 | ) 19 | -------------------------------------------------------------------------------- /elfesteem/compatibility_python23.py: -------------------------------------------------------------------------------- 1 | import sys 2 | if sys.version_info[0] == 2 and sys.version_info[1] <= 3: 3 | # Python 2.3 does not know 'sorted' nor 'reversed' 4 | def sorted(l, key=None, reverse=False): 5 | l = [_ for _ in l] 6 | if key is None: 7 | if reverse: l.sort(lambda x,y: cmp(y,x)) 8 | else: l.sort() 9 | else: 10 | if reverse: l.sort(lambda x,y: cmp(key(y),key(x))) 11 | else: l.sort(lambda x,y: cmp(key(x),key(y))) 12 | return l 13 | def reversed(l): 14 | length = len(l) 15 | return [ l[length-idx] for idx in range(1,length+1) ] 16 | import warnings 17 | warnings.simplefilter("ignore", FutureWarning) 18 | -------------------------------------------------------------------------------- /elfcli: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | import code 5 | #import elfesteem.elf 6 | import readline 7 | import argparse 8 | 9 | 10 | 11 | def usage(): 12 | print >>sys.stderr,"Usage: elfcli [-i inputfile]" 13 | raise SystemExit 14 | 15 | def main(): 16 | 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("infile") 19 | 20 | options = parser.parse_args() 21 | 22 | # prepare locals and binding for interactive session 23 | readline.parse_and_bind("tab: complete") 24 | del(parser) 25 | from elfesteem import * 26 | 27 | elf = elf_init.ELF(open(options.infile).read()) 28 | 29 | code.interact(local=locals()) 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | analyze: 11 | name: CodeQL analysis 12 | runs-on: ubuntu-latest 13 | permissions: 14 | actions: read 15 | contents: read 16 | security-events: write 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | language: [ python ] 22 | 23 | steps: 24 | - name: Checkout 25 | uses: actions/checkout@v4 26 | 27 | - name: Initialize CodeQL 28 | uses: github/codeql-action/init@v3 29 | with: 30 | languages: ${{ matrix.language }} 31 | queries: +security-and-quality 32 | 33 | - name: Autobuild 34 | uses: github/codeql-action/autobuild@v3 35 | 36 | - name: Perform CodeQL Analysis 37 | uses: github/codeql-action/analyze@v3 38 | with: 39 | category: "/language:${{ matrix.language }}" 40 | -------------------------------------------------------------------------------- /examples/test_pe.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import pe 4 | from pe_init import PE 5 | import rlcompleter,readline,pdb, sys 6 | from pprint import pprint as pp 7 | readline.parse_and_bind("tab: complete") 8 | 9 | 10 | e_ = PE() 11 | mysh = "\xc3" 12 | s_text = e_.SHList.add_section(name = "text", addr = 0x1000, rawsize = 0x1000, data = mysh) 13 | e_.Opthdr.AddressOfEntryPoint = s_text.addr 14 | new_dll = [({"name":"kernel32.dll", 15 | "firstthunk":s_text.addr+0x100}, 16 | ["CreateFileA", "SetFilePointer", "WriteFile", "CloseHandle"] 17 | ) 18 | , 19 | ({"name":"USER32.dll", 20 | "firstthunk":None}, 21 | ["SetDlgItemInt", "GetMenu", "HideCaret"] 22 | ) 23 | ] 24 | e_.DirImport.add_dlldesc(new_dll) 25 | 26 | s_myimp = e_.SHList.add_section(name = "myimp", rawsize = 0x1000) 27 | e_.DirImport.set_rva(s_myimp.addr) 28 | fd = open('uu.bin', 'wb') 29 | try: 30 | fd.write(str(e_)) 31 | finally: 32 | fd.close() 33 | -------------------------------------------------------------------------------- /.github/workflows/tools.yml: -------------------------------------------------------------------------------- 1 | # This workflow compares the outputs of elfesteem with native tools on the OS 2 | 3 | name: Native tools 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | pull_request: 9 | branches: [ "master" ] 10 | 11 | jobs: 12 | macos: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: ["macos-12", "macos-13"] 18 | python-version: ["3.10"] 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Comparison with otool 26 | run: | 27 | export PYTHONPATH=$PYTHONPATH:$(pwd) 28 | zsh ./tests/examples_macos.sh 29 | linux: 30 | runs-on: ${{ matrix.os }} 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | os: ["ubuntu-latest", "ubuntu-22.04", "ubuntu-20.04"] 35 | python-version: ["3.10"] 36 | steps: 37 | - uses: actions/checkout@v3 38 | - name: Set up Python ${{ matrix.python-version }} 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | - name: Comparison with readelf 43 | run: | 44 | readelf --version 45 | export PYTHONPATH=$PYTHONPATH:$(pwd) 46 | bash ./tests/examples_linux.sh 47 | -------------------------------------------------------------------------------- /tests/binary_input/tiny45.asm: -------------------------------------------------------------------------------- 1 | ; tiny.asm 2 | 3 | BITS 32 4 | 5 | org 0x00010000 6 | 7 | db 0x7F, "ELF" ; e_ident 8 | dd 1 ; p_type 9 | dd 0 ; p_offset 10 | dd $$ ; p_vaddr 11 | dw 2 ; e_type ; p_paddr 12 | dw 3 ; e_machine 13 | dd _start ; e_version ; p_filesz 14 | dd _start ; e_entry ; p_memsz 15 | dd 4 ; e_phoff ; p_flags 16 | _start: 17 | mov bl, 42 ; e_shoff ; p_align 18 | xor eax, eax 19 | inc eax ; e_flags 20 | int 0x80 21 | db 0 22 | dw 0x34 ; e_ehsize 23 | dw 0x20 ; e_phentsize 24 | db 1 ; e_phnum 25 | ; e_shentsize 26 | ; e_shnum 27 | ; e_shstrndx 28 | 29 | filesize equ $ - $$ 30 | -------------------------------------------------------------------------------- /tests/binary_input/tiny52.asm: -------------------------------------------------------------------------------- 1 | ; tiny.asm 2 | 3 | BITS 32 4 | 5 | org 0x00010000 6 | 7 | db 0x7F, "ELF" ; e_ident 8 | dd 1 ; p_type 9 | dd 0 ; p_offset 10 | dd $$ ; p_vaddr 11 | dw 2 ; e_type ; p_paddr 12 | dw 3 ; e_machine 13 | dd _start ; e_version ; p_filesz 14 | dd _start ; e_entry ; p_memsz 15 | dd 4 ; e_phoff ; p_flags 16 | _start: 17 | mov bl, 42 ; e_shoff ; p_align 18 | xor eax, eax 19 | inc eax ; e_flags 20 | int 0x80 21 | db 0 22 | dw 0x34 ; e_ehsize 23 | dw 0x20 ; e_phentsize 24 | dw 1 ; e_phnum 25 | dw 0 ; e_shentsize 26 | dw 0 ; e_shnum 27 | dw 0 ; e_shstrndx 28 | 29 | filesize equ $ - $$ 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | jobs: 3 | include: 4 | - python: '3.7' 5 | - python: '2.7' 6 | - name: 'Python: 2.3' 7 | # python 2.3 not available in travis 8 | install: 9 | - cd .. 10 | - curl -O https://www.python.org/ftp/python/2.3.7/Python-2.3.7.tgz 11 | - tar xzf Python-2.3.7.tgz 12 | - cd Python-2.3.7 13 | # We need to disable FORTIFY_SOURCE to compile python 2.3 14 | # cf. https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/286334 15 | - ./configure BASECFLAGS=-U_FORTIFY_SOURCE 16 | - make 17 | - export PATH=$(pwd):$PATH 18 | - cd ../elfesteem 19 | - python -c 'import sys;print(sys.version)' 20 | script: 21 | - python ./tests/test_all.py 22 | after_success: 23 | - true # coverage needs python >= 2.6 24 | - python: 'pypy3' 25 | - python: 'pypy' 26 | - python: '3.8' 27 | - python: '3.4' 28 | install: 29 | - pip install coverage codecov 30 | before_script: 31 | export PYTHONPATH=$PYTHONPATH:$(pwd) 32 | script: 33 | - coverage run ./tests/test_all.py 34 | # We don't use e.g. tox for non-regression tests, because we want to have 35 | # a script that works with old python too, and tox needs python2.5 36 | # python2.4 ./tests/test_all.py will work fine :-) 37 | # Note that coverage is incompatible with python 3.2, cf. 38 | # https://github.com/menegazzo/travispy/issues/20 39 | after_success: 40 | - codecov 41 | -------------------------------------------------------------------------------- /examples/minidump_to_pe.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """Minidump to PE example""" 3 | import sys 4 | from elfesteem.minidump_init import Minidump 5 | from elfesteem.pe_init import PE 6 | 7 | fd = open(sys.argv[1]) 8 | try: 9 | raw = fd.read() 10 | finally: 11 | fd.close() 12 | minidump = Minidump(raw) 13 | 14 | pe = PE() 15 | for i, memory in enumerate(sorted(minidump.memory.itervalues(), 16 | key=lambda x:x.address)): 17 | # Get section name 18 | name = str(memory.name) 19 | if not name: 20 | name = "s_%02d" % i 21 | else: 22 | name = name.split('\\')[-1] 23 | 24 | # Get section protection 25 | protect = memory.pretty_protect 26 | protect_mask = 0x20 27 | if protect == "UNKNOWN": 28 | protect_mask |= 0xe0000000 29 | else: 30 | if "EXECUTE" in protect: 31 | protect_mask |= 1 << 29 32 | if "READ" in protect: 33 | protect_mask |= 1 << 30 34 | if "WRITE" in protect: 35 | protect_mask |= 1 << 31 36 | 37 | # Add the section 38 | pe.SHList.add_section(name=name, addr=memory.address, rawsize=memory.size, 39 | data=memory.content, flags=protect_mask) 40 | 41 | # Find entry point 42 | entry_point = minidump.threads.Threads[0].ThreadContext.Eip[0] 43 | pe.Opthdr.AddressOfEntryPoint = entry_point 44 | 45 | fd = open("out_pe.bin", "w") 46 | try: 47 | fd.write(str(pe)) 48 | finally: 49 | fd.close() 50 | -------------------------------------------------------------------------------- /tests/binary_input/tiny64.asm: -------------------------------------------------------------------------------- 1 | ; tiny.asm 2 | 3 | BITS 32 4 | 5 | org 0x00200000 6 | 7 | db 0x7F, "ELF" ; e_ident 8 | db 1, 1, 1, 0, 0 9 | _start: 10 | mov bl, 42 11 | xor eax, eax 12 | inc eax 13 | int 0x80 14 | dw 2 ; e_type 15 | dw 3 ; e_machine 16 | dd 1 ; e_version 17 | dd _start ; e_entry 18 | dd phdr - $$ ; e_phoff 19 | phdr: dd 1 ; e_shoff ; p_type 20 | dd 0 ; e_flags ; p_offset 21 | dd $$ ; e_ehsize ; p_vaddr 22 | ; e_phentsize 23 | dw 1 ; e_phnum ; p_paddr 24 | dw 0 ; e_shentsize 25 | dd filesize ; e_shnum ; p_filesz 26 | ; e_shstrndx 27 | dd filesize ; p_memsz 28 | dd 5 ; p_flags 29 | dd 0x1000 ; p_align 30 | 31 | filesize equ $ - $$ 32 | -------------------------------------------------------------------------------- /tests/binary_input/tiny76.asm: -------------------------------------------------------------------------------- 1 | ; tiny.asm 2 | 3 | BITS 32 4 | 5 | org 0x08048000 6 | 7 | ehdr: 8 | db 0x7F, "ELF" ; e_ident 9 | db 1, 1, 1, 0, 0 10 | _start: mov bl, 42 11 | xor eax, eax 12 | inc eax 13 | int 0x80 14 | dw 2 ; e_type 15 | dw 3 ; e_machine 16 | dd 1 ; e_version 17 | dd _start ; e_entry 18 | dd phdr - $$ ; e_phoff 19 | dd 0 ; e_shoff 20 | dd 0 ; e_flags 21 | dw ehdrsize ; e_ehsize 22 | dw phdrsize ; e_phentsize 23 | phdr: dd 1 ; e_phnum ; p_type 24 | ; e_shentsize 25 | dd 0 ; e_shnum ; p_offset 26 | ; e_shstrndx 27 | ehdrsize equ $ - ehdr 28 | dd $$ ; p_vaddr 29 | dd $$ ; p_paddr 30 | dd filesize ; p_filesz 31 | dd filesize ; p_memsz 32 | dd 5 ; p_flags 33 | dd 0x1000 ; p_align 34 | phdrsize equ $ - phdr 35 | 36 | filesize equ $ - $$ 37 | -------------------------------------------------------------------------------- /tests/test_minidump_manipulation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os 4 | __dir__ = os.path.dirname(__file__) 5 | 6 | from test_all import run_tests, assertion, hashlib, open_read 7 | from elfesteem.minidump_init import Minidump 8 | 9 | def test_MD_windows(assertion): 10 | md = open_read(__dir__+'/binary_input/windows.dmp') 11 | assertion('82a09a9d801bddd1dc94dfb9ba6eddf0', 12 | hashlib.md5(md).hexdigest(), 13 | 'Reading windows.dmp') 14 | e = Minidump(md) 15 | d = e.dump().encode('latin1') 16 | assertion('48cae6cc782305b611f6e8b82049b9a0', 17 | hashlib.md5(d).hexdigest(), 18 | 'Displaying the content of windows.dmp') 19 | 20 | def test_MD_i386(assertion): 21 | md = open_read(__dir__+'/binary_input/minidump-i386.dmp') 22 | assertion('0f2ee1a0a2e6351e64929197c07679e6', 23 | hashlib.md5(md).hexdigest(), 24 | 'Reading minidump-i386.dmp') 25 | e = Minidump(md) 26 | d = e.dump().encode('latin1') 27 | assertion('c89c01352e515874b00d998b1ad06998', 28 | hashlib.md5(d).hexdigest(), 29 | 'Displaying the content of minidump-i386.dmp') 30 | 31 | def test_MD_x86_64(assertion): 32 | md = open_read(__dir__+'/binary_input/minidump-x86_64.dmp') 33 | assertion('ecde7af61615e05ffcde1f064c1a22f8', 34 | hashlib.md5(md).hexdigest(), 35 | 'Reading minidump-x86_64.dmp') 36 | e = Minidump(md) 37 | d = e.dump().encode('latin1') 38 | assertion('4357695a7e265aca04bb2809485b8634', 39 | hashlib.md5(d).hexdigest(), 40 | 'Displaying the content of minidump-x86_64.dmp') 41 | 42 | def run_test(assertion): 43 | for name, value in dict(globals()).items(): 44 | if name.startswith('test_'): 45 | value(assertion) 46 | 47 | if __name__ == "__main__": 48 | run_tests(run_test) 49 | -------------------------------------------------------------------------------- /.github/workflows/python-versions.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install various versions of Python and run non-regression tests. 2 | 3 | name: Python versions 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | pull_request: 9 | branches: [ "master" ] 10 | 11 | jobs: 12 | build: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: ["ubuntu-latest", "macos-latest"] 18 | python-version: ["3.10", "3.12", "pypy2.7", "pypy3.9"] 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install coverage codecov 28 | - name: Non-regression tests 29 | run: | 30 | export PYTHONPATH=$PYTHONPATH:$(pwd) 31 | coverage run ./tests/test_all.py 32 | - name: Update codecov 33 | run: | 34 | codecov 35 | 36 | python23: 37 | name: python2.3 38 | runs-on: "ubuntu-latest" 39 | strategy: 40 | fail-fast: false 41 | steps: 42 | - uses: actions/checkout@v3 43 | - name: Set up Python 2.3 44 | run: | 45 | cd .. 46 | curl -O https://www.python.org/ftp/python/2.3.7/Python-2.3.7.tgz 47 | tar xzf Python-2.3.7.tgz 48 | cd Python-2.3.7 49 | # We need to disable FORTIFY_SOURCE to compile python 2.3 50 | # cf. https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/286334 51 | ./configure BASECFLAGS=-U_FORTIFY_SOURCE 52 | make 53 | sudo ln -fs $(pwd)/python /usr/local/bin/python 54 | - name: Non-regression tests 55 | run: | 56 | python -c 'import sys;print(sys.version)' 57 | export PYTHONPATH=$PYTHONPATH:$(pwd) 58 | python ./tests/test_all.py 59 | -------------------------------------------------------------------------------- /tests/binary_input/tiny84.asm: -------------------------------------------------------------------------------- 1 | ; tiny.asm 2 | 3 | BITS 32 4 | 5 | org 0x08048000 6 | 7 | ehdr: ; Elf32_Ehdr 8 | db 0x7F, "ELF" ; e_ident 9 | db 1, 1, 1, 0, 0 10 | _start: mov bl, 42 11 | xor eax, eax 12 | inc eax 13 | int 0x80 14 | dw 2 ; e_type 15 | dw 3 ; e_machine 16 | dd 1 ; e_version 17 | dd _start ; e_entry 18 | dd phdr - $$ ; e_phoff 19 | dd 0 ; e_shoff 20 | dd 0 ; e_flags 21 | dw ehdrsize ; e_ehsize 22 | dw phdrsize ; e_phentsize 23 | dw 1 ; e_phnum 24 | dw 0 ; e_shentsize 25 | dw 0 ; e_shnum 26 | dw 0 ; e_shstrndx 27 | 28 | ehdrsize equ $ - ehdr 29 | 30 | phdr: ; Elf32_Phdr 31 | dd 1 ; p_type 32 | dd 0 ; p_offset 33 | dd $$ ; p_vaddr 34 | dd $$ ; p_paddr 35 | dd filesize ; p_filesz 36 | dd filesize ; p_memsz 37 | dd 5 ; p_flags 38 | dd 0x1000 ; p_align 39 | 40 | phdrsize equ $ - phdr 41 | 42 | filesize equ $ - $$ 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ELF Esteem # 2 | 3 | ## Overview 4 | 5 | The goal of this library is to manipulate various containers of executable code. 6 | ELF, PE, COFF and Mach-O files are fully supported. 7 | It includes a partial support of Minidump and RPRC files, and a non-working implementation of Java classes. 8 | 9 | It aims at being self-contained and portable: it is pure python, compatible from python 2.3 upwards (including python 3.x). 10 | 11 | ## Parsing with ELF Esteem 12 | 13 | [binary.py](elfesteem/binary.py) 14 | can be used to read a binary of any known format and display its main characteristics. 15 | 16 | [readelf.py](examples/readelf.py) 17 | outputs the same as binutils' readelf, using ELF Esteem. 18 | 19 | [otool.py](examples/otool.py) 20 | outputs the same as MacOSX otool and dyldinfo, using ELF Esteem. 21 | 22 | [readpe.py](examples/readpe.py) 23 | analyses the content of a PE or COFF file, including a hierarchical display of the layout of the file. 24 | 25 | ## File manipulation with ELF Esteem 26 | 27 | Most of the internal representation of the file parsed by ELF Esteem is based on [cstruct.py](elfesteem/cstruct.py) which is a generic framework to manipulate binary data structures. 28 | 29 | The file is fully loaded using one of the classes `ELF`, `PE`, `COFF`, `MACHO`, `RPRC`, or `Minidump`. This class is the root of a tree of subclasses (e.g. file header, list of sections, ...) and each subtree can be modified. The method `pack()` reconstructs a binary. 30 | 31 | The philosophy behind ELF Esteem is that if the input file is valid, and no modification is made to the internal representation, then `pack()` will recover the input. 32 | When modifications are made, then (depending on the details of the file format) some values are automatically recomputed (e.g. fields containing lengths, checksums). 33 | 34 | **More doc soon.** 35 | 36 | ## Development status 37 | 38 | [![codecov](https://codecov.io/gh/LRGH/elfesteem/branch/master/graph/badge.svg)](https://codecov.io/gh/LRGH/elfesteem) 39 | [![Unit tests](https://github.com/LRGH/elfesteem/actions/workflows/python-package.yml/badge.svg)](https://github.com/LRGH/elfesteem/actions/workflows/python-package.yml) 40 | -------------------------------------------------------------------------------- /elfesteem/binary.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # Generic container for all binary types known by elfesteem, 3 | # with auto-recognition of the binary type. 4 | 5 | import sys, os 6 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) 7 | 8 | from elfesteem.elf_init import ELF 9 | from elfesteem.pe_init import PE, COFF 10 | from elfesteem.minidump_init import Minidump 11 | from elfesteem.macho import MACHO 12 | from elfesteem.rprc import RPRC 13 | 14 | class UnknownFormat(object): 15 | def __init__(self, raw): 16 | self.raw = raw 17 | architecture = 'UNKNOWN' 18 | entrypoint = -1 19 | sections = () 20 | symbols = () 21 | dynsyms = () 22 | class virt_stub(object): 23 | max_addr = lambda _:-1 24 | virt = virt_stub() 25 | 26 | class BINARY(object): 27 | def __init__(self, raw): 28 | for container in ELF, PE, Minidump, MACHO, RPRC, COFF: 29 | try: 30 | self.e = container(raw) 31 | break 32 | except ValueError: 33 | pass 34 | except AssertionError: 35 | pass 36 | else: 37 | self.e = UnknownFormat(raw) 38 | container = property(lambda _:_.e.__class__.__name__) 39 | architecture = property(lambda _:_.e.architecture) 40 | entrypoint = property(lambda _:_.e.entrypoint) 41 | max_addr = property(lambda _:_.e.virt.max_addr()) 42 | sections = property(lambda _:_.e.sections) 43 | symbols = property(lambda _:_.e.symbols) 44 | dynsyms = property(lambda _:_.e.dynsyms) 45 | 46 | if __name__ == "__main__": 47 | for file in sys.argv[1:]: 48 | print("File: %s"%file) 49 | fd = open(file, 'rb') 50 | try: 51 | raw = fd.read() 52 | finally: 53 | fd.close() 54 | e = BINARY(raw) 55 | print(" container %s" % e.container) 56 | print(" architecture %s" % e.architecture) 57 | print(" entrypoint %#x" % e.entrypoint) 58 | print(" max address %#x" % e.max_addr) 59 | print(" %d sections:" % len(e.sections)) 60 | for sect in e.sections: 61 | print(" %s" % sect) 62 | print(" %d symbols:" % len(e.symbols)) 63 | for symbol in e.symbols: 64 | print(" %s" % symbol) 65 | print(" %d dynamic symbols:" % len(e.dynsyms)) 66 | for symbol in e.dynsyms: 67 | print(" %s" % symbol) 68 | -------------------------------------------------------------------------------- /tests/test_intervals.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | from test_all import run_tests, assertion 4 | from elfesteem.intervals import Intervals 5 | 6 | def test_intervals(assertion): 7 | i = Intervals() 8 | assertion(i.ranges, [], 9 | 'Empty interval') 10 | i.add(10, 90) 11 | assertion(i.ranges, [slice(10, 90)], 12 | 'Interval [10:90]') 13 | i.add(0, 100) 14 | assertion(i.ranges, [slice(0, 100)], 15 | 'Addition of bigger interval') 16 | i.add(0, 100) 17 | assertion(i.ranges, [slice(0, 100)], 18 | 'Addition of identical interval') 19 | i.delete(8, 25) 20 | assertion(i.ranges, [slice(0, 8), slice(25, 100)], 21 | '[0:100] minus [8:25]') 22 | assertion(False, i.contains(18, 30), 23 | '[0:8]+[25:100] contains [18:30]') 24 | assertion(True, i.contains(30, 30), 25 | '[0:8]+[25:100] contains [30:30]') 26 | assertion(True, i.excludes(10, 20), 27 | '[0:8]+[25:100] excludes [10:20]') 28 | assertion(False, i.excludes(10, 30), 29 | '[0:8]+[25:100] excludes [10:30]') 30 | assertion(True, i.excludes(-10, -5), 31 | '[0:8]+[25:100] excludes [-10:-5]') 32 | assertion(True, i.excludes(110, 130), 33 | '[0:8]+[25:100] excludes [110:130]') 34 | i.add(12, 16) 35 | assertion(i.ranges, [slice(0, 8), slice(12, 16), slice(25, 100)], 36 | 'Addition of disjoint interval') 37 | i.add(11, 14) 38 | assertion(i.ranges, [slice(0, 8), slice(11, 16), slice(25, 100)], 39 | 'Addition of overlapping interval') 40 | i.add(1, 11) 41 | assertion(i.ranges, [slice(0, 16), slice(25, 100)], 42 | 'Addition generating a merge') 43 | i.delete(8, 15) 44 | assertion(i.ranges, [slice(0, 8), slice (15, 16), slice(25, 100)], 45 | 'Deletion within an interval') 46 | i.add(10, 30) 47 | assertion(i.ranges, [slice(0, 8), slice(10, 100)], 48 | 'Addition of encompassing interval') 49 | i.delete(0, 100) 50 | assertion(i.ranges, [], 51 | 'Deletion of everyting') 52 | assertion(False, i.contains(18, 30), 53 | 'Empty contains [18:30]') 54 | assertion(True, i.excludes(10, 30), 55 | 'Empty excludes [10:30]') 56 | i.add(10, 30) 57 | i.delete(14, 27) 58 | assertion(str(i), '[10:14] [27:30]', 59 | 'Display [10:14] [27:30]') 60 | assertion([_ for _ in i], [10, 11, 12, 13, 27, 28, 29], 61 | 'Enumerate [10:14] [27:30]') 62 | 63 | def run_test(assertion): 64 | for name, value in dict(globals()).items(): 65 | if name.startswith('test_'): 66 | value(assertion) 67 | 68 | if __name__ == "__main__": 69 | run_tests(run_test) 70 | -------------------------------------------------------------------------------- /elfesteem/strpatchwork.py: -------------------------------------------------------------------------------- 1 | from array import array 2 | # To be compatible with python 2 and python 3 3 | import sys 4 | import struct 5 | data_null = struct.pack("B",0) 6 | data_empty = struct.pack("") 7 | 8 | class StrPatchwork(object): 9 | def __init__(self, s=data_empty, paddingbyte=data_null): 10 | if s is None: s = data_empty 11 | if isinstance(s, StrPatchwork): s = s.pack() 12 | self.s = array("B",s) 13 | # cache s to avoid rebuilding str after each find 14 | self.s_cache = s 15 | self.paddingbyte=paddingbyte 16 | def __str__(self): 17 | raise AttributeError("Use pack() instead of str()") 18 | def pack(self): 19 | if sys.version_info[0] >= 3: 20 | return self.s.tobytes() 21 | else: 22 | return self.s.tostring() 23 | 24 | def __getitem__(self, item): 25 | s = self.s 26 | if type(item) is slice: 27 | r = s[item] 28 | end = item.stop 29 | if end != None and len(s) < end: 30 | if item.step is not None: 31 | TODO 32 | elif len(r) > 0: 33 | # We go beyond the end of 's' 34 | r.extend(array("B",self.paddingbyte*(end-len(s)))) 35 | else: 36 | # We are entirely after the end of 's' 37 | start = item.start 38 | if start is None: start = 0 39 | r = array("B",self.paddingbyte*(end-start)) 40 | else: 41 | if item > len(s): 42 | return self.paddingbyte 43 | else: 44 | r = array("B",[s[item]]) 45 | if sys.version_info[0] >= 3: 46 | return r.tobytes() 47 | else: 48 | return r.tostring() 49 | def __setitem__(self, item, val): 50 | if val is None: 51 | return 52 | if sys.version_info[0] >= 3 and type(val) == str: 53 | val = val.encode(encoding="latin1") 54 | val = array("B",val) 55 | if type(item) is not slice: 56 | item = slice(item, item+len(val)) 57 | end = item.stop 58 | l = len(self.s) 59 | if l < end: 60 | self.s.extend(array("B", self.paddingbyte*(end-l))) 61 | self.s[item] = val 62 | self.s_cache = None 63 | 64 | 65 | def __repr__(self): 66 | return "" % self.pack() 67 | def __len__(self): 68 | return len(self.s) 69 | def __contains__(self, val): 70 | return val in self.pack() 71 | def __iadd__(self, other): 72 | self.s.extend(array("B", other)) 73 | return self 74 | 75 | def find(self, pattern, *args): 76 | if not self.s_cache: 77 | self.s_cache = self.pack() 78 | return self.s_cache.find(pattern, *args) 79 | 80 | def rfind(self, pattern, *args): 81 | if not self.s_cache: 82 | self.s_cache = self.pack() 83 | return self.s_cache.rfind(pattern, *args) 84 | 85 | -------------------------------------------------------------------------------- /elfesteem/intervals.py: -------------------------------------------------------------------------------- 1 | import sys 2 | if sys.version_info[0] >= 3: 3 | from functools import reduce 4 | if sys.version_info[0:2] == (2, 3): 5 | from elfesteem.compatibility_python23 import sorted 6 | 7 | class Intervals(object): 8 | ''' 9 | Represent a subset of the integers, to be used to detect which parts 10 | of the file have been parsed 11 | ''' 12 | def __init__(self): 13 | self.ranges = [ ] 14 | def __str__(self): 15 | if len(self.ranges) == 0: return "[]" 16 | return reduce(lambda x, y: x+" "+y, 17 | map(lambda x: "[%s:%s]"%(x.start,x.stop), self.ranges)) 18 | # Internal methods to make object manipulation easier 19 | def _split(self, *poslist): 20 | def _split_slice(l, s): 21 | for pos in sorted(poslist): 22 | if s.start < pos < s.stop: 23 | l.append(slice(s.start, pos)) 24 | s = slice(pos, s.stop) 25 | l.append(s) 26 | return l 27 | self.ranges = reduce(_split_slice, self.ranges, []) 28 | def _merge(self): 29 | def _merge_two_slices(l, s): 30 | if len(l) and (l[-1].stop == s.start): 31 | l[-1] = slice(l[-1].start, s.stop) 32 | else: 33 | l.append(s) 34 | return l 35 | self.ranges = reduce(_merge_two_slices, self.ranges, []) 36 | # Interface of the class 37 | def __iter__(self): 38 | for s in self.ranges: 39 | for t in range(s.start, s.stop): 40 | yield t 41 | def contains(self, start, stop): 42 | for s in self.ranges: 43 | if s.start <= start and stop <= s.stop: 44 | return True 45 | return False 46 | def excludes(self, start, stop): 47 | if len(self.ranges) == 0: 48 | return True 49 | if stop <= self.ranges[0].start: 50 | return True 51 | if self.ranges[-1].stop <= start: 52 | return True 53 | for i in range(len(self.ranges)-1): 54 | if self.ranges[i].stop <= start and stop <= self.ranges[i+1].start: 55 | return True 56 | return False 57 | def delete(self, start, stop): 58 | def _remove_slices(l, s): 59 | if start > s.start or stop < s.stop: 60 | l.append(s) 61 | return l 62 | self._split(start, stop) 63 | self.ranges = reduce(_remove_slices, self.ranges, []) 64 | return self 65 | def add(self, start, stop): 66 | if len(self.ranges) == 0: 67 | self.ranges.append(slice(start, stop)) 68 | return self 69 | new_ranges = [] 70 | prev_stop = None 71 | for l in self.ranges: 72 | if start <= l.start: 73 | if prev_stop is None: 74 | new_ranges.append(slice(start, min(stop,l.start))) 75 | elif prev_stop < stop: 76 | new_ranges.append(slice(max(start,prev_stop), min(stop,l.start))) 77 | new_ranges.append(l) 78 | prev_stop = l.stop 79 | if new_ranges[-1].stop < stop: 80 | new_ranges.append(slice(max(start,new_ranges[-1].stop), stop)) 81 | self.ranges = new_ranges 82 | self._merge() 83 | return self 84 | -------------------------------------------------------------------------------- /tests/binary_input/README.txt: -------------------------------------------------------------------------------- 1 | Ange 2 | Some files from https://github.com/corkami/pocs/tree/master/PE/bin 3 | 4 | tiny*.asm 5 | tiny*.bin 6 | Cf. http://www.muppetlabs.com/%7Ebreadbox/software/tiny/teensy.html 7 | 8 | C28346_Load_Program_to_Flash.out 9 | Source https://github.com/slavaprokopiy/Mini-TMS320C28346/blob/master/For_user/C28346_Load_Program_to_Flash/Debug/C28346_Load_Program_to_Flash.out 10 | 11 | cku190.rs6aix32c-3.2.4 12 | cku192.irix40 13 | cku192.ultrix43c-mips3 14 | cku193a05.apollo-sr10-s5r3 15 | cku196.clix-3.1 16 | cku200.dec-osf-1.3a 17 | Source ftp://kermit.columbia.edu/kermit/bin/ 18 | 19 | notle-tesla-dsp.xe64T 20 | ducati-m3_p768.bin 21 | Source https://drive.google.com/drive/folders/0B2AlG69ZVaWldU1vUnRFUklCek0 22 | Linked from https://github.com/radare/radare2/issues/1602 23 | 24 | coff_mingw.obj 25 | elf64_small.o 26 | elf64_small.out 27 | elf_cpp.o 28 | elf_small.o 29 | elf_small.out 30 | pe_mingw.exe 31 | pe_vstudio.dll 32 | macho/macho_32.o 33 | macho/macho_32.out 34 | macho/macho_64.o 35 | macho/macho_64.out 36 | macho/macho_fat.out 37 | Built by Louis Granboulan for elfesteem non-regression tests 38 | 39 | macho/sh 40 | An example of Mach-O with more symbol stubs than symbols (/bin/sh) 41 | 42 | macho/libPrintServiceQuota.1.dylib 43 | An example of big-endian Mach-O (from an old MacOSX for PowerPC) 44 | 45 | macho/Decibels 46 | An example of iPhone app, with two ARM architectures and Encryption 47 | 48 | macho/LyonMetro 49 | An other example of iPhone app, with a LC_VERSION_MIN_IPHONEOS 50 | 51 | macho/TelephonyUtil.o 52 | An example of object file with a LC_LINKER_OPTION 53 | Extracted from /usr/lib/libATCommandStudio.a from a recent MacOSX 54 | 55 | macho/libdns_services.dylib 56 | An example of file with a LC_SOURCE_VERSION 57 | Copied from /usr/lib/libdns_services.dylib from a recent MacOSX 58 | 59 | macho/libecpg.6.5.dylib 60 | An example of file with a section size "past end of file" 61 | Copied from /usr/lib/libecpg.6.5.dylib from a recent MacOSX 62 | 63 | macho/libATCommandStudioDynamic.dylib 64 | An example of file with weak binding 65 | Copied from /usr/lib/libATCommandStudioDynamic.dylib from a recent MacOSX 66 | 67 | macho/libcoretls.dylib 68 | An example of file with no binding, no weak binding, no lazy binding 69 | Copied from /usr/lib/libcoretls.dylib from a recent MacOSX 70 | 71 | macho/libSystem.B.dylib 72 | An example of file BIND_OPCODE_SET_DYLIB_SPECIAL_IMM 73 | Copied from /usr/lib/libSystem.B.dylib from a recent MacOSX 74 | 75 | macho/OSXII 76 | An example of old universal binary, ppc & i386, with LC_UNIXTHREAD 77 | The OSXII software has been discontinued, cf. 78 | https://www.macupdate.com/app/mac/10578/osxii 79 | 80 | macho/SweetHome3D 81 | An example of universal binary, ppc, i386 & x86_64, with LC_UNIXTHREAD 82 | SweetHome3D is open source and available at http://www.sweethome3d.com/ 83 | 84 | macho/MacTheRipper 85 | Another old Mach-O binary, with LC_PREBOUND_DYLIB 86 | This is the version 2.6.6, downloadable at a link available at its 87 | Wikipedia page 88 | 89 | minidump-i386.dmp 90 | minidump-x86_64.dmp 91 | Source https://github.com/OutOfOrder/BreakpadTest/tree/master/Samples 92 | 93 | windows.dmp 94 | Source https://github.com/electron/node-minidump/tree/master/test/fixtures 95 | -------------------------------------------------------------------------------- /tests/test_all.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | # These non-regression tests should be OK from python2.3 to python3.x 4 | 5 | # How to import by name, compatible with python2 and python3 6 | import sys, os 7 | __dir__ = os.path.dirname(__file__) 8 | try: 9 | # The following is working starting with python2.7 10 | import importlib 11 | import_by_name = importlib.import_module 12 | except ImportError: 13 | # The following is working for python2.3 to python3.11 14 | import imp 15 | def import_by_name(name): 16 | fp, pathname, description = imp.find_module(name, [__dir__]) 17 | try: 18 | module = imp.load_module(name, fp, pathname, description) 19 | finally: 20 | if fp is not None: fp.close() 21 | return module 22 | 23 | try: 24 | import hashlib 25 | except ImportError: 26 | # Python 2.4 does not have hashlib 27 | # but 'md5' is deprecated since python2.5 28 | import md5 as oldpy_md5 29 | class hashlib(object): 30 | def md5(self, data): 31 | return oldpy_md5.new(data) 32 | md5 = classmethod(md5) 33 | 34 | try: 35 | # This way, we can use our code with pytest, but we can also 36 | # use it directly, e.g. when testing for python2.3. 37 | # No decorator, the syntax is forbidden in python2.3. 38 | import pytest 39 | def assertion(): 40 | def inner_assertion(target, value, message): 41 | assert target == value 42 | return inner_assertion 43 | assertion = pytest.fixture(assertion) 44 | except Exception: 45 | assertion = None 46 | 47 | class print_colored(object): # Namespace 48 | end = '\033[0m' 49 | def bold(self, txt): 50 | print('\033[1m'+txt+self.end) 51 | bold = classmethod(bold) 52 | def boldred(self, txt): 53 | print('\033[91;1m'+txt+self.end) 54 | boldred = classmethod(boldred) 55 | def boldgreen(self, txt): 56 | print('\033[92;1m'+txt+self.end) 57 | boldgreen = classmethod(boldgreen) 58 | 59 | def assertion_status(target, value, message, status_ptr): 60 | if target != value: 61 | print_colored.boldred('Non-regression failure for %r' % message) 62 | status_ptr[0] = False 63 | 64 | def run_tests(run_test): 65 | status_ptr = [True] 66 | run_test(lambda target, value, msg, status_ptr=status_ptr: 67 | assertion_status(target, value, msg, status_ptr)) 68 | if status_ptr[0]: 69 | print_colored.boldgreen('OK') 70 | return status_ptr[0] 71 | 72 | def test_MD5(assertion): 73 | import struct 74 | assertion('f71dbe52628a3f83a77ab494817525c6', 75 | hashlib.md5(struct.pack('BBBB',116,111,116,111)).hexdigest(), 76 | 'MD5') 77 | 78 | def open_read(f): 79 | fd = open(f, 'rb') 80 | try: 81 | data = fd.read() 82 | finally: 83 | fd.close() 84 | return data 85 | 86 | if __name__ == "__main__": 87 | exit_value = 0 88 | print_colored.bold('test_MD5') 89 | if not run_tests(test_MD5): 90 | exit_value = 1 91 | for name in ( 92 | 'visual_studio_mangling', 93 | 'pe_manipulation', 94 | 'elf_manipulation', 95 | 'macho_manipulation', 96 | 'rprc_manipulation', 97 | 'minidump_manipulation', 98 | 'intervals', 99 | ): 100 | module = import_by_name('test_' + name) 101 | print_colored.bold(name) 102 | if not run_tests(module.run_test): 103 | exit_value = 1 104 | sys.exit(exit_value) 105 | -------------------------------------------------------------------------------- /tests/test_rprc_manipulation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os 4 | __dir__ = os.path.dirname(__file__) 5 | 6 | from test_all import run_tests, assertion, hashlib, open_read 7 | from elfesteem.rprc import RPRC 8 | 9 | def test_RPRC_empty(assertion): 10 | e = RPRC() 11 | d = e.pack() 12 | assertion('865001a37fa24754bd17012e85d2bfff', 13 | hashlib.md5(d).hexdigest(), 14 | 'Creation of a standard empty RPRC') 15 | d = RPRC(d).pack() 16 | assertion('865001a37fa24754bd17012e85d2bfff', 17 | hashlib.md5(d).hexdigest(), 18 | 'Creation of a standard empty RPRC; fix point') 19 | 20 | def test_RPRC_ducati(assertion): 21 | rprc_m3 = open_read(__dir__+'/binary_input/ducati-m3_p768.bin') 22 | assertion('d31c5887b98b37f949da3570b8688983', 23 | hashlib.md5(rprc_m3).hexdigest(), 24 | 'Reading ducati-m3_p768.bin') 25 | e = RPRC(rprc_m3) 26 | d = e.pack() 27 | assertion('d31c5887b98b37f949da3570b8688983', 28 | hashlib.md5(d).hexdigest(), 29 | 'Packing after reading ducati-m3_p768.bin') 30 | # Packed file is identical :-) 31 | d = e.display().encode('latin1') 32 | assertion('c691ff75fffede7701086f6b3c981b3b', 33 | hashlib.md5(d).hexdigest(), 34 | 'Display RPRC file content') 35 | d = e.getsectionbyvad(0x00004000).pack() 36 | assertion('c77c8edf39114343b16b284ffddd2dff', 37 | hashlib.md5(d).hexdigest(), 38 | 'Get existing section by address') 39 | d = e.getsectionbyvad(0x00400000) 40 | assertion(None, d, 'Get non-existing section by address') 41 | d = e.content[0x100:0x120] 42 | assertion('604e845109bba89a3dfa00da8c65cbd1', 43 | hashlib.md5(d).hexdigest(), 44 | 'Extract chunk from raw data') 45 | d = e.virt[0x00004000] 46 | assertion('6b31bdfa7f9bfece263381ffa91bd6a9', 47 | hashlib.md5(d).hexdigest(), 48 | 'Extract byte from mapped memory') 49 | d = e.virt[0x00004000:0x00004020] 50 | assertion('4b22b71399e1e0a6820c769456ce7483', 51 | hashlib.md5(d).hexdigest(), 52 | 'Extract chunk from mapped memory') 53 | d = e.virt[0x00003ff0:0x00004020] 54 | assertion('ff2e5ba4b1c82e231f477c01ec805e06', 55 | hashlib.md5(d).hexdigest(), 56 | 'Extract chunk from mapped and unmapped memory') 57 | e.virt[0x00004000:0x00004100] = e.virt[0x00004000:0x00004100] 58 | d = e.pack() 59 | assertion('d31c5887b98b37f949da3570b8688983', 60 | hashlib.md5(d).hexdigest(), 61 | 'Writing in memory (interval)') 62 | e.virt[0x00004000] = e.virt[0x00004000:0x00004100] 63 | d = e.pack() 64 | assertion('d31c5887b98b37f949da3570b8688983', 65 | hashlib.md5(d).hexdigest(), 66 | 'Writing in memory (address)') 67 | try: 68 | e.virt[0x00040000] = e.virt[0x00004000:0x00004100] 69 | assertion(0,1, 'Writing in non-mapped memory') 70 | except ValueError: 71 | pass 72 | try: 73 | e.virt[0x00003ff0:0x00004020] = e.virt[0x00003ff0:0x00004020] 74 | assertion(0,1, 'Writing in partially non-mapped memory') 75 | except ValueError: 76 | pass 77 | 78 | def test_RPRC_invalid(assertion): 79 | try: 80 | e = RPRC(open_read(__dir__+'/binary_input/README.txt')) 81 | assertion(0,1, 'Not an RPRC') 82 | except ValueError: 83 | pass 84 | 85 | def run_test(assertion): 86 | for name, value in dict(globals()).items(): 87 | if name.startswith('test_'): 88 | value(assertion) 89 | 90 | if __name__ == "__main__": 91 | run_tests(run_test) 92 | -------------------------------------------------------------------------------- /elfesteem/rprc.py: -------------------------------------------------------------------------------- 1 | # RPRC syntax: firmware format used by rpmsg 2 | 3 | # The main source of information on this format is 4 | # https://github.com/ohadbc/sysbios-rpmsg 5 | # A tool that reads the content of a RPRC .bin file is 6 | # https://github.com/ohadbc/sysbios-rpmsg/blob/master/src/utils/rprcfmt.h 7 | # https://github.com/ohadbc/sysbios-rpmsg/blob/master/src/utils/readrprc.c 8 | # But the last version of this tool (tagged "new ABI") does not correspond 9 | # to the RPRC files downloadable at http://goo.gl/4dndeg 10 | # For example, the size of resources is 76 bytes, while in the new ABI it 11 | # is 96 bytes. All examples of output of 'readrprc' that are found in this 12 | # repository and in the following links have 76-bytes long resources. 13 | # https://github.com/radare/radare2/issues/1602 14 | # http://omappedia.org/wiki/RPMsg_BIOS_Sources 15 | # http://www.omappedia.com/wiki/RPMsg_Tesla 16 | # http://omappedia.org/wiki/Debugging_RPMsg#Readrprc_Utility 17 | # http://omappedia.org/wiki/RPMsg_BIOS_Sources#SYS.2FBIOS_RPMsg_Customizations 18 | # http://omappedia.org/wiki/Design_Overview_-_RPMsg#Firmware_Image_Format 19 | # Currently, we don't know if there is a flag that tells when the "new ABI" 20 | # is used, e.g. a value of 'version' greater than 2 in the header. 21 | 22 | import struct 23 | from elfesteem.cstruct import CData, CStruct, data_null, data_empty 24 | from elfesteem.strpatchwork import StrPatchwork 25 | 26 | # Section types 27 | FW_RESOURCE = 0 28 | FW_TEXT = 1 29 | FW_DATA = 2 30 | 31 | # Resource types (old ABI) 32 | RSC_CARVEOUT = 0 33 | RSC_DEVMEM = 1 34 | RSC_DEVICE = 2 35 | RSC_IRQ = 3 36 | RSC_TRACE = 4 37 | RSC_BOOTADDR = 5 38 | RSC_VRING = 6 39 | 40 | # Resource types (new ABI) 41 | RSC_CARVEOUT = 0 42 | RSC_DEVMEM = 1 43 | RSC_TRACE = 2 44 | RSC_VRING = 3 45 | RSC_VIRTIO_HDR = 4 46 | RSC_VIRTIO_CFG = 5 47 | 48 | class Header(CStruct): 49 | _fields = [ ("magic","4s"), 50 | ("version","u32"), 51 | ("header_len","u32"), 52 | ("data",CData(lambda _:_.header_len))] 53 | magic_txt = property(lambda _:_.magic.decode('latin1')) 54 | def _initialize(self): 55 | CStruct._initialize(self) 56 | # Change default values 57 | self.magic = 'RPRC'.encode('latin1') 58 | self.version = 2 59 | self.header_len = 1012 60 | self.data[0] = data_null * self.header_len 61 | self._size += self.header_len 62 | def display(self): 63 | rep = [] 64 | rep.append('magic number %(magic_txt)s' % self) 65 | rep.append('header version %(version)d' % self) 66 | rep.append('header size %(header_len)d' % self) 67 | rep.append('header data') 68 | rep.append(str(self.data)) 69 | return '\n'.join(rep) 70 | 71 | # NB: the following definition is taken from 72 | # https://github.com/ohadbc/sysbios-rpmsg/blob/master/src/utils/rprcfmt.h 73 | # It does not correspond to the RPRC files we have 74 | class ResourceNewABI(CStruct): 75 | _fields = [ ("type","u32"), 76 | ("id","u32"), 77 | ("da","u64"), # Device Address 78 | ("pa","u64"), # Physical Address 79 | ("len","u32"), 80 | ("flags","u32"), 81 | ("reserved","16s"), 82 | ("name","48s"), 83 | ] 84 | 85 | class Resource(CStruct): 86 | _fields = [ ("type","u32"), 87 | ("da","u64"), # Device Address 88 | ("pa","u64"), # Physical Address 89 | ("len","u32"), 90 | ("flags","u32"), 91 | ("name","48s"), 92 | ] 93 | name_txt = property(lambda _:_.name.strip(data_null).decode('latin1')) 94 | def unpack(self, c, o): 95 | CStruct.unpack(self, c, o) 96 | self.offset = o 97 | def display(self): 98 | return 'resource %(type)d, da: %(da)#010x, pa: %(pa)#010x, len: %(len)#010x, name: %(name_txt)s' % self 99 | 100 | class Section(CStruct): 101 | _fields = [ ("type","u32"), 102 | ("da","u64"), # Device Address 103 | ("len","u32"), 104 | ("data",CData(lambda _:_.len))] 105 | def unpack(self, c, o): 106 | CStruct.unpack(self, c, o) 107 | self.offset = o 108 | if self.type == FW_RESOURCE: 109 | self.res_len = Resource(parent=self).bytelen 110 | if self.data.bytelen % self.res_len != 0: 111 | raise ValueError('Section data length %#x not multiple of %#x' % (self.data.bytelen, self.res_len)) 112 | of = 0 113 | self.res = [] 114 | while of + self.res_len <= self.data.bytelen: 115 | r = Resource(parent=self, content=self.data, start=of) 116 | self.res.append(r) 117 | of += self.res_len 118 | def display(self): 119 | rep = [] 120 | rep.append('section %(type)d, address: %(da)#010x, size: %(len)#010x' % self) 121 | if self.type == FW_RESOURCE: 122 | rep.append('resource table: %d' % self.res_len) 123 | for r in self.res: 124 | rep.append(r.display()) 125 | return '\n'.join(rep) 126 | def __str__(self): 127 | return 'section %(type)d, address: %(da)#010x, size: %(len)#010x' % self 128 | 129 | class Layout(object): 130 | ''' This class manages the layout of the file when loaded in memory. ''' 131 | def __init__(self, overlap=None): 132 | ''' Initialize with an empty memory ''' 133 | if overlap == 'silent': 134 | pass 135 | elif overlap == 'warning': 136 | TODO 137 | elif overlap == 'error': 138 | TODO 139 | else: 140 | raise ValueError('Define overlap in %s'%self.__class__) 141 | self.layout = [(0, None)] 142 | def __setitem__(self, item, data): 143 | ''' Load 'data' in memory at interval 'item'. ''' 144 | if item.start == item.stop: 145 | return 146 | # Find the position in the layout where the data is loaded 147 | for i, (o, _) in enumerate(self.layout): 148 | if o >= item.start: break 149 | else: 150 | i = len(self.layout) 151 | # Find the position in the layout where the data loading ends 152 | for j, (o, _) in enumerate(self.layout): 153 | if o > item.stop: break 154 | else: 155 | j = len(self.layout) 156 | # Find what is the value after the end 157 | _, prv_data = self.layout[j-1] 158 | self.layout[i:j] = [(item.start, data),(item.stop, prv_data)] 159 | def __getitem__(self, item): 160 | ''' Return a list of (slice, data) which indicates what is in 161 | memory at interval 'item'; the slices that are returned 162 | are contiguous and add up to the whole 'item' slice. ''' 163 | res = [] 164 | for i, (stop, _) in enumerate(self.layout): 165 | if item.start >= stop: 166 | continue 167 | start, data = self.layout[i-1] 168 | if item.stop <= start: 169 | continue 170 | res.append((slice(max(item.start,start),min(item.stop,stop)),data)) 171 | if stop < item.stop: 172 | _, data = self.layout[-1] 173 | res.append((slice(stop,item.stop),data)) 174 | return res 175 | def max_addr(self): 176 | return self.layout[-1][0] 177 | 178 | class Virtual(object): 179 | # This class manages 'virtual addresses', i.e. the addresses when 180 | # the RPRC file is loaded in memory. 181 | # These addresses are the ones used by absolute addressing in the 182 | # executable code. 183 | def __init__(self, e): 184 | self.parent = e 185 | self.layout = Layout(overlap='silent') 186 | for s in self.parent.sections: 187 | self.layout[s.da:s.da+s.len] = s 188 | def __getitem__(self, item): 189 | # If 'item' is an integer, we return the byte at this address, 190 | # else 'item' is a slice and we return the corresponding bytes, 191 | # padded with zeroes. 192 | if type(item) is slice: 193 | assert item.step is None 194 | start, stop = item.start, item.stop 195 | else: 196 | start, stop = item, item+1 197 | res = data_empty 198 | for i, s in self.layout[start:stop]: 199 | if s is None: res += data_null * (i.stop-i.start) # non-mapped 200 | else: res += s.data[i.start-s.da:i.stop-s.da] 201 | return res 202 | def __setitem__(self, item, data): 203 | # If 'item' is an integer, we write starting from this address 204 | if type(item) is slice: 205 | assert item.step is None 206 | start, stop = item.start, item.stop 207 | assert len(data) == stop-start 208 | else: 209 | start, stop = item, item+len(data) 210 | l = self.layout[start:stop] 211 | if None in [ s for _, s in l]: 212 | raise ValueError('Addresses %#x:%#x not entirely mapped in memory'%(start,stop)) 213 | for i, s in l: 214 | of = i.start-start 215 | s.data[i.start-s.da:i.stop-s.da] = data[i.start-s.da+of:i.stop-s.da+of] 216 | def max_addr(self): 217 | return self.layout.max_addr() 218 | 219 | class RPRC(object): 220 | # API shared by all/most binary containers 221 | architecture = property(lambda _:'ARM') 222 | entrypoint = property(lambda _:-1) 223 | #sections = property(lambda _:_.SHList.shlist) 224 | symbols = property(lambda _:()) 225 | dynsyms = property(lambda _:()) 226 | 227 | sex = '<' 228 | wsize = 32 229 | virt = property(lambda _:_._virt) 230 | def __init__(self, data = None, **kargs): 231 | self.sections = [] 232 | if data is not None: 233 | self.content = StrPatchwork(data) 234 | self.parse_content() 235 | else: 236 | # Create a RPRC file with no section 237 | self.hdr = Header(parent=self) 238 | self._virt = Virtual(self) 239 | def parse_content(self): 240 | h = struct.unpack("B"*4, self.content[:4]) 241 | if h != ( 0x52,0x50,0x52,0x43 ): # magic number, RPRC 242 | raise ValueError("Not an RPRC") 243 | self.hdr = Header(parent=self, content=self.content) 244 | of = self.hdr.bytelen 245 | while of < len(self.content): 246 | s = Section(parent=self, content=self.content, start=of) 247 | self.sections.append(s) 248 | of += s.bytelen 249 | def pack(self): 250 | c = StrPatchwork() 251 | c[0] = self.hdr.pack() 252 | of = self.hdr.bytelen 253 | for s in self.sections: 254 | c[of] = s.pack() 255 | of += s.bytelen 256 | return c.pack() 257 | def display(self): 258 | # Same output as 'readrprc' 259 | rep = [self.hdr.display()] + [s.display() for s in self.sections] 260 | return '\n'.join(rep) 261 | def getsectionbyvad(self, ad): 262 | # Same API as ELF or PE, but different implementation for accessing 263 | # data by virtual addresses: a mechanism entirely inside 'virt' 264 | # rather than split between two classes; future versions of 265 | # elfesteem should probably do the same for all binary containers. 266 | return self.virt.layout[ad:ad+1][0][1] 267 | 268 | if __name__ == "__main__": 269 | import sys, code 270 | if len(sys.argv) > 2: 271 | for f in sys.argv[1:]: 272 | print('File: %s'%f) 273 | fd = open(f, 'rb') 274 | try: 275 | raw = fd.read() 276 | finally: 277 | fd.close() 278 | e = RPRC(raw) 279 | print (e.display()) 280 | sys.exit(0) 281 | if len(sys.argv) == 2: 282 | fd = open(sys.argv[1], 'rb') 283 | try: 284 | raw = fd.read() 285 | finally: 286 | fd.close() 287 | e = RPRC(raw) 288 | code.interact('Interactive Python Console', None, locals()) 289 | -------------------------------------------------------------------------------- /elfesteem/macho/common.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | from elfesteem.cstruct import Constants, CStruct 4 | from elfesteem.cstruct import data_empty, data_null 5 | from elfesteem.cstruct import bytes_to_name, name_to_bytes 6 | 7 | import logging 8 | log = logging.getLogger("mach-o") 9 | console_handler = logging.StreamHandler() 10 | console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) 11 | log.addHandler(console_handler) 12 | log.setLevel(logging.WARN) 13 | 14 | __all__ = [ 'data_empty', 'data_null', 'bytes_to_name', 'name_to_bytes', 15 | 'log', 'relocation_info' ] 16 | 17 | # Variables defined below and that need to be visible when import *. 18 | def ImportAll(**kargs): 19 | __all__.extend(kargs.keys()) 20 | globals().update(kargs) 21 | 22 | # In addition for needing to be visible when import *, these values 23 | # are added to constants, built in a way allowing to recover the 24 | # constant's name from its value. 25 | constants = {} 26 | def SetConstants(**kargs): 27 | __all__.extend([_ for _ in kargs.keys() if _ != 'no_name']) 28 | Constants(globs = globals(), table = constants, **kargs) 29 | 30 | #### Main source: /usr/include/mach/machine.h 31 | # VEO is found on http://www.opensource.apple.com/source/cctools/cctools-809/include/mach/machine.h 32 | ImportAll( 33 | CPU_ARCH_ABI64 = 0x01000000 34 | ) 35 | SetConstants( 36 | CPU_TYPE_VAX = 1, 37 | CPU_TYPE_ROMP = 2, # Deprecated 38 | CPU_TYPE_NS32032 = 4, # Deprecated 39 | CPU_TYPE_NS32332 = 5, # Deprecated 40 | CPU_TYPE_MC680x0 = 6, 41 | CPU_TYPE_X86 = 7, 42 | CPU_TYPE_I386 = 7, 43 | CPU_TYPE_X86_64 = 7 | CPU_ARCH_ABI64, 44 | CPU_TYPE_MIPS = 8, 45 | CPU_TYPE_NS32532 = 9, # Deprecated 46 | CPU_TYPE_MC98000 = 10, 47 | CPU_TYPE_HPPA = 11, 48 | CPU_TYPE_ARM = 12, 49 | CPU_TYPE_ARM64 = 12 | CPU_ARCH_ABI64, 50 | CPU_TYPE_MC88000 = 13, 51 | CPU_TYPE_SPARC = 14, 52 | CPU_TYPE_I860 = 15, 53 | CPU_TYPE_I860_LITTLE = 16, # Deprecated 54 | CPU_TYPE_ALPHA = 16, 55 | CPU_TYPE_RS6000 = 17, # Deprecated 56 | CPU_TYPE_POWERPC = 18, 57 | CPU_TYPE_POWERPC64 = 18 | CPU_ARCH_ABI64, 58 | CPU_TYPE_VEO = 255, 59 | no_name = ('CPU_TYPE_I386', 'CPU_TYPE_I860_LITTLE',) 60 | ) 61 | 62 | ImportAll( 63 | CPU_SUBTYPE_MASK = 0xff000000, # mask for feature flags 64 | CPU_SUBTYPE_LIB64 = 0x80000000, # 64 bit libraries 65 | ) 66 | 67 | # VAX subtypes. 68 | ImportAll( 69 | CPU_SUBTYPE_VAX_ALL = 0, 70 | CPU_SUBTYPE_VAX780 = 1, 71 | CPU_SUBTYPE_VAX785 = 2, 72 | CPU_SUBTYPE_VAX750 = 3, 73 | CPU_SUBTYPE_VAX730 = 4, 74 | CPU_SUBTYPE_UVAXI = 5, 75 | CPU_SUBTYPE_UVAXII = 6, 76 | CPU_SUBTYPE_VAX8200 = 7, 77 | CPU_SUBTYPE_VAX8500 = 8, 78 | CPU_SUBTYPE_VAX8600 = 9, 79 | CPU_SUBTYPE_VAX8650 = 10, 80 | CPU_SUBTYPE_VAX8800 = 11, 81 | CPU_SUBTYPE_UVAXIII = 12, 82 | ) 83 | 84 | # ROMP subtypes. 85 | ImportAll( 86 | CPU_SUBTYPE_RT_ALL = 0, 87 | CPU_SUBTYPE_RT_PC = 1, 88 | CPU_SUBTYPE_RT_APC = 2, 89 | CPU_SUBTYPE_RT_135 = 3, 90 | ) 91 | 92 | # 2032/32332/32532 subtypes. 93 | ImportAll( 94 | CPU_SUBTYPE_MMAX_ALL = 0, 95 | CPU_SUBTYPE_MMAX_DPC = 1, # 032 CPU 96 | CPU_SUBTYPE_SQT = 2, 97 | CPU_SUBTYPE_MMAX_APC_FPU = 3, # 32081 FPU 98 | CPU_SUBTYPE_MMAX_APC_FPA = 4, # Weitek FPA 99 | CPU_SUBTYPE_MMAX_XPC = 5, # 532 CPU 100 | ) 101 | 102 | # 680x0 subtypes 103 | # NeXT used to consider 68030 code as generic 68000 code. 104 | # For backwards compatability: 105 | # * CPU_SUBTYPE_MC68030 symbol has been preserved for source code 106 | # compatability. 107 | # * CPU_SUBTYPE_MC680x0_ALL has been defined to be the same 108 | # subtype as CPU_SUBTYPE_MC68030 for binary comatability. 109 | # * CPU_SUBTYPE_MC68030_ONLY has been added to allow new object 110 | # files to be tagged as containing 68030-specific instructions. 111 | ImportAll( 112 | CPU_SUBTYPE_MC680x0_ALL = 1, 113 | CPU_SUBTYPE_MC68030 = 1, 114 | CPU_SUBTYPE_MC68040 = 2, 115 | CPU_SUBTYPE_MC68030_ONLY = 3, 116 | ) 117 | 118 | # I386 subtypes. 119 | def CPU_SUBTYPE_INTEL(f, m): return f + (m << 4) 120 | ImportAll( 121 | CPU_SUBTYPE_I386_ALL = CPU_SUBTYPE_INTEL(3, 0), 122 | CPU_SUBTYPE_386 = CPU_SUBTYPE_INTEL(3, 0), 123 | CPU_SUBTYPE_486 = CPU_SUBTYPE_INTEL(4, 0), 124 | CPU_SUBTYPE_486SX = CPU_SUBTYPE_INTEL(4, 8), 125 | CPU_SUBTYPE_586 = CPU_SUBTYPE_INTEL(5, 0), 126 | CPU_SUBTYPE_PENT = CPU_SUBTYPE_INTEL(5, 0), 127 | CPU_SUBTYPE_PENTPRO = CPU_SUBTYPE_INTEL(6, 1), 128 | CPU_SUBTYPE_PENTII_M3 = CPU_SUBTYPE_INTEL(6, 3), 129 | CPU_SUBTYPE_PENTII_M5 = CPU_SUBTYPE_INTEL(6, 5), 130 | CPU_SUBTYPE_CELERON = CPU_SUBTYPE_INTEL(7, 6), 131 | CPU_SUBTYPE_CELERON_MOBILE = CPU_SUBTYPE_INTEL(7, 7), 132 | CPU_SUBTYPE_PENTIUM_3 = CPU_SUBTYPE_INTEL(8, 0), 133 | CPU_SUBTYPE_PENTIUM_3_M = CPU_SUBTYPE_INTEL(8, 1), 134 | CPU_SUBTYPE_PENTIUM_3_XEON = CPU_SUBTYPE_INTEL(8, 2), 135 | CPU_SUBTYPE_PENTIUM_M = CPU_SUBTYPE_INTEL(9, 0), 136 | CPU_SUBTYPE_PENTIUM_4 = CPU_SUBTYPE_INTEL(10, 0), 137 | CPU_SUBTYPE_PENTIUM_4_M = CPU_SUBTYPE_INTEL(10, 1), 138 | CPU_SUBTYPE_ITANIUM = CPU_SUBTYPE_INTEL(11, 0), 139 | CPU_SUBTYPE_ITANIUM_2 = CPU_SUBTYPE_INTEL(11, 1), 140 | CPU_SUBTYPE_XEON = CPU_SUBTYPE_INTEL(12, 0), 141 | CPU_SUBTYPE_XEON_MP = CPU_SUBTYPE_INTEL(12, 1), 142 | ) 143 | 144 | ImportAll( 145 | CPU_SUBTYPE_X86_ALL = 3, 146 | CPU_SUBTYPE_X86_64_ALL = 3, 147 | CPU_SUBTYPE_X86_ARCH1 = 4, 148 | CPU_SUBTYPE_X86_64_H = 8, # Haswell feature subset 149 | ) 150 | 151 | # Mips subtypes. 152 | ImportAll( 153 | CPU_SUBTYPE_MIPS_ALL = 0, 154 | CPU_SUBTYPE_MIPS_R2300 = 1, 155 | CPU_SUBTYPE_MIPS_R2600 = 2, 156 | CPU_SUBTYPE_MIPS_R2800 = 3, 157 | CPU_SUBTYPE_MIPS_R2000a = 4, # pmax 158 | CPU_SUBTYPE_MIPS_R2000 = 5, 159 | CPU_SUBTYPE_MIPS_R3000a = 6, # 3max 160 | CPU_SUBTYPE_MIPS_R3000 = 7, 161 | ) 162 | 163 | # HPPA subtypes for Hewlett-Packard HP-PA family of risc processors. 164 | # Port by NeXT to 700 series. 165 | ImportAll( 166 | CPU_SUBTYPE_HPPA_ALL = 0, 167 | CPU_SUBTYPE_HPPA_7100 = 0, 168 | CPU_SUBTYPE_HPPA_7100LC = 1, 169 | ) 170 | 171 | # MC88000 subtypes 172 | ImportAll( 173 | CPU_SUBTYPE_MC88000_ALL = 0, 174 | CPU_SUBTYPE_MMAX_JPC = 1, 175 | CPU_SUBTYPE_MC88100 = 1, 176 | CPU_SUBTYPE_MC88110 = 2, 177 | ) 178 | 179 | # MC98000 (PowerPC) subtypes 180 | ImportAll( 181 | CPU_SUBTYPE_MC98000_AL = 0, 182 | CPU_SUBTYPE_MC98601 = 1, 183 | ) 184 | 185 | 186 | # I860 subtypes 187 | ImportAll( 188 | CPU_SUBTYPE_I860_ALL = 0, 189 | CPU_SUBTYPE_I860_860 = 1, 190 | 191 | CPU_SUBTYPE_I860_LITTLE_ALL = 0, 192 | CPU_SUBTYPE_I860_LITTLE = 1, 193 | ) 194 | 195 | # RS6000 subtypes 196 | ImportAll( 197 | CPU_SUBTYPE_RS6000_ALL = 0, 198 | CPU_SUBTYPE_RS6000 = 1, 199 | ) 200 | 201 | # Sun4 subtypes - port done at CMU 202 | ImportAll( 203 | CPU_SUBTYPE_SUN4_ALL = 0, 204 | CPU_SUBTYPE_SUN4_260 = 1, 205 | CPU_SUBTYPE_SUN4_110 = 2, 206 | CPU_SUBTYPE_SPARC_ALL = 0, 207 | ) 208 | 209 | # PowerPC subtypes 210 | ImportAll( 211 | CPU_SUBTYPE_POWERPC_ALL = 0, 212 | CPU_SUBTYPE_POWERPC_601 = 1, 213 | CPU_SUBTYPE_POWERPC_602 = 2, 214 | CPU_SUBTYPE_POWERPC_603 = 3, 215 | CPU_SUBTYPE_POWERPC_603e = 4, 216 | CPU_SUBTYPE_POWERPC_603ev = 5, 217 | CPU_SUBTYPE_POWERPC_604 = 6, 218 | CPU_SUBTYPE_POWERPC_604e = 7, 219 | CPU_SUBTYPE_POWERPC_620 = 8, 220 | CPU_SUBTYPE_POWERPC_750 = 9, 221 | CPU_SUBTYPE_POWERPC_7400 = 10, 222 | CPU_SUBTYPE_POWERPC_7450 = 11, 223 | CPU_SUBTYPE_POWERPC_970 = 100, 224 | 225 | CPU_SUBTYPE_POWERPC64_ALL = 0, 226 | ) 227 | 228 | # VEO subtypes 229 | # Note: the CPU_SUBTYPE_VEO_ALL will likely change over time to be defined as 230 | # one of the specific subtypes. 231 | ImportAll( 232 | CPU_SUBTYPE_VEO_1 = 1, 233 | CPU_SUBTYPE_VEO_2 = 2, 234 | CPU_SUBTYPE_VEO_3 = 3, 235 | CPU_SUBTYPE_VEO_4 = 4, 236 | CPU_SUBTYPE_VEO_ALL = 2, # CPU_SUBTYPE_VEO_2 237 | ) 238 | 239 | # Acorn subtypes 240 | ImportAll( 241 | CPU_SUBTYPE_ARM_ALL = 0, 242 | CPU_SUBTYPE_ARM_V4T = 5, 243 | CPU_SUBTYPE_ARM_V6 = 6, 244 | CPU_SUBTYPE_ARM_V5TEJ = 7, 245 | CPU_SUBTYPE_ARM_XSCALE = 8, 246 | CPU_SUBTYPE_ARM_V7 = 9, 247 | CPU_SUBTYPE_ARM_V7F = 10, # Cortex A9 248 | CPU_SUBTYPE_ARM_V7S = 11, # Swift 249 | CPU_SUBTYPE_ARM_V7K = 12, 250 | CPU_SUBTYPE_ARM_V8 = 13, 251 | CPU_SUBTYPE_ARM_V6M = 14, # Not meant to be run under xnu 252 | CPU_SUBTYPE_ARM_V7M = 15, # Not meant to be run under xnu 253 | CPU_SUBTYPE_ARM_V7EM = 16, # Not meant to be run under xnu 254 | 255 | CPU_SUBTYPE_ARM64_ALL = 0, 256 | CPU_SUBTYPE_ARM64_V8 = 1, 257 | ) 258 | 259 | 260 | #### Source: /usr/include/mach-o/reloc.h 261 | 262 | # * In reloc.h, there are two data structures: relocation_info and scattered_relocation_info, which are merged in one structure below. 263 | ImportAll( 264 | R_SCATTERED = 0x80000000 265 | ) 266 | class relocation_info(CStruct): 267 | _fields = [ 268 | ("relocaddr","u32"), 269 | ("relocsym","u32"), 270 | ] 271 | scattered = property(lambda _:(_.relocaddr&0x80000000)>>31) 272 | address = property(lambda _:(_.relocaddr&0x00ffffff)) 273 | # Scattered 274 | pcrel_1 = property(lambda _:(_.relocaddr&0x40000000)>>30) 275 | length_1 = property(lambda _:(_.relocaddr&0x30000000)>>28) 276 | type_1 = property(lambda _:(_.relocaddr&0x0f000000)>>24) 277 | # Not scattered 278 | type_0 = property(lambda _:(_.relocsym&0xf0000000)>>28) 279 | extern_0 = property(lambda _:(_.relocsym&0x08000000)>>27) 280 | length_0 = property(lambda _:(_.relocsym&0x06000000)>>25) 281 | pcrel_0 = property(lambda _:(_.relocsym&0x01000000)>>24) 282 | value = property(lambda _:(_.relocsym&0x00ffffff)) 283 | # Generic 284 | type = property(lambda _:getattr(_,"type_%s"%_.scattered)) 285 | extern = property(lambda _:getattr(_,"extern_%s"%_.scattered)) 286 | length = property(lambda _:getattr(_,"length_%s"%_.scattered)) 287 | pcrel = property(lambda _:getattr(_,"pcrel_%s"%_.scattered)) 288 | def symbolNumOrValue(self): 289 | if self.scattered: return self.relocsym 290 | else: return self.value 291 | symbolNumOrValue = property(symbolNumOrValue) 292 | def __repr__(self): 293 | fields = [ "pcrel", "length" ] 294 | if not self.scattered: 295 | fields.append("extern") 296 | fields.extend(["type", "scattered", "symbolNumOrValue"]) 297 | return "<" + self.__class__.__name__ + " " + " -- ".join([x + " " + hex(getattr(self,x)) for x in fields]) + ">" 298 | 299 | # Relocation types used in a generic implementation. Relocation entries for 300 | # normal things use the generic relocation as discribed above and their r_type 301 | # is GENERIC_RELOC_VANILLA (a value of zero). 302 | # (...) 303 | # The implemention is quite messy given the compatibility with the existing 304 | # relocation entry format. (...) 305 | ImportAll( 306 | GENERIC_RELOC_VANILLA = 0, # generic relocation as described above 307 | GENERIC_RELOC_PAIR = 1, # Only follows a GENERIC_RELOC_SECTDIFF 308 | GENERIC_RELOC_SECTDIFF = 2, 309 | GENERIC_RELOC_PB_LA_PTR = 3, # prebound lazy pointer */ 310 | GENERIC_RELOC_LOCAL_SECTDIFF = 4, 311 | GENERIC_RELOC_TLV = 5, # thread local variables */ 312 | ) 313 | 314 | #### Source: /usr/include/mach-o/x86_64/reloc.h 315 | # Relocations for x86_64 are a bit different than for other architectures in 316 | # Mach-O: Scattered relocations are not used. Almost all relocations produced 317 | # by the compiler are external relocations. An external relocation has the 318 | # r_extern bit set to 1 and the r_symbolnum field contains the symbol table 319 | # index of the target label. 320 | # (...) 321 | ImportAll( 322 | X86_64_RELOC_UNSIGNED = 0, # for absolute addresses 323 | X86_64_RELOC_SIGNED = 1, # for signed 32-bit displacement 324 | X86_64_RELOC_BRANCH = 2, # a CALL/JMP instruction with 32-bit displacement 325 | X86_64_RELOC_GOT_LOAD = 3, # a MOVQ load of a GOT entry 326 | X86_64_RELOC_GOT = 4, # other GOT references 327 | X86_64_RELOC_SUBTRACTOR = 5, # must be followed by a X86_64_RELOC_UNSIGNED 328 | X86_64_RELOC_SIGNED_1 = 6, # for signed 32-bit displacement with a -1 addend 329 | X86_64_RELOC_SIGNED_2 = 7, # for signed 32-bit displacement with a -2 addend 330 | X86_64_RELOC_SIGNED_4 = 8, # for signed 32-bit displacement with a -4 addend 331 | X86_64_RELOC_TLV = 9, # for thread local variables 332 | ) 333 | -------------------------------------------------------------------------------- /elfesteem/new_cstruct.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import struct 4 | import re 5 | 6 | # To be compatible with python 2 and python 3 7 | data_empty = struct.pack("") 8 | data_null = struct.pack("B",0) 9 | 10 | type_size = {} 11 | size2type = {} 12 | size2type_s = {} 13 | 14 | for t in 'B', 'H', 'I', 'Q': 15 | s = struct.calcsize(t) 16 | type_size[t] = s*8 17 | size2type[s*8] = t 18 | 19 | for t in 'b', 'h', 'i', 'q': 20 | s = struct.calcsize(t) 21 | type_size[t] = s*8 22 | size2type_s[s*8] = t 23 | 24 | type_size['u08'] = size2type[8] 25 | type_size['u16'] = size2type[16] 26 | type_size['u32'] = size2type[32] 27 | type_size['u64'] = size2type[64] 28 | 29 | type_size['s08'] = size2type_s[8] 30 | type_size['s16'] = size2type_s[16] 31 | type_size['s32'] = size2type_s[32] 32 | type_size['s64'] = size2type_s[64] 33 | 34 | type_size['d'] = 'd' 35 | type_size['f'] = 'f' 36 | type_size['q'] = 'q' 37 | type_size['ptr'] = 'ptr' 38 | 39 | sex_types = {0:'<', 1:'>'} 40 | 41 | def fix_size(fields, wsize): 42 | out = [] 43 | for name, v in fields: 44 | if v.endswith("s"): 45 | pass 46 | elif v == "ptr": 47 | v = size2type[wsize] 48 | elif not v in type_size: 49 | raise ValueError("unkown Cstruct type", v) 50 | else: 51 | v = type_size[v] 52 | out.append((name, v)) 53 | fields = out 54 | return fields 55 | 56 | def real_fmt(fmt, wsize): 57 | if fmt == "ptr": 58 | v = size2type[wsize] 59 | elif fmt in type_size: 60 | v = type_size[fmt] 61 | else: 62 | v = fmt 63 | return v 64 | 65 | all_cstructs = {} 66 | class Cstruct_Metaclass(type): 67 | _prefix = "_field_" 68 | def __new__(cls, name, bases, dct): 69 | if name == 'CStructBase': 70 | o = type.__new__(cls, name, bases, dct) 71 | return o 72 | for fields in dct['_fields']: 73 | fname = fields[0] 74 | if fname in ['parent', 'parent_head']: 75 | raise ValueError('field name will confuse internal structs', 76 | repr(fname)) 77 | dct[fname] = property(dct.pop("get_"+fname, 78 | lambda self,fname=fname: getattr(self,cls._prefix+fname)), 79 | dct.pop("set_"+fname, 80 | lambda self,v,fname=fname: setattr(self,cls._prefix+fname,v)), 81 | dct.pop("del_"+fname, None)) 82 | 83 | 84 | 85 | o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) 86 | if name != "CStruct": 87 | all_cstructs[name] = o 88 | return o 89 | 90 | def unpack_l(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): 91 | if _sex is None and _wsize is None: 92 | # get sex and size from parent 93 | if parent_head: 94 | _sex = parent_head._sex 95 | _wsize = parent_head._wsize 96 | else: 97 | _sex = 0 98 | _wsize = 32 99 | c = cls(_sex = _sex, _wsize = _wsize) 100 | if parent_head is None: 101 | parent_head = c 102 | c.parent_head = parent_head 103 | 104 | of1 = off 105 | for field in c._fields: 106 | cpt = None 107 | of2 = of1 108 | if len(field) == 2: 109 | fname, ffmt = field 110 | elif len(field) == 3: 111 | fname, ffmt, cpt = field 112 | if ffmt in type_size or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): 113 | # basic types 114 | if cpt: 115 | value = [] 116 | i = 0 117 | while i < cpt(c): 118 | fmt = real_fmt(ffmt, _wsize) 119 | of2 = of1+struct.calcsize(fmt) 120 | value.append(struct.unpack(c.sex+fmt, s[of1:of2])[0]) 121 | of1 = of2 122 | i+=1 123 | else: 124 | fmt = real_fmt(ffmt, _wsize) 125 | of2 = of1+struct.calcsize(fmt) 126 | value = struct.unpack(c.sex+fmt, s[of1:of2])[0] 127 | elif ffmt == "sz": # null terminated special case 128 | of2 = s.find(data_null, of1) 129 | if of2 == -1: 130 | raise ValueError('no null char in string!') 131 | of2 += 1 132 | value = s[of1:of2-1] 133 | elif ffmt in all_cstructs: 134 | # sub structures 135 | if cpt: 136 | value = [] 137 | i = 0 138 | while i < cpt(c): 139 | v, l = all_cstructs[ffmt].unpack_l(s, of1, parent_head, _sex, _wsize) 140 | v.parent = c 141 | value.append(v) 142 | of2 = of1 + l 143 | of1 = of2 144 | i += 1 145 | else: 146 | value, l = all_cstructs[ffmt].unpack_l(s, of1, parent_head, _sex, _wsize) 147 | value.parent = c 148 | of2 = of1 + l 149 | elif isinstance(ffmt, tuple): 150 | f_get, f_set = ffmt 151 | value, of2 = f_get(c, s, of1) 152 | else: 153 | raise ValueError('unknown class', ffmt) 154 | of1 = of2 155 | setattr(c, CStruct._prefix+fname, value) 156 | 157 | return c, of2-off 158 | 159 | def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): 160 | c, l = cls.unpack_l(s, off = off, 161 | parent_head = parent_head, _sex=_sex, _wsize=_wsize) 162 | return c 163 | 164 | CStructBase = Cstruct_Metaclass('CStructBase', (object,), {}) 165 | class CStruct(CStructBase): 166 | _packformat = "" 167 | _fields = [] 168 | 169 | def __init__(self, parent_head = None, _sex = None, _wsize = None, **kargs): 170 | self.parent_head = parent_head 171 | kargs = dict(kargs) 172 | #if not sex or size: get the one of the parent 173 | if _sex is None and _wsize is None: 174 | if parent_head: 175 | _sex = parent_head._sex 176 | _wsize = parent_head._wsize 177 | else: 178 | # else default sex & size 179 | _sex = 0 180 | _wsize = 32 181 | self.sex = _sex 182 | self.wsize = _wsize 183 | if self._packformat: 184 | self.sex = self._packformat 185 | else: 186 | self.sex = sex_types[_sex] 187 | for f in self._fields: 188 | setattr(self, CStruct._prefix+f[0], None) 189 | if kargs: 190 | for k, v in kargs.items(): 191 | self.__dict__[CStruct._prefix+k] = v 192 | 193 | def pack(self): 194 | out = data_empty 195 | for field in self._fields: 196 | cpt = None 197 | if len(field) == 2: 198 | fname, ffmt = field 199 | elif len(field) == 3: 200 | fname, ffmt, cpt = field 201 | 202 | value = getattr(self, CStruct._prefix+fname) 203 | if ffmt in type_size or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): 204 | # basic types 205 | fmt = real_fmt(ffmt, self.wsize) 206 | if cpt is None: 207 | if value is None: 208 | o = struct.calcsize(fmt)*data_null 209 | else: 210 | o = struct.pack(self.sex+fmt, value) 211 | else: 212 | o = data_empty 213 | for v in value: 214 | if value is None: 215 | o += struct.calcsize(fmt)*data_null 216 | else: 217 | o += struct.pack(self.sex+fmt, v) 218 | 219 | elif ffmt == "sz": # null terminated special case 220 | o = value+data_null 221 | elif ffmt in all_cstructs: 222 | # sub structures 223 | if cpt is None: 224 | o = value.pack() 225 | else: 226 | o = data_empty 227 | for v in value: 228 | o += v.pack() 229 | elif isinstance(ffmt, tuple): 230 | f_get, f_set = ffmt 231 | o = f_set(self, value) 232 | 233 | else: 234 | raise ValueError('unknown class', ffmt) 235 | out += o 236 | 237 | return out 238 | 239 | def __len__(self): 240 | return len(self.pack()) 241 | 242 | def __str__(self): 243 | raise AttributeError("Use pack() instead of str()") 244 | return self.pack() 245 | 246 | def __repr__(self): 247 | return "<%s=%s>" % (self.__class__.__name__, "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields))) 248 | 249 | def __getitem__(self, item): # to work with format strings 250 | return getattr(self, item) 251 | 252 | if __name__ == "__main__": 253 | 254 | """ 255 | Classic C struct 256 | """ 257 | class c1(CStruct): 258 | _fields = [("c1_field1", "u16"), 259 | ("c1_field2", "u16"), 260 | ("c1_field3", "u32"), 261 | ] 262 | 263 | """ 264 | Struct with a sub structure as field (here, c1) 265 | """ 266 | class c2(CStruct): 267 | _fields = [("c2_field1", "u16"), 268 | ("c2_field2", "u16"), 269 | ("c2_field3", "u32"), 270 | ("c2_c", "c1"), 271 | ] 272 | 273 | """ 274 | b field is an array of 2 u16 275 | c field is an array of a count u16 276 | """ 277 | class c3(CStruct): 278 | _fields = [("a", "u16"), 279 | ("b", "u16", lambda x:2), 280 | ("c", "c1", lambda c:c.a), 281 | ("d", "u16"), 282 | ] 283 | 284 | """ 285 | e field has its own packing/unpacking custom functions 286 | """ 287 | class c4(CStruct): 288 | _fields = [("d", "u16"), 289 | ("e", (lambda c, s, of:c.gets(s, of), 290 | lambda c, value:c.sets(value))), 291 | ("f", "u16"), 292 | ] 293 | def gets(self, s, of): 294 | i = 0 295 | while s[of+i] != "\x00": 296 | i+=1 297 | return s[of:of+i], of+i+1 298 | def sets(self, value): 299 | return str(value)+'\x00' 300 | 301 | """ 302 | h field is a 4 len string 303 | """ 304 | class c5(CStruct): 305 | _fields = [("g", "u16"), 306 | ("h", "4s"), 307 | ] 308 | 309 | """ 310 | j field is a nul terminated string 311 | """ 312 | class c6(CStruct): 313 | _fields = [("i", "u16"), 314 | ("j", "sz"), 315 | ("k", "u16"), 316 | ] 317 | 318 | print(all_cstructs) 319 | 320 | s1 = struct.pack('HHI', 1111, 2222, 333333333) 321 | c = c1.unpack(s1) 322 | print(repr(c)) 323 | assert len(c) == 8 324 | s2 = c.pack() 325 | assert s1 == s2 326 | print(repr(s2)) 327 | print(repr(c1.unpack(s2))) 328 | 329 | s3 = struct.pack('HHI', 4444, 5555, 666666666)+s2 330 | print(repr(s3)) 331 | assert len(s3) == 16 332 | c = c2.unpack(s3) 333 | print(repr(c)) 334 | s4 = c.pack() 335 | print("%r %r"%(s3,s4)) 336 | assert s3 == s4 337 | assert c.c2_c.parent_head == c 338 | 339 | 340 | s5 = struct.pack('HHH', 2, 5555, 6666)+s1*2+struct.pack('H', 9999) 341 | c = c3.unpack(s5) 342 | assert len(c) == 24 343 | print(repr(c)) 344 | print(c.b) 345 | print(c.c) 346 | print(c.c[0].c1_field1) 347 | 348 | s6 = c.pack() 349 | print("%r %r"%(s5,s6)) 350 | assert s5 == s6 351 | 352 | c = c1() 353 | c.c1_field1 = 1111 354 | c.c1_field2 = 2222 355 | c.c1_field3 = 333333333 356 | assert c.pack() == s1 357 | 358 | s7 = struct.pack('H', 8888)+"fffff\x00"+struct.pack('H', 9999) 359 | c = c4.unpack(s7) 360 | print(repr(c)) 361 | print(repr(c.e)) 362 | print(repr(c.f)) 363 | 364 | print(repr(s7)) 365 | print(repr(c.pack())) 366 | assert s7 == c.pack() 367 | 368 | s8 = struct.pack('H4s', 8888, "abcd") 369 | c = c5.unpack(s8) 370 | print(repr(c)) 371 | assert s8 == c.pack() 372 | 373 | 374 | s9 = struct.pack('H', 9999)+ "toto\x00" + struct.pack('H', 1010) 375 | print(repr(s9)) 376 | c = c6.unpack(s9) 377 | print("%r %r"%(c,c.pack())) 378 | assert s9 == c.pack() 379 | 380 | -------------------------------------------------------------------------------- /elfesteem/minidump_init.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | High-level abstraction of Minidump file 4 | """ 5 | import sys, os 6 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) 7 | 8 | from elfesteem.strpatchwork import StrPatchwork 9 | from elfesteem import minidump as mp 10 | 11 | if sys.version_info[0:2] == (2, 3): 12 | from elfesteem.compatibility_python23 import sorted 13 | 14 | class MemorySegment(object): 15 | """Stand for a segment in memory with additionnal information""" 16 | 17 | def __init__(self, offset, memory_desc, module=None, memory_info=None): 18 | self.offset = offset 19 | self.memory_desc = memory_desc 20 | self.module = module 21 | self.memory_info = memory_info 22 | self.minidump = self.memory_desc.parent_head 23 | 24 | def address(self): 25 | return self.memory_desc.StartOfMemoryRange 26 | address = property(address) 27 | 28 | def size(self): 29 | if isinstance(self.memory_desc, mp.MemoryDescriptor64): 30 | return self.memory_desc.DataSize 31 | elif isinstance(self.memory_desc, mp.MemoryDescriptor): 32 | return self.memory_desc.Memory.DataSize 33 | raise TypeError 34 | size = property(size) 35 | 36 | def name(self): 37 | if self.module: 38 | return self.module.ModuleName 39 | return "" 40 | name = property(name) 41 | 42 | def content(self): 43 | return self.minidump._content[self.offset:self.offset + self.size] 44 | content = property(content) 45 | 46 | def protect(self): 47 | if self.memory_info: 48 | return self.memory_info.Protect 49 | return None 50 | protect = property(protect) 51 | 52 | def pretty_protect(self): 53 | if self.protect is None: 54 | return "UNKNOWN" 55 | return mp.memProtect[self.protect] 56 | pretty_protect = property(pretty_protect) 57 | 58 | def dump(self): 59 | return mp.data_str(self.content) 60 | 61 | 62 | class Minidump(object): 63 | """Stand for a Minidump file 64 | 65 | Here is a few limitation: 66 | - only < 4GB Minidump are supported (LocationDescriptor handling) 67 | - only Stream relative to memory mapping are implemented 68 | 69 | Official description is available on MSDN: 70 | https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx 71 | """ 72 | 73 | _sex = 0 74 | _wsize = 32 75 | 76 | def entrypoint(self): 77 | if not len(self.threads.Threads): return -1 78 | pc_reg = () 79 | if self.systeminfo.ProcessorArchitecture == \ 80 | mp.processorArchitecture.PROCESSOR_ARCHITECTURE_X86: 81 | pc_reg = self.threads.Threads[0].ThreadContext.Eip 82 | if self.systeminfo.ProcessorArchitecture == \ 83 | mp.processorArchitecture.PROCESSOR_ARCHITECTURE_AMD64: 84 | pc_reg = self.threads.Threads[0].ThreadContext.Rip 85 | if not len(pc_reg): return -1 86 | return pc_reg[0] 87 | architecture = property(lambda _:_.systeminfo.pretty_processor_architecture[23:]) 88 | entrypoint = property(entrypoint) 89 | sections = property(lambda _:_.memory.values()) 90 | symbols = () 91 | dynsyms = () 92 | 93 | def __init__(self, minidump_str): 94 | self._content = StrPatchwork(minidump_str) 95 | 96 | # Specific streams 97 | self.modulelist = None 98 | self.memory64list = None 99 | self.memorylist = None 100 | self.memoryinfolist = None 101 | self.systeminfo = None 102 | 103 | # Get information 104 | self.streams = [] 105 | self.threads = None 106 | self.parse_content() 107 | 108 | # Memory information 109 | self.memory = {} # base address (virtual) -> Memory information 110 | self.build_memory() 111 | self.virt = ContentVirtual(self) 112 | 113 | def parse_content(self): 114 | """Build structures corresponding to current content""" 115 | 116 | # Header 117 | offset = 0 118 | self.minidumpHDR = mp.MinidumpHDR.unpack(self._content, offset, self) 119 | assert self.minidumpHDR.Magic == 0x504d444d 120 | 121 | # Streams 122 | base_offset = self.minidumpHDR.StreamDirectoryRva.rva 123 | empty_stream = mp.StreamDirectory(StreamType=0, 124 | Location=mp.LocationDescriptor(DataSize=0, 125 | Rva=mp.Rva(rva=0) 126 | ) 127 | ) 128 | streamdir_size = len(empty_stream) 129 | for i in range(self.minidumpHDR.NumberOfStreams): 130 | stream_offset = base_offset + i * streamdir_size 131 | stream = mp.StreamDirectory.unpack(self._content, stream_offset, self) 132 | self.streams.append(stream) 133 | 134 | # Launch specific action depending on the stream 135 | datasize = stream.Location.DataSize 136 | offset = stream.Location.Rva.rva 137 | if stream.StreamType == mp.streamType.ModuleListStream: 138 | self.modulelist = mp.ModuleList.unpack(self._content, offset, self) 139 | if datasize == 8+self.modulelist.NumberOfModules*108: 140 | self.modulelist = mp.ModuleListWithPadding.unpack(self._content, offset, self) 141 | elif stream.StreamType == mp.streamType.MemoryListStream: 142 | self.memorylist = mp.MemoryList.unpack(self._content, offset, self) 143 | if datasize == 8+self.memorylist.NumberOfMemoryRanges*16: 144 | self.memorylist = mp.MemoryListWithPadding.unpack(self._content, offset, self) 145 | elif stream.StreamType == mp.streamType.Memory64ListStream: 146 | self.memory64list = mp.Memory64List.unpack(self._content, offset, self) 147 | elif stream.StreamType == mp.streamType.MemoryInfoListStream: 148 | self.memoryinfolist = mp.MemoryInfoList.unpack(self._content, offset, self) 149 | elif stream.StreamType == mp.streamType.SystemInfoStream: 150 | self.systeminfo = mp.SystemInfo.unpack(self._content, offset, self) 151 | elif stream.StreamType == mp.streamType.MiscInfoStream: 152 | self.miscinfo = mp.MiscInfo.unpack(self._content, offset, self) 153 | # Breakpad extension types 154 | elif stream.StreamType == mp.MDminidumpType.MD_ASSERTION_INFO_STREAM: 155 | self.breakpad_assertion = mp.BreakpadAssertion.unpack(self._content, offset, self) 156 | elif stream.StreamType == mp.MDminidumpType.MD_BREAKPAD_INFO_STREAM: 157 | self.breakpad_info = mp.BreakpadRawInfo.unpack(self._content, offset, self) 158 | 159 | # Some streams need the SystemInfo stream to work 160 | if self.systeminfo is None: 161 | return 162 | for stream in self.streams: 163 | datasize = stream.Location.DataSize 164 | offset = stream.Location.Rva.rva 165 | if stream.StreamType == mp.streamType.ThreadListStream: 166 | self.threads = mp.ThreadList.unpack(self._content, offset, self) 167 | if datasize == 8+self.threads.NumberOfThreads*48: 168 | self.threads = mp.ThreadListWithPadding.unpack(self._content, offset, self) 169 | elif stream.StreamType == mp.streamType.ExceptionStream: 170 | self.exception = mp.Exception.unpack(self._content, offset, self) 171 | 172 | 173 | def build_memory(self): 174 | """Build an easier to use memory view based on ModuleList and 175 | Memory64List streams""" 176 | 177 | addr2module = {} 178 | if self.modulelist: 179 | for module in self.modulelist.Modules: 180 | addr2module[module.BaseOfImage] = module 181 | addr2meminfo = {} 182 | if self.memoryinfolist: 183 | for memory in self.memoryinfolist.MemoryInfos: 184 | addr2meminfo[memory.BaseAddress] = memory 185 | 186 | mode64 = self.minidumpHDR.Flags & mp.minidumpType.MiniDumpWithFullMemory 187 | 188 | if mode64: 189 | offset = self.memory64list.BaseRva 190 | memranges = self.memory64list.MemoryRanges 191 | else: 192 | memranges = self.memorylist.MemoryRanges 193 | 194 | for memory in memranges: 195 | if not mode64: 196 | offset = memory.Memory.Rva.rva 197 | 198 | # Create a MemorySegment with augmented information 199 | base_address = memory.StartOfMemoryRange 200 | module = addr2module.get(base_address, None) 201 | meminfo = addr2meminfo.get(base_address, None) 202 | self.memory[base_address] = MemorySegment(offset, memory, 203 | module, meminfo) 204 | 205 | if mode64: 206 | offset += memory.DataSize 207 | 208 | # Sanity check 209 | if mode64: 210 | assert not False in [addr in self.memory for addr in addr2module] 211 | 212 | def get(self, virt_start, virt_stop): 213 | """Return the content at the (virtual addresses) 214 | [virt_start:virt_stop]""" 215 | 216 | # Find the corresponding memory segment 217 | for addr in self.memory: 218 | if virt_start <= addr <= virt_stop: 219 | break 220 | else: 221 | return "" 222 | 223 | memory = self.memory[addr] 224 | shift = addr - virt_start 225 | last = virt_stop - addr 226 | if last > memory.size: 227 | raise RuntimeError("Multi-page not implemented") 228 | 229 | return self._content[memory.offset + shift:memory.offset + last] 230 | 231 | def dump(self): 232 | """ 233 | Same output as minidump_dump from 234 | https://chromium.googlesource.com/breakpad/breakpad 235 | """ 236 | res = [ self.minidumpHDR.dump() ] 237 | streams_by_type = {} # Duplicates will not be shown 238 | for i, s in enumerate(self.streams): 239 | streams_by_type[s.StreamType] = (i, s) 240 | res.extend(["", "mDirectory[%d]"%i, s.dump()]) 241 | res.append("\nStreams:") 242 | for t in sorted(streams_by_type.keys()): 243 | i, s = streams_by_type[t] 244 | res.append(" stream type %s at index %d" % (s.type_with_name, i)) 245 | res.extend(["", 246 | "MinidumpThreadList", 247 | " thread_count = %d" % self.threads.NumberOfThreads]) 248 | for i, t in enumerate(self.threads.Threads): 249 | res.extend(["", 250 | "thread[%d]"%i, 251 | t.dump(), 252 | "", 253 | t.ThreadContext.dump(), 254 | "", 255 | "Stack", 256 | self.memory[t.Stack.StartOfMemoryRange].dump(), 257 | ]) 258 | res.extend(["", 259 | "MinidumpModuleList", 260 | " module_count = %d" % self.modulelist.NumberOfModules]) 261 | for i, m in enumerate(self.modulelist.Modules): 262 | res.extend(["", 263 | "module[%d]"%i, 264 | m.dump(), 265 | m.dump_other(), 266 | ]) 267 | res.extend(["", 268 | "MinidumpMemoryList", 269 | " region_count = %d" % self.memorylist.NumberOfMemoryRanges]) 270 | for i, m in enumerate(self.memorylist.MemoryRanges): 271 | res.extend(["", 272 | "region[%d]"%i, 273 | m.dump(), 274 | "Memory", 275 | self.memory[m.StartOfMemoryRange].dump(), 276 | ]) 277 | if hasattr(self, 'exception'): 278 | res.extend(["", 279 | self.exception.dump(), 280 | "", 281 | self.exception.ThreadContext.dump(), 282 | ]) 283 | if hasattr(self, 'breakpad_assertion'): 284 | res.extend(["",self.breakpad_assertion.dump()]) 285 | res.extend(["",self.systeminfo.dump(),""]) 286 | if hasattr(self, 'miscinfo'): 287 | res.extend([self.miscinfo.dump(),""]) 288 | if hasattr(self, 'breakpad_info'): 289 | res.extend([self.breakpad_info.dump(),""]) 290 | return '\n'.join(res) 291 | 292 | class ContentVirtual(object): 293 | """ Stub for binary.py """ 294 | def __init__(self, minidump): 295 | self.parent = minidump 296 | def max_addr(self): 297 | ad = -1 298 | for memory in self.parent.memory.values(): 299 | ad = max(ad, memory.address+memory.size) 300 | return ad 301 | 302 | if __name__ == "__main__": 303 | for file in sys.argv[1:]: 304 | if len(sys.argv) > 2: print("File: %s"%file) 305 | fd = open(file, 'rb') 306 | try: 307 | raw = fd.read() 308 | finally: 309 | fd.close() 310 | e = Minidump(raw) 311 | print(e.dump()) 312 | -------------------------------------------------------------------------------- /tests/test_elf_manipulation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os 4 | __dir__ = os.path.dirname(__file__) 5 | 6 | from test_all import run_tests, assertion, hashlib, open_read 7 | from elfesteem.strpatchwork import StrPatchwork 8 | from elfesteem.elf_init import ELF, log 9 | from elfesteem import elf 10 | 11 | import struct 12 | 13 | # We want to be able to verify warnings in non-regression test 14 | log_history = [] 15 | log.warning = lambda *args, **kargs: log_history.append(('warn',args,kargs)) 16 | log.error = lambda *args, **kargs: log_history.append(('error',args,kargs)) 17 | 18 | def test_ELF_empty(assertion): 19 | e = ELF() 20 | d = e.pack() 21 | assertion('0ddf18391c150850c72257b3f3caa67b', 22 | hashlib.md5(d).hexdigest(), 23 | 'Creation of a standard empty ELF') 24 | assertion(0, 25 | len(e.symbols), 26 | 'Empty ELF has no symbols') 27 | d = ELF(d).pack() 28 | assertion('0ddf18391c150850c72257b3f3caa67b', 29 | hashlib.md5(d).hexdigest(), 30 | 'Creation of a standard empty ELF; fix point') 31 | assertion(True, 32 | e.has_relocatable_sections(), 33 | 'Standard empty ELF is relocatable') 34 | 35 | def test_ELF_invalid(assertion): 36 | try: 37 | e = ELF(open_read(__dir__+'/binary_input/README.txt')) 38 | assertion(0,1, 'Not an ELF') 39 | except ValueError: 40 | pass 41 | 42 | def test_ELF_creation(assertion): 43 | e = ELF( 44 | e_type = elf.ET_REL, # Default value 45 | e_machine = elf.EM_386, # Default value 46 | sections = ['.text', '.text.startup', '.group', 47 | '.data', '.rodata.str1.4', '.rodata.cst4', 48 | '.bss', '.eh_frame', '.comment', '.note.GNU-stack', 49 | ], 50 | relocs = ['.text'], # These sections will have relocs 51 | ) 52 | d = e.pack() 53 | assertion('dc3f17080d002ba0bfb3aec9f3bec8b2', 54 | hashlib.md5(d).hexdigest(), 55 | 'Creation of an ELF with a given list of sections') 56 | 57 | def test_ELF_small32(assertion): 58 | global log_history 59 | elf_small = open_read(__dir__+'/binary_input/elf_small.out') 60 | assertion('d5284d5f438e25ef5502a0c1de97d84f', 61 | hashlib.md5(elf_small).hexdigest(), 62 | 'Reading elf_small.out') 63 | e = ELF(elf_small) 64 | d = e.pack() 65 | assertion('d5284d5f438e25ef5502a0c1de97d84f', 66 | hashlib.md5(d).hexdigest(), 67 | 'Packing after reading elf_small.out') 68 | # Packed file is identical :-) 69 | d = repr(e.ph).encode('latin1') 70 | assertion('ab4b1e52e7532789592878872910a2a1', 71 | hashlib.md5(d).hexdigest(), 72 | 'Display Program Headers') 73 | d = repr(e.sh).encode('latin1') 74 | assertion('ddf01165114eb70bd27910e4c5b03c09', 75 | hashlib.md5(d).hexdigest(), 76 | 'Display Section Headers (repr)') 77 | d = e.sh.readelf_display().encode('latin1') 78 | assertion('08da11fa164d7013561db398c068ac71', 79 | hashlib.md5(d).hexdigest(), 80 | 'Display Section Headers (readelf)') 81 | d = e.getsectionbyname('.symtab').readelf_display().encode('latin1') 82 | assertion('943434f4cde658b1659b7d8db39d9e60', 83 | hashlib.md5(d).hexdigest(), 84 | 'Display Symbol Table') 85 | assertion(' 49: 0804a01c 0 NOTYPE GLOBAL DEFAULT ABS _edata', 86 | e.getsectionbyname('.symtab')['_edata'].readelf_display(), 87 | 'Get symbol by name, found') 88 | assertion(' 2: 00000000 0 FUNC GLOBAL DEFAULT UND __stack_chk_fail', 89 | e.getsectionbyname('.dynsym')[2].readelf_display(), 90 | 'Get symbol by index, found') 91 | d = e.getsectionbytype(elf.SHT_SYMTAB).pack() 92 | assertion('4ed5a808faff1ca7c6a766ae45ebf377', 93 | hashlib.md5(d).hexdigest(), 94 | 'Get existing section by type') 95 | d = e.getsectionbyname('.text').pack() 96 | assertion('7149c6e4b8baaab8beebfeb818585638', 97 | hashlib.md5(d).hexdigest(), 98 | 'Get existing section by name') 99 | d = e.getsectionbyvad(0x080483d0+0x100).pack() 100 | assertion('7149c6e4b8baaab8beebfeb818585638', 101 | hashlib.md5(d).hexdigest(), 102 | 'Get existing section by address') 103 | d = e.getsectionbyname('no_sect') 104 | assertion(None, d, 'Get non-existing section by name') 105 | d = e.getsectionbyvad(0x1000) 106 | assertion(None, d, 'Get non-existing section by address') 107 | d = e[0x100:0x120] 108 | assertion('5e94f899265a799826a46ec86a293e16', 109 | hashlib.md5(d).hexdigest(), 110 | 'Extract chunk from raw data') 111 | assertion(True, 112 | e.virt.is_addr_in(0x080483d0), 113 | 'Address in mapped virtual memory') 114 | assertion(False, 115 | e.virt.is_addr_in(0x08048000), 116 | 'Address not in mapped virtual memory') 117 | d = e.virt[0x080483d0:0x080483e0] 118 | assertion('9d225ebfd0f9562b74b17c5a4653dc6f', 119 | hashlib.md5(d).hexdigest(), 120 | 'Extract chunk from mapped memory, in a section') 121 | try: 122 | e.virt[0x08040000:0x08040020] 123 | assertion(0,1, 'Extract chunk from non-mapped memory') 124 | except ValueError: 125 | pass 126 | assertion(e.virt[0x080483d0:0x080483e0], 127 | e.virt(0x080483d0,0x080483e0), 128 | 'Extract chunk from mapped memory, old API') 129 | e.virt[0x080483d0:0x080483e0] = e.virt[0x080483d0:0x080483e0] 130 | d = e.pack() 131 | assertion('d5284d5f438e25ef5502a0c1de97d84f', 132 | hashlib.md5(d).hexdigest(), 133 | 'Writing in memory (interval)') 134 | e.virt[0x080483d0] = e.virt[0x080483d0:0x080483e0] 135 | d = e.pack() 136 | assertion('d5284d5f438e25ef5502a0c1de97d84f', 137 | hashlib.md5(d).hexdigest(), 138 | 'Writing in memory (address)') 139 | assertion(0x804a028, len(e.virt), 'Max virtual address') 140 | assertion([('warn', ('__len__ deprecated',), {})], 141 | log_history, 142 | '__len__ deprecated (logs)') 143 | log_history = [] 144 | # Find leave; ret 145 | assertion(0x8048481, 146 | e.virt.find(struct.pack('BB', 0xc9, 0xc3)), 147 | 'Find pattern (existing)') 148 | assertion(-1, 149 | e.virt.find(struct.pack('BBBB', 1,2,3,4)), 150 | 'Find pattern (not existing)') 151 | 152 | def test_ELF_small64(assertion): 153 | elf64_small = open_read(__dir__+'/binary_input/elf64_small.out') 154 | assertion('dc21d928bb6a3a0fa59b17fafe803d50', 155 | hashlib.md5(elf64_small).hexdigest(), 156 | 'Reading elf64_small.out') 157 | e = ELF(elf64_small) 158 | d = e.pack() 159 | assertion('dc21d928bb6a3a0fa59b17fafe803d50', 160 | hashlib.md5(d).hexdigest(), 161 | 'Packing after reading elf64_small.out') 162 | # Packed file is identical :-) 163 | d = e.sh.readelf_display().encode('latin1') 164 | assertion('0454c8b5354b3eda58fce252d5d48621', 165 | hashlib.md5(d).hexdigest(), 166 | 'Display Section Headers (readelf, 64bit)') 167 | d = e.getsectionbyname('.symtab').readelf_display().encode('latin1') 168 | assertion('452e64fb0f2dad5c0e44d83e57b9d82b', 169 | hashlib.md5(d).hexdigest(), 170 | 'Display Symbol Table (elf64)') 171 | d = e.getsectionbyname('.rela.dyn').readelf_display().encode('latin1') 172 | assertion('650cf3f99117d39d63fae73232e09acf', 173 | hashlib.md5(d).hexdigest(), 174 | 'Display Reloc Table (elf64)') 175 | 176 | def test_ELF_group(assertion): 177 | elf_group = open_read(__dir__+'/binary_input/elf_cpp.o') 178 | assertion('57fed5de9474bc0600173a1db5ee6327', 179 | hashlib.md5(elf_group).hexdigest(), 180 | 'Reading elf_cpp.o') 181 | e = ELF(elf_group) 182 | d = e.pack() 183 | assertion('57fed5de9474bc0600173a1db5ee6327', 184 | hashlib.md5(d).hexdigest(), 185 | 'Packing after reading elf_cpp.o') 186 | # Packed file is identical :-) 187 | d = e.getsectionbyname('.group').readelf_display().encode('latin1') 188 | assertion('5c80b11a64a32e7aaee8ef378da4ccef', 189 | hashlib.md5(d).hexdigest(), 190 | 'Display Group Section') 191 | 192 | def test_ELF_TMP320C6x(assertion): 193 | elf_tmp320c6x = open_read(__dir__+'/binary_input/notle-tesla-dsp.xe64T') 194 | assertion('fb83ed8d809f394e70f5d84d0c8e593f', 195 | hashlib.md5(elf_tmp320c6x).hexdigest(), 196 | 'Reading notle-tesla-dsp.xe64T') 197 | e = ELF(elf_tmp320c6x) 198 | d = e.pack() 199 | assertion('fb83ed8d809f394e70f5d84d0c8e593f', 200 | hashlib.md5(d).hexdigest(), 201 | 'Packing after reading notle-tesla-dsp.xe64T') 202 | # Packed file is identical :-) 203 | d = e.sh.readelf_display().encode('latin1') 204 | assertion('ecf169c765d29175177528e24601f1be', 205 | hashlib.md5(d).hexdigest(), 206 | 'Display Section Headers (TMP320C6x)') 207 | 208 | def test_ELF_invalid_entsize(assertion): 209 | global log_history 210 | # Some various ways for an ELF to be detected as invalid 211 | e = ELF() 212 | e.symbols.sh.entsize = 24 213 | e = ELF(e.pack()) 214 | assertion([('error', ('SymTable has invalid entsize %d instead of %d', 24, 16), {})], 215 | log_history, 216 | 'Invalid entsize for symbols (logs)') 217 | log_history = [] 218 | 219 | def test_ELF_invalid_shstrndx(assertion): 220 | global log_history 221 | e = ELF() 222 | e.Ehdr.shstrndx = 20 223 | e = ELF(e.pack()) 224 | assertion([('error', ('No section of index shstrndx=20',), {})], 225 | log_history, 226 | 'Invalid shstrndx (logs)') 227 | assertion(88, 228 | e.Ehdr.shoff, 229 | 'Normal e.Ehdr.shoff') 230 | log_history = [] 231 | 232 | def test_ELF_offset_to_sections(assertion): 233 | global log_history 234 | data = StrPatchwork(ELF().pack()) 235 | data[88+20] = struct.pack("' % self.X 103 | def pprint(self): 104 | return self.X 105 | 106 | from elfesteem.strpatchwork import StrPatchwork 107 | class CData(object): 108 | # Generic class to be used at the end of a CStruct, to implement common 109 | # cases implemented in C as struct s { ...; char data[]; } 110 | # We use StrPatchwork because the data may be very long, and we want to 111 | # be able to modify it very efficiently. 112 | def __new__(self, f): 113 | class CDataInstance(CBase): 114 | def _initialize(self, f=f): 115 | self._size = f(self.parent) 116 | self._data = StrPatchwork() 117 | def unpack(self, c, o): 118 | self._data[0] = c[o:o+self._size] 119 | def pack(self): 120 | return self._data.pack() 121 | def __str__(self): 122 | return self.pack().decode('latin1') 123 | def __getitem__(self, item): 124 | return self._data[item] 125 | def __setitem__(self, item, value): 126 | self._data[item] = value 127 | return CDataInstance 128 | 129 | type_size = {} 130 | size2type = {} 131 | size2type_s = {} 132 | 133 | for t in 'B', 'H', 'I', 'Q': 134 | s = struct.calcsize(t) 135 | type_size[t] = s*8 136 | size2type[s*8] = t 137 | 138 | for t in 'b', 'h', 'i', 'q': 139 | s = struct.calcsize(t) 140 | type_size[t] = s*8 141 | size2type_s[s*8] = t 142 | 143 | type_size['u08'] = size2type[8] 144 | type_size['u16'] = size2type[16] 145 | type_size['u32'] = size2type[32] 146 | type_size['u64'] = size2type[64] 147 | 148 | type_size['s08'] = size2type_s[8] 149 | type_size['s16'] = size2type_s[16] 150 | type_size['s32'] = size2type_s[32] 151 | type_size['s64'] = size2type_s[64] 152 | 153 | def convert_size2type(ftype, wsize): 154 | if not isinstance(ftype, str): 155 | return '' 156 | elif re.match(r'\d+s', ftype): 157 | return ftype 158 | elif ftype == "ptr": 159 | return size2type[wsize] 160 | elif ftype in type_size: 161 | return type_size[ftype] 162 | else: 163 | raise ValueError("unkown CStruct type", ftype) 164 | 165 | class CStruct_metaclass(type): 166 | """ 167 | metaclass, with a syntax compatible with python2 and python3 168 | """ 169 | _prefix = "_field_" # To avoid namespace collisions 170 | def __new__(cls, name, bases, dct): 171 | if '_fields' in dct: 172 | for fname, _ in dct['_fields']: 173 | dct[fname] = property( 174 | lambda self,fname=fname: self.getf(fname), 175 | lambda self,v,fname=fname: self.setf(fname,v), 176 | None) 177 | return type.__new__(cls, name, bases, dct) 178 | 179 | CStruct_base = CStruct_metaclass('CStruct_base', (CBase,), {}) 180 | class CStruct(CStruct_base): 181 | """ 182 | The class CStruct is inherited by classes that simply 183 | represent a concatenation of typed fields 184 | 185 | How to create a CStruct class: 186 | _fields list the pairs (field_name, field_type) 187 | if the last fields are (field_name, class), they are optional 188 | _align: an optional integer value for alignment of optional fields 189 | 190 | How to create a CStruct object: 191 | the keywords not used by CBase initialise the object fields 192 | 193 | How to use a CStruct object: 194 | in addition to the CBase interface, the fields can be modified 195 | 196 | Field types: 197 | basic types with fixed size (u08, ..., 16s) 198 | wsize-dependent type (ptr) 199 | """ 200 | 201 | def getf(self, fname): 202 | return getattr(self,'_0'+fname) 203 | def setf(self, fname, v): 204 | return setattr(self,'_0'+fname,v) 205 | 206 | _packformat = "" 207 | 208 | def _parent_parse(self, kargs): 209 | CBase._parent_parse(self, kargs) 210 | if self._packformat: 211 | self.sex = "" 212 | self._format = {} 213 | pstr = [] 214 | for fname, ftype in self._fields: 215 | ftype = convert_size2type(ftype, self.wsize) 216 | self._format[fname] = ftype 217 | pstr.append(ftype) 218 | self._packstring = self.sex + self._packformat+"".join(pstr) 219 | self._names = [x[0] for x in self._fields if isinstance(x[1],str)] 220 | self._opt = [x for x in self._fields if not isinstance(x[1],str)] 221 | 222 | def unpack(self, c, o): 223 | self._size = struct.calcsize(self._packstring) 224 | s = c[o:o+self._size] 225 | s += data_null*(self._size-len(s)) 226 | disas = struct.unpack(self._packstring, s) 227 | for n,v in zip(self._names,disas): 228 | setattr(self, n, v) 229 | # If the last fields are optional data, their types are a class 230 | for fname, fclass in self._opt: 231 | v = fclass(parent=self, content=c, start=o+self._size) 232 | self._size += self._size_align(v) 233 | self.setf(fname, v) 234 | 235 | def _initialize(self): 236 | self._size = struct.calcsize(self._packstring) 237 | for f in self._names: 238 | # Default values 239 | if self._format[f].endswith('s'): self.setf(f,data_empty) 240 | else: self.setf(f,0) 241 | for fname, fclass in self._opt: 242 | v = fclass(parent=self) 243 | self._size += self._size_align(v) 244 | self.setf(fname, v) 245 | 246 | def update(self, **kargs): 247 | for f in [f for f in kargs if f in self._names]: 248 | self.setf(f,kargs[f]) 249 | for fname, fclass in self._opt: 250 | v = self.getf(fname) 251 | self._size -= self._size_align(v) 252 | v.update(**kargs) 253 | self._size += self._size_align(v) 254 | 255 | def pack(self): 256 | fields = [getattr(self, x) for x in self._names] 257 | s = struct.pack(self._packstring, *fields) 258 | for fname, fclass in self._opt: 259 | s += self._pack_align(self.getf(fname)) 260 | if self.bytelen != len(s): 261 | raise ValueError("Inconsistent size %d != %d for %r" 262 | % (self.bytelen,len(s), self.__class__.__name__)) 263 | return s 264 | 265 | def __str__(self): 266 | raise AttributeError("Use pack() instead of str()") 267 | 268 | def pprint(self): 269 | rep = { } 270 | for fname, _ in self._fields: 271 | rep[fname] = getattr(self, fname) 272 | if hasattr(rep[fname], 'pprint'): 273 | rep[fname] = rep[fname].pprint() 274 | return ( "<%s>" % self.__class__.__name__, rep ) 275 | 276 | def __repr__(self): 277 | return "<%s=%s>" % (self.__class__.__name__, 278 | "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields))) 279 | 280 | def __getitem__(self, item): # to work with format strings 281 | return getattr(self, item) 282 | 283 | class CStructWithStrTable(CStruct): 284 | # The attribute 'name' is computed from an integer index 'name_idx' 285 | # and a link to the string table 'strtab' 286 | def get_name(self): 287 | return self.strtab.get_name(self.name_idx) 288 | def set_name(self, name): 289 | if self.name_idx == 0: 290 | self.name_idx = self.strtab.add_name(name) 291 | else: 292 | self.strtab.mod_name(self.name_idx, name) 293 | name = property(get_name, set_name) 294 | def update(self, **kargs): 295 | CStruct.update(self, **kargs) 296 | if 'name' in kargs and 'name_idx' in self._names: 297 | self.name = kargs['name'] 298 | 299 | class CArray_metaclass(type): 300 | """ 301 | metaclass, with a syntax compatible with python2 and python3 302 | """ 303 | def __new__(cls, name, bases, dct): 304 | class_defined = '_cls' in dct 305 | for c in bases: 306 | class_defined = class_defined or '_cls' in c.__dict__ 307 | if not name.startswith('CArray') and not class_defined: 308 | raise ValueError("Class %r should define '_cls'"%name) 309 | return type.__new__(cls, name, bases, dct) 310 | 311 | CArray_base = CArray_metaclass('CArray_base', (CBase,), {}) 312 | class CArray(CArray_base): 313 | """ 314 | The class CArray is inherited by classes that represent 315 | a variable length array of objects of variable length. 316 | 317 | How to create a CArray subclass: 318 | _cls: the class of the array elements 319 | count (optional): method that returns the number of elements 320 | 321 | How to use a CArray object: 322 | in addition to the CBase interface, 323 | [item] gives access to an element of the array 324 | len gives the number of elements 325 | append adds an element to the array 326 | _array is the whole array 327 | _last is the terminating element, if count is not defined 328 | """ 329 | def _initialize(self): 330 | self._array = [] # Elements of the array 331 | self._size = 0 332 | if not hasattr(self, 'count'): 333 | # Array end is decided by a terminating element 334 | # which is detected by 'stop', of by default by 335 | # comparing with the default value of an object 336 | # of class _cls 337 | self._last = self._cls(parent=self) 338 | self._size += self._size_align(self._last) 339 | 340 | def pack(self): 341 | s = data_empty.join([self._pack_align(o) for o in self._array]) 342 | if hasattr(self, '_last'): s += self._pack_align(self._last) 343 | if self._size != len(s): 344 | raise ValueError("Inconsistent size %d != %d for %r" 345 | % (self._size,len(s), self.__class__.__name__)) 346 | return s 347 | 348 | def stop(self, elt): 349 | return elt.pack() == self._last.pack() 350 | 351 | def unpack(self, c, o): 352 | if o is None: return 353 | self._off = o 354 | if hasattr(self, 'count'): 355 | # self.count() is recomputed each time 356 | # This enables complicated conditions for array termination 357 | idx = 0 358 | while idx < self.count(): 359 | if o+self._size >= len(c): 360 | break 361 | elt = self._cls(parent=self, content=c, start=o+self._size) 362 | self._array.append(elt) 363 | self._size += self._size_align(elt) 364 | idx += 1 365 | else: 366 | pos = 0 367 | while True: 368 | if o+pos >= len(c): 369 | break 370 | elt = self._cls(parent=self, content=c, start=o+pos) 371 | if self.stop(elt): 372 | break 373 | self._array.append(elt) 374 | pos += self._size_align(elt) 375 | self._size += pos 376 | 377 | def __getitem__(self, item): 378 | return self._array[item] 379 | 380 | def __len__(self): 381 | return len(self._array) 382 | 383 | def append(self, obj): 384 | self._array.append(obj) 385 | self._size += self._size_align(self._array[-1]) 386 | return obj 387 | 388 | def pprint(self): 389 | return ("<%s>"%self.__class__.__name__, 390 | [x.pprint() for x in self._array], 391 | ) 392 | 393 | def __repr__(self): 394 | return "<%s of length %d>" % (self.__class__.__name__, len(self)) 395 | 396 | # Method that defines constants (as in .h headers) and tables that 397 | # can recover the constant's name from its value. 398 | def Constants(globs = None, table = None, 399 | name = None, prefix = None, 400 | no_name = (), **kargs): 401 | if prefix is None: 402 | # Use the prefix common to all value names 403 | for k in kargs: 404 | if prefix is None: 405 | prefix = k 406 | else: 407 | while not k.startswith(prefix): 408 | prefix = prefix[:-1] 409 | if name is None: 410 | if prefix.endswith('_'): name = prefix[:-1] 411 | else: name = prefix 412 | if name != '' and not name in table: table[name] = {} 413 | for k in kargs: 414 | globs[k] = kargs[k] 415 | if name != '': 416 | if k.startswith(prefix) and not k in no_name: 417 | if kargs[k] in table[name]: 418 | print("Duplicate at %s[%s]=%s; %s"%(name,kargs[k],table[name][kargs[k]],k)) 419 | table[name][kargs[k]] = k[len(prefix):] 420 | -------------------------------------------------------------------------------- /examples/readelf.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | import sys, os 3 | 4 | if sys.version_info[0] == 2 and sys.version_info[1] < 5: 5 | sys.stderr.write("python version older than 2.5 is not supported\n") 6 | exit(1) 7 | 8 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) 9 | from elfesteem import elf_init, elf 10 | 11 | import subprocess 12 | def popen_read_out_err(cmd): 13 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 14 | p.wait() 15 | p.stdin.close() 16 | return p.stdout.read() + p.stderr.read() 17 | 18 | import re 19 | def get_readelf_version(): 20 | readelf_v = popen_read_out_err(["readelf", "--version"]) 21 | if type(readelf_v) != str: readelf_v = str(readelf_v, encoding='latin1') 22 | r = re.search(r'GNU readelf .* (\d+\.\d+)', readelf_v) 23 | if r: 24 | sys.stderr.write("readelf version %s\n" % float(r.groups()[0])) 25 | return float(r.groups()[0]) 26 | else: 27 | sys.stderr.write("Could not detect readelf version\n") 28 | sys.stderr.write(readelf_v) 29 | return None 30 | 31 | et_strings = { 32 | elf.ET_REL: 'REL (Relocatable file)', 33 | elf.ET_EXEC: 'EXEC (Executable file)', 34 | elf.ET_DYN: 'DYN (Shared object file)', 35 | elf.ET_CORE: 'CORE (Core file)', 36 | } 37 | def expand_code(table, val): 38 | if val in table: return table[val] 39 | return ': %#x' % val 40 | 41 | def is_pie(e): 42 | # binutils 2.37 43 | # 2021-06-15 https://github.com/bminor/binutils-gdb/commit/93df3340fd5ad32f784214fc125de71811da72ff 44 | for i, sh in enumerate(e.sh): 45 | if sh.sh.type != elf.SHT_DYNAMIC: 46 | continue 47 | if e.wsize == 32: 48 | dyntab = sh.dyntab[:-2] 49 | elif e.wsize == 64: 50 | dyntab = sh.dyntab[:-1] 51 | for d in dyntab: 52 | if d.type == elf.DT_FLAGS_1 and d.name & elf.DF_1_PIE: 53 | return True 54 | return False 55 | 56 | def display_headers(e): 57 | print("ELF Header:") 58 | import struct 59 | ident = struct.unpack('16B', e.Ehdr.ident) 60 | print(" Magic: %s "%' '.join(['%02x'%_ for _ in ident])) 61 | print(" Class: %s"%expand_code({ 62 | elf.ELFCLASS32: 'ELF32', 63 | elf.ELFCLASS64: 'ELF64', 64 | }, ident[elf.EI_CLASS])) 65 | print(" Data: %s"%expand_code({ 66 | elf.ELFDATA2LSB: "2's complement, little endian", 67 | elf.ELFDATA2MSB: "2's complement, big endian", 68 | }, ident[elf.EI_DATA])) 69 | print(" Version: %s"%expand_code({ 70 | 1: '1 (current)', 71 | }, ident[elf.EI_VERSION])) 72 | print(" OS/ABI: %s"%expand_code({ 73 | 0: 'UNIX - System V', 74 | }, ident[elf.EI_OSABI])) 75 | print(" ABI Version: %d"%ident[elf.EI_ABIVERSION]) 76 | elf_file_type = expand_code(et_strings, e.Ehdr.type) 77 | if e.Ehdr.type == elf.ET_DYN and elf.is_pie(e): 78 | elf_file_type = 'DYN (Position-Independent Executable file)' 79 | print(" Type: %s"%elf_file_type) 80 | machine_code = dict(elf.constants['EM']) 81 | # Same textual output as readelf, from readelf.c 82 | machine_code[elf.EM_M32] = 'ME32100' 83 | machine_code[elf.EM_SPARC] = 'Sparc' 84 | machine_code[elf.EM_386] = 'Intel 80386' 85 | machine_code[elf.EM_68K] = 'MC68000' 86 | machine_code[elf.EM_88K] = 'MC88000' 87 | machine_code[elf.EM_486] = 'Intel 80486' 88 | machine_code[elf.EM_860] = 'Intel 80860' 89 | machine_code[elf.EM_MIPS] = 'MIPS R3000' 90 | machine_code[elf.EM_S370] = 'IBM System/370' 91 | machine_code[elf.EM_MIPS_RS3_LE] = 'MIPS R4000 big-endian' 92 | machine_code[elf.EM_PARISC] = 'HPPA' 93 | machine_code[elf.EM_SPARC32PLUS] = 'Sparc v8+' 94 | machine_code[elf.EM_960] = 'Intel 80960' 95 | machine_code[elf.EM_PPC] = 'PowerPC' 96 | machine_code[elf.EM_PPC64] = 'PowerPC64' 97 | machine_code[elf.EM_V800] = 'NEC V800' 98 | machine_code[elf.EM_FR20] = 'Fujitsu FR20' 99 | machine_code[elf.EM_RH32] = 'TRW RH32' 100 | machine_code[elf.EM_ARM] = 'ARM' 101 | machine_code[elf.EM_FAKE_ALPHA] = 'Digital Alpha (old)' 102 | machine_code[elf.EM_SH] = 'Renesas / SuperH SH' 103 | machine_code[elf.EM_SPARCV9] = 'Sparc v9' 104 | machine_code[elf.EM_TRICORE] = 'Siemens Tricore' 105 | machine_code[elf.EM_ARC] = 'ARC' 106 | machine_code[elf.EM_H8_300] = 'Renesas H8/300' 107 | machine_code[elf.EM_H8_300H] = 'Renesas H8/300H' 108 | machine_code[elf.EM_H8S] = 'Renesas H8S' 109 | machine_code[elf.EM_H8_500] = 'Renesas H8/500' 110 | machine_code[elf.EM_IA_64] = 'Intel IA-64' 111 | machine_code[elf.EM_MIPS_X] = 'Stanford MIPS-X' 112 | machine_code[elf.EM_COLDFIRE] = 'Motorola Coldfire' 113 | machine_code[elf.EM_X86_64] = 'Advanced Micro Devices X86-64' 114 | print(" Machine: %s"%expand_code(machine_code, e.Ehdr.machine)) 115 | print(" Version: %#x"%e.Ehdr.version) 116 | print(" Entry point address: %#x"%e.Ehdr.entry) 117 | print(" Start of program headers: %d (bytes into file)"%e.Ehdr.phoff) 118 | print(" Start of section headers: %d (bytes into file)"%e.Ehdr.shoff) 119 | print(" Flags: %#x"%e.Ehdr.flags) 120 | print(" Size of this header: %d (bytes)"%e.Ehdr.ehsize) 121 | print(" Size of program headers: %d (bytes)"%e.Ehdr.phentsize) 122 | print(" Number of program headers: %d"%e.Ehdr.phnum) 123 | print(" Size of section headers: %d (bytes)"%e.Ehdr.shentsize) 124 | print(" Number of section headers: %d"%e.Ehdr.shnum) 125 | print(" Section header string table index: %d"%e.Ehdr.shstrndx) 126 | 127 | def display_program_headers(e): 128 | # Output format similar to readelf -l 129 | if len(e.ph.phlist) == 0: 130 | print("\nThere are no program headers in this file.") 131 | return 132 | print("\nElf file type is %s" % expand_code(et_strings, e.Ehdr.type)) 133 | print("Entry point 0x%x" % e.Ehdr.entry) 134 | print("There are %d program headers, starting at offset %d" % (e.Ehdr.phnum, e.Ehdr.phoff)) 135 | print("\nProgram Headers:") 136 | if e.wsize == 32: 137 | header = " Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align" 138 | format = " %-14s 0x%06x 0x%08x 0x%08x 0x%05x 0x%05x %-3s 0x%x" 139 | elif e.wsize == 64: 140 | header = " Type Offset VirtAddr PhysAddr\n FileSiz MemSiz Flags Align" 141 | format = " %-14s 0x%016x 0x%016x 0x%016x\n 0x%016x 0x%016x %-3s %x" 142 | print(header) 143 | for p in e.ph: 144 | flags = [' ', ' ', ' '] 145 | if p.ph.flags & 4: flags[0] = 'R' 146 | if p.ph.flags & 2: flags[1] = 'W' 147 | if p.ph.flags & 1: flags[2] = 'E' 148 | print(format%(elf.constants['PT'][p.ph.type], 149 | p.ph.offset, p.ph.vaddr, p.ph.paddr, 150 | p.ph.filesz, p.ph.memsz, ''.join(flags), 151 | p.ph.align)) 152 | if p.ph.type == elf.PT_INTERP: 153 | s = p.shlist[0] 154 | print(' [Requesting program interpreter: %s]' % e[s.sh.offset:s.sh.offset+s.sh.size].strip('\0')) 155 | if len(e.sh.shlist) == 0: 156 | return 157 | print("\n Section to Segment mapping:") 158 | print(" Segment Sections...") 159 | for i, p in enumerate(e.ph): 160 | res = " %02d " % i 161 | for s in p.shlist: 162 | res += s.sh.name + " " 163 | print(res) 164 | 165 | def display_dynamic(e): 166 | machine = elf.constants['EM'][e.Ehdr.machine] 167 | for i, sh in enumerate(e.sh): 168 | if sh.sh.type != elf.SHT_DYNAMIC: 169 | continue 170 | if e.wsize == 32: 171 | header = " Tag Type Name/Value" 172 | format = "%#010x %-28s %s" 173 | dyntab = sh.dyntab[:-2] 174 | elif e.wsize == 64: 175 | header = " Tag Type Name/Value" 176 | format = "%#018x %-20s %s" 177 | dyntab = sh.dyntab[:-1] 178 | print("\nDynamic section at offset %#x contains %d entries:" % (sh.sh.offset, len(dyntab))) 179 | print(header) 180 | for d in dyntab: 181 | type = elf.constants['DT'].get(machine, {}).get(d.type, None) 182 | if type is None: type = elf.constants['DT'].get(d.type, None) 183 | else: type = machine + '_' + type 184 | if type in ('NEEDED',): 185 | name = 'Shared library: [%s]' % d.name 186 | elif type in ('STRSZ','SYMENT','RELSZ','RELENT','PLTRELSZ','RELASZ'): 187 | name = '%d (bytes)' % d.name 188 | elif type in ('PLTGOT','HASH','STRTAB','SYMTAB','INIT','FINI','REL', 189 | 'JMPREL','DEBUG','RELA', 190 | 'CHECKSUM','VERNEED', 191 | 'GNU_HASH', 192 | 'MIPS_BASE_ADDRESS','MIPS_LIBLIST','MIPS_GOTSYM', 193 | 'MIPS_HIDDEN_GOTIDX','MIPS_PROTECTED_GOTIDX', 194 | 'MIPS_LOCAL_GOTIDX','MIPS_LOCALPAGE_GOTIDX', 195 | 'MIPS_SYMBOL_LIB','MIPS_MSYM','MIPS_CONFLICT', 196 | 'MIPS_RLD_MAP','MIPS_OPTIONS', 197 | 'MIPS_INTERFACE','MIPS_INTERFACE_SIZE'): 198 | name = '%#x' % d.name 199 | elif type == 'PLTREL': 200 | name = elf.constants['DT'].get(d.name, d.name) 201 | elif type == 'MIPS_FLAGS': 202 | if d.name == 0: 203 | name = 'NONE' 204 | else: 205 | flags = ('QUICKSTART', 'NOTPOT', 'NO_LIBRARY_REPLACEMENT', 206 | 'NO_MOVE', 'SGI_ONLY', 'GUARANTEE_INIT', 207 | 'DELTA_C_PLUS_PLUS', 'GUARANTEE_START_INIT', 208 | 'PIXIE', 'DEFAULT_DELAY_LOAD', 'REQUICKSTART', 209 | 'REQUICKSTARTED', 'CORD', 'NO_UNRES_UNDEF', 210 | 'RLD_ORDER_SAFE') 211 | name = ' '.join([ f for (f,b) 212 | in zip(flags,reversed(bin(d.name)[2:])) 213 | if b == '1' ]) 214 | else: 215 | name = d.name 216 | output = format%(d.type, '(%s)'%type, name) 217 | print(output) 218 | 219 | 220 | def display_symbols(sections): 221 | for s in sections: 222 | print("\n"+s.readelf_display()) 223 | 224 | 225 | 226 | if __name__ == '__main__': 227 | import argparse 228 | parser = argparse.ArgumentParser(add_help=False) 229 | parser.add_argument('-H', '--help', action='help', default=argparse.SUPPRESS, help='Display this information') 230 | parser.add_argument('-h', '--file-header', dest='options', action='append_const', const='headers', help='Display the ELF file header') 231 | parser.add_argument('-S', '--section-headers', '--sections', dest='options', action='append_const', const='sections', help="Display the sections' header") 232 | parser.add_argument('-r', '--relocs', dest='options', action='append_const', const='reltab', help='Display the relocations (if present)') 233 | parser.add_argument('-s', '--syms', '--symbols', dest='options', action='append_const', const='symtab', help='Display the symbol table') 234 | parser.add_argument('--dyn-syms', dest='options', action='append_const', const='dynsym', help='Display the dynamic symbol table') 235 | parser.add_argument('-d', '--dynamic', dest='options', action='append_const', const='dynamic', help='Display the dynamic section (if present)') 236 | parser.add_argument('-l', '--program-headers', '--segments', dest='options', action='append_const', const='program', help='Display the program headers') 237 | parser.add_argument('-g', '--section-groups', dest='options', action='append_const', const='groups', help='Display the section groups') 238 | parser.add_argument('--readelf', dest='readelf_version', action='append', help='Simulate the output of a given version of readelf') 239 | parser.add_argument('file', nargs='+', help='ELF file(s)') 240 | args = parser.parse_args() 241 | if args.options is None: 242 | args.options = [] 243 | 244 | elf.is_pie = lambda _: False 245 | if args.readelf_version: 246 | for readelf in args.readelf_version: 247 | if 'native' in readelf: 248 | readelf_version = get_readelf_version() 249 | else: 250 | readelf_version = float(readelf) 251 | if True: 252 | # TODO: readelf has a different output if "do_section_details" or "do_wide" 253 | elf.Shdr.header64 = [" [Nr] Name Type Address Offset", 254 | " Size EntSize Flags Link Info Align"] 255 | elf.Shdr.format64 = (" [%(idx)2d] %(name17)-17s %(type_txt)-15s %(addr)016x %(offset)08x\n" 256 | " %(size)016x %(entsize)016x %(flags_txt)3s %(link)2d %(info)2d %(addralign)d") 257 | if readelf_version >= 2.26: 258 | # 2016-01-20 https://github.com/bminor/binutils-gdb/commit/9fb71ee49fc37163697e4f34e16097928eb83d66 259 | elf.Shdr.footer = property(lambda _: [ 260 | "Key to Flags:", 261 | " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", 262 | " L (link order), O (extra OS processing required), G (group), T (TLS),", 263 | " C (compressed), x (unknown), o (OS specific), E (exclude),", 264 | " %sp (processor specific)" % ( 265 | "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else 266 | "y (noread), " if e.Ehdr.machine == elf.EM_ARM else 267 | "" ), 268 | ]) 269 | if readelf_version >= 2.27: 270 | # 2016-07-05 https://github.com/bminor/binutils-gdb/commit/f0728ee368f217f2473798ad7ccfe9feae4412ce 271 | elf.Shdr.footer = property(lambda _: [ 272 | "Key to Flags:", 273 | " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", 274 | " L (link order), O (extra OS processing required), G (group), T (TLS),", 275 | " C (compressed), x (unknown), o (OS specific), E (exclude),", 276 | " %sp (processor specific)" % ( 277 | "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else 278 | "y (purecode), " if e.Ehdr.machine == elf.EM_ARM else 279 | "" ), 280 | ]) 281 | if readelf_version >= 2.29: # more precisely 2.29.1 282 | # 2017-09-05 https://github.com/bminor/binutils-gdb/commit/83eef883581525d04df3a8e53a82c01d0d12b56a 283 | elf.Shdr.footer = property(lambda _: [ 284 | "Key to Flags:", 285 | " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", 286 | " L (link order), O (extra OS processing required), G (group), T (TLS),", 287 | " C (compressed), x (unknown), o (OS specific), E (exclude),", 288 | " %sp (processor specific)" % ( 289 | "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else 290 | "y (purecode), " if e.Ehdr.machine == elf.EM_ARM else 291 | "v (VLE), " if e.Ehdr.machine == elf.EM_PPC else 292 | "" ), 293 | ]) 294 | if readelf_version >= 2.36: # more precisely 2.36.1 295 | # 2021-02-02 https://github.com/bminor/binutils-gdb/commit/5424d7ed94cf5a7ca24636ab9f4e6d5c353fc0d3 296 | elf.Shdr.footer = property(lambda _: [ 297 | "Key to Flags:", 298 | " W (write), A (alloc), X (execute), M (merge), S (strings), I (info),", 299 | " L (link order), O (extra OS processing required), G (group), T (TLS),", 300 | " C (compressed), x (unknown), o (OS specific), E (exclude),", 301 | " %s%sp (processor specific)" % ( 302 | "R (retain), D (mbind), " if e.Ehdr.ident[elf.EI_OSABI] in (elf.ELFOSABI_GNU, elf.ELFOSABI_FREEBSD) else 303 | "D (mbind), " if e.Ehdr.ident[elf.EI_OSABI] == elf.ELFOSABI_NONE else 304 | "" 305 | , 306 | "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else 307 | "y (noread), " if e.Ehdr.machine == elf.EM_ARM else 308 | "" ), 309 | ]) 310 | if readelf_version >= 2.35: 311 | # 2020-07-02 https://github.com/bminor/binutils-gdb/commit/0942c7ab94e554657c3e11ab85ae7f15373ee80d 312 | elf.Shdr.name17 = property(lambda _: _.name[:12]+"[...]" if len(_.name) > 17 else _.name) 313 | if readelf_version >= 2.37: 314 | # 2021-06-15 https://github.com/bminor/binutils-gdb/commit/93df3340fd5ad32f784214fc125de71811da72ff 315 | elf.is_pie = is_pie 316 | 317 | 318 | for file in args.file: 319 | if len(args.file) > 1: 320 | print("\nFile: %s" % file) 321 | fd = open(file, 'rb') 322 | try: 323 | raw = fd.read() 324 | finally: 325 | fd.close() 326 | e = elf_init.ELF(raw) 327 | if 'headers' in args.options: 328 | display_headers(e) 329 | if 'sections' in args.options: 330 | print(e.sh.readelf_display()) 331 | if 'reltab' in args.options: 332 | for sh in e.sh: 333 | if not 'rel' in dir(sh): continue 334 | print("\n" + sh.readelf_display()) 335 | if 'symtab' in args.options or 'dynsym' in args.options: 336 | display_symbols(e.getsectionsbytype(elf.SHT_DYNSYM)) 337 | if 'symtab' in args.options: 338 | display_symbols(e.getsectionsbytype(elf.SHT_SYMTAB)) 339 | if 'dynamic' in args.options: 340 | display_dynamic(e) 341 | if 'program' in args.options: 342 | display_program_headers(e) 343 | if 'groups' in args.options: 344 | for sh in e.sh: 345 | if not sh.sh.type == elf.SHT_GROUP: continue 346 | print(sh.readelf_display()) 347 | -------------------------------------------------------------------------------- /examples/otool.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys, os 4 | import platform 5 | 6 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..')) 7 | from elfesteem import macho_init, macho 8 | 9 | def print_header(e, **fargs): 10 | print("Mach header") 11 | print(" magic cputype cpusubtype caps filetype ncmds sizeofcmds flags") 12 | print(" 0x%08x %7d %10d 0x%02x %10u %5u %10u 0x%08x" %(e.Mhdr.magic,e.Mhdr.cputype ,e.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK),(e.Mhdr.cpusubtype & macho.CPU_SUBTYPE_MASK) >> 24,e.Mhdr.filetype,e.Mhdr.ncmds,e.Mhdr.sizeofcmds,e.Mhdr.flags)) 13 | 14 | import subprocess 15 | def popen_read_out_err(cmd): 16 | p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 17 | p.wait() 18 | p.stdin.close() 19 | return p.stdout.read() + p.stderr.read() 20 | 21 | import re 22 | def get_otool_version(): 23 | otool_v = popen_read_out_err(["otool", "--version"]) 24 | if type(otool_v) != str: otool_v = str(otool_v, encoding='latin1') 25 | r = re.search(r' LLVM version (\d+)', otool_v) 26 | if r: 27 | return int(r.groups()[0]) 28 | else: 29 | sys.stderr.write("Could not detect otool version\n") 30 | sys.stderr.write(otool_v) 31 | return None 32 | 33 | def split_integer(v, nbits, ndigits, truncate=None): 34 | mask = (1< 0: 37 | res.insert(0, v & mask) 38 | v = v >> nbits 39 | ndigits -= 1 40 | res[0] += v << nbits 41 | if truncate is not None: 42 | while len(res) > truncate and res[-1] == 0: 43 | res = res[:-1] 44 | return ".".join(["%u"%_ for _ in res]) 45 | 46 | def print_lc(e, llvm=False, **fargs): 47 | for i, lc in enumerate(e.load): 48 | print("Load command %u" %i) 49 | print("\n".join(lc.otool(llvm=llvm))) 50 | 51 | 52 | 53 | def print_symbols(e, **fargs): 54 | for sect in e.sect: 55 | if type(sect) != macho_init.SymbolTable: 56 | continue 57 | print("%-35s %-15s %-4s %-10s %s"%("Symbol","Section","Type","Value","Description")) 58 | for symbol in sect.symbols: 59 | print(symbol.otool()) 60 | 61 | def print_dysym(e, **fargs): 62 | # Display indirect symbol tables 63 | for sect in e.sect: 64 | if getattr(sect, 'type', None) is None: 65 | continue 66 | elif sect.type == 'indirectsym': 67 | print("Indirect symbols [%d entries]"%len(sect)) 68 | print("%5s %s"%("index","name")) 69 | for entry in sect: 70 | entry = entry.index 71 | if entry == macho.INDIRECT_SYMBOL_LOCAL: 72 | print("%5s" % "LOCAL") 73 | elif entry == macho.INDIRECT_SYMBOL_ABS: 74 | print("%5s" % "ABSOLUTE") 75 | elif 0 <= entry < len(e.symbols.symbols): 76 | print("%5s %s" % (entry,e.symbols.symbols[entry].name)) 77 | else: 78 | print("INVALID(%d)" % entry) 79 | elif sect.type == 'locrel': 80 | print("Local relocations [%d entries]"%len(sect)) 81 | for entry in sect: 82 | print(repr(entry)) 83 | elif sect.type == 'extrel': 84 | print("External relocations [%d entries]"%len(sect)) 85 | for entry in sect: 86 | print(repr(entry)) 87 | 88 | def print_indirect(e, **fargs): 89 | # Find section with indirect symbols and indirect symbols table 90 | indirectsym_table = None 91 | indirectsym_section = [] 92 | for s in e.sect: 93 | if getattr(s, 'type', None) == 'indirectsym': 94 | if indirectsym_table is not None: 95 | raise ValueError("Only one IndirectSymbolTable per Mach-O file") 96 | indirectsym_table = s 97 | if not hasattr(s, 'sh'): continue 98 | if s.sh.type in [ 99 | macho.S_SYMBOL_STUBS, 100 | macho.S_LAZY_SYMBOL_POINTERS, 101 | macho.S_NON_LAZY_SYMBOL_POINTERS, 102 | macho.S_LAZY_DYLIB_SYMBOL_POINTERS, 103 | ]: 104 | indirectsym_section.append(s) 105 | # Display 106 | verbose = False # Exactly the same output as 'otool -Iv' 107 | import struct 108 | idx = 0 109 | for s in indirectsym_section: 110 | print("Indirect symbols for (%s,%s) %u entries" 111 | % (s.sh.segname, s.sh.sectname, len(s))) 112 | if e.wsize == 64: 113 | header = "%-18s %5s" 114 | format = "0x%016x %5s" 115 | valfmt = e.sex+"Q" 116 | if e.wsize == 32: 117 | header = "%-10s %5s" 118 | format = "0x%08x %5s" 119 | valfmt = e.sex+"I" 120 | if s.sh.type == macho.S_SYMBOL_STUBS: 121 | # First two bytes are 0xff 0x25 122 | valfmt = e.sex+"HI" 123 | address = s.addr 124 | data = [ "address", "index", " name" ] 125 | if verbose: 126 | # The value read in the table is not output by otool 127 | # it may be useless ??? 128 | header += "%-20s " 129 | format += "%-20s " 130 | data += "value" 131 | header += "%s" 132 | format += "%s" 133 | print(header % tuple(data)) 134 | for entry in s: 135 | if verbose: content = struct.unpack(valfmt,entry.content)[-1] 136 | index = indirectsym_table.entries[idx].index 137 | name = '' 138 | if index == macho.INDIRECT_SYMBOL_LOCAL: index = "LOCAL" 139 | elif index == macho.INDIRECT_SYMBOL_ABS: index = "ABSOLUTE" 140 | else: name = ' '+e.symbols.symbols[index].name 141 | data = [ address, index, name ] 142 | if verbose: data.append(content) 143 | print(format % tuple(data)) 144 | idx += 1 145 | address += entry.bytelen 146 | 147 | def print_relocs(e, **fargs): 148 | for s in e.sect: 149 | if not hasattr(s, 'reloclist'): continue 150 | print("Relocation information (%s,%s) %u entries" 151 | % (s.sh.segname, s.sh.sectname, s.sh.nreloc)) 152 | print("address pcrel length extern type scattered symbolnum/value") 153 | for x in s.reloclist: 154 | if x.scattered: xt, xn = 'n/a', '0x%08x' % x.symbolNumOrValue 155 | else: xt, xn = x.extern, '%u' % x.symbolNumOrValue 156 | print("%08x %-5u %-6u %-6s %-7d %-9d %s" % 157 | (x.address, x.pcrel, x.length, xt, x.type, x.scattered, xn)) 158 | 159 | def print_opcodes(e, **fargs): 160 | messages_and_values = ( 161 | ('rebase_', macho.REBASE_OPCODE_DONE, 162 | 'rebase opcodes:', 'no compressed rebase info'), 163 | ('bind_', macho.BIND_OPCODE_DONE, 164 | 'binding opcodes:', 'no compressed binding info'), 165 | ('weak_bind_', macho.BIND_OPCODE_DONE, 166 | 'weak binding opcodes:', 'no compressed weak binding info'), 167 | ('lazy_bind_', -1, 168 | 'lazy binding opcodes:', 'no compressed lazy binding info'), 169 | ) 170 | for t, v, ok, ko in messages_and_values: 171 | s_list = [ _ for _ in e.sect if getattr(_, 'type', None) == t ] 172 | if len(s_list) == 0: 173 | print(ko) 174 | continue 175 | if len(s_list) > 1: 176 | print("ERROR: many sections with %s"%t[:-1]) 177 | for s in s_list: 178 | print(ok) 179 | for x in s._array: 180 | print(x) 181 | if x.opcode == v: 182 | break 183 | 184 | def print_rebase(e, **fargs): 185 | for s in e.sect: 186 | if getattr(s, 'type', None) != 'rebase_': continue 187 | print("rebase information (from compressed dyld info):") 188 | print("segment section address type") 189 | for x in s.info: print(x) 190 | 191 | def print_bind(e, **fargs): 192 | for s in e.sect: 193 | if getattr(s, 'type', None) != 'bind_': continue 194 | print("bind information:") 195 | print("segment section address type addend dylib symbol") 196 | for x in s.info: print(x) 197 | break 198 | else: 199 | print("no compressed binding info") 200 | 201 | def print_weak_bind(e, **fargs): 202 | for s in e.sect: 203 | if getattr(s, 'type', None) != 'weak_bind_': continue 204 | print("weak binding information:") 205 | print("segment section address type addend symbol") 206 | for x in s.info: print(x) 207 | break 208 | else: 209 | print("no weak binding") 210 | 211 | def print_lazy_bind(e, **fargs): 212 | for s in e.sect: 213 | if getattr(s, 'type', None) != 'lazy_bind_': continue 214 | print("lazy binding information (from lazy_bind part of dyld info):") 215 | print("segment section address index dylib symbol") 216 | for x in s.info: print(x) 217 | break 218 | else: 219 | print("no compressed lazy binding info") 220 | 221 | def print_export(e, **fargs): 222 | for s in e.sect: 223 | if getattr(s, 'type', None) != 'export_': continue 224 | print("export information (from trie):") 225 | for x in sorted(s.info, key=lambda _:_.addr): print(x) 226 | break 227 | else: 228 | print("no compressed export info") 229 | 230 | archi = { 231 | (macho.CPU_TYPE_MC680x0, macho.CPU_SUBTYPE_MC680x0_ALL): 'm68k', 232 | (macho.CPU_TYPE_MC680x0, macho.CPU_SUBTYPE_MC68030_ONLY): 'm68030', 233 | (macho.CPU_TYPE_MC680x0, macho.CPU_SUBTYPE_MC68040): 'm68040', 234 | (macho.CPU_TYPE_MC88000, macho.CPU_SUBTYPE_MC88000_ALL): 'm88k', 235 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_I386_ALL): 'i386', 236 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_486): 'i486', 237 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_486SX): 'i486SX', 238 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENT): 'pentium', 239 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTPRO): 'pentpro', 240 | #macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTIUM_4): 'pentium4', 241 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTII_M3): 'pentIIm3', 242 | (macho.CPU_TYPE_I386, macho.CPU_SUBTYPE_PENTII_M5): 'pentIIm5', 243 | (macho.CPU_TYPE_X86_64, macho.CPU_SUBTYPE_X86_64_ALL): 'x86_64', 244 | (macho.CPU_TYPE_X86_64, macho.CPU_SUBTYPE_X86_64_H): 'x86_64h', 245 | (macho.CPU_TYPE_I860, macho.CPU_SUBTYPE_I860_ALL): 'i860', 246 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_ALL): 'ppc', 247 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_601): 'ppc601', 248 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603): 'ppc602', 249 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603): 'ppc603', 250 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603e): 'ppc603e', 251 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_603ev):'ppc603ev', 252 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_604): 'ppc604', 253 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_604e): 'ppc604e', 254 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_620): 'ppc620', 255 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_750): 'ppc750', 256 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_7400): 'ppc7400', 257 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_7450): 'ppc7450', 258 | (macho.CPU_TYPE_POWERPC, macho.CPU_SUBTYPE_POWERPC_970): 'ppc970', 259 | (macho.CPU_TYPE_POWERPC64, macho.CPU_SUBTYPE_POWERPC64_ALL):'ppc64', 260 | (macho.CPU_TYPE_POWERPC64, macho.CPU_SUBTYPE_POWERPC_970): 'ppc970-64', 261 | (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_ALL): 'veo', 262 | (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_1): 'veo1', 263 | (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_2): 'veo2', 264 | (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_3): 'veo3', 265 | (macho.CPU_TYPE_VEO, macho.CPU_SUBTYPE_VEO_4): 'veo4', 266 | (macho.CPU_TYPE_HPPA, macho.CPU_SUBTYPE_HPPA_ALL): 'hppa', 267 | (macho.CPU_TYPE_HPPA, macho.CPU_SUBTYPE_HPPA_7100LC): 'hppa7100LC', 268 | (macho.CPU_TYPE_SPARC, macho.CPU_SUBTYPE_SPARC_ALL): 'sparc', 269 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_ALL): 'arm', 270 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V4T): 'armv4t', 271 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V5TEJ): 'armv5', 272 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_XSCALE): 'xscale', 273 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V6): 'armv6', 274 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V6M): 'armv6m', 275 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7): 'armv7', 276 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7F): 'armv7f', 277 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7S): 'armv7s', 278 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7K): 'armv7k', 279 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7M): 'armv7m', 280 | (macho.CPU_TYPE_ARM, macho.CPU_SUBTYPE_ARM_V7EM): 'armv7em', 281 | (macho.CPU_TYPE_ARM64, macho.CPU_SUBTYPE_ARM64_ALL): 'arm64', 282 | (macho.CPU_TYPE_ARM64, macho.CPU_SUBTYPE_ARM64_V8): 'arm64v8', 283 | } 284 | 285 | def arch_name(e): 286 | return archi[(e.Mhdr.cputype, 287 | e.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK))] 288 | 289 | if __name__ == '__main__': 290 | import argparse 291 | parser = argparse.ArgumentParser(add_help=False) 292 | # Simulates 'otool' 293 | parser.add_argument('-arch', dest='arch_type', action='append', help='select architecture') 294 | parser.add_argument('-h', dest='options', action='append_const', const='header', help='print the mach header') 295 | parser.add_argument('-l', dest='options', action='append_const', const='load', help='print the load commands') 296 | parser.add_argument('--symbols', dest='options', action='append_const', const='symbols', help='print the symbols') 297 | parser.add_argument('--dysym', dest='options', action='append_const', const='dysym', help='print dynamic symbols') 298 | parser.add_argument('-r', dest='options', action='append_const', const='reloc', help='Display the relocation entries') 299 | parser.add_argument('-I', dest='options', action='append_const', const='indirect', help='Display the indirect symbol table') 300 | parser.add_argument('--llvm', dest='llvm_version', action='append', help='Simulate the output of a given version of llvm-otool') 301 | # Simulates 'dyldinfo' 302 | parser.add_argument('-opcodes', dest='options', action='append_const', const='opcodes', help='opcodes used to generate the rebase and binding information') 303 | parser.add_argument('-rebase', dest='options', action='append_const', const='rebase', help='addresses dyld will adjust if file not loaded at preferred address') 304 | parser.add_argument('-bind', dest='options', action='append_const', const='bind', help='addresses dyld will set based on symbolic lookups') 305 | parser.add_argument('-weak_bind', dest='options', action='append_const', const='weak_bind', help='symbols which dyld must coalesce') 306 | parser.add_argument('-lazy_bind', dest='options', action='append_const', const='lazy_bind', help='addresses dyld will lazily set on first use') 307 | parser.add_argument('-export', dest='options', action='append_const', const='export', help='addresses of all symbols this file exports') 308 | parser.add_argument('file', nargs='*', help='object file') 309 | args = parser.parse_args() 310 | if args.options is None: 311 | args.options = [] 312 | if len(args.file) == 0: 313 | parser.print_help() 314 | functions = [] 315 | fargs = {} 316 | dyldinfo_simulation = False 317 | if args.llvm_version: 318 | # Hypothesis: the major number of the version of Xcode is sufficient 319 | # to determine what the output format of llvm-otool is. 320 | for llvm in args.llvm_version: 321 | if 'native' in llvm: 322 | fargs['llvm'] = get_otool_version() 323 | else: 324 | fargs['llvm'] = int(llvm) 325 | if 'header' in args.options: 326 | functions.append(print_header) 327 | if 'load' in args.options: 328 | if fargs.get('llvm',8) in (8, 9, 10, 11) and not 'header' in args.options: 329 | functions.append(print_header) 330 | functions.append(print_lc) 331 | if 'symbols' in args.options: 332 | functions.append(print_symbols) 333 | if 'dysym' in args.options: 334 | functions.append(print_dysym) 335 | if 'reloc' in args.options: 336 | functions.append(print_relocs) 337 | if 'indirect' in args.options: 338 | functions.append(print_indirect) 339 | if 'rebase' in args.options: 340 | functions.append(print_rebase) 341 | dyldinfo_simulation = True 342 | if 'bind' in args.options: 343 | functions.append(print_bind) 344 | dyldinfo_simulation = True 345 | if 'weak_bind' in args.options: 346 | functions.append(print_weak_bind) 347 | dyldinfo_simulation = True 348 | if 'lazy_bind' in args.options: 349 | functions.append(print_lazy_bind) 350 | dyldinfo_simulation = True 351 | if 'export' in args.options: 352 | functions.append(print_export) 353 | dyldinfo_simulation = True 354 | if 'opcodes' in args.options: 355 | functions.append(print_opcodes) 356 | dyldinfo_simulation = True 357 | 358 | for file in args.file: 359 | fd = open(file, 'rb') 360 | try: 361 | raw = fd.read() 362 | finally: 363 | fd.close() 364 | filesize = os.path.getsize(file) 365 | try: 366 | e = macho_init.MACHO(raw, 367 | parseSymbols = False) 368 | except ValueError as err: 369 | print("%s:" %file) 370 | print(" %s" % err) 371 | continue 372 | if args.arch_type is None: 373 | if hasattr(e, 'Fhdr'): 374 | # Select the current architecture, if present 375 | current = platform.machine() 376 | for _ in e.arch: 377 | if current == arch_name(_): 378 | e = _ 379 | break 380 | else: 381 | # Display all architectures 382 | e = [ _ for _ in e.arch ] 383 | elif 'all' in args.arch_type: 384 | if hasattr(e, 'Fhdr'): 385 | # Display all architectures 386 | e = [ _ for _ in e.arch ] 387 | elif len(args.arch_type) == 1: 388 | if hasattr(e, 'Fhdr'): 389 | # Display one architecture 390 | current = args.arch_type[0] 391 | for _ in e.arch: 392 | if current == arch_name(_): 393 | e = _ 394 | break 395 | else: 396 | sys.stderr.write("error: otool: file: %s does not contain architecture: %s\n" % (file, current)) 397 | e = [] 398 | else: 399 | # Display if it is the architecture 400 | current = args.arch_type[0] 401 | if current != arch_name(e): 402 | e = [] 403 | else: 404 | if hasattr(e, 'Fhdr'): 405 | # Display some architectures, in the order appearing in the args 406 | f = [] 407 | for current in args.arch_type: 408 | for _ in e.arch: 409 | if current == arch_name(_): 410 | f.append(_) 411 | break 412 | else: 413 | sys.stderr.write("error: otool: file: %s does not contain architecture: %s\n" % (file, current)) 414 | e = f 415 | else: 416 | # Display if one is the architecture 417 | for current in args.arch_type: 418 | if current == arch_name(e): 419 | break 420 | else: 421 | e = [] 422 | 423 | if dyldinfo_simulation and len(args.file) > 1: 424 | print("\n%s:" %file) 425 | if hasattr(e, 'Mhdr'): 426 | if not dyldinfo_simulation and functions != [ print_header ]: 427 | print("%s:" %file) 428 | for f in functions: 429 | f(e, **fargs) 430 | else: 431 | for _ in e: 432 | t0 = _.Mhdr.cputype 433 | t1 = _.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK) 434 | if dyldinfo_simulation: 435 | print("for arch %s:" % arch_name(_)) 436 | else: 437 | if functions != [ print_header ]: 438 | print("%s (architecture %s):" %(file, arch_name(_))) 439 | for f in functions: 440 | f(_, **fargs) 441 | -------------------------------------------------------------------------------- /elfesteem/jclass_init.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import struct 4 | from new_cstruct import CStruct 5 | import logging 6 | log = logging.getLogger("classparse") 7 | console_handler = logging.StreamHandler() 8 | console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) 9 | log.addHandler(console_handler) 10 | log.setLevel(logging.WARN) 11 | 12 | 13 | 14 | 15 | def gensapce(lvl): 16 | return ' '*lvl 17 | 18 | def out_attrs(o, lvl = None): 19 | if lvl is None: 20 | lvl = 0 21 | out = "" 22 | if not isinstance(o, list): 23 | return gensapce(lvl)+repr(o)+'\n' 24 | for f, v in o: 25 | out += gensapce(lvl)+repr(f) 26 | if isinstance(v, list): 27 | out +='\n' 28 | for x in v: 29 | out += out_attrs(x, lvl+1) 30 | else: 31 | out += " "+repr(v) 32 | out +="\n" 33 | return out 34 | 35 | 36 | 37 | 38 | class CPUtf8(CStruct): 39 | _packformat = ">" 40 | _fields = [ ("tag", "u08"), 41 | ("length", "u16"), 42 | ("value", (lambda c, s, of:c.gets(s, of), 43 | lambda c, value:c.sets(value))) 44 | ] 45 | def gets(self, s, of): 46 | v = s[of:of+self.length] 47 | return v, of+self.length 48 | def sets(self, value): 49 | return str(value) 50 | 51 | def set_str(self, s): 52 | self.length = len(s) 53 | self.value = s 54 | def pp(self): 55 | return "%r"%(self.value) 56 | 57 | class CPInteger(CStruct): 58 | _packformat = ">" 59 | _fields = [ ("tag", "u08"), 60 | ("value", "u32")] 61 | 62 | class CPFloat(CStruct): 63 | _packformat = ">" 64 | _fields = [ ("tag", "u08"), 65 | ("value", "f")] 66 | 67 | class CPLong(CStruct): 68 | _packformat = ">" 69 | _fields = [ ("tag", "u08"), 70 | ("value", "q")] 71 | 72 | class CPDouble(CStruct): 73 | _packformat = ">" 74 | _fields = [ ("tag", "u08"), 75 | ("value", "d")] 76 | 77 | class CPClass(CStruct): 78 | _packformat = ">" 79 | _fields = [ ("tag", "u08"), 80 | ("name", "u16")] 81 | 82 | def get_name(self): 83 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 84 | def pp(self): 85 | return "%r"%(self.name) 86 | 87 | class CPString(CStruct): 88 | _packformat = ">" 89 | _fields = [ ("tag", "u08"), 90 | ("value", "u16")] 91 | 92 | def get_value(self): 93 | return self.parent_head.get_constant_pool_by_index(self.value_value).value 94 | def set_value(self, v): 95 | self.parent_head.get_constant_pool_by_index(self.value_value).set_str(v) 96 | def pp(self): 97 | s = self.value 98 | """ 99 | if len(s) > 40: 100 | s = str(s)[:40]+'...' 101 | """ 102 | return "%r"%(s) 103 | 104 | class CPFieldref(CStruct): 105 | _packformat = ">" 106 | _fields = [ ("tag", "u08"), 107 | ("name", "u16"), 108 | ("type", "u16")] 109 | 110 | def get_name(self): 111 | return self.parent_head.get_constant_pool_by_index(self.name_value).name 112 | def get_type(self): 113 | return self.parent_head.get_constant_pool_by_index(self.type_value) 114 | def pp(self): 115 | return "%r %r"%(self.name, parse_field_descriptor(self.type.type, self.type.name)) 116 | 117 | class CPMethodref(CStruct): 118 | _packformat = ">" 119 | _fields = [ ("tag", "u08"), 120 | ("name", "u16"), 121 | ("type", "u16")] 122 | 123 | 124 | def get_name(self): 125 | return self.parent_head.get_constant_pool_by_index(self.name_value).name 126 | def get_type(self): 127 | return self.parent_head.get_constant_pool_by_index(self.type_value) 128 | def pp(self): 129 | return "%r"%(demangle_java_name(self.name, self.type.type, self.type.name)) 130 | 131 | 132 | # From hachoir project 133 | code_to_type_name = { 134 | 'B': "byte", 135 | 'C': "char", 136 | 'D': "double", 137 | 'F': "float", 138 | 'I': "int", 139 | 'J': "long", 140 | 'S': "short", 141 | 'Z': "boolean", 142 | 'V': "void", 143 | } 144 | 145 | 146 | def demangle_java_name(c_name, c_typetype, c_typename): 147 | t = c_name.replace('/', '.') 148 | return parse_method_descriptor(c_typetype, t+'->'+c_typename) 149 | 150 | 151 | def eat_descriptor(descr): 152 | """ 153 | Read head of a field/method descriptor. Returns a pair of strings, where 154 | the first one is a human-readable string representation of the first found 155 | type, and the second one is the tail of the parameter. 156 | """ 157 | array_dim = 0 158 | while descr[0] == '[': 159 | array_dim += 1 160 | descr = descr[1:] 161 | if (descr[0] == 'L'): 162 | try: end = descr.find(';') 163 | except: raise ValueError("Not a valid descriptor string: " + descr) 164 | type = descr[1:end] 165 | descr = descr[end:] 166 | else: 167 | global code_to_type_name 168 | try: 169 | type = code_to_type_name[descr[0]] 170 | except KeyError: 171 | raise ValueError("Not a valid descriptor string: %s" % descr) 172 | return (type.replace("/", ".") + array_dim * "[]", descr[1:]) 173 | 174 | def parse_field_descriptor(descr, name=None): 175 | """ 176 | Parse a field descriptor (single type), and returns it as human-readable 177 | string representation. 178 | """ 179 | assert descr 180 | (type, tail) = eat_descriptor(descr) 181 | assert not tail 182 | if name: 183 | return type + " " + name 184 | else: 185 | return type 186 | 187 | def parse_method_descriptor(descr, name=None): 188 | """ 189 | Parse a method descriptor (params type and return type), and returns it 190 | as human-readable string representation. 191 | """ 192 | assert descr and (descr[0] == '(') 193 | descr = descr[1:] 194 | params_list = [] 195 | while descr[0] != ')': 196 | (param, descr) = eat_descriptor(descr) 197 | params_list.append(param) 198 | (type, tail) = eat_descriptor(descr[1:]) 199 | assert not tail 200 | params = ", ".join(params_list) 201 | if name: 202 | return "%s %s(%s)" % (type, name, params) 203 | else: 204 | return "%s (%s)" % (type, params) 205 | 206 | 207 | 208 | class CPInterfaceMethodref(CStruct): 209 | _packformat = ">" 210 | _fields = [ ("tag", "u08"), 211 | ("name", "u16"), 212 | ("type", "u16")] 213 | 214 | def get_name(self): 215 | return self.parent_head.get_constant_pool_by_index(self.name_value).name 216 | def get_type(self): 217 | return self.parent_head.get_constant_pool_by_index(self.type_value) 218 | def pp(self): 219 | return "%r %r %r"%(self.name.replace('/', '.'), self.type.name, self.type.type) 220 | 221 | class CPNameandType(CStruct): 222 | _packformat = ">" 223 | _fields = [ ("tag", "u08"), 224 | ("name", "u16"), 225 | ("type", "u16")] 226 | 227 | 228 | def get_name(self): 229 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 230 | def get_type(self): 231 | return self.parent_head.get_constant_pool_by_index(self.type_value).value 232 | 233 | def pp(self): 234 | return "%r %r"%(self.type, self.name) 235 | 236 | 237 | CONSTANT_TYPES = { 238 | 1 : CPUtf8, 239 | 3 : CPInteger, 240 | 4 : CPFloat, 241 | 5 : CPLong, 242 | 6 : CPDouble, 243 | 7 : CPClass, 244 | 8 : CPString, 245 | 9 : CPFieldref, 246 | 10: CPMethodref, 247 | 11: CPInterfaceMethodref, 248 | 12: CPNameandType, 249 | } 250 | 251 | CONSTANT_TYPES_inv = dict([(x[1], x[0]) for x in CONSTANT_TYPES.items()]) 252 | 253 | 254 | class CPoolfield(CStruct): 255 | _packformat = ">" 256 | _fields = [("tag", "u08")] 257 | @classmethod 258 | def unpack_l(cls, s, off = 0, parent_head = None, _sex=1, _wsize=32): 259 | tag = ord(s[off]) 260 | if not tag in CONSTANT_TYPES: 261 | raise ValueError('unknown type', hex(tag)) 262 | c, l = CONSTANT_TYPES[tag].unpack_l(s, off, parent_head, _sex, _wsize) 263 | return c, l 264 | 265 | @classmethod 266 | def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): 267 | c, l = cls.unpack_l(s, off = off, 268 | parent_head = parent_head, _sex=_sex, _wsize=_wsize) 269 | return c 270 | 271 | 272 | class CException_table(CStruct): 273 | _packformat = ">" 274 | _fields = [ ("start_pc", "u16"), 275 | ("end_pc", "u16"), 276 | ("handler_pc", "u16"), 277 | ("catch_type", "u16") 278 | ] 279 | 280 | class CAttribute_code(CStruct): 281 | _packformat = ">" 282 | _fields = [ ("name", "u16"), 283 | ("attribute_length", "u32"), 284 | ("max_stack", "u16"), 285 | ("max_locals", "u16"), 286 | ("code_length", "u32"), 287 | ("code", (lambda c, s, of:c.getcode(s, of), 288 | lambda c, value:c.setcode(value))), 289 | ("exception_table_length", "u16"), 290 | ("exception_table", "CException_table", lambda c:c.exception_table_length), 291 | ("attributes_count", "u16"), 292 | ("attributes", "CAttributeInfo", lambda c:c.attributes_count), 293 | ] 294 | def getcode(self, s, of): 295 | v = s[of:of+self.code_length] 296 | return v, of+self.code_length 297 | def setcode(self, value): 298 | return str(value) 299 | 300 | def get_name(self): 301 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 302 | 303 | class LineNumberTableEntry(CStruct): 304 | _packformat = ">" 305 | _fields = [ ("start_pc", "u16"), 306 | ("line_number", "u16") 307 | ] 308 | 309 | class CLineNumberTable(CStruct): 310 | _packformat = ">" 311 | _fields = [ ("name", "u16"), 312 | ("attribute_length", "u32"), 313 | ("line_number_table_length", "u16"), 314 | ("line_number_table", "LineNumberTableEntry", lambda c:c.line_number_table_length), 315 | ] 316 | def get_name(self): 317 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 318 | 319 | 320 | class CException(CStruct): 321 | _packformat = ">" 322 | _fields = [ ("name", "u16"), 323 | ("attribute_length", "u32"), 324 | ("exceptions_count", "u16"), 325 | ("exceptions", "u16", lambda c:c.exceptions_count), 326 | ] 327 | def get_name(self): 328 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 329 | 330 | class CClass(CStruct): 331 | _packformat = ">" 332 | _fields = [ ("inner_class_info", "u16"), 333 | ("outer_class_info", "u16"), 334 | ("inner_name", "u16"), 335 | ("inner_class_access_flags", "u16"), 336 | ] 337 | 338 | def get_inner_class_info(self): 339 | return self.parent_head.get_constant_pool_by_index(self.inner_value_class_info).name 340 | def get_outer_class_info(self): 341 | return self.parent_head.get_constant_pool_by_index(self.outer_value_class_info).name 342 | def get_inner_name(self): 343 | return self.parent_head.get_constant_pool_by_index(self.inner_value_name).name 344 | 345 | class CInnerClasses(CStruct): 346 | _packformat = ">" 347 | _fields = [ ("name", "u16"), 348 | ("attribute_length", "u32"), 349 | ("classes_count", "u16"), 350 | ("classes", "CClass", lambda c:c.classes_count), 351 | ] 352 | 353 | def get_name(self): 354 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 355 | 356 | class CSourceFile(CStruct): 357 | _packformat = ">" 358 | _fields = [ ("name", "u16"), 359 | ("attribute_length", "u32"), 360 | ("sourcefile", "u16"), 361 | ] 362 | def get_name(self): 363 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 364 | def get_sourcefile(self): 365 | return self.parent_head.get_constant_pool_by_index(self.sourcefile_value).value 366 | 367 | class CSynthetic(CStruct): 368 | _packformat = ">" 369 | _fields = [ ("name", "u16"), 370 | ("attribute_length", "u32") 371 | ] 372 | 373 | def get_name(self): 374 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 375 | 376 | class CAttributeInfo_default(CStruct): 377 | _packformat = ">" 378 | _fields = [ ("name", "u16"), 379 | ("attribute_length", "u32"), 380 | ("attribute", (lambda c, s, of:c.getcode(s, of), 381 | lambda c, value:c.setcode(value))), 382 | ] 383 | 384 | def get_name(self): 385 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 386 | def getcode(self, s, of): 387 | v = s[of:of+self.attribute_length] 388 | return v, of+self.attribute_length 389 | def setcode(self, value): 390 | return str(value) 391 | 392 | 393 | class CAttributeInfo(CStruct): 394 | _packformat = ">" 395 | _fields = [ ("name", "u16") 396 | ] 397 | @classmethod 398 | def unpack_l(cls, s, off = 0, parent_head = None, _sex=1, _wsize=32): 399 | tag = struct.unpack('>H', s[off:off+2])[0] 400 | c = parent_head.get_constant_pool_by_index(tag) 401 | if not isinstance(c, CPUtf8): 402 | raise ValueError('Error in parsing, should be string', hex(tag)) 403 | name = c.value 404 | if name == "Code": 405 | c, l = CAttribute_code.unpack_l(s, off, parent_head, _sex, _wsize) 406 | elif name == "LineNumberTable": 407 | c, l = CLineNumberTable.unpack_l(s, off, parent_head, _sex, _wsize) 408 | elif name == "Exceptions": 409 | c, l = CException.unpack_l(s, off, parent_head, _sex, _wsize) 410 | elif name == "InnerClasses": 411 | c, l = CInnerClasses.unpack_l(s, off, parent_head, _sex, _wsize) 412 | elif name == "SourceFile": 413 | c, l = CSourceFile.unpack_l(s, off, parent_head, _sex, _wsize) 414 | elif name == "Synthetic": 415 | c, l = CSynthetic.unpack_l(s, off, parent_head, _sex, _wsize) 416 | else: 417 | log.warning("unsupported attribute, skipping:\n%r"%(c)) 418 | c, l = CAttributeInfo_default.unpack_l(s, off, parent_head, _sex, _wsize) 419 | return c, l 420 | 421 | @classmethod 422 | def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None): 423 | c, l = cls.unpack_l(s, off = off, 424 | parent_head = parent_head, _sex=_sex, _wsize=_wsize) 425 | return c 426 | 427 | class CFieldInfo(CStruct): 428 | _packformat = ">" 429 | _fields = [ ("access_flags", "u16"), 430 | ("name", "u16"), 431 | ("descriptor", "u16"), 432 | ("attributes_count", "u16"), 433 | ("attributes", "CAttributeInfo", lambda c:c.attributes_count), 434 | ] 435 | 436 | def get_name(self): 437 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 438 | 439 | class CMethods(CStruct): 440 | _packformat = ">" 441 | _fields = [ ("access_flags", "u16"), 442 | ("name", "u16"), 443 | ("descriptor", "u16"), 444 | ("attributes_count", "u16"), 445 | ("attributes", "CAttributeInfo", lambda c:c.attributes_count), 446 | ] 447 | def get_name(self): 448 | return self.parent_head.get_constant_pool_by_index(self.name_value).value 449 | def get_descriptor(self): 450 | return self.parent_head.get_constant_pool_by_index(self.descriptor_value).value 451 | 452 | class Jclass_hdr(CStruct): 453 | _packformat = ">" 454 | _fields = [ ("magic", "u32"), 455 | ("minor_version","u16"), 456 | ("major_version","u16"), 457 | ("constants_pool_count","u16"), 458 | ("constants_pool", (lambda c, s, of:c.gets(s, of), 459 | lambda c, value:c.sets(value))), 460 | ("bitmask", "u16"), 461 | ("this","u16"), 462 | ("super","u16") 463 | ] 464 | 465 | def gets(self, s, of): 466 | v = [] 467 | while len(v) < self.constants_pool_count-1: 468 | c, l = CPoolfield.unpack_l(s, of, self.parent_head) 469 | v.append(c) 470 | of += l 471 | if c.tag in [5, 6]: 472 | # XXX long objects insert an supplementary object 473 | v.append(None) 474 | return v, of 475 | def sets(self, value): 476 | out = "".join([str(x) for x in value if x != None]) 477 | return out 478 | 479 | class Jclass_description(CStruct): 480 | _packformat = ">" 481 | _fields = [ ("interface_count","u16"), 482 | ("interfaces","u16", lambda c:c.interface_count), 483 | ("fields_count","u16"), 484 | ("fields","CFieldInfo", lambda c:c.fields_count), 485 | ("methods_count","u16"), 486 | ("methods","CMethods", lambda c:c.methods_count), 487 | ("attributes_count","u16"), 488 | ("attributes","CAttributeInfo", lambda c:c.attributes_count), 489 | ] 490 | 491 | def get_interfaces(self): 492 | out = [self.parent_head.get_constant_pool_by_index(x).name for x in self.interfaces_value] 493 | return out 494 | 495 | 496 | class JCLASS(object): 497 | def __getitem__(self, item): 498 | return self.content[item] 499 | def __setitem__(self, item, data): 500 | self.content.__setitem__(item, data) 501 | return 502 | 503 | def __init__(self, pestr = None): 504 | self._sex = 0 505 | self._wsize = 32 506 | self.content = pestr 507 | self.parse_content() 508 | 509 | def get_constant_pool_by_index(self, index): 510 | index -=1 511 | if 0 <= index < len(self.hdr.constants_pool): 512 | return self.hdr.constants_pool[index] 513 | return None 514 | 515 | def parse_content(self): 516 | self.hdr, l = Jclass_hdr.unpack_l(self.content, 0, self, self) 517 | self.description = Jclass_description.unpack(self.content, l, self, self) 518 | 519 | def __str__(self): 520 | out = '' 521 | out += str(self.hdr) 522 | out += str(self.description) 523 | return out 524 | 525 | 526 | def add_constant(self, c): 527 | self.hdr.constants_pool.append(c) 528 | self.hdr.constants_pool_count = len(self.hdr.constants_pool) + 1 529 | return len(self.hdr.constants_pool) 530 | 531 | def add_integer(self, i): 532 | c = CPInteger(parent_head = self, value = i) 533 | c.tag = CONSTANT_TYPES_inv[c.__class__] 534 | return self.add_constant(c) 535 | 536 | def add_float(self, i): 537 | c = CPFloat(parent_head = self, value = i) 538 | c.tag = CONSTANT_TYPES_inv[c.__class__] 539 | return self.add_constant(c) 540 | 541 | def add_long(self, i): 542 | c = CPLong(parent_head = self, value = i) 543 | c.tag = CONSTANT_TYPES_inv[c.__class__] 544 | return self.add_constant(c) 545 | 546 | def add_double(self, i): 547 | c = CPDouble(parent_head = self, value = i) 548 | c.tag = CONSTANT_TYPES_inv[c.__class__] 549 | return self.add_constant(c) 550 | 551 | def add_utf8(self, i): 552 | c = CPUtf8(parent_head = self, length = len(i), value = i) 553 | c.tag = CONSTANT_TYPES_inv[c.__class__] 554 | return self.add_constant(c) 555 | 556 | def add_string(self, i): 557 | x = self.add_utf8(i) 558 | c = CPString(parent_head = self, value = x) 559 | c.tag = CONSTANT_TYPES_inv[c.__class__] 560 | return self.add_constant(c) 561 | 562 | def add_nameandtype(self, name, t): 563 | namei = self.add_utf8(name) 564 | typei = self.add_utf8(t) 565 | c = CPNameandType(parent_head = self, name = namei, type = typei) 566 | c.tag = CONSTANT_TYPES_inv[c.__class__] 567 | return self.add_constant(c) 568 | 569 | def add_class(self, i): 570 | x = self.add_utf8(i) 571 | c = CPClass(parent_head = self, name = x) 572 | c.tag = CONSTANT_TYPES_inv[c.__class__] 573 | return self.add_constant(c) 574 | 575 | def add_methodref(self, name, typetype, typename): 576 | namei = self.add_class(name) 577 | typei = self.add_nameandtype(typename, typetype) 578 | c = CPMethodref(parent_head = self, name = namei, type = typei) 579 | c.tag = CONSTANT_TYPES_inv[c.__class__] 580 | return self.add_constant(c) 581 | 582 | def add_fieldref(self, name, typetype, typename): 583 | namei = self.add_class(name) 584 | typei = self.add_nameandtype(typename, typetype) 585 | c = CPFieldref(parent_head = self, name = namei, type = typei) 586 | c.tag = CONSTANT_TYPES_inv[c.__class__] 587 | return self.add_constant(c) 588 | 589 | 590 | 591 | if __name__ == "__main__": 592 | import sys 593 | fd = open(sys.argv[1]) 594 | try: 595 | data = fd.read() 596 | finally: 597 | fd.close() 598 | e = JCLASS(data) 599 | -------------------------------------------------------------------------------- /tests/test_pe_manipulation.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import os 4 | __dir__ = os.path.dirname(__file__) 5 | 6 | from test_all import run_tests, assertion, hashlib, open_read 7 | from elfesteem.pe_init import log, PE, COFF, Coff 8 | from elfesteem.strpatchwork import StrPatchwork 9 | from elfesteem import pe 10 | import struct 11 | 12 | # We want to be able to verify warnings in non-regression test 13 | log_history = [] 14 | log.warning = lambda *args, **kargs: log_history.append(('warn',args,kargs)) 15 | log.error = lambda *args, **kargs: log_history.append(('error',args,kargs)) 16 | 17 | def test_PE_addsections_32(assertion): 18 | global log_history 19 | e = PE() 20 | d = e.pack() 21 | assertion('901e6383ee161b569af1d35d3f77b038', 22 | hashlib.md5(d).hexdigest(), 23 | 'Creation of a standard empty PE') 24 | e.SHList.add_section(name = 'new', rawsize = 0x1000) 25 | d = e.pack() 26 | assertion('15aefbcc8f4b39e9484df8b1ed277c75', 27 | hashlib.md5(d).hexdigest(), 28 | 'Adding a section to an empty PE') 29 | e.SHList.add_section(name = 'nxt', rawsize = 0x1000) 30 | d = e.virt[0x401000:0x402000] 31 | assertion('620f0b67a91f7f74151bc5be745b7110', 32 | hashlib.md5(d).hexdigest(), 33 | 'Extract chunk from mapped memory, across multiple sections') 34 | for _ in range(89): 35 | e.SHList.add_section(name = 'nxt', rawsize = 0x1000) 36 | assertion([('error', ('Cannot add section %s: not enough space for section list', 'nxt'), {})], 37 | log_history, 38 | 'Add too many sections (logs)') 39 | log_history = [] 40 | assertion(90, # Should be 91 if the last section could been added 41 | len(e.SHList), 42 | 'Add too many sections') 43 | 44 | def test_PE_empty64(assertion): 45 | e = PE(wsize=64) 46 | d = e.pack() 47 | assertion('863bf62f521b0cad3209e42cff959eed', 48 | hashlib.md5(d).hexdigest(), 49 | 'Creation of a standard empty PE+') 50 | 51 | def test_PE_manipulate(assertion): 52 | global log_history 53 | pe_mingw = open_read(__dir__+'/binary_input/pe_mingw.exe') 54 | e = PE(pe_mingw) 55 | # Packed file is not identical :-( 56 | # Are missing: 57 | # - the data between the end of DOS header and the start of PE header 58 | # - the padding after the list of sections, before the first section 59 | # - many parts of directories 60 | d = e.pack() 61 | assertion('2f08b8315c4e0a30d51a8decf104345c', 62 | hashlib.md5(d).hexdigest(), 63 | 'Packing after reading pe_mingw.exe') 64 | d = PE(d).pack() 65 | assertion('2f08b8315c4e0a30d51a8decf104345c', 66 | hashlib.md5(d).hexdigest(), 67 | 'Packing after reading pe_mingw.exe; fix point') 68 | d = e.SHList.display().encode('latin1') 69 | assertion('ba631f3f172712b6526e284269c1ecbb', 70 | hashlib.md5(d).hexdigest(), 71 | 'Display Sections from PE') 72 | d = e.Symbols.display().encode('latin1') 73 | assertion('1ee89dc3dc2104190734747d148b7511', 74 | hashlib.md5(d).hexdigest(), 75 | 'Display COFF Symbols') 76 | assertion('__gnu_exception_handler@4', 77 | e.Symbols.getbyindex(2).name, 78 | 'Get symbol by index, found') 79 | assertion(None, 80 | e.Symbols.getbyindex(2000), 81 | 'Get symbol by index, not existing') 82 | d = e.getsectionbyname('.text').pack() 83 | assertion('ad0d51a670cb6cd2015499840ffefb8f', 84 | hashlib.md5(d).hexdigest(), 85 | 'Get existing section by name') 86 | d = e.getsectionbyoff(0x400+0x100).pack() 87 | assertion('ad0d51a670cb6cd2015499840ffefb8f', 88 | hashlib.md5(d).hexdigest(), 89 | 'Get existing section by offset') 90 | d = e.getsectionbyvad(0x400000+0x1000+0x100).pack() 91 | assertion('ad0d51a670cb6cd2015499840ffefb8f', 92 | hashlib.md5(d).hexdigest(), 93 | 'Get existing section by address') 94 | d = e.getsectionbyname('no_sect') 95 | assertion(None, d, 'Get non-existing section by name') 96 | d = e.getsectionbyoff(0x80000) 97 | assertion(None, d, 'Get non-existing section by offset') 98 | d = e.getsectionbyvad(0x1000) 99 | assertion(None, d, 'Get non-existing section by address') 100 | d = e[0x100:0x120] 101 | assertion('6b8897a89909959320f8adfc1d81c9ee', 102 | hashlib.md5(d).hexdigest(), 103 | 'Extract chunk from raw data') 104 | assertion(True, 105 | e.virt.is_addr_in(0x401000), 106 | 'Address in mapped virtual memory') 107 | assertion(False, 108 | e.virt.is_addr_in(0x201000), 109 | 'Address not in mapped virtual memory') 110 | d = e.virt[0x401000] 111 | assertion('4c614360da93c0a041b22e537de151eb', 112 | hashlib.md5(d).hexdigest(), 113 | 'Extract byte from mapped memory, in a section') 114 | d = e.virt[0x400100] 115 | assertion('93b885adfe0da089cdf634904fd59f71', 116 | hashlib.md5(d).hexdigest(), 117 | 'Extract byte from mapped memory, in no section') 118 | d = e.virt[0x400100:0x400120] 119 | assertion('6b8897a89909959320f8adfc1d81c9ee', 120 | hashlib.md5(d).hexdigest(), 121 | 'Extract chunk from mapped memory, in headers') 122 | d = e.virt[0x401000:0x401020] 123 | assertion('21ac18c2564a3b408b31aae0af19d502', 124 | hashlib.md5(d).hexdigest(), 125 | 'Extract chunk from mapped memory, in a section') 126 | d = e.virt[0x100:0x200] # One null byte 127 | assertion([('warn', ('unknown rva address! -3fff00',), {})], 128 | log_history, 129 | 'Extract chunk from non-mapped memory (logs)') 130 | log_history = [] 131 | assertion('d41d8cd98f00b204e9800998ecf8427e', 132 | hashlib.md5(d).hexdigest(), 133 | 'Extract chunk from non-mapped memory') 134 | assertion(e.virt[0x401000:0x401020], 135 | e.virt(0x401000,0x401020), 136 | 'Extract chunk from mapped memory, old API') 137 | e[0x100:0x120] = e[0x100:0x120] 138 | d = e.pack() 139 | assertion('2f08b8315c4e0a30d51a8decf104345c', 140 | hashlib.md5(d).hexdigest(), 141 | 'Writing in raw data') 142 | e.virt[0x401100:0x401120] = e.virt[0x401100:0x401120] 143 | d = e.pack() 144 | assertion('2f08b8315c4e0a30d51a8decf104345c', 145 | hashlib.md5(d).hexdigest(), 146 | 'Writing in memory (interval)') 147 | e.virt[0x401100] = e.virt[0x401100:0x401120] 148 | d = e.pack() 149 | assertion('2f08b8315c4e0a30d51a8decf104345c', 150 | hashlib.md5(d).hexdigest(), 151 | 'Writing in memory (address)') 152 | e.virt[0x400100:0x400120] = e.virt[0x400100:0x400120] 153 | assertion([('warn', ('Cannot write at RVA %s', slice(256, 288, None)), {})], 154 | log_history, 155 | 'Writing at invalid RVA (logs)') 156 | log_history = [] 157 | assertion(0x468e71, len(e.virt), 'Max virtual address') 158 | assertion([('warn', ('__len__ deprecated',), {})], 159 | log_history, 160 | '__len__ deprectated (logs)') 161 | log_history = [] 162 | # Find leave; ret 163 | assertion(0x401294, 164 | e.virt.find(struct.pack('BB', 0xc9, 0xc3)), 165 | 'Find pattern (from the start)') 166 | assertion(0x4014B4, 167 | e.virt.rfind(struct.pack('BB', 0xc9, 0xc3)), 168 | 'Find pattern (from the end)') 169 | e.SHList.align_sections() 170 | d = e.pack() 171 | assertion('2f08b8315c4e0a30d51a8decf104345c', 172 | hashlib.md5(d).hexdigest(), 173 | 'Align sections') 174 | # Remove Bound Import directory 175 | # Usually, its content is not stored in any section... that's 176 | # a future version of elfesteem will need to manage this 177 | # specific directory in a specific way. 178 | e.NThdr.optentries[pe.DIRECTORY_ENTRY_BOUND_IMPORT].rva = 0 179 | e.NThdr.optentries[pe.DIRECTORY_ENTRY_BOUND_IMPORT].size = 0 180 | # Create new sections with all zero content 181 | s_redir = e.SHList.add_section(name = "redir", size = 0x1000) 182 | s_test = e.SHList.add_section(name = "test", size = 0x1000) 183 | s_rel = e.SHList.add_section(name = "rel", size = 0x5000) 184 | d = e.pack() 185 | assertion('439f6c698d3d5238d88c5ccef99761e2', 186 | hashlib.md5(d).hexdigest(), 187 | 'Adding sections') 188 | d = PE(d).pack() 189 | assertion('439f6c698d3d5238d88c5ccef99761e2', 190 | hashlib.md5(d).hexdigest(), 191 | 'Adding sections; fix point') 192 | e = PE(pe_mingw) 193 | # Delete the last sections => OK 194 | for _ in range(2): 195 | del e.SHList._array[-1] 196 | e.SHList._size -= 40 197 | e.COFFhdr.numberofsections -= 1 198 | # Add two Descriptors in the Import Directory 199 | e.DirImport.add_dlldesc( 200 | [({"name":"kernel32.dll", 201 | "firstthunk":s_test.addr}, 202 | ["CreateFileA", 203 | "SetFilePointer", 204 | "WriteFile", 205 | "CloseHandle", 206 | ] 207 | ), 208 | ({"name":"USER32.dll", 209 | "firstthunk":None}, 210 | ["SetDlgItemInt", 211 | "GetMenu", 212 | "HideCaret", 213 | ] 214 | ) 215 | ] 216 | ) 217 | s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport)) 218 | e.DirImport.set_rva(s_myimp.addr) 219 | assertion(0x4050a8, 220 | e.DirImport.get_funcvirt('KERNEL32.dll','ExitProcess'), 221 | 'Import ExitProcess') 222 | assertion(None, 223 | e.DirImport.get_funcvirt(None,'LoadStringW'), 224 | 'Import LoadStringW') 225 | assertion(None, 226 | e.DirExport.get_funcvirt('SetUserGeoID'), 227 | 'Export SetUserGeoID') 228 | d = e.pack() 229 | assertion('8a3a1c8c9aa2db211e1d34c7efbb8473', 230 | hashlib.md5(d).hexdigest(), 231 | 'Adding new imports') 232 | d = PE(d).pack() 233 | assertion([('warn', ('Section %d size %#x not aligned to %#x', 5, 294, 512), {})], 234 | log_history, 235 | 'Adding new imports (logs)') 236 | log_history = [] 237 | assertion('8a3a1c8c9aa2db211e1d34c7efbb8473', 238 | hashlib.md5(d).hexdigest(), 239 | 'Adding new imports; fix point') 240 | # Add an export 241 | if e.DirExport.expdesc is None: 242 | e.DirExport.create(['coco']) 243 | assertion(0x40703e, 244 | e.DirExport.get_funcvirt('coco'), 245 | 'Export: get_funcvirt') 246 | # 'eval' avoids warnings with python2.3 247 | assertion({1: eval("0xdeedc0fe"), 'coco': eval("0xdeedc0fe")}, 248 | e.export_funcs(), 249 | 'Export: export_funcs') 250 | d = e.pack() 251 | assertion('47a864481296d88f908126fb822ded59', 252 | hashlib.md5(d).hexdigest(), 253 | 'Adding new exports') 254 | d = PE(d).pack() 255 | assertion([('warn', ('Section %d size %#x not aligned to %#x', 5, 294, 512), {})], 256 | log_history, 257 | 'Adding new exports (logs)') 258 | log_history = [] 259 | assertion('47a864481296d88f908126fb822ded59', 260 | hashlib.md5(d).hexdigest(), 261 | 'Adding new exports; fix point') 262 | # Add a new Descriptor in the Import Directory 263 | e.DirImport.add_dlldesc([ ({"name":"MyDLL.dll"}, ["MyFunc"]) ]) 264 | e.DirImport.set_rva(None) 265 | assertion('47a864481296d88f908126fb822ded59', 266 | hashlib.md5(d).hexdigest(), 267 | 'Adding imports, no specified section') 268 | 269 | def test_PE_dll(assertion): 270 | global log_history 271 | # Small DLL created with Visual Studio 272 | dll_vstudio = open_read(__dir__+'/binary_input/pe_vstudio.dll') 273 | e = PE(dll_vstudio) 274 | d = e.pack() 275 | assertion('19028e1a1bde785fb4a58aeacf56007b', 276 | hashlib.md5(d).hexdigest(), 277 | 'Packing after reading pe_vstudio.dll') 278 | # Test the display() functions 279 | d = e.DirImport.display().encode('latin1') 280 | assertion('e9f925c32ed91f889a2b57e73360d444', 281 | hashlib.md5(d).hexdigest(), 282 | 'Display Directory IMPORT') 283 | d = e.DirExport.display().encode('latin1') 284 | assertion('2d262c4d834e58b17d4c7f2359d1f6f1', 285 | hashlib.md5(d).hexdigest(), 286 | 'Display Directory EXPORT') 287 | d = e.DirRes.display().encode('latin1') 288 | assertion('a794e58acca2f6b2d9628e64008ad6d8', 289 | hashlib.md5(d).hexdigest(), 290 | 'Display Directory RESOURCE') 291 | d = e.DirReloc.display().encode('latin1') 292 | assertion('33af05a3215689dec4cdae3656c63af0', 293 | hashlib.md5(d).hexdigest(), 294 | 'Display Directory BASERELOC') 295 | d = '\n'.join([repr(_) for reldir in e.DirReloc for _ in reldir.rels]) 296 | d = d.encode('latin1') 297 | assertion('87951bfbb3c09dec8c54d41f72cc4263', 298 | hashlib.md5(d).hexdigest(), 299 | 'Display all relocations') 300 | 301 | def test_PE_ange(assertion): 302 | global log_history 303 | # Parse some ill-formed PE made by Ange Albertini 304 | PE(open_read(__dir__+'/binary_input/Ange/resourceloop.exe')) 305 | assertion([('warn', ('Resource tree too deep',), {})]*212, 306 | log_history, 307 | 'Ange/resourceloop.exe (logs)') 308 | log_history = [] 309 | PE(open_read(__dir__+'/binary_input/Ange/namedresource.exe')) 310 | assertion([], 311 | log_history, 312 | 'Ange/namedresource.exe (logs)') 313 | PE(open_read(__dir__+'/binary_input/Ange/weirdsord.exe')) 314 | assertion([('warn', ('Section %d offset %#x not aligned to %#x', 0, 513, 16384), {}), ('warn', ('Section %d size %#x not aligned to %#x', 0, 270, 16384), {})], 315 | log_history, 316 | 'Ange/weirdsord.exe (logs)') 317 | log_history = [] 318 | PE(open_read(__dir__+'/binary_input/Ange/nosectionW7.exe')) 319 | assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 16, 0), {})], 320 | log_history, 321 | 'Ange/nosectionW7.exe (logs)') 322 | log_history = [] 323 | PE(open_read(__dir__+'/binary_input/Ange/imports_relocW7.exe')) 324 | assertion([], 325 | log_history, 326 | 'Ange/imports_relocW7.exe (logs)') 327 | PE(open_read(__dir__+'/binary_input/Ange/imports_tinyXP.exe')) 328 | assertion([], 329 | log_history, 330 | 'Ange/imports_tinyXP.exe (logs)') 331 | PE(open_read(__dir__+'/binary_input/Ange/bottomsecttbl.exe')) 332 | assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 16, 696), {})], 333 | log_history, 334 | 'Ange/bottomsecttbl.exe (logs)') 335 | log_history = [] 336 | PE(open_read(__dir__+'/binary_input/Ange/delayfake.exe')) 337 | assertion([], 338 | log_history, 339 | 'Ange/delayfake.exe (logs)') 340 | PE(open_read(__dir__+'/binary_input/Ange/exportobf.exe')) 341 | assertion([], 342 | log_history, 343 | 'Ange/exportobf.exe (logs)') 344 | PE(open_read(__dir__+'/binary_input/Ange/dllbound-ld.exe')) 345 | assertion([], 346 | log_history, 347 | 'Ange/dllbound-ld.exe (logs)') 348 | PE(open_read(__dir__+'/binary_input/Ange/d_tiny.dll')) 349 | assertion([('warn', ('Opthdr magic %#x', 31074), {}), 350 | ('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 0, 13864), {}), 351 | ('warn', ('Windows 8 needs at least 13 directories, %d found', 0), {}), 352 | ('warn', ('Too many symbols: %d', 541413408), {}), 353 | ('warn', ('File too short for StrTable -0x61746127 != 0x0',), {})], 354 | log_history, 355 | 'Ange/d_tiny.dll (logs)') 356 | log_history = [] 357 | PE(open_read(__dir__+'/binary_input/Ange/dllfw.dll')) 358 | assertion([], 359 | log_history, 360 | 'Ange/dllfw.dll (logs)') 361 | PE(open_read(__dir__+'/binary_input/Ange/tinydllXP.dll')) 362 | assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 0, 0), {}), 363 | ('warn', ('Windows 8 needs at least 13 directories, %d found', 0), {}), 364 | ('warn', ('File too short for StrTable 0x55 != 0xc258016a',), {})], 365 | log_history, 366 | 'Ange/tinydllXP.dll (logs)') 367 | log_history = [] 368 | e = PE(open_read(__dir__+'/binary_input/Ange/resourceloop.exe')) 369 | log_history = [] 370 | d = e.DirRes.display().encode('latin1') 371 | assertion('98701be30b09759a64340e5245e48195', 372 | hashlib.md5(d).hexdigest(), 373 | 'Display Directory RESOURCE that is too deep') 374 | 375 | def test_PE_invalids(assertion): 376 | # Some various ways for a PE to be detected as invalid 377 | e = PE() 378 | data = StrPatchwork(e.pack()) 379 | try: 380 | e.NTsig.signature = 0x2000 381 | e = PE(e.pack()) 382 | assertion(0,1, 'Not a PE, invalid NTsig') 383 | except ValueError: 384 | pass 385 | try: 386 | e.DOShdr.lfanew = 0x200000 387 | data[60] = struct.pack("