├── elfesteem
    ├── macho_init.py
    ├── macho
    │   ├── __init__.py
    │   └── common.py
    ├── __init__.py
    ├── compatibility_python23.py
    ├── binary.py
    ├── strpatchwork.py
    ├── intervals.py
    ├── rprc.py
    ├── new_cstruct.py
    ├── minidump_init.py
    ├── cstruct.py
    └── jclass_init.py
├── tests
    ├── binary_input
    │   ├── Ange
    │   │   ├── d_tiny.dll
    │   │   ├── dllfw.dll
    │   │   ├── delayfake.exe
    │   │   ├── dllbound-ld.exe
    │   │   ├── exportobf.exe
    │   │   ├── nosectionW7.exe
    │   │   ├── tinydllXP.dll
    │   │   ├── weirdsord.exe
    │   │   ├── bottomsecttbl.exe
    │   │   ├── namedresource.exe
    │   │   ├── resourceloop.exe
    │   │   ├── imports_relocW7.exe
    │   │   └── imports_tinyXP.exe
    │   ├── elf_cpp.o
    │   ├── macho
    │   │   ├── sh
    │   │   ├── OSXII
    │   │   ├── Decibels
    │   │   ├── LyonMetro
    │   │   ├── SweetHome3D
    │   │   ├── macho_32.o
    │   │   ├── macho_64.o
    │   │   ├── MacTheRipper
    │   │   ├── macho_32.out
    │   │   ├── macho_64.out
    │   │   ├── macho_fat.out
    │   │   ├── TelephonyUtil.o
    │   │   ├── libcoretls.dylib
    │   │   ├── libSystem.B.dylib
    │   │   ├── libecpg.6.5.dylib
    │   │   ├── macho_lcbuild.out
    │   │   ├── libdns_services.dylib
    │   │   ├── libPrintServiceQuota.1.dylib
    │   │   └── libATCommandStudioDynamic.dylib
    │   ├── tiny45.bin
    │   ├── tiny52.bin
    │   ├── tiny64.bin
    │   ├── tiny76.bin
    │   ├── tiny84.bin
    │   ├── pe_mingw.exe
    │   ├── windows.dmp
    │   ├── cku192.irix40
    │   ├── cku196.clix-3.1
    │   ├── coff_mingw.obj
    │   ├── elf64_small.out
    │   ├── elf_small.out
    │   ├── pe_vstudio.dll
    │   ├── minidump-i386.dmp
    │   ├── cku200.dec-osf-1.3a
    │   ├── ducati-m3_p768.bin
    │   ├── minidump-x86_64.dmp
    │   ├── cku190.rs6aix32c-3.2.4
    │   ├── cku192.ultrix43c-mips3
    │   ├── notle-tesla-dsp.xe64T
    │   ├── cku193a05.apollo-sr10-s5r3
    │   ├── C28346_Load_Program_to_Flash.out
    │   ├── tiny45.asm
    │   ├── tiny52.asm
    │   ├── tiny64.asm
    │   ├── tiny76.asm
    │   ├── tiny84.asm
    │   └── README.txt
    ├── examples_macos.sh
    ├── examples_linux.sh
    ├── test_minidump_manipulation.py
    ├── test_intervals.py
    ├── test_all.py
    ├── test_rprc_manipulation.py
    ├── test_elf_manipulation.py
    └── test_pe_manipulation.py
├── setup.py
├── elfcli
├── .github
    └── workflows
    │   ├── codeql.yml
    │   ├── tools.yml
    │   └── python-versions.yml
├── examples
    ├── test_pe.py
    ├── minidump_to_pe.py
    ├── readelf.py
    └── otool.py
├── .travis.yml
└── README.md


/elfesteem/macho_init.py:
--------------------------------------------------------------------------------
1 | from elfesteem.macho import *
2 | 


--------------------------------------------------------------------------------
/elfesteem/macho/__init__.py:
--------------------------------------------------------------------------------
1 | from elfesteem.macho.init import *
2 | 


--------------------------------------------------------------------------------
/tests/binary_input/Ange/d_tiny.dll:
--------------------------------------------------------------------------------
1 | MZPE   * tiny data PE (61 bytes)
2 |                           


--------------------------------------------------------------------------------
/tests/binary_input/elf_cpp.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/elf_cpp.o


--------------------------------------------------------------------------------
/tests/binary_input/macho/sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/sh


--------------------------------------------------------------------------------
/tests/binary_input/tiny45.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny45.bin


--------------------------------------------------------------------------------
/tests/binary_input/tiny52.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny52.bin


--------------------------------------------------------------------------------
/tests/binary_input/tiny64.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny64.bin


--------------------------------------------------------------------------------
/tests/binary_input/tiny76.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny76.bin


--------------------------------------------------------------------------------
/tests/binary_input/tiny84.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/tiny84.bin


--------------------------------------------------------------------------------
/tests/binary_input/macho/OSXII:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/OSXII


--------------------------------------------------------------------------------
/tests/binary_input/pe_mingw.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/pe_mingw.exe


--------------------------------------------------------------------------------
/tests/binary_input/windows.dmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/windows.dmp


--------------------------------------------------------------------------------
/tests/binary_input/Ange/dllfw.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/dllfw.dll


--------------------------------------------------------------------------------
/tests/binary_input/cku192.irix40:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku192.irix40


--------------------------------------------------------------------------------
/tests/binary_input/cku196.clix-3.1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku196.clix-3.1


--------------------------------------------------------------------------------
/tests/binary_input/coff_mingw.obj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/coff_mingw.obj


--------------------------------------------------------------------------------
/tests/binary_input/elf64_small.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/elf64_small.out


--------------------------------------------------------------------------------
/tests/binary_input/elf_small.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/elf_small.out


--------------------------------------------------------------------------------
/tests/binary_input/macho/Decibels:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/Decibels


--------------------------------------------------------------------------------
/tests/binary_input/macho/LyonMetro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/LyonMetro


--------------------------------------------------------------------------------
/tests/binary_input/pe_vstudio.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/pe_vstudio.dll


--------------------------------------------------------------------------------
/elfesteem/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | __all__ = ['pe_init', 'elf_init', 'jclass_init', 'strpatchwork']
4 | 


--------------------------------------------------------------------------------
/tests/binary_input/macho/SweetHome3D:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/SweetHome3D


--------------------------------------------------------------------------------
/tests/binary_input/macho/macho_32.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_32.o


--------------------------------------------------------------------------------
/tests/binary_input/macho/macho_64.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_64.o


--------------------------------------------------------------------------------
/tests/binary_input/minidump-i386.dmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/minidump-i386.dmp


--------------------------------------------------------------------------------
/tests/binary_input/Ange/delayfake.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/delayfake.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/dllbound-ld.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/dllbound-ld.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/exportobf.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/exportobf.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/nosectionW7.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/nosectionW7.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/tinydllXP.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/tinydllXP.dll


--------------------------------------------------------------------------------
/tests/binary_input/Ange/weirdsord.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/weirdsord.exe


--------------------------------------------------------------------------------
/tests/binary_input/cku200.dec-osf-1.3a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku200.dec-osf-1.3a


--------------------------------------------------------------------------------
/tests/binary_input/ducati-m3_p768.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/ducati-m3_p768.bin


--------------------------------------------------------------------------------
/tests/binary_input/macho/MacTheRipper:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/MacTheRipper


--------------------------------------------------------------------------------
/tests/binary_input/macho/macho_32.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_32.out


--------------------------------------------------------------------------------
/tests/binary_input/macho/macho_64.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_64.out


--------------------------------------------------------------------------------
/tests/binary_input/macho/macho_fat.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_fat.out


--------------------------------------------------------------------------------
/tests/binary_input/minidump-x86_64.dmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/minidump-x86_64.dmp


--------------------------------------------------------------------------------
/tests/binary_input/Ange/bottomsecttbl.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/bottomsecttbl.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/namedresource.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/namedresource.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/resourceloop.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/resourceloop.exe


--------------------------------------------------------------------------------
/tests/binary_input/cku190.rs6aix32c-3.2.4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku190.rs6aix32c-3.2.4


--------------------------------------------------------------------------------
/tests/binary_input/cku192.ultrix43c-mips3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku192.ultrix43c-mips3


--------------------------------------------------------------------------------
/tests/binary_input/macho/TelephonyUtil.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/TelephonyUtil.o


--------------------------------------------------------------------------------
/tests/binary_input/macho/libcoretls.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libcoretls.dylib


--------------------------------------------------------------------------------
/tests/binary_input/notle-tesla-dsp.xe64T:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/notle-tesla-dsp.xe64T


--------------------------------------------------------------------------------
/tests/binary_input/Ange/imports_relocW7.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/imports_relocW7.exe


--------------------------------------------------------------------------------
/tests/binary_input/Ange/imports_tinyXP.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/Ange/imports_tinyXP.exe


--------------------------------------------------------------------------------
/tests/binary_input/macho/libSystem.B.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libSystem.B.dylib


--------------------------------------------------------------------------------
/tests/binary_input/macho/libecpg.6.5.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libecpg.6.5.dylib


--------------------------------------------------------------------------------
/tests/binary_input/macho/macho_lcbuild.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/macho_lcbuild.out


--------------------------------------------------------------------------------
/tests/binary_input/cku193a05.apollo-sr10-s5r3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/cku193a05.apollo-sr10-s5r3


--------------------------------------------------------------------------------
/tests/binary_input/macho/libdns_services.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libdns_services.dylib


--------------------------------------------------------------------------------
/tests/binary_input/C28346_Load_Program_to_Flash.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/C28346_Load_Program_to_Flash.out


--------------------------------------------------------------------------------
/tests/binary_input/macho/libPrintServiceQuota.1.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libPrintServiceQuota.1.dylib


--------------------------------------------------------------------------------
/tests/binary_input/macho/libATCommandStudioDynamic.dylib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LRGH/elfesteem/HEAD/tests/binary_input/macho/libATCommandStudioDynamic.dylib


--------------------------------------------------------------------------------
/tests/examples_macos.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/zsh
 2 | 
 3 | # Note that we don't test all files, because some are not well parsed by the
 4 | # system's otool.
 5 | 
 6 | for file in tests/binary_input/macho/{[DLST],lib[AScde],macho_}*; do
 7 | echo "=== $file ==="
 8 | diff -c =(otool -l $file) =(python ./examples/otool.py --llvm=native -l $file 2>/dev/null)
 9 | done
10 | 


--------------------------------------------------------------------------------
/tests/examples_linux.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | options="-h -S -r -s --dyn-syms -d -l -g"
 4 | options="-h -S"
 5 | for option in $options; do
 6 | for file in /bin/sh tests/binary_input/elf_small.out; do
 7 | echo "=== readelf $option $file ==="
 8 | diff -c <(readelf $option $file) <(python ./examples/readelf.py $option --readelf=native $file 2>/dev/null)
 9 | done
10 | done
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | setup(
 6 |     name = 'ELF-Esteem',
 7 |     version = '0.1',    
 8 |     packages = ['elfesteem', 'elfesteem.macho'],
 9 |     requires = ['python (>= 2.3)'],
10 |     scripts = ['examples/readelf.py','examples/otool.py','examples/readpe.py'],
11 |     # Metadata
12 |     author = 'Philippe BIONDI',
13 |     author_email = 'phil(at)secdev.org',
14 |     description = 'ELF-Esteem: ELF file manipulation library',
15 |     license = 'LGPLv2.1',
16 |     url = 'https://github.com/airbus-seclab/elfesteem',
17 |     # keywords = '',
18 | )
19 | 


--------------------------------------------------------------------------------
/elfesteem/compatibility_python23.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | if sys.version_info[0] == 2 and sys.version_info[1] <= 3:
 3 |     # Python 2.3 does not know 'sorted' nor 'reversed'
 4 |     def sorted(l, key=None, reverse=False):
 5 |         l = [_ for _ in l]
 6 |         if key is None:
 7 |             if reverse: l.sort(lambda x,y: cmp(y,x))
 8 |             else:       l.sort()
 9 |         else:
10 |             if reverse: l.sort(lambda x,y: cmp(key(y),key(x)))
11 |             else:       l.sort(lambda x,y: cmp(key(x),key(y)))
12 |         return l
13 |     def reversed(l):
14 |         length = len(l)
15 |         return [ l[length-idx] for idx in range(1,length+1) ]
16 |     import warnings
17 |     warnings.simplefilter("ignore", FutureWarning)
18 | 


--------------------------------------------------------------------------------
/elfcli:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import sys
 4 | import code
 5 | #import elfesteem.elf
 6 | import readline
 7 | import argparse
 8 | 
 9 | 
10 | 
11 | def usage():
12 |     print >>sys.stderr,"Usage: elfcli [-i inputfile]"
13 |     raise SystemExit
14 | 
15 | def main():
16 |     
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument("infile")
19 | 
20 |     options = parser.parse_args()
21 | 
22 |     # prepare locals and binding for interactive session
23 |     readline.parse_and_bind("tab: complete")
24 |     del(parser)
25 |     from elfesteem import *
26 |     
27 |     elf = elf_init.ELF(open(options.infile).read())
28 |     
29 |     code.interact(local=locals())
30 |     
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | jobs:
10 |   analyze:
11 |     name: CodeQL analysis
12 |     runs-on: ubuntu-latest
13 |     permissions:
14 |       actions: read
15 |       contents: read
16 |       security-events: write
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         language: [ python ]
22 | 
23 |     steps:
24 |       - name: Checkout
25 |         uses: actions/checkout@v4
26 | 
27 |       - name: Initialize CodeQL
28 |         uses: github/codeql-action/init@v3
29 |         with:
30 |           languages: ${{ matrix.language }}
31 |           queries: +security-and-quality
32 | 
33 |       - name: Autobuild
34 |         uses: github/codeql-action/autobuild@v3
35 | 
36 |       - name: Perform CodeQL Analysis
37 |         uses: github/codeql-action/analyze@v3
38 |         with:
39 |           category: "/language:${{ matrix.language }}"
40 | 


--------------------------------------------------------------------------------
/examples/test_pe.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import pe
 4 | from pe_init import PE
 5 | import rlcompleter,readline,pdb, sys
 6 | from pprint import pprint as pp
 7 | readline.parse_and_bind("tab: complete")
 8 | 
 9 | 
10 | e_ = PE()
11 | mysh = "\xc3"
12 | s_text = e_.SHList.add_section(name = "text", addr = 0x1000, rawsize = 0x1000, data = mysh)
13 | e_.Opthdr.AddressOfEntryPoint = s_text.addr
14 | new_dll = [({"name":"kernel32.dll",
15 |              "firstthunk":s_text.addr+0x100},
16 |             ["CreateFileA", "SetFilePointer", "WriteFile", "CloseHandle"]
17 |             )
18 |            ,
19 |            ({"name":"USER32.dll",
20 |              "firstthunk":None},
21 |             ["SetDlgItemInt", "GetMenu", "HideCaret"]
22 |             )
23 |            ]
24 | e_.DirImport.add_dlldesc(new_dll)
25 | 
26 | s_myimp = e_.SHList.add_section(name = "myimp", rawsize = 0x1000)
27 | e_.DirImport.set_rva(s_myimp.addr)
28 | fd = open('uu.bin', 'wb')
29 | try:
30 |     fd.write(str(e_))
31 | finally:
32 |     fd.close()
33 | 


--------------------------------------------------------------------------------
/.github/workflows/tools.yml:
--------------------------------------------------------------------------------
 1 | # This workflow compares the outputs of elfesteem with native tools on the OS
 2 | 
 3 | name: Native tools
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ "master" ]
 8 |   pull_request:
 9 |     branches: [ "master" ]
10 | 
11 | jobs:
12 |   macos:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: ["macos-12", "macos-13"]
18 |         python-version: ["3.10"]
19 |     steps:
20 |     - uses: actions/checkout@v3
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v4
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 |     - name: Comparison with otool
26 |       run: |
27 |         export PYTHONPATH=$PYTHONPATH:$(pwd)
28 |         zsh ./tests/examples_macos.sh
29 |   linux:
30 |     runs-on: ${{ matrix.os }}
31 |     strategy:
32 |       fail-fast: false
33 |       matrix:
34 |         os: ["ubuntu-latest", "ubuntu-22.04", "ubuntu-20.04"]
35 |         python-version: ["3.10"]
36 |     steps:
37 |     - uses: actions/checkout@v3
38 |     - name: Set up Python ${{ matrix.python-version }}
39 |       uses: actions/setup-python@v4
40 |       with:
41 |         python-version: ${{ matrix.python-version }}
42 |     - name: Comparison with readelf
43 |       run: |
44 |         readelf --version
45 |         export PYTHONPATH=$PYTHONPATH:$(pwd)
46 |         bash ./tests/examples_linux.sh
47 | 


--------------------------------------------------------------------------------
/tests/binary_input/tiny45.asm:
--------------------------------------------------------------------------------
 1 |   ; tiny.asm
 2 |   
 3 |   BITS 32
 4 |   
 5 |                 org     0x00010000
 6 |   
 7 |                 db      0x7F, "ELF"             ; e_ident
 8 |                 dd      1                                       ; p_type
 9 |                 dd      0                                       ; p_offset
10 |                 dd      $$                                      ; p_vaddr 
11 |                 dw      2                       ; e_type        ; p_paddr
12 |                 dw      3                       ; e_machine
13 |                 dd      _start                  ; e_version     ; p_filesz
14 |                 dd      _start                  ; e_entry       ; p_memsz
15 |                 dd      4                       ; e_phoff       ; p_flags
16 |   _start:
17 |                 mov     bl, 42                  ; e_shoff       ; p_align
18 |                 xor     eax, eax
19 |                 inc     eax                     ; e_flags
20 |                 int     0x80
21 |                 db      0
22 |                 dw      0x34                    ; e_ehsize
23 |                 dw      0x20                    ; e_phentsize
24 |                 db      1                       ; e_phnum
25 |                                                 ; e_shentsize
26 |                                                 ; e_shnum
27 |                                                 ; e_shstrndx
28 |   
29 |   filesize      equ     $ - $$
30 | 


--------------------------------------------------------------------------------
/tests/binary_input/tiny52.asm:
--------------------------------------------------------------------------------
 1 |   ; tiny.asm
 2 |   
 3 |   BITS 32
 4 |   
 5 |                 org     0x00010000
 6 |   
 7 |                 db      0x7F, "ELF"             ; e_ident
 8 |                 dd      1                                       ; p_type
 9 |                 dd      0                                       ; p_offset
10 |                 dd      $$                                      ; p_vaddr 
11 |                 dw      2                       ; e_type        ; p_paddr
12 |                 dw      3                       ; e_machine
13 |                 dd      _start                  ; e_version     ; p_filesz
14 |                 dd      _start                  ; e_entry       ; p_memsz
15 |                 dd      4                       ; e_phoff       ; p_flags
16 |   _start:
17 |                 mov     bl, 42                  ; e_shoff       ; p_align
18 |                 xor     eax, eax
19 |                 inc     eax                     ; e_flags
20 |                 int     0x80
21 |                 db      0
22 |                 dw      0x34                    ; e_ehsize
23 |                 dw      0x20                    ; e_phentsize
24 |                 dw      1                       ; e_phnum
25 |                 dw      0                       ; e_shentsize
26 |                 dw      0                       ; e_shnum
27 |                 dw      0                       ; e_shstrndx
28 |   
29 |   filesize      equ     $ - $$
30 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | jobs:
 3 |   include:
 4 |     - python: '3.7'
 5 |     - python: '2.7'
 6 |     - name: 'Python: 2.3'
 7 |       # python 2.3 not available in travis
 8 |       install:
 9 |         - cd ..
10 |         - curl -O https://www.python.org/ftp/python/2.3.7/Python-2.3.7.tgz
11 |         - tar xzf Python-2.3.7.tgz
12 |         - cd Python-2.3.7
13 |         # We need to disable FORTIFY_SOURCE to compile python 2.3
14 |         # cf. https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/286334
15 |         - ./configure BASECFLAGS=-U_FORTIFY_SOURCE
16 |         - make
17 |         - export PATH=$(pwd):$PATH
18 |         - cd ../elfesteem
19 |         - python -c 'import sys;print(sys.version)'
20 |       script:
21 |         - python ./tests/test_all.py
22 |       after_success:
23 |         - true # coverage needs python >= 2.6
24 |     - python: 'pypy3'
25 |     - python: 'pypy'
26 |     - python: '3.8'
27 |     - python: '3.4'
28 | install:
29 |   - pip install coverage codecov
30 | before_script:
31 |   export PYTHONPATH=$PYTHONPATH:$(pwd)
32 | script:
33 |   - coverage run ./tests/test_all.py
34 |   # We don't use e.g. tox for non-regression tests, because we want to have
35 |   # a script that works with old python too, and tox needs python2.5
36 |   # python2.4 ./tests/test_all.py will work fine :-)
37 |   # Note that coverage is incompatible with python 3.2, cf.
38 |   # https://github.com/menegazzo/travispy/issues/20
39 | after_success:
40 |   - codecov
41 | 


--------------------------------------------------------------------------------
/examples/minidump_to_pe.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """Minidump to PE example"""
 3 | import sys
 4 | from elfesteem.minidump_init import Minidump
 5 | from elfesteem.pe_init import PE
 6 | 
 7 | fd = open(sys.argv[1])
 8 | try:
 9 |     raw = fd.read()
10 | finally:
11 |     fd.close()
12 | minidump = Minidump(raw)
13 | 
14 | pe = PE()
15 | for i, memory in enumerate(sorted(minidump.memory.itervalues(),
16 |                                   key=lambda x:x.address)):
17 |     # Get section name
18 |     name = str(memory.name)
19 |     if not name:
20 |         name = "s_%02d" % i
21 |     else:
22 |         name = name.split('\\')[-1]
23 | 
24 |     # Get section protection
25 |     protect = memory.pretty_protect
26 |     protect_mask = 0x20
27 |     if protect == "UNKNOWN":
28 |         protect_mask |= 0xe0000000
29 |     else:
30 |         if "EXECUTE" in protect:
31 |             protect_mask |= 1 << 29
32 |         if "READ" in protect:
33 |             protect_mask |= 1 << 30
34 |         if "WRITE" in protect:
35 |             protect_mask |= 1 << 31
36 | 
37 |     # Add the section
38 |     pe.SHList.add_section(name=name, addr=memory.address, rawsize=memory.size,
39 |                           data=memory.content, flags=protect_mask)
40 | 
41 | # Find entry point
42 | entry_point = minidump.threads.Threads[0].ThreadContext.Eip[0]
43 | pe.Opthdr.AddressOfEntryPoint = entry_point
44 | 
45 | fd = open("out_pe.bin", "w")
46 | try:
47 |     fd.write(str(pe))
48 | finally:
49 |     fd.close()
50 | 


--------------------------------------------------------------------------------
/tests/binary_input/tiny64.asm:
--------------------------------------------------------------------------------
 1 |   ; tiny.asm
 2 |   
 3 |   BITS 32
 4 |   
 5 |                 org     0x00200000
 6 |   
 7 |                 db      0x7F, "ELF"             ; e_ident
 8 |                 db      1, 1, 1, 0, 0
 9 |   _start:
10 |                 mov     bl, 42
11 |                 xor     eax, eax
12 |                 inc     eax
13 |                 int     0x80
14 |                 dw      2                       ; e_type
15 |                 dw      3                       ; e_machine
16 |                 dd      1                       ; e_version
17 |                 dd      _start                  ; e_entry
18 |                 dd      phdr - $$               ; e_phoff
19 |   phdr:         dd      1                       ; e_shoff       ; p_type
20 |                 dd      0                       ; e_flags       ; p_offset
21 |                 dd      $$                      ; e_ehsize      ; p_vaddr
22 |                                                 ; e_phentsize
23 |                 dw      1                       ; e_phnum       ; p_paddr
24 |                 dw      0                       ; e_shentsize
25 |                 dd      filesize                ; e_shnum       ; p_filesz
26 |                                                 ; e_shstrndx
27 |                 dd      filesize                                ; p_memsz
28 |                 dd      5                                       ; p_flags
29 |                 dd      0x1000                                  ; p_align
30 |   
31 |   filesize      equ     $ - $$
32 | 


--------------------------------------------------------------------------------
/tests/binary_input/tiny76.asm:
--------------------------------------------------------------------------------
 1 |   ; tiny.asm
 2 |   
 3 |   BITS 32
 4 |   
 5 |                 org     0x08048000
 6 |   
 7 |   ehdr:
 8 |                 db      0x7F, "ELF"             ; e_ident
 9 |                 db      1, 1, 1, 0, 0
10 |   _start:       mov     bl, 42
11 |                 xor     eax, eax
12 |                 inc     eax
13 |                 int     0x80
14 |                 dw      2                       ; e_type
15 |                 dw      3                       ; e_machine
16 |                 dd      1                       ; e_version
17 |                 dd      _start                  ; e_entry
18 |                 dd      phdr - $$               ; e_phoff
19 |                 dd      0                       ; e_shoff
20 |                 dd      0                       ; e_flags
21 |                 dw      ehdrsize                ; e_ehsize
22 |                 dw      phdrsize                ; e_phentsize
23 |   phdr:         dd      1                       ; e_phnum       ; p_type
24 |                                                 ; e_shentsize
25 |                 dd      0                       ; e_shnum       ; p_offset
26 |                                                 ; e_shstrndx
27 |   ehdrsize      equ     $ - ehdr
28 |                 dd      $$                                      ; p_vaddr
29 |                 dd      $$                                      ; p_paddr
30 |                 dd      filesize                                ; p_filesz
31 |                 dd      filesize                                ; p_memsz
32 |                 dd      5                                       ; p_flags
33 |                 dd      0x1000                                  ; p_align
34 |   phdrsize      equ     $ - phdr
35 |   
36 |   filesize      equ     $ - $$
37 | 


--------------------------------------------------------------------------------
/tests/test_minidump_manipulation.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import os
 4 | __dir__ = os.path.dirname(__file__)
 5 | 
 6 | from test_all import run_tests, assertion, hashlib, open_read
 7 | from elfesteem.minidump_init import Minidump
 8 | 
 9 | def test_MD_windows(assertion):
10 |     md = open_read(__dir__+'/binary_input/windows.dmp')
11 |     assertion('82a09a9d801bddd1dc94dfb9ba6eddf0',
12 |               hashlib.md5(md).hexdigest(),
13 |               'Reading windows.dmp')
14 |     e = Minidump(md)
15 |     d = e.dump().encode('latin1')
16 |     assertion('48cae6cc782305b611f6e8b82049b9a0',
17 |               hashlib.md5(d).hexdigest(),
18 |               'Displaying the content of windows.dmp')
19 | 
20 | def test_MD_i386(assertion):
21 |     md = open_read(__dir__+'/binary_input/minidump-i386.dmp')
22 |     assertion('0f2ee1a0a2e6351e64929197c07679e6',
23 |               hashlib.md5(md).hexdigest(),
24 |               'Reading minidump-i386.dmp')
25 |     e = Minidump(md)
26 |     d = e.dump().encode('latin1')
27 |     assertion('c89c01352e515874b00d998b1ad06998',
28 |               hashlib.md5(d).hexdigest(),
29 |               'Displaying the content of minidump-i386.dmp')
30 | 
31 | def test_MD_x86_64(assertion):
32 |     md = open_read(__dir__+'/binary_input/minidump-x86_64.dmp')
33 |     assertion('ecde7af61615e05ffcde1f064c1a22f8',
34 |               hashlib.md5(md).hexdigest(),
35 |               'Reading minidump-x86_64.dmp')
36 |     e = Minidump(md)
37 |     d = e.dump().encode('latin1')
38 |     assertion('4357695a7e265aca04bb2809485b8634',
39 |               hashlib.md5(d).hexdigest(),
40 |               'Displaying the content of minidump-x86_64.dmp')
41 | 
42 | def run_test(assertion):
43 |     for name, value in dict(globals()).items():
44 |         if name.startswith('test_'):
45 |             value(assertion)
46 | 
47 | if __name__ == "__main__":
48 |     run_tests(run_test)
49 | 


--------------------------------------------------------------------------------
/.github/workflows/python-versions.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install various versions of Python and run non-regression tests.
 2 | 
 3 | name: Python versions
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ "master" ]
 8 |   pull_request:
 9 |     branches: [ "master" ]
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: ["ubuntu-latest", "macos-latest"]
18 |         python-version: ["3.10", "3.12", "pypy2.7", "pypy3.9"]
19 |     steps:
20 |     - uses: actions/checkout@v3
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v4
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 |     - name: Install dependencies
26 |       run: |
27 |         python -m pip install coverage codecov
28 |     - name: Non-regression tests
29 |       run: |
30 |         export PYTHONPATH=$PYTHONPATH:$(pwd)
31 |         coverage run ./tests/test_all.py
32 |     - name: Update codecov
33 |       run: |
34 |         codecov
35 | 
36 |   python23:
37 |     name: python2.3
38 |     runs-on: "ubuntu-latest"
39 |     strategy:
40 |       fail-fast: false
41 |     steps:
42 |     - uses: actions/checkout@v3
43 |     - name: Set up Python 2.3
44 |       run: |
45 |         cd ..
46 |         curl -O https://www.python.org/ftp/python/2.3.7/Python-2.3.7.tgz
47 |         tar xzf Python-2.3.7.tgz
48 |         cd Python-2.3.7
49 |         # We need to disable FORTIFY_SOURCE to compile python 2.3
50 |         # cf. https://bugs.launchpad.net/ubuntu/+source/gcc-defaults/+bug/286334
51 |         ./configure BASECFLAGS=-U_FORTIFY_SOURCE
52 |         make
53 |         sudo ln -fs $(pwd)/python /usr/local/bin/python
54 |     - name: Non-regression tests
55 |       run: |
56 |         python -c 'import sys;print(sys.version)'
57 |         export PYTHONPATH=$PYTHONPATH:$(pwd)
58 |         python ./tests/test_all.py
59 | 


--------------------------------------------------------------------------------
/tests/binary_input/tiny84.asm:
--------------------------------------------------------------------------------
 1 |   ; tiny.asm
 2 |   
 3 |   BITS 32
 4 |   
 5 |                 org     0x08048000
 6 |   
 7 |   ehdr:                                                 ; Elf32_Ehdr
 8 |                 db      0x7F, "ELF"                     ;   e_ident
 9 |                 db      1, 1, 1, 0, 0
10 |   _start:       mov     bl, 42
11 |                 xor     eax, eax
12 |                 inc     eax
13 |                 int     0x80
14 |                 dw      2                               ;   e_type
15 |                 dw      3                               ;   e_machine
16 |                 dd      1                               ;   e_version
17 |                 dd      _start                          ;   e_entry
18 |                 dd      phdr - $$                       ;   e_phoff
19 |                 dd      0                               ;   e_shoff
20 |                 dd      0                               ;   e_flags
21 |                 dw      ehdrsize                        ;   e_ehsize
22 |                 dw      phdrsize                        ;   e_phentsize
23 |                 dw      1                               ;   e_phnum
24 |                 dw      0                               ;   e_shentsize
25 |                 dw      0                               ;   e_shnum
26 |                 dw      0                               ;   e_shstrndx
27 |   
28 |   ehdrsize      equ     $ - ehdr
29 |   
30 |   phdr:                                                 ; Elf32_Phdr
31 |                 dd      1                               ;   p_type
32 |                 dd      0                               ;   p_offset
33 |                 dd      $$                              ;   p_vaddr
34 |                 dd      $$                              ;   p_paddr
35 |                 dd      filesize                        ;   p_filesz
36 |                 dd      filesize                        ;   p_memsz
37 |                 dd      5                               ;   p_flags
38 |                 dd      0x1000                          ;   p_align
39 |   
40 |   phdrsize      equ     $ - phdr
41 |   
42 |   filesize      equ     $ - $$
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ELF Esteem #
 2 | 
 3 | ## Overview
 4 | 
 5 | The goal of this library is to manipulate various containers of executable code.
 6 | ELF, PE, COFF and Mach-O files are fully supported.
 7 | It includes a partial support of Minidump and RPRC files, and a non-working implementation of Java classes.
 8 | 
 9 | It aims at being self-contained and portable: it is pure python, compatible from python 2.3 upwards (including python 3.x).
10 | 
11 | ## Parsing with ELF Esteem
12 | 
13 | [binary.py](elfesteem/binary.py)
14 | can be used to read a binary of any known format and display its main characteristics.
15 | 
16 | [readelf.py](examples/readelf.py)
17 | outputs the same as binutils' readelf, using ELF Esteem.
18 | 
19 | [otool.py](examples/otool.py)
20 | outputs the same as MacOSX otool and dyldinfo, using ELF Esteem.
21 | 
22 | [readpe.py](examples/readpe.py)
23 | analyses the content of a PE or COFF file, including a hierarchical display of the layout of the file.
24 | 
25 | ## File manipulation with ELF Esteem
26 | 
27 | Most of the internal representation of the file parsed by ELF Esteem is based on [cstruct.py](elfesteem/cstruct.py) which is a generic framework to manipulate binary data structures.
28 | 
29 | The file is fully loaded using one of the classes `ELF`, `PE`, `COFF`, `MACHO`, `RPRC`, or `Minidump`. This class is the root of a tree of subclasses (e.g. file header, list of sections, ...) and each subtree can be modified. The method `pack()` reconstructs a binary.
30 | 
31 | The philosophy behind ELF Esteem is that if the input file is valid, and no modification is made to the internal representation, then `pack()` will recover the input.
32 | When modifications are made, then (depending on the details of the file format) some values are automatically recomputed (e.g. fields containing lengths, checksums).
33 | 
34 | **More doc soon.**
35 | 
36 | ## Development status
37 | 
38 | [![codecov](https://codecov.io/gh/LRGH/elfesteem/branch/master/graph/badge.svg)](https://codecov.io/gh/LRGH/elfesteem)
39 | [![Unit tests](https://github.com/LRGH/elfesteem/actions/workflows/python-package.yml/badge.svg)](https://github.com/LRGH/elfesteem/actions/workflows/python-package.yml)
40 | 


--------------------------------------------------------------------------------
/elfesteem/binary.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # Generic container for all binary types known by elfesteem,
 3 | # with auto-recognition of the binary type.
 4 | 
 5 | import sys, os
 6 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..'))
 7 | 
 8 | from elfesteem.elf_init import ELF
 9 | from elfesteem.pe_init import PE, COFF
10 | from elfesteem.minidump_init import Minidump
11 | from elfesteem.macho import MACHO
12 | from elfesteem.rprc import RPRC
13 | 
14 | class UnknownFormat(object):
15 |     def __init__(self, raw):
16 |         self.raw = raw
17 |     architecture = 'UNKNOWN'
18 |     entrypoint = -1
19 |     sections   = ()
20 |     symbols    = ()
21 |     dynsyms    = ()
22 |     class virt_stub(object):
23 |         max_addr = lambda _:-1
24 |     virt = virt_stub()
25 | 
26 | class BINARY(object):
27 |     def __init__(self, raw):
28 |         for container in ELF, PE, Minidump, MACHO, RPRC, COFF:
29 |             try:
30 |                 self.e = container(raw)
31 |                 break
32 |             except ValueError:
33 |                 pass
34 |             except AssertionError:
35 |                 pass
36 |         else:
37 |             self.e = UnknownFormat(raw)
38 |     container    = property(lambda _:_.e.__class__.__name__)
39 |     architecture = property(lambda _:_.e.architecture)
40 |     entrypoint   = property(lambda _:_.e.entrypoint)
41 |     max_addr     = property(lambda _:_.e.virt.max_addr())
42 |     sections     = property(lambda _:_.e.sections)
43 |     symbols      = property(lambda _:_.e.symbols)
44 |     dynsyms      = property(lambda _:_.e.dynsyms)
45 | 
46 | if __name__ == "__main__":
47 |     for file in sys.argv[1:]:
48 |         print("File: %s"%file)
49 |         fd = open(file, 'rb')
50 |         try:
51 |             raw = fd.read()
52 |         finally:
53 |             fd.close()
54 |         e = BINARY(raw)
55 |         print("  container    %s" % e.container)
56 |         print("  architecture %s" % e.architecture)
57 |         print("  entrypoint   %#x" % e.entrypoint)
58 |         print("  max address  %#x" % e.max_addr)
59 |         print("  %d sections:" % len(e.sections))
60 |         for sect in e.sections:
61 |             print("    %s" % sect)
62 |         print("  %d symbols:" % len(e.symbols))
63 |         for symbol in e.symbols:
64 |             print("    %s" % symbol)
65 |         print("  %d dynamic symbols:" % len(e.dynsyms))
66 |         for symbol in e.dynsyms:
67 |             print("    %s" % symbol)
68 | 


--------------------------------------------------------------------------------
/tests/test_intervals.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | from test_all import run_tests, assertion
 4 | from elfesteem.intervals import Intervals
 5 | 
 6 | def test_intervals(assertion):
 7 |     i = Intervals()
 8 |     assertion(i.ranges, [],
 9 |               'Empty interval')
10 |     i.add(10, 90)
11 |     assertion(i.ranges, [slice(10, 90)],
12 |               'Interval [10:90]')
13 |     i.add(0, 100)
14 |     assertion(i.ranges, [slice(0, 100)],
15 |               'Addition of bigger interval')
16 |     i.add(0, 100)
17 |     assertion(i.ranges, [slice(0, 100)],
18 |               'Addition of identical interval')
19 |     i.delete(8, 25)
20 |     assertion(i.ranges, [slice(0, 8), slice(25, 100)],
21 |               '[0:100] minus [8:25]')
22 |     assertion(False, i.contains(18, 30),
23 |               '[0:8]+[25:100] contains [18:30]')
24 |     assertion(True,  i.contains(30, 30),
25 |               '[0:8]+[25:100] contains [30:30]')
26 |     assertion(True,  i.excludes(10, 20),
27 |               '[0:8]+[25:100] excludes [10:20]')
28 |     assertion(False, i.excludes(10, 30),
29 |               '[0:8]+[25:100] excludes [10:30]')
30 |     assertion(True,  i.excludes(-10, -5),
31 |               '[0:8]+[25:100] excludes [-10:-5]')
32 |     assertion(True,  i.excludes(110, 130),
33 |               '[0:8]+[25:100] excludes [110:130]')
34 |     i.add(12, 16)
35 |     assertion(i.ranges, [slice(0, 8), slice(12, 16), slice(25, 100)],
36 |               'Addition of disjoint interval')
37 |     i.add(11, 14)
38 |     assertion(i.ranges, [slice(0, 8), slice(11, 16), slice(25, 100)],
39 |               'Addition of overlapping interval')
40 |     i.add(1, 11)
41 |     assertion(i.ranges, [slice(0, 16), slice(25, 100)],
42 |               'Addition generating a merge')
43 |     i.delete(8, 15)
44 |     assertion(i.ranges, [slice(0, 8), slice (15, 16), slice(25, 100)],
45 |               'Deletion within an interval')
46 |     i.add(10, 30)
47 |     assertion(i.ranges, [slice(0, 8), slice(10, 100)],
48 |               'Addition of encompassing interval')
49 |     i.delete(0, 100)
50 |     assertion(i.ranges, [],
51 |               'Deletion of everyting')
52 |     assertion(False, i.contains(18, 30),
53 |               'Empty contains [18:30]')
54 |     assertion(True,  i.excludes(10, 30),
55 |               'Empty excludes [10:30]')
56 |     i.add(10, 30)
57 |     i.delete(14, 27)
58 |     assertion(str(i), '[10:14] [27:30]',
59 |               'Display [10:14] [27:30]')
60 |     assertion([_ for _ in i], [10, 11, 12, 13, 27, 28, 29],
61 |               'Enumerate [10:14] [27:30]')
62 | 
63 | def run_test(assertion):
64 |     for name, value in dict(globals()).items():
65 |         if name.startswith('test_'):
66 |             value(assertion)
67 | 
68 | if __name__ == "__main__":
69 |     run_tests(run_test)
70 | 


--------------------------------------------------------------------------------
/elfesteem/strpatchwork.py:
--------------------------------------------------------------------------------
 1 | from array import array
 2 | # To be compatible with python 2 and python 3
 3 | import sys
 4 | import struct
 5 | data_null = struct.pack("B",0)
 6 | data_empty = struct.pack("")
 7 | 
 8 | class StrPatchwork(object):
 9 |     def __init__(self, s=data_empty, paddingbyte=data_null):
10 |         if s is None: s = data_empty
11 |         if isinstance(s, StrPatchwork): s = s.pack()
12 |         self.s = array("B",s)
13 |         # cache s to avoid rebuilding str after each find
14 |         self.s_cache = s
15 |         self.paddingbyte=paddingbyte
16 |     def __str__(self):
17 |         raise AttributeError("Use pack() instead of str()")
18 |     def pack(self):
19 |         if sys.version_info[0] >= 3:
20 |             return self.s.tobytes()
21 |         else:
22 |             return self.s.tostring()
23 | 
24 |     def __getitem__(self, item):
25 |         s = self.s
26 |         if type(item) is slice:
27 |             r = s[item]
28 |             end = item.stop
29 |             if end != None and len(s) < end:
30 |                 if item.step is not None:
31 |                     TODO
32 |                 elif len(r) > 0:
33 |                     # We go beyond the end of 's'
34 |                     r.extend(array("B",self.paddingbyte*(end-len(s))))
35 |                 else:
36 |                     # We are entirely after the end of 's'
37 |                     start = item.start
38 |                     if start is None: start = 0
39 |                     r = array("B",self.paddingbyte*(end-start))
40 |         else:
41 |             if item > len(s):
42 |                 return self.paddingbyte
43 |             else:
44 |                 r = array("B",[s[item]])
45 |         if sys.version_info[0] >= 3:
46 |             return r.tobytes()
47 |         else:
48 |             return r.tostring()
49 |     def __setitem__(self, item, val):
50 |         if val is None:
51 |             return
52 |         if sys.version_info[0] >= 3 and type(val) == str:
53 |             val = val.encode(encoding="latin1")
54 |         val = array("B",val)
55 |         if type(item) is not slice:
56 |             item = slice(item, item+len(val))
57 |         end = item.stop
58 |         l = len(self.s)
59 |         if l < end:
60 |             self.s.extend(array("B", self.paddingbyte*(end-l)))
61 |         self.s[item] = val
62 |         self.s_cache = None
63 | 
64 | 
65 |     def __repr__(self):
66 |         return "<Patchwork %r>" % self.pack()
67 |     def __len__(self):
68 |         return len(self.s)
69 |     def __contains__(self, val):
70 |         return val in self.pack()
71 |     def __iadd__(self, other):
72 |         self.s.extend(array("B", other))
73 |         return self
74 | 
75 |     def find(self, pattern, *args):
76 |         if not self.s_cache:
77 |             self.s_cache = self.pack()
78 |         return self.s_cache.find(pattern, *args)
79 | 
80 |     def rfind(self, pattern, *args):
81 |         if not self.s_cache:
82 |             self.s_cache = self.pack()
83 |         return self.s_cache.rfind(pattern, *args)
84 | 
85 | 


--------------------------------------------------------------------------------
/elfesteem/intervals.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | if sys.version_info[0] >= 3:
 3 |     from functools import reduce
 4 | if sys.version_info[0:2] == (2, 3):
 5 |     from elfesteem.compatibility_python23 import sorted
 6 | 
 7 | class Intervals(object):
 8 |     '''
 9 |     Represent a subset of the integers, to be used to detect which parts
10 |     of the file have been parsed
11 |     '''
12 |     def __init__(self):
13 |         self.ranges = [ ]
14 |     def __str__(self):
15 |         if len(self.ranges) == 0: return "[]"
16 |         return reduce(lambda x, y: x+" "+y,
17 |                map(lambda x: "[%s:%s]"%(x.start,x.stop), self.ranges))
18 |     # Internal methods to make object manipulation easier
19 |     def _split(self, *poslist):
20 |         def _split_slice(l, s):
21 |             for pos in sorted(poslist):
22 |                 if s.start < pos < s.stop:
23 |                     l.append(slice(s.start, pos))
24 |                     s = slice(pos, s.stop)
25 |             l.append(s)
26 |             return l
27 |         self.ranges = reduce(_split_slice, self.ranges, [])
28 |     def _merge(self):
29 |         def _merge_two_slices(l, s):
30 |             if len(l) and (l[-1].stop == s.start):
31 |                 l[-1] = slice(l[-1].start, s.stop)
32 |             else:
33 |                 l.append(s)
34 |             return l
35 |         self.ranges = reduce(_merge_two_slices, self.ranges, [])
36 |     # Interface of the class
37 |     def __iter__(self):
38 |         for s in self.ranges:
39 |             for t in range(s.start, s.stop):
40 |                 yield t
41 |     def contains(self, start, stop):
42 |         for s in self.ranges:
43 |             if s.start <= start and stop <= s.stop:
44 |                 return True
45 |         return False
46 |     def excludes(self, start, stop):
47 |         if len(self.ranges) == 0:
48 |             return True
49 |         if stop <= self.ranges[0].start:
50 |             return True
51 |         if self.ranges[-1].stop <= start:
52 |             return True
53 |         for i in range(len(self.ranges)-1):
54 |             if self.ranges[i].stop <= start and stop <= self.ranges[i+1].start:
55 |                 return True
56 |         return False
57 |     def delete(self, start, stop):
58 |         def _remove_slices(l, s):
59 |             if start > s.start or stop < s.stop:
60 |                 l.append(s)
61 |             return l
62 |         self._split(start, stop)
63 |         self.ranges = reduce(_remove_slices, self.ranges, [])
64 |         return self
65 |     def add(self, start, stop):
66 |         if len(self.ranges) == 0:
67 |             self.ranges.append(slice(start, stop))
68 |             return self
69 |         new_ranges = []
70 |         prev_stop = None
71 |         for l in self.ranges:
72 |             if start <= l.start:
73 |                 if prev_stop is None:
74 |                     new_ranges.append(slice(start, min(stop,l.start)))
75 |                 elif prev_stop < stop:
76 |                     new_ranges.append(slice(max(start,prev_stop), min(stop,l.start)))
77 |             new_ranges.append(l)
78 |             prev_stop = l.stop
79 |         if new_ranges[-1].stop < stop:
80 |             new_ranges.append(slice(max(start,new_ranges[-1].stop), stop))
81 |         self.ranges = new_ranges
82 |         self._merge()
83 |         return self
84 | 


--------------------------------------------------------------------------------
/tests/binary_input/README.txt:
--------------------------------------------------------------------------------
 1 | Ange
 2 |     Some files from https://github.com/corkami/pocs/tree/master/PE/bin
 3 | 
 4 | tiny*.asm
 5 | tiny*.bin
 6 |     Cf. http://www.muppetlabs.com/%7Ebreadbox/software/tiny/teensy.html
 7 | 
 8 | C28346_Load_Program_to_Flash.out
 9 |     Source https://github.com/slavaprokopiy/Mini-TMS320C28346/blob/master/For_user/C28346_Load_Program_to_Flash/Debug/C28346_Load_Program_to_Flash.out
10 | 
11 | cku190.rs6aix32c-3.2.4
12 | cku192.irix40
13 | cku192.ultrix43c-mips3
14 | cku193a05.apollo-sr10-s5r3
15 | cku196.clix-3.1
16 | cku200.dec-osf-1.3a
17 |     Source ftp://kermit.columbia.edu/kermit/bin/
18 | 
19 | notle-tesla-dsp.xe64T
20 | ducati-m3_p768.bin
21 |     Source https://drive.google.com/drive/folders/0B2AlG69ZVaWldU1vUnRFUklCek0
22 |     Linked from https://github.com/radare/radare2/issues/1602
23 | 
24 | coff_mingw.obj
25 | elf64_small.o
26 | elf64_small.out
27 | elf_cpp.o
28 | elf_small.o
29 | elf_small.out
30 | pe_mingw.exe
31 | pe_vstudio.dll
32 | macho/macho_32.o
33 | macho/macho_32.out
34 | macho/macho_64.o
35 | macho/macho_64.out
36 | macho/macho_fat.out
37 |     Built by Louis Granboulan for elfesteem non-regression tests
38 | 
39 | macho/sh
40 |     An example of Mach-O with more symbol stubs than symbols (/bin/sh)
41 | 
42 | macho/libPrintServiceQuota.1.dylib
43 |     An example of big-endian Mach-O (from an old MacOSX for PowerPC)
44 | 
45 | macho/Decibels
46 |     An example of iPhone app, with two ARM architectures and Encryption
47 | 
48 | macho/LyonMetro
49 |     An other example of iPhone app, with a LC_VERSION_MIN_IPHONEOS
50 | 
51 | macho/TelephonyUtil.o
52 |     An example of object file with a LC_LINKER_OPTION
53 |     Extracted from /usr/lib/libATCommandStudio.a from a recent MacOSX
54 | 
55 | macho/libdns_services.dylib
56 |     An example of file with a LC_SOURCE_VERSION
57 |     Copied from /usr/lib/libdns_services.dylib from a recent MacOSX
58 | 
59 | macho/libecpg.6.5.dylib
60 |     An example of file with a section size "past end of file"
61 |     Copied from /usr/lib/libecpg.6.5.dylib from a recent MacOSX
62 | 
63 | macho/libATCommandStudioDynamic.dylib
64 |     An example of file with weak binding
65 |     Copied from /usr/lib/libATCommandStudioDynamic.dylib from a recent MacOSX
66 | 
67 | macho/libcoretls.dylib
68 |     An example of file with no binding, no weak binding, no lazy binding
69 |     Copied from /usr/lib/libcoretls.dylib from a recent MacOSX
70 | 
71 | macho/libSystem.B.dylib
72 |     An example of file BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
73 |     Copied from /usr/lib/libSystem.B.dylib from a recent MacOSX
74 | 
75 | macho/OSXII
76 |     An example of old universal binary, ppc & i386, with LC_UNIXTHREAD
77 |     The OSXII software has been discontinued, cf.
78 |     https://www.macupdate.com/app/mac/10578/osxii
79 | 
80 | macho/SweetHome3D
81 |     An example of universal binary, ppc, i386 & x86_64, with LC_UNIXTHREAD
82 |     SweetHome3D is open source and available at http://www.sweethome3d.com/
83 | 
84 | macho/MacTheRipper
85 |     Another old Mach-O binary, with LC_PREBOUND_DYLIB
86 |     This is the version 2.6.6, downloadable at a link available at its
87 |     Wikipedia page
88 | 
89 | minidump-i386.dmp
90 | minidump-x86_64.dmp
91 |     Source https://github.com/OutOfOrder/BreakpadTest/tree/master/Samples
92 | 
93 | windows.dmp
94 |     Source https://github.com/electron/node-minidump/tree/master/test/fixtures
95 | 


--------------------------------------------------------------------------------
/tests/test_all.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | # These non-regression tests should be OK from python2.3 to python3.x
  4 | 
  5 | # How to import by name, compatible with python2 and python3
  6 | import sys, os
  7 | __dir__ = os.path.dirname(__file__)
  8 | try:
  9 |     # The following is working starting with python2.7
 10 |     import importlib
 11 |     import_by_name = importlib.import_module
 12 | except ImportError:
 13 |     # The following is working for python2.3 to python3.11
 14 |     import imp
 15 |     def import_by_name(name):
 16 |         fp, pathname, description = imp.find_module(name, [__dir__])
 17 |         try:
 18 |             module = imp.load_module(name, fp, pathname, description)
 19 |         finally:
 20 |             if fp is not None: fp.close()
 21 |         return module
 22 | 
 23 | try:
 24 |     import hashlib
 25 | except ImportError:
 26 |     # Python 2.4 does not have hashlib
 27 |     # but 'md5' is deprecated since python2.5
 28 |     import md5 as oldpy_md5
 29 |     class hashlib(object):
 30 |         def md5(self, data):
 31 |             return oldpy_md5.new(data)
 32 |         md5 = classmethod(md5)
 33 | 
 34 | try:
 35 |     # This way, we can use our code with pytest, but we can also
 36 |     # use it directly, e.g. when testing for python2.3.
 37 |     # No decorator, the syntax is forbidden in python2.3.
 38 |     import pytest
 39 |     def assertion():
 40 |         def inner_assertion(target, value, message):
 41 |             assert target == value
 42 |         return inner_assertion
 43 |     assertion = pytest.fixture(assertion)
 44 | except Exception:
 45 |     assertion = None
 46 | 
 47 | class print_colored(object): # Namespace
 48 |     end = '\033[0m'
 49 |     def bold(self, txt):
 50 |         print('\033[1m'+txt+self.end)
 51 |     bold = classmethod(bold)
 52 |     def boldred(self, txt):
 53 |         print('\033[91;1m'+txt+self.end)
 54 |     boldred = classmethod(boldred)
 55 |     def boldgreen(self, txt):
 56 |         print('\033[92;1m'+txt+self.end)
 57 |     boldgreen = classmethod(boldgreen)
 58 | 
 59 | def assertion_status(target, value, message, status_ptr):
 60 |     if target != value:
 61 |         print_colored.boldred('Non-regression failure for %r' % message)
 62 |         status_ptr[0] = False
 63 | 
 64 | def run_tests(run_test):
 65 |     status_ptr = [True]
 66 |     run_test(lambda target, value, msg, status_ptr=status_ptr:
 67 |         assertion_status(target, value, msg, status_ptr))
 68 |     if status_ptr[0]:
 69 |         print_colored.boldgreen('OK')
 70 |     return status_ptr[0]
 71 | 
 72 | def test_MD5(assertion):
 73 |     import struct
 74 |     assertion('f71dbe52628a3f83a77ab494817525c6',
 75 |               hashlib.md5(struct.pack('BBBB',116,111,116,111)).hexdigest(),
 76 |               'MD5')
 77 | 
 78 | def open_read(f):
 79 |     fd = open(f, 'rb')
 80 |     try:
 81 |         data = fd.read()
 82 |     finally:
 83 |         fd.close()
 84 |     return data
 85 | 
 86 | if __name__ == "__main__":
 87 |     exit_value = 0
 88 |     print_colored.bold('test_MD5')
 89 |     if not run_tests(test_MD5):
 90 |         exit_value = 1
 91 |     for name in (
 92 |             'visual_studio_mangling',
 93 |             'pe_manipulation',
 94 |             'elf_manipulation',
 95 |             'macho_manipulation',
 96 |             'rprc_manipulation',
 97 |             'minidump_manipulation',
 98 |             'intervals',
 99 |             ):
100 |         module = import_by_name('test_' + name)
101 |         print_colored.bold(name)
102 |         if not run_tests(module.run_test):
103 |             exit_value = 1
104 |     sys.exit(exit_value)
105 | 


--------------------------------------------------------------------------------
/tests/test_rprc_manipulation.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | 
 3 | import os
 4 | __dir__ = os.path.dirname(__file__)
 5 | 
 6 | from test_all import run_tests, assertion, hashlib, open_read
 7 | from elfesteem.rprc import RPRC
 8 | 
 9 | def test_RPRC_empty(assertion):
10 |     e = RPRC()
11 |     d = e.pack()
12 |     assertion('865001a37fa24754bd17012e85d2bfff',
13 |               hashlib.md5(d).hexdigest(),
14 |               'Creation of a standard empty RPRC')
15 |     d = RPRC(d).pack()
16 |     assertion('865001a37fa24754bd17012e85d2bfff',
17 |               hashlib.md5(d).hexdigest(),
18 |               'Creation of a standard empty RPRC; fix point')
19 | 
20 | def test_RPRC_ducati(assertion):
21 |     rprc_m3 = open_read(__dir__+'/binary_input/ducati-m3_p768.bin')
22 |     assertion('d31c5887b98b37f949da3570b8688983',
23 |               hashlib.md5(rprc_m3).hexdigest(),
24 |               'Reading ducati-m3_p768.bin')
25 |     e = RPRC(rprc_m3)
26 |     d = e.pack()
27 |     assertion('d31c5887b98b37f949da3570b8688983',
28 |               hashlib.md5(d).hexdigest(),
29 |               'Packing after reading ducati-m3_p768.bin')
30 |     # Packed file is identical :-)
31 |     d = e.display().encode('latin1')
32 |     assertion('c691ff75fffede7701086f6b3c981b3b',
33 |               hashlib.md5(d).hexdigest(),
34 |               'Display RPRC file content')
35 |     d = e.getsectionbyvad(0x00004000).pack()
36 |     assertion('c77c8edf39114343b16b284ffddd2dff',
37 |               hashlib.md5(d).hexdigest(),
38 |               'Get existing section by address')
39 |     d = e.getsectionbyvad(0x00400000)
40 |     assertion(None, d, 'Get non-existing section by address')
41 |     d = e.content[0x100:0x120]
42 |     assertion('604e845109bba89a3dfa00da8c65cbd1',
43 |               hashlib.md5(d).hexdigest(),
44 |               'Extract chunk from raw data')
45 |     d = e.virt[0x00004000]
46 |     assertion('6b31bdfa7f9bfece263381ffa91bd6a9',
47 |               hashlib.md5(d).hexdigest(),
48 |               'Extract byte from mapped memory')
49 |     d = e.virt[0x00004000:0x00004020]
50 |     assertion('4b22b71399e1e0a6820c769456ce7483',
51 |               hashlib.md5(d).hexdigest(),
52 |               'Extract chunk from mapped memory')
53 |     d = e.virt[0x00003ff0:0x00004020]
54 |     assertion('ff2e5ba4b1c82e231f477c01ec805e06',
55 |               hashlib.md5(d).hexdigest(),
56 |               'Extract chunk from mapped and unmapped memory')
57 |     e.virt[0x00004000:0x00004100] = e.virt[0x00004000:0x00004100]
58 |     d = e.pack()
59 |     assertion('d31c5887b98b37f949da3570b8688983',
60 |               hashlib.md5(d).hexdigest(),
61 |               'Writing in memory (interval)')
62 |     e.virt[0x00004000] = e.virt[0x00004000:0x00004100]
63 |     d = e.pack()
64 |     assertion('d31c5887b98b37f949da3570b8688983',
65 |               hashlib.md5(d).hexdigest(),
66 |               'Writing in memory (address)')
67 |     try:
68 |         e.virt[0x00040000] = e.virt[0x00004000:0x00004100]
69 |         assertion(0,1, 'Writing in non-mapped memory')
70 |     except ValueError:
71 |         pass
72 |     try:
73 |         e.virt[0x00003ff0:0x00004020] = e.virt[0x00003ff0:0x00004020]
74 |         assertion(0,1, 'Writing in partially non-mapped memory')
75 |     except ValueError:
76 |         pass
77 | 
78 | def test_RPRC_invalid(assertion):
79 |     try:
80 |         e = RPRC(open_read(__dir__+'/binary_input/README.txt'))
81 |         assertion(0,1, 'Not an RPRC')
82 |     except ValueError:
83 |         pass
84 | 
85 | def run_test(assertion):
86 |     for name, value in dict(globals()).items():
87 |         if name.startswith('test_'):
88 |             value(assertion)
89 | 
90 | if __name__ == "__main__":
91 |     run_tests(run_test)
92 | 


--------------------------------------------------------------------------------
/elfesteem/rprc.py:
--------------------------------------------------------------------------------
  1 | # RPRC syntax: firmware format used by rpmsg
  2 | 
  3 | # The main source of information on this format is
  4 | #  https://github.com/ohadbc/sysbios-rpmsg
  5 | # A tool that reads the content of a RPRC .bin file is
  6 | #  https://github.com/ohadbc/sysbios-rpmsg/blob/master/src/utils/rprcfmt.h
  7 | #  https://github.com/ohadbc/sysbios-rpmsg/blob/master/src/utils/readrprc.c
  8 | # But the last version of this tool (tagged "new ABI") does not correspond
  9 | # to the RPRC files downloadable at http://goo.gl/4dndeg
 10 | # For example, the size of resources is 76 bytes, while in the new ABI it
 11 | # is 96 bytes. All examples of output of 'readrprc' that are found in this
 12 | # repository and in the following links have 76-bytes long resources.
 13 | #  https://github.com/radare/radare2/issues/1602
 14 | #  http://omappedia.org/wiki/RPMsg_BIOS_Sources
 15 | #  http://www.omappedia.com/wiki/RPMsg_Tesla
 16 | #  http://omappedia.org/wiki/Debugging_RPMsg#Readrprc_Utility
 17 | #  http://omappedia.org/wiki/RPMsg_BIOS_Sources#SYS.2FBIOS_RPMsg_Customizations
 18 | #  http://omappedia.org/wiki/Design_Overview_-_RPMsg#Firmware_Image_Format
 19 | # Currently, we don't know if there is a flag that tells when the "new ABI"
 20 | # is used, e.g. a value of 'version' greater than 2 in the header.
 21 | 
 22 | import struct
 23 | from elfesteem.cstruct import CData, CStruct, data_null, data_empty
 24 | from elfesteem.strpatchwork import StrPatchwork
 25 | 
 26 | # Section types
 27 | FW_RESOURCE    = 0
 28 | FW_TEXT        = 1
 29 | FW_DATA        = 2
 30 | 
 31 | # Resource types (old ABI)
 32 | RSC_CARVEOUT    = 0
 33 | RSC_DEVMEM      = 1
 34 | RSC_DEVICE      = 2
 35 | RSC_IRQ         = 3
 36 | RSC_TRACE       = 4
 37 | RSC_BOOTADDR    = 5
 38 | RSC_VRING       = 6
 39 | 
 40 | # Resource types (new ABI)
 41 | RSC_CARVEOUT    = 0
 42 | RSC_DEVMEM      = 1
 43 | RSC_TRACE       = 2
 44 | RSC_VRING       = 3
 45 | RSC_VIRTIO_HDR  = 4
 46 | RSC_VIRTIO_CFG  = 5
 47 | 
 48 | class Header(CStruct):
 49 |     _fields = [ ("magic","4s"), 
 50 |                 ("version","u32"),
 51 |                 ("header_len","u32"),
 52 |                 ("data",CData(lambda _:_.header_len))]
 53 |     magic_txt = property(lambda _:_.magic.decode('latin1'))
 54 |     def _initialize(self):
 55 |         CStruct._initialize(self)
 56 |         # Change default values
 57 |         self.magic      = 'RPRC'.encode('latin1')
 58 |         self.version    = 2
 59 |         self.header_len = 1012
 60 |         self.data[0]    = data_null * self.header_len
 61 |         self._size     += self.header_len
 62 |     def display(self):
 63 |         rep = []
 64 |         rep.append('magic number %(magic_txt)s' % self)
 65 |         rep.append('header version %(version)d' % self)
 66 |         rep.append('header size %(header_len)d' % self)
 67 |         rep.append('header data')
 68 |         rep.append(str(self.data))
 69 |         return '\n'.join(rep)
 70 | 
 71 | # NB: the following definition is taken from
 72 | # https://github.com/ohadbc/sysbios-rpmsg/blob/master/src/utils/rprcfmt.h
 73 | # It does not correspond to the RPRC files we have
 74 | class ResourceNewABI(CStruct):
 75 |     _fields = [ ("type","u32"),
 76 |                 ("id","u32"),
 77 |                 ("da","u64"),   # Device Address
 78 |                 ("pa","u64"),   # Physical Address
 79 |                 ("len","u32"),
 80 |                 ("flags","u32"),
 81 |                 ("reserved","16s"),
 82 |                 ("name","48s"),
 83 |                 ]
 84 | 
 85 | class Resource(CStruct):
 86 |     _fields = [ ("type","u32"),
 87 |                 ("da","u64"),   # Device Address
 88 |                 ("pa","u64"),   # Physical Address
 89 |                 ("len","u32"),
 90 |                 ("flags","u32"),
 91 |                 ("name","48s"),
 92 |                 ]
 93 |     name_txt = property(lambda _:_.name.strip(data_null).decode('latin1'))
 94 |     def unpack(self, c, o):
 95 |         CStruct.unpack(self, c, o)
 96 |         self.offset = o
 97 |     def display(self):
 98 |         return 'resource %(type)d, da: %(da)#010x, pa: %(pa)#010x, len: %(len)#010x, name: %(name_txt)s' % self
 99 | 
100 | class Section(CStruct):
101 |     _fields = [ ("type","u32"),
102 |                 ("da","u64"),   # Device Address
103 |                 ("len","u32"),
104 |                 ("data",CData(lambda _:_.len))]
105 |     def unpack(self, c, o):
106 |         CStruct.unpack(self, c, o)
107 |         self.offset = o
108 |         if self.type == FW_RESOURCE:
109 |             self.res_len = Resource(parent=self).bytelen
110 |             if self.data.bytelen % self.res_len != 0:
111 |                 raise ValueError('Section data length %#x not multiple of %#x' % (self.data.bytelen, self.res_len))
112 |             of = 0
113 |             self.res = []
114 |             while of + self.res_len <= self.data.bytelen:
115 |                 r = Resource(parent=self, content=self.data, start=of)
116 |                 self.res.append(r)
117 |                 of += self.res_len
118 |     def display(self):
119 |         rep = []
120 |         rep.append('section %(type)d, address: %(da)#010x, size: %(len)#010x' % self)
121 |         if self.type == FW_RESOURCE:
122 |             rep.append('resource table: %d' % self.res_len)
123 |             for r in self.res:
124 |                 rep.append(r.display())
125 |         return '\n'.join(rep)
126 |     def __str__(self):
127 |         return 'section %(type)d, address: %(da)#010x, size: %(len)#010x' % self
128 | 
129 | class Layout(object):
130 |     ''' This class manages the layout of the file when loaded in memory. '''
131 |     def __init__(self, overlap=None):
132 |         ''' Initialize with an empty memory '''
133 |         if   overlap == 'silent':
134 |             pass
135 |         elif overlap == 'warning':
136 |             TODO
137 |         elif overlap == 'error':
138 |             TODO
139 |         else:
140 |             raise ValueError('Define overlap in %s'%self.__class__)
141 |         self.layout = [(0, None)]
142 |     def __setitem__(self, item, data):
143 |         ''' Load 'data' in memory at interval 'item'. '''
144 |         if item.start == item.stop:
145 |             return
146 |         # Find the position in the layout where the data is loaded
147 |         for i, (o, _) in enumerate(self.layout):
148 |             if o >= item.start: break
149 |         else:
150 |             i = len(self.layout)
151 |         # Find the position in the layout where the data loading ends
152 |         for j, (o, _) in enumerate(self.layout):
153 |             if o > item.stop: break
154 |         else:
155 |             j = len(self.layout)
156 |         # Find what is the value after the end
157 |         _, prv_data = self.layout[j-1]
158 |         self.layout[i:j] = [(item.start, data),(item.stop, prv_data)]
159 |     def __getitem__(self, item):
160 |         ''' Return a list of (slice, data) which indicates what is in
161 |             memory at interval 'item'; the slices that are returned
162 |             are contiguous and add up to the whole 'item' slice. '''
163 |         res = []
164 |         for i, (stop, _) in enumerate(self.layout):
165 |             if item.start >= stop:
166 |                 continue
167 |             start, data = self.layout[i-1]
168 |             if item.stop <= start:
169 |                 continue
170 |             res.append((slice(max(item.start,start),min(item.stop,stop)),data))
171 |         if stop < item.stop:
172 |             _, data = self.layout[-1]
173 |             res.append((slice(stop,item.stop),data))
174 |         return res
175 |     def max_addr(self):
176 |         return self.layout[-1][0]
177 | 
178 | class Virtual(object):
179 |     # This class manages 'virtual addresses', i.e. the addresses when
180 |     # the RPRC file is loaded in memory.
181 |     # These addresses are the ones used by absolute addressing in the
182 |     # executable code.
183 |     def __init__(self, e):
184 |         self.parent = e
185 |         self.layout = Layout(overlap='silent')
186 |         for s in self.parent.sections:
187 |             self.layout[s.da:s.da+s.len] = s
188 |     def __getitem__(self, item):
189 |         # If 'item' is an integer, we return the byte at this address,
190 |         # else 'item' is a slice and we return the corresponding bytes,
191 |         # padded with zeroes.
192 |         if type(item) is slice:
193 |             assert item.step is None
194 |             start, stop = item.start, item.stop
195 |         else:
196 |             start, stop = item, item+1
197 |         res = data_empty
198 |         for i, s in self.layout[start:stop]:
199 |             if s is None: res += data_null * (i.stop-i.start) # non-mapped
200 |             else: res += s.data[i.start-s.da:i.stop-s.da]
201 |         return res
202 |     def __setitem__(self, item, data):
203 |         # If 'item' is an integer, we write starting from this address
204 |         if type(item) is slice:
205 |             assert item.step is None
206 |             start, stop = item.start, item.stop
207 |             assert len(data) == stop-start
208 |         else:
209 |             start, stop = item, item+len(data)
210 |         l = self.layout[start:stop]
211 |         if None in [ s for _, s in l]:
212 |             raise ValueError('Addresses %#x:%#x not entirely mapped in memory'%(start,stop))
213 |         for i, s in l:
214 |             of = i.start-start
215 |             s.data[i.start-s.da:i.stop-s.da] = data[i.start-s.da+of:i.stop-s.da+of]
216 |     def max_addr(self):
217 |         return self.layout.max_addr()
218 | 
219 | class RPRC(object):
220 |     # API shared by all/most binary containers
221 |     architecture = property(lambda _:'ARM')
222 |     entrypoint = property(lambda _:-1)
223 |     #sections = property(lambda _:_.SHList.shlist)
224 |     symbols = property(lambda _:())
225 |     dynsyms = property(lambda _:())
226 | 
227 |     sex = '<'
228 |     wsize = 32
229 |     virt = property(lambda _:_._virt)
230 |     def __init__(self, data = None, **kargs):
231 |         self.sections = []
232 |         if data is not None:
233 |             self.content = StrPatchwork(data)
234 |             self.parse_content()
235 |         else:
236 |             # Create a RPRC file with no section
237 |             self.hdr = Header(parent=self)
238 |         self._virt = Virtual(self)
239 |     def parse_content(self):
240 |         h = struct.unpack("B"*4, self.content[:4])
241 |         if h != ( 0x52,0x50,0x52,0x43 ): # magic number, RPRC
242 |             raise ValueError("Not an RPRC")
243 |         self.hdr = Header(parent=self, content=self.content)
244 |         of = self.hdr.bytelen
245 |         while of < len(self.content):
246 |             s = Section(parent=self, content=self.content, start=of)
247 |             self.sections.append(s)
248 |             of += s.bytelen
249 |     def pack(self):
250 |         c = StrPatchwork()
251 |         c[0] = self.hdr.pack()
252 |         of = self.hdr.bytelen
253 |         for s in self.sections:
254 |             c[of] = s.pack()
255 |             of += s.bytelen
256 |         return c.pack()
257 |     def display(self):
258 |         # Same output as 'readrprc'
259 |         rep = [self.hdr.display()] + [s.display() for s in self.sections]
260 |         return '\n'.join(rep)
261 |     def getsectionbyvad(self, ad):
262 |         # Same API as ELF or PE, but different implementation for accessing
263 |         # data by virtual addresses: a mechanism entirely inside 'virt'
264 |         # rather than split between two classes; future versions of
265 |         # elfesteem should probably do the same for all binary containers.
266 |         return self.virt.layout[ad:ad+1][0][1]
267 | 
268 | if __name__ == "__main__":
269 |     import sys, code
270 |     if len(sys.argv) > 2:
271 |         for f in sys.argv[1:]:
272 |             print('File: %s'%f)
273 |             fd = open(f, 'rb')
274 |             try:
275 |                 raw = fd.read()
276 |             finally:
277 |                 fd.close()
278 |             e = RPRC(raw)
279 |             print (e.display())
280 |         sys.exit(0)
281 |     if len(sys.argv) == 2:
282 |         fd = open(sys.argv[1], 'rb')
283 |         try:
284 |             raw = fd.read()
285 |         finally:
286 |             fd.close()
287 |         e = RPRC(raw)
288 |     code.interact('Interactive Python Console', None, locals())
289 | 


--------------------------------------------------------------------------------
/elfesteem/macho/common.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | from elfesteem.cstruct import Constants, CStruct
  4 | from elfesteem.cstruct import data_empty, data_null
  5 | from elfesteem.cstruct import bytes_to_name, name_to_bytes
  6 | 
  7 | import logging
  8 | log = logging.getLogger("mach-o")
  9 | console_handler = logging.StreamHandler()
 10 | console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
 11 | log.addHandler(console_handler)
 12 | log.setLevel(logging.WARN)
 13 | 
 14 | __all__ = [ 'data_empty', 'data_null', 'bytes_to_name', 'name_to_bytes',
 15 |             'log', 'relocation_info' ]
 16 | 
 17 | # Variables defined below and that need to be visible when import *.
 18 | def ImportAll(**kargs):
 19 |     __all__.extend(kargs.keys())
 20 |     globals().update(kargs)
 21 | 
 22 | # In addition for needing to be visible when import *, these values
 23 | # are added to constants, built in a way allowing to recover the
 24 | # constant's name from its value.
 25 | constants = {}
 26 | def SetConstants(**kargs):
 27 |     __all__.extend([_ for _ in kargs.keys() if _ != 'no_name'])
 28 |     Constants(globs = globals(), table = constants, **kargs)
 29 | 
 30 | #### Main source: /usr/include/mach/machine.h
 31 | # VEO is found on http://www.opensource.apple.com/source/cctools/cctools-809/include/mach/machine.h
 32 | ImportAll(
 33 | CPU_ARCH_ABI64  = 0x01000000
 34 | )
 35 | SetConstants(
 36 | CPU_TYPE_VAX         = 1,
 37 | CPU_TYPE_ROMP        = 2, # Deprecated
 38 | CPU_TYPE_NS32032     = 4, # Deprecated
 39 | CPU_TYPE_NS32332     = 5, # Deprecated
 40 | CPU_TYPE_MC680x0     = 6,
 41 | CPU_TYPE_X86         = 7,
 42 | CPU_TYPE_I386        = 7,
 43 | CPU_TYPE_X86_64      = 7 | CPU_ARCH_ABI64,
 44 | CPU_TYPE_MIPS        = 8,
 45 | CPU_TYPE_NS32532     = 9,  # Deprecated
 46 | CPU_TYPE_MC98000     = 10,
 47 | CPU_TYPE_HPPA        = 11,
 48 | CPU_TYPE_ARM         = 12,
 49 | CPU_TYPE_ARM64       = 12 | CPU_ARCH_ABI64,
 50 | CPU_TYPE_MC88000     = 13,
 51 | CPU_TYPE_SPARC       = 14,
 52 | CPU_TYPE_I860        = 15,
 53 | CPU_TYPE_I860_LITTLE = 16, # Deprecated
 54 | CPU_TYPE_ALPHA       = 16,
 55 | CPU_TYPE_RS6000      = 17, # Deprecated
 56 | CPU_TYPE_POWERPC     = 18,
 57 | CPU_TYPE_POWERPC64   = 18 | CPU_ARCH_ABI64,
 58 | CPU_TYPE_VEO         = 255,
 59 | no_name = ('CPU_TYPE_I386', 'CPU_TYPE_I860_LITTLE',)
 60 | )
 61 | 
 62 | ImportAll(
 63 | CPU_SUBTYPE_MASK      = 0xff000000,  # mask for feature flags
 64 | CPU_SUBTYPE_LIB64     = 0x80000000,  # 64 bit libraries
 65 | )
 66 | 
 67 | # VAX subtypes.
 68 | ImportAll(
 69 | CPU_SUBTYPE_VAX_ALL  = 0,
 70 | CPU_SUBTYPE_VAX780   = 1,
 71 | CPU_SUBTYPE_VAX785   = 2,
 72 | CPU_SUBTYPE_VAX750   = 3,
 73 | CPU_SUBTYPE_VAX730   = 4,
 74 | CPU_SUBTYPE_UVAXI    = 5,
 75 | CPU_SUBTYPE_UVAXII   = 6,
 76 | CPU_SUBTYPE_VAX8200  = 7,
 77 | CPU_SUBTYPE_VAX8500  = 8,
 78 | CPU_SUBTYPE_VAX8600  = 9,
 79 | CPU_SUBTYPE_VAX8650  = 10,
 80 | CPU_SUBTYPE_VAX8800  = 11,
 81 | CPU_SUBTYPE_UVAXIII  = 12,
 82 | )
 83 | 
 84 | # ROMP subtypes.
 85 | ImportAll(
 86 | CPU_SUBTYPE_RT_ALL = 0,
 87 | CPU_SUBTYPE_RT_PC  = 1,
 88 | CPU_SUBTYPE_RT_APC = 2,
 89 | CPU_SUBTYPE_RT_135 = 3,
 90 | )
 91 | 
 92 | # 2032/32332/32532 subtypes.
 93 | ImportAll(
 94 | CPU_SUBTYPE_MMAX_ALL      = 0,
 95 | CPU_SUBTYPE_MMAX_DPC      = 1, # 032 CPU
 96 | CPU_SUBTYPE_SQT           = 2,
 97 | CPU_SUBTYPE_MMAX_APC_FPU  = 3, # 32081 FPU
 98 | CPU_SUBTYPE_MMAX_APC_FPA  = 4, # Weitek FPA
 99 | CPU_SUBTYPE_MMAX_XPC      = 5, # 532 CPU
100 | )
101 | 
102 | # 680x0 subtypes
103 | #   NeXT used to consider 68030 code as generic 68000 code.
104 | #   For backwards compatability:
105 | #   * CPU_SUBTYPE_MC68030 symbol has been preserved for source code
106 | #     compatability.
107 | #   * CPU_SUBTYPE_MC680x0_ALL has been defined to be the same
108 | #     subtype as CPU_SUBTYPE_MC68030 for binary comatability.
109 | #   * CPU_SUBTYPE_MC68030_ONLY has been added to allow new object
110 | #     files to be tagged as containing 68030-specific instructions.
111 | ImportAll(
112 | CPU_SUBTYPE_MC680x0_ALL  = 1,
113 | CPU_SUBTYPE_MC68030      = 1,
114 | CPU_SUBTYPE_MC68040      = 2,
115 | CPU_SUBTYPE_MC68030_ONLY = 3,
116 | )
117 | 
118 | # I386 subtypes.
119 | def CPU_SUBTYPE_INTEL(f, m): return f + (m << 4)
120 | ImportAll(
121 | CPU_SUBTYPE_I386_ALL       = CPU_SUBTYPE_INTEL(3, 0),
122 | CPU_SUBTYPE_386            = CPU_SUBTYPE_INTEL(3, 0),
123 | CPU_SUBTYPE_486            = CPU_SUBTYPE_INTEL(4, 0),
124 | CPU_SUBTYPE_486SX          = CPU_SUBTYPE_INTEL(4, 8),
125 | CPU_SUBTYPE_586            = CPU_SUBTYPE_INTEL(5, 0),
126 | CPU_SUBTYPE_PENT           = CPU_SUBTYPE_INTEL(5, 0),
127 | CPU_SUBTYPE_PENTPRO        = CPU_SUBTYPE_INTEL(6, 1),
128 | CPU_SUBTYPE_PENTII_M3      = CPU_SUBTYPE_INTEL(6, 3),
129 | CPU_SUBTYPE_PENTII_M5      = CPU_SUBTYPE_INTEL(6, 5),
130 | CPU_SUBTYPE_CELERON        = CPU_SUBTYPE_INTEL(7, 6),
131 | CPU_SUBTYPE_CELERON_MOBILE = CPU_SUBTYPE_INTEL(7, 7),
132 | CPU_SUBTYPE_PENTIUM_3      = CPU_SUBTYPE_INTEL(8, 0),
133 | CPU_SUBTYPE_PENTIUM_3_M    = CPU_SUBTYPE_INTEL(8, 1),
134 | CPU_SUBTYPE_PENTIUM_3_XEON = CPU_SUBTYPE_INTEL(8, 2),
135 | CPU_SUBTYPE_PENTIUM_M      = CPU_SUBTYPE_INTEL(9, 0),
136 | CPU_SUBTYPE_PENTIUM_4      = CPU_SUBTYPE_INTEL(10, 0),
137 | CPU_SUBTYPE_PENTIUM_4_M    = CPU_SUBTYPE_INTEL(10, 1),
138 | CPU_SUBTYPE_ITANIUM        = CPU_SUBTYPE_INTEL(11, 0),
139 | CPU_SUBTYPE_ITANIUM_2      = CPU_SUBTYPE_INTEL(11, 1),
140 | CPU_SUBTYPE_XEON           = CPU_SUBTYPE_INTEL(12, 0),
141 | CPU_SUBTYPE_XEON_MP        = CPU_SUBTYPE_INTEL(12, 1),
142 | )
143 | 
144 | ImportAll(
145 | CPU_SUBTYPE_X86_ALL    = 3,
146 | CPU_SUBTYPE_X86_64_ALL = 3,
147 | CPU_SUBTYPE_X86_ARCH1  = 4,
148 | CPU_SUBTYPE_X86_64_H   = 8, # Haswell feature subset
149 | )
150 | 
151 | # Mips subtypes.
152 | ImportAll(
153 | CPU_SUBTYPE_MIPS_ALL     = 0,
154 | CPU_SUBTYPE_MIPS_R2300   = 1,
155 | CPU_SUBTYPE_MIPS_R2600   = 2,
156 | CPU_SUBTYPE_MIPS_R2800   = 3,
157 | CPU_SUBTYPE_MIPS_R2000a  = 4, # pmax
158 | CPU_SUBTYPE_MIPS_R2000   = 5,
159 | CPU_SUBTYPE_MIPS_R3000a  = 6, # 3max
160 | CPU_SUBTYPE_MIPS_R3000   = 7,
161 | )
162 | 
163 | # HPPA subtypes for Hewlett-Packard HP-PA family of risc processors.
164 | # Port by NeXT to 700 series. 
165 | ImportAll(
166 | CPU_SUBTYPE_HPPA_ALL     = 0,
167 | CPU_SUBTYPE_HPPA_7100    = 0,
168 | CPU_SUBTYPE_HPPA_7100LC  = 1,
169 | )
170 | 
171 | # MC88000 subtypes
172 | ImportAll(
173 | CPU_SUBTYPE_MC88000_ALL  = 0,
174 | CPU_SUBTYPE_MMAX_JPC     = 1,
175 | CPU_SUBTYPE_MC88100      = 1,
176 | CPU_SUBTYPE_MC88110      = 2,
177 | )
178 | 
179 | # MC98000 (PowerPC) subtypes
180 | ImportAll(
181 | CPU_SUBTYPE_MC98000_AL   = 0,
182 | CPU_SUBTYPE_MC98601      = 1,
183 | )
184 | 
185 | 
186 | # I860 subtypes
187 | ImportAll(
188 | CPU_SUBTYPE_I860_ALL     = 0,
189 | CPU_SUBTYPE_I860_860     = 1,
190 | 
191 | CPU_SUBTYPE_I860_LITTLE_ALL = 0,
192 | CPU_SUBTYPE_I860_LITTLE     = 1,
193 | )
194 | 
195 | # RS6000 subtypes
196 | ImportAll(
197 | CPU_SUBTYPE_RS6000_ALL = 0,
198 | CPU_SUBTYPE_RS6000     = 1,
199 | )
200 | 
201 | # Sun4 subtypes - port done at CMU
202 | ImportAll(
203 | CPU_SUBTYPE_SUN4_ALL     = 0,
204 | CPU_SUBTYPE_SUN4_260     = 1,
205 | CPU_SUBTYPE_SUN4_110     = 2,
206 | CPU_SUBTYPE_SPARC_ALL    = 0,
207 | )
208 | 
209 | # PowerPC subtypes
210 | ImportAll(
211 | CPU_SUBTYPE_POWERPC_ALL   = 0,
212 | CPU_SUBTYPE_POWERPC_601   = 1,
213 | CPU_SUBTYPE_POWERPC_602   = 2,
214 | CPU_SUBTYPE_POWERPC_603   = 3,
215 | CPU_SUBTYPE_POWERPC_603e  = 4,
216 | CPU_SUBTYPE_POWERPC_603ev = 5,
217 | CPU_SUBTYPE_POWERPC_604   = 6,
218 | CPU_SUBTYPE_POWERPC_604e  = 7,
219 | CPU_SUBTYPE_POWERPC_620   = 8,
220 | CPU_SUBTYPE_POWERPC_750   = 9,
221 | CPU_SUBTYPE_POWERPC_7400  = 10,
222 | CPU_SUBTYPE_POWERPC_7450  = 11,
223 | CPU_SUBTYPE_POWERPC_970   = 100,
224 | 
225 | CPU_SUBTYPE_POWERPC64_ALL = 0,
226 | )
227 | 
228 | # VEO subtypes
229 | #  Note: the CPU_SUBTYPE_VEO_ALL will likely change over time to be defined as
230 | #  one of the specific subtypes.
231 | ImportAll(
232 | CPU_SUBTYPE_VEO_1     = 1,
233 | CPU_SUBTYPE_VEO_2     = 2,
234 | CPU_SUBTYPE_VEO_3     = 3,
235 | CPU_SUBTYPE_VEO_4     = 4,
236 | CPU_SUBTYPE_VEO_ALL   = 2, # CPU_SUBTYPE_VEO_2
237 | )
238 | 
239 | # Acorn subtypes
240 | ImportAll(
241 | CPU_SUBTYPE_ARM_ALL    = 0,
242 | CPU_SUBTYPE_ARM_V4T    = 5,
243 | CPU_SUBTYPE_ARM_V6     = 6,
244 | CPU_SUBTYPE_ARM_V5TEJ  = 7,
245 | CPU_SUBTYPE_ARM_XSCALE = 8,
246 | CPU_SUBTYPE_ARM_V7     = 9,
247 | CPU_SUBTYPE_ARM_V7F    = 10, # Cortex A9
248 | CPU_SUBTYPE_ARM_V7S    = 11, # Swift
249 | CPU_SUBTYPE_ARM_V7K    = 12,
250 | CPU_SUBTYPE_ARM_V8     = 13,
251 | CPU_SUBTYPE_ARM_V6M    = 14, # Not meant to be run under xnu
252 | CPU_SUBTYPE_ARM_V7M    = 15, # Not meant to be run under xnu
253 | CPU_SUBTYPE_ARM_V7EM   = 16, # Not meant to be run under xnu
254 | 
255 | CPU_SUBTYPE_ARM64_ALL  = 0,
256 | CPU_SUBTYPE_ARM64_V8   = 1,
257 | )
258 | 
259 | 
260 | #### Source: /usr/include/mach-o/reloc.h
261 | 
262 | # * In reloc.h, there are two data structures: relocation_info and scattered_relocation_info, which are merged in one structure below.
263 | ImportAll(
264 | R_SCATTERED = 0x80000000
265 | )
266 | class relocation_info(CStruct):
267 |     _fields = [
268 |         ("relocaddr","u32"),
269 |         ("relocsym","u32"),
270 |         ]
271 |     scattered = property(lambda _:(_.relocaddr&0x80000000)>>31)
272 |     address   = property(lambda _:(_.relocaddr&0x00ffffff))
273 |     # Scattered
274 |     pcrel_1  = property(lambda _:(_.relocaddr&0x40000000)>>30)
275 |     length_1 = property(lambda _:(_.relocaddr&0x30000000)>>28)
276 |     type_1   = property(lambda _:(_.relocaddr&0x0f000000)>>24)
277 |     # Not scattered
278 |     type_0   = property(lambda _:(_.relocsym&0xf0000000)>>28)
279 |     extern_0 = property(lambda _:(_.relocsym&0x08000000)>>27)
280 |     length_0 = property(lambda _:(_.relocsym&0x06000000)>>25)
281 |     pcrel_0  = property(lambda _:(_.relocsym&0x01000000)>>24)
282 |     value    = property(lambda _:(_.relocsym&0x00ffffff))
283 |     # Generic
284 |     type   = property(lambda _:getattr(_,"type_%s"%_.scattered))
285 |     extern = property(lambda _:getattr(_,"extern_%s"%_.scattered))
286 |     length = property(lambda _:getattr(_,"length_%s"%_.scattered))
287 |     pcrel  = property(lambda _:getattr(_,"pcrel_%s"%_.scattered))
288 |     def symbolNumOrValue(self):
289 |         if self.scattered: return self.relocsym
290 |         else:              return self.value
291 |     symbolNumOrValue = property(symbolNumOrValue)
292 |     def __repr__(self):
293 |         fields = [ "pcrel", "length" ]
294 |         if not self.scattered:
295 |             fields.append("extern")
296 |         fields.extend(["type", "scattered", "symbolNumOrValue"])
297 |         return "<" + self.__class__.__name__ + " " + " -- ".join([x + " " + hex(getattr(self,x)) for x in fields]) + ">"
298 | 
299 | # Relocation types used in a generic implementation.  Relocation entries for
300 | # normal things use the generic relocation as discribed above and their r_type
301 | # is GENERIC_RELOC_VANILLA (a value of zero).
302 | # (...)
303 | # The implemention is quite messy given the compatibility with the existing
304 | # relocation entry format. (...)
305 | ImportAll(
306 | GENERIC_RELOC_VANILLA        = 0, # generic relocation as described above
307 | GENERIC_RELOC_PAIR           = 1, # Only follows a GENERIC_RELOC_SECTDIFF
308 | GENERIC_RELOC_SECTDIFF       = 2,
309 | GENERIC_RELOC_PB_LA_PTR      = 3, # prebound lazy pointer */
310 | GENERIC_RELOC_LOCAL_SECTDIFF = 4,
311 | GENERIC_RELOC_TLV            = 5, # thread local variables */
312 | )
313 | 
314 | #### Source: /usr/include/mach-o/x86_64/reloc.h
315 | # Relocations for x86_64 are a bit different than for other architectures in
316 | # Mach-O: Scattered relocations are not used.  Almost all relocations produced
317 | # by the compiler are external relocations.  An external relocation has the
318 | # r_extern bit set to 1 and the r_symbolnum field contains the symbol table
319 | # index of the target label.
320 | # (...)
321 | ImportAll(
322 | X86_64_RELOC_UNSIGNED    = 0, # for absolute addresses
323 | X86_64_RELOC_SIGNED      = 1, # for signed 32-bit displacement
324 | X86_64_RELOC_BRANCH      = 2, # a CALL/JMP instruction with 32-bit displacement
325 | X86_64_RELOC_GOT_LOAD    = 3, # a MOVQ load of a GOT entry
326 | X86_64_RELOC_GOT         = 4, # other GOT references
327 | X86_64_RELOC_SUBTRACTOR  = 5, # must be followed by a X86_64_RELOC_UNSIGNED
328 | X86_64_RELOC_SIGNED_1    = 6, # for signed 32-bit displacement with a -1 addend
329 | X86_64_RELOC_SIGNED_2    = 7, # for signed 32-bit displacement with a -2 addend
330 | X86_64_RELOC_SIGNED_4    = 8, # for signed 32-bit displacement with a -4 addend
331 | X86_64_RELOC_TLV         = 9, # for thread local variables
332 | )
333 | 


--------------------------------------------------------------------------------
/elfesteem/new_cstruct.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import struct
  4 | import re
  5 | 
  6 | # To be compatible with python 2 and python 3
  7 | data_empty = struct.pack("")
  8 | data_null = struct.pack("B",0)
  9 | 
 10 | type_size = {}
 11 | size2type = {}
 12 | size2type_s = {}
 13 | 
 14 | for t in 'B', 'H', 'I', 'Q':
 15 |     s = struct.calcsize(t)
 16 |     type_size[t] = s*8
 17 |     size2type[s*8] = t
 18 | 
 19 | for t in 'b', 'h', 'i', 'q':
 20 |     s = struct.calcsize(t)
 21 |     type_size[t] = s*8
 22 |     size2type_s[s*8] = t
 23 | 
 24 | type_size['u08'] = size2type[8]
 25 | type_size['u16'] = size2type[16]
 26 | type_size['u32'] = size2type[32]
 27 | type_size['u64'] = size2type[64]
 28 | 
 29 | type_size['s08'] = size2type_s[8]
 30 | type_size['s16'] = size2type_s[16]
 31 | type_size['s32'] = size2type_s[32]
 32 | type_size['s64'] = size2type_s[64]
 33 | 
 34 | type_size['d'] = 'd'
 35 | type_size['f'] = 'f'
 36 | type_size['q'] = 'q'
 37 | type_size['ptr'] = 'ptr'
 38 | 
 39 | sex_types = {0:'<', 1:'>'}
 40 | 
 41 | def fix_size(fields, wsize):
 42 |     out = []
 43 |     for name, v in fields:
 44 |         if v.endswith("s"):
 45 |             pass
 46 |         elif v == "ptr":
 47 |             v = size2type[wsize]
 48 |         elif not v in type_size:
 49 |             raise ValueError("unkown Cstruct type", v)
 50 |         else:
 51 |             v = type_size[v]
 52 |         out.append((name, v))
 53 |     fields = out
 54 |     return fields
 55 | 
 56 | def real_fmt(fmt, wsize):
 57 |     if fmt == "ptr":
 58 |         v = size2type[wsize]
 59 |     elif fmt in type_size:
 60 |         v = type_size[fmt]
 61 |     else:
 62 |         v = fmt
 63 |     return v
 64 | 
 65 | all_cstructs = {}
 66 | class Cstruct_Metaclass(type):
 67 |     _prefix = "_field_"
 68 |     def __new__(cls, name, bases, dct):
 69 |         if name == 'CStructBase':
 70 |             o = type.__new__(cls, name, bases, dct)
 71 |             return o
 72 |         for fields in dct['_fields']:
 73 |             fname = fields[0]
 74 |             if fname in ['parent', 'parent_head']:
 75 |                 raise ValueError('field name will confuse internal structs',
 76 |                                  repr(fname))
 77 |             dct[fname] = property(dct.pop("get_"+fname,
 78 |                                           lambda self,fname=fname: getattr(self,cls._prefix+fname)),
 79 |                                   dct.pop("set_"+fname,
 80 |                                           lambda self,v,fname=fname: setattr(self,cls._prefix+fname,v)),
 81 |                                   dct.pop("del_"+fname, None))
 82 | 
 83 | 
 84 | 
 85 |         o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct)
 86 |         if name != "CStruct":
 87 |             all_cstructs[name] = o
 88 |         return o
 89 | 
 90 |     def unpack_l(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None):
 91 |         if _sex is None and _wsize is None:
 92 |             # get sex and size from parent
 93 |             if parent_head:
 94 |                 _sex = parent_head._sex
 95 |                 _wsize = parent_head._wsize
 96 |             else:
 97 |                 _sex = 0
 98 |                 _wsize = 32
 99 |         c = cls(_sex = _sex, _wsize = _wsize)
100 |         if parent_head is None:
101 |             parent_head = c
102 |         c.parent_head = parent_head
103 | 
104 |         of1 = off
105 |         for field in c._fields:
106 |             cpt = None
107 |             of2 = of1
108 |             if len(field) == 2:
109 |                 fname, ffmt = field
110 |             elif len(field) == 3:
111 |                 fname, ffmt, cpt = field
112 |             if ffmt in type_size or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)):
113 |                 # basic types
114 |                 if cpt:
115 |                     value = []
116 |                     i = 0
117 |                     while i < cpt(c):
118 |                         fmt = real_fmt(ffmt, _wsize)
119 |                         of2 = of1+struct.calcsize(fmt)
120 |                         value.append(struct.unpack(c.sex+fmt, s[of1:of2])[0])
121 |                         of1 = of2
122 |                         i+=1
123 |                 else:
124 |                     fmt = real_fmt(ffmt, _wsize)
125 |                     of2 = of1+struct.calcsize(fmt)
126 |                     value = struct.unpack(c.sex+fmt, s[of1:of2])[0]
127 |             elif ffmt == "sz": # null terminated special case
128 |                 of2 = s.find(data_null, of1)
129 |                 if of2 == -1:
130 |                     raise ValueError('no null char in string!')
131 |                 of2 += 1
132 |                 value = s[of1:of2-1]
133 |             elif ffmt in all_cstructs:
134 |                 # sub structures
135 |                 if cpt:
136 |                     value = []
137 |                     i = 0
138 |                     while i < cpt(c):
139 |                         v, l = all_cstructs[ffmt].unpack_l(s, of1, parent_head, _sex, _wsize)
140 |                         v.parent = c
141 |                         value.append(v)
142 |                         of2 = of1 + l
143 |                         of1 = of2
144 |                         i += 1
145 |                 else:
146 |                     value, l = all_cstructs[ffmt].unpack_l(s, of1, parent_head, _sex, _wsize)
147 |                     value.parent = c
148 |                     of2 = of1 + l
149 |             elif isinstance(ffmt, tuple):
150 |                 f_get, f_set = ffmt
151 |                 value, of2 = f_get(c, s, of1)
152 |             else:
153 |                 raise ValueError('unknown class', ffmt)
154 |             of1 = of2
155 |             setattr(c, CStruct._prefix+fname, value)
156 | 
157 |         return c, of2-off
158 | 
159 |     def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None):
160 |         c, l = cls.unpack_l(s, off = off,
161 |                             parent_head = parent_head, _sex=_sex, _wsize=_wsize)
162 |         return c
163 | 
164 | CStructBase = Cstruct_Metaclass('CStructBase', (object,), {})
165 | class CStruct(CStructBase):
166 |     _packformat = ""
167 |     _fields = []
168 | 
169 |     def __init__(self, parent_head = None, _sex = None, _wsize = None, **kargs):
170 |         self.parent_head = parent_head
171 |         kargs = dict(kargs)
172 |         #if not sex or size: get the one of the parent
173 |         if _sex is None and _wsize is None:
174 |             if parent_head:
175 |                 _sex = parent_head._sex
176 |                 _wsize = parent_head._wsize
177 |             else:
178 |                 # else default sex & size
179 |                 _sex = 0
180 |                 _wsize = 32
181 |         self.sex = _sex
182 |         self.wsize = _wsize
183 |         if self._packformat:
184 |             self.sex = self._packformat
185 |         else:
186 |             self.sex = sex_types[_sex]
187 |         for f in self._fields:
188 |             setattr(self, CStruct._prefix+f[0], None)
189 |         if kargs:
190 |             for k, v in kargs.items():
191 |                 self.__dict__[CStruct._prefix+k] = v
192 | 
193 |     def pack(self):
194 |         out = data_empty
195 |         for field in self._fields:
196 |             cpt = None
197 |             if len(field) == 2:
198 |                 fname, ffmt = field
199 |             elif len(field) == 3:
200 |                 fname, ffmt, cpt = field
201 | 
202 |             value = getattr(self, CStruct._prefix+fname)
203 |             if ffmt in type_size or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)):
204 |                 # basic types
205 |                 fmt = real_fmt(ffmt, self.wsize)
206 |                 if cpt is None:
207 |                     if value is None:
208 |                         o = struct.calcsize(fmt)*data_null
209 |                     else:
210 |                         o = struct.pack(self.sex+fmt, value)
211 |                 else:
212 |                     o = data_empty
213 |                     for v in value:
214 |                         if value is None:
215 |                             o += struct.calcsize(fmt)*data_null
216 |                         else:
217 |                             o += struct.pack(self.sex+fmt, v)
218 | 
219 |             elif ffmt == "sz": # null terminated special case
220 |                 o = value+data_null
221 |             elif ffmt in all_cstructs:
222 |                 # sub structures
223 |                 if cpt is None:
224 |                     o = value.pack()
225 |                 else:
226 |                     o = data_empty
227 |                     for v in value:
228 |                         o += v.pack()
229 |             elif isinstance(ffmt, tuple):
230 |                 f_get, f_set = ffmt
231 |                 o = f_set(self, value)
232 | 
233 |             else:
234 |                 raise ValueError('unknown class', ffmt)
235 |             out += o
236 | 
237 |         return out
238 | 
239 |     def __len__(self):
240 |         return len(self.pack())
241 | 
242 |     def __str__(self):
243 |         raise AttributeError("Use pack() instead of str()")
244 |         return self.pack()
245 | 
246 |     def __repr__(self):
247 |         return "<%s=%s>" % (self.__class__.__name__, "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields)))
248 | 
249 |     def __getitem__(self, item): # to work with format strings
250 |         return getattr(self, item)
251 | 
252 | if __name__ == "__main__":
253 | 
254 |     """
255 |     Classic C struct
256 |     """
257 |     class c1(CStruct):
258 |         _fields = [("c1_field1", "u16"),
259 |                    ("c1_field2", "u16"),
260 |                    ("c1_field3", "u32"),
261 |                    ]
262 | 
263 |     """
264 |     Struct with a sub structure as field (here, c1)
265 |     """
266 |     class c2(CStruct):
267 |         _fields = [("c2_field1", "u16"),
268 |                    ("c2_field2", "u16"),
269 |                    ("c2_field3", "u32"),
270 |                    ("c2_c", "c1"),
271 |                    ]
272 | 
273 |     """
274 |     b field is an array of 2 u16
275 |     c field is an array of a count u16
276 |     """
277 |     class c3(CStruct):
278 |         _fields = [("a", "u16"),
279 |                    ("b", "u16", lambda x:2),
280 |                    ("c", "c1", lambda c:c.a),
281 |                    ("d", "u16"),
282 |                    ]
283 | 
284 |     """
285 |     e field has its own packing/unpacking custom functions
286 |     """
287 |     class c4(CStruct):
288 |         _fields = [("d", "u16"),
289 |                    ("e", (lambda c, s, of:c.gets(s, of),
290 |                           lambda c, value:c.sets(value))),
291 |                    ("f", "u16"),
292 |                    ]
293 |         def gets(self, s, of):
294 |             i = 0
295 |             while s[of+i] != "\x00":
296 |                 i+=1
297 |             return s[of:of+i], of+i+1
298 |         def sets(self, value):
299 |             return str(value)+'\x00'
300 | 
301 |     """
302 |     h field is a 4 len string
303 |     """
304 |     class c5(CStruct):
305 |         _fields = [("g", "u16"),
306 |                    ("h", "4s"),
307 |                    ]
308 | 
309 |     """
310 |     j field is a nul terminated string
311 |     """
312 |     class c6(CStruct):
313 |         _fields = [("i", "u16"),
314 |                    ("j", "sz"),
315 |                    ("k", "u16"),
316 |                    ]
317 | 
318 |     print(all_cstructs)
319 | 
320 |     s1 = struct.pack('HHI', 1111, 2222, 333333333)
321 |     c = c1.unpack(s1)
322 |     print(repr(c))
323 |     assert len(c) == 8
324 |     s2 = c.pack()
325 |     assert s1 == s2
326 |     print(repr(s2))
327 |     print(repr(c1.unpack(s2)))
328 | 
329 |     s3 = struct.pack('HHI', 4444, 5555, 666666666)+s2
330 |     print(repr(s3))
331 |     assert len(s3) == 16
332 |     c = c2.unpack(s3)
333 |     print(repr(c))
334 |     s4 = c.pack()
335 |     print("%r %r"%(s3,s4))
336 |     assert s3 == s4
337 |     assert c.c2_c.parent_head == c
338 | 
339 | 
340 |     s5 = struct.pack('HHH', 2, 5555, 6666)+s1*2+struct.pack('H', 9999)
341 |     c = c3.unpack(s5)
342 |     assert len(c) == 24
343 |     print(repr(c))
344 |     print(c.b)
345 |     print(c.c)
346 |     print(c.c[0].c1_field1)
347 | 
348 |     s6 = c.pack()
349 |     print("%r %r"%(s5,s6))
350 |     assert s5 == s6
351 | 
352 |     c = c1()
353 |     c.c1_field1 = 1111
354 |     c.c1_field2 = 2222
355 |     c.c1_field3 = 333333333
356 |     assert c.pack() == s1
357 | 
358 |     s7 = struct.pack('H', 8888)+"fffff\x00"+struct.pack('H', 9999)
359 |     c = c4.unpack(s7)
360 |     print(repr(c))
361 |     print(repr(c.e))
362 |     print(repr(c.f))
363 | 
364 |     print(repr(s7))
365 |     print(repr(c.pack()))
366 |     assert s7 == c.pack()
367 | 
368 |     s8 = struct.pack('H4s', 8888, "abcd")
369 |     c = c5.unpack(s8)
370 |     print(repr(c))
371 |     assert s8 == c.pack()
372 | 
373 | 
374 |     s9 = struct.pack('H', 9999)+ "toto\x00" + struct.pack('H', 1010)
375 |     print(repr(s9))
376 |     c = c6.unpack(s9)
377 |     print("%r %r"%(c,c.pack()))
378 |     assert s9 == c.pack()
379 | 
380 | 


--------------------------------------------------------------------------------
/elfesteem/minidump_init.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """
  3 | High-level abstraction of Minidump file
  4 | """
  5 | import sys, os
  6 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..'))
  7 | 
  8 | from elfesteem.strpatchwork import StrPatchwork
  9 | from elfesteem import minidump as mp
 10 | 
 11 | if sys.version_info[0:2] == (2, 3):
 12 |     from elfesteem.compatibility_python23 import sorted
 13 | 
 14 | class MemorySegment(object):
 15 |     """Stand for a segment in memory with additionnal information"""
 16 | 
 17 |     def __init__(self, offset, memory_desc, module=None, memory_info=None):
 18 |         self.offset = offset
 19 |         self.memory_desc = memory_desc
 20 |         self.module = module
 21 |         self.memory_info = memory_info
 22 |         self.minidump = self.memory_desc.parent_head
 23 | 
 24 |     def address(self):
 25 |         return self.memory_desc.StartOfMemoryRange
 26 |     address = property(address)
 27 | 
 28 |     def size(self):
 29 |         if isinstance(self.memory_desc, mp.MemoryDescriptor64):
 30 |             return self.memory_desc.DataSize
 31 |         elif isinstance(self.memory_desc, mp.MemoryDescriptor):
 32 |             return self.memory_desc.Memory.DataSize
 33 |         raise TypeError
 34 |     size = property(size)
 35 | 
 36 |     def name(self):
 37 |         if self.module:
 38 |             return self.module.ModuleName
 39 |         return ""
 40 |     name = property(name)
 41 | 
 42 |     def content(self):
 43 |         return self.minidump._content[self.offset:self.offset + self.size]
 44 |     content = property(content)
 45 | 
 46 |     def protect(self):
 47 |         if self.memory_info:
 48 |             return self.memory_info.Protect
 49 |         return None
 50 |     protect = property(protect)
 51 | 
 52 |     def pretty_protect(self):
 53 |         if self.protect is None:
 54 |             return "UNKNOWN"
 55 |         return mp.memProtect[self.protect]
 56 |     pretty_protect = property(pretty_protect)
 57 | 
 58 |     def dump(self):
 59 |         return mp.data_str(self.content)
 60 | 
 61 | 
 62 | class Minidump(object):
 63 |     """Stand for a Minidump file
 64 | 
 65 |     Here is a few limitation:
 66 |      - only < 4GB Minidump are supported (LocationDescriptor handling)
 67 |      - only Stream relative to memory mapping are implemented
 68 | 
 69 |     Official description is available on MSDN:
 70 |     https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx
 71 |     """
 72 | 
 73 |     _sex = 0
 74 |     _wsize = 32
 75 | 
 76 |     def entrypoint(self):
 77 |         if not len(self.threads.Threads): return -1
 78 |         pc_reg = ()
 79 |         if self.systeminfo.ProcessorArchitecture == \
 80 |                 mp.processorArchitecture.PROCESSOR_ARCHITECTURE_X86:
 81 |             pc_reg = self.threads.Threads[0].ThreadContext.Eip
 82 |         if self.systeminfo.ProcessorArchitecture == \
 83 |                 mp.processorArchitecture.PROCESSOR_ARCHITECTURE_AMD64:
 84 |             pc_reg = self.threads.Threads[0].ThreadContext.Rip
 85 |         if not len(pc_reg): return -1
 86 |         return pc_reg[0]
 87 |     architecture = property(lambda _:_.systeminfo.pretty_processor_architecture[23:])
 88 |     entrypoint = property(entrypoint)
 89 |     sections = property(lambda _:_.memory.values())
 90 |     symbols = ()
 91 |     dynsyms = ()
 92 | 
 93 |     def __init__(self, minidump_str):
 94 |         self._content = StrPatchwork(minidump_str)
 95 | 
 96 |         # Specific streams
 97 |         self.modulelist = None
 98 |         self.memory64list = None
 99 |         self.memorylist = None
100 |         self.memoryinfolist = None
101 |         self.systeminfo = None
102 | 
103 |         # Get information
104 |         self.streams = []
105 |         self.threads = None
106 |         self.parse_content()
107 | 
108 |         # Memory information
109 |         self.memory = {} # base address (virtual) -> Memory information
110 |         self.build_memory()
111 |         self.virt = ContentVirtual(self)
112 | 
113 |     def parse_content(self):
114 |         """Build structures corresponding to current content"""
115 | 
116 |         # Header
117 |         offset = 0
118 |         self.minidumpHDR = mp.MinidumpHDR.unpack(self._content, offset, self)
119 |         assert self.minidumpHDR.Magic == 0x504d444d
120 | 
121 |         # Streams
122 |         base_offset = self.minidumpHDR.StreamDirectoryRva.rva
123 |         empty_stream = mp.StreamDirectory(StreamType=0,
124 |                                           Location=mp.LocationDescriptor(DataSize=0,
125 |                                                                          Rva=mp.Rva(rva=0)
126 |                                           )
127 |         )
128 |         streamdir_size = len(empty_stream)
129 |         for i in range(self.minidumpHDR.NumberOfStreams):
130 |             stream_offset = base_offset + i * streamdir_size
131 |             stream = mp.StreamDirectory.unpack(self._content, stream_offset, self)
132 |             self.streams.append(stream)
133 | 
134 |             # Launch specific action depending on the stream
135 |             datasize = stream.Location.DataSize
136 |             offset = stream.Location.Rva.rva
137 |             if stream.StreamType == mp.streamType.ModuleListStream:
138 |                 self.modulelist = mp.ModuleList.unpack(self._content, offset, self)
139 |                 if datasize == 8+self.modulelist.NumberOfModules*108:
140 |                     self.modulelist = mp.ModuleListWithPadding.unpack(self._content, offset, self)
141 |             elif stream.StreamType == mp.streamType.MemoryListStream:
142 |                 self.memorylist = mp.MemoryList.unpack(self._content, offset, self)
143 |                 if datasize == 8+self.memorylist.NumberOfMemoryRanges*16:
144 |                     self.memorylist = mp.MemoryListWithPadding.unpack(self._content, offset, self)
145 |             elif stream.StreamType == mp.streamType.Memory64ListStream:
146 |                 self.memory64list = mp.Memory64List.unpack(self._content, offset, self)
147 |             elif stream.StreamType == mp.streamType.MemoryInfoListStream:
148 |                 self.memoryinfolist = mp.MemoryInfoList.unpack(self._content, offset, self)
149 |             elif stream.StreamType == mp.streamType.SystemInfoStream:
150 |                 self.systeminfo = mp.SystemInfo.unpack(self._content, offset, self)
151 |             elif stream.StreamType == mp.streamType.MiscInfoStream:
152 |                 self.miscinfo = mp.MiscInfo.unpack(self._content, offset, self)
153 |             # Breakpad extension types
154 |             elif stream.StreamType == mp.MDminidumpType.MD_ASSERTION_INFO_STREAM:
155 |                 self.breakpad_assertion = mp.BreakpadAssertion.unpack(self._content, offset, self)
156 |             elif stream.StreamType == mp.MDminidumpType.MD_BREAKPAD_INFO_STREAM:
157 |                 self.breakpad_info = mp.BreakpadRawInfo.unpack(self._content, offset, self)
158 | 
159 |         # Some streams need the SystemInfo stream to work
160 |         if self.systeminfo is None:
161 |             return
162 |         for stream in self.streams:
163 |             datasize = stream.Location.DataSize
164 |             offset = stream.Location.Rva.rva
165 |             if stream.StreamType == mp.streamType.ThreadListStream:
166 |                 self.threads = mp.ThreadList.unpack(self._content, offset, self)
167 |                 if datasize == 8+self.threads.NumberOfThreads*48:
168 |                     self.threads = mp.ThreadListWithPadding.unpack(self._content, offset, self)
169 |             elif stream.StreamType == mp.streamType.ExceptionStream:
170 |                 self.exception = mp.Exception.unpack(self._content, offset, self)
171 | 
172 | 
173 |     def build_memory(self):
174 |         """Build an easier to use memory view based on ModuleList and
175 |         Memory64List streams"""
176 | 
177 |         addr2module = {}
178 |         if self.modulelist:
179 |             for module in self.modulelist.Modules:
180 |                 addr2module[module.BaseOfImage] = module
181 |         addr2meminfo = {}
182 |         if self.memoryinfolist:
183 |             for memory in self.memoryinfolist.MemoryInfos:
184 |                 addr2meminfo[memory.BaseAddress] = memory
185 | 
186 |         mode64 = self.minidumpHDR.Flags & mp.minidumpType.MiniDumpWithFullMemory
187 | 
188 |         if mode64:
189 |             offset = self.memory64list.BaseRva
190 |             memranges = self.memory64list.MemoryRanges
191 |         else:
192 |             memranges = self.memorylist.MemoryRanges
193 | 
194 |         for memory in memranges:
195 |             if not mode64:
196 |                 offset = memory.Memory.Rva.rva
197 | 
198 |             # Create a MemorySegment with augmented information
199 |             base_address = memory.StartOfMemoryRange
200 |             module = addr2module.get(base_address, None)
201 |             meminfo = addr2meminfo.get(base_address, None)
202 |             self.memory[base_address] = MemorySegment(offset, memory,
203 |                                                       module, meminfo)
204 | 
205 |             if mode64:
206 |                 offset += memory.DataSize
207 | 
208 |         # Sanity check
209 |         if mode64:
210 |             assert not False in [addr in self.memory for addr in addr2module]
211 | 
212 |     def get(self, virt_start, virt_stop):
213 |         """Return the content at the (virtual addresses)
214 |         [virt_start:virt_stop]"""
215 | 
216 |         # Find the corresponding memory segment
217 |         for addr in self.memory:
218 |             if virt_start <= addr <= virt_stop:
219 |                 break
220 |         else:
221 |             return ""
222 | 
223 |         memory = self.memory[addr]
224 |         shift = addr - virt_start
225 |         last = virt_stop - addr
226 |         if last > memory.size:
227 |             raise RuntimeError("Multi-page not implemented")
228 | 
229 |         return self._content[memory.offset + shift:memory.offset + last]
230 | 
231 |     def dump(self):
232 |         """
233 |         Same output as minidump_dump from
234 |         https://chromium.googlesource.com/breakpad/breakpad
235 |         """
236 |         res = [ self.minidumpHDR.dump() ]
237 |         streams_by_type = {} # Duplicates will not be shown
238 |         for i, s in enumerate(self.streams):
239 |             streams_by_type[s.StreamType] = (i, s)
240 |             res.extend(["", "mDirectory[%d]"%i, s.dump()])
241 |         res.append("\nStreams:")
242 |         for t in sorted(streams_by_type.keys()):
243 |             i, s = streams_by_type[t]
244 |             res.append("  stream type %s at index %d" % (s.type_with_name, i))
245 |         res.extend(["",
246 |             "MinidumpThreadList",
247 |             "  thread_count = %d" % self.threads.NumberOfThreads])
248 |         for i, t in enumerate(self.threads.Threads):
249 |             res.extend(["",
250 |                 "thread[%d]"%i,
251 |                 t.dump(),
252 |                 "",
253 |                 t.ThreadContext.dump(),
254 |                 "",
255 |                 "Stack",
256 |                 self.memory[t.Stack.StartOfMemoryRange].dump(),
257 |                 ])
258 |         res.extend(["",
259 |             "MinidumpModuleList",
260 |             "  module_count = %d" % self.modulelist.NumberOfModules])
261 |         for i, m in enumerate(self.modulelist.Modules):
262 |             res.extend(["",
263 |                 "module[%d]"%i,
264 |                 m.dump(),
265 |                 m.dump_other(),
266 |                 ])
267 |         res.extend(["",
268 |             "MinidumpMemoryList",
269 |             "  region_count = %d" % self.memorylist.NumberOfMemoryRanges])
270 |         for i, m in enumerate(self.memorylist.MemoryRanges):
271 |             res.extend(["",
272 |                 "region[%d]"%i,
273 |                 m.dump(),
274 |                 "Memory",
275 |                 self.memory[m.StartOfMemoryRange].dump(),
276 |                 ])
277 |         if hasattr(self, 'exception'):
278 |             res.extend(["",
279 |                 self.exception.dump(),
280 |                 "",
281 |                 self.exception.ThreadContext.dump(),
282 |                 ])
283 |         if hasattr(self, 'breakpad_assertion'):
284 |             res.extend(["",self.breakpad_assertion.dump()])
285 |         res.extend(["",self.systeminfo.dump(),""])
286 |         if hasattr(self, 'miscinfo'):
287 |             res.extend([self.miscinfo.dump(),""])
288 |         if hasattr(self, 'breakpad_info'):
289 |             res.extend([self.breakpad_info.dump(),""])
290 |         return '\n'.join(res)
291 | 
292 | class ContentVirtual(object):
293 |     """ Stub for binary.py """
294 |     def __init__(self, minidump):
295 |         self.parent = minidump
296 |     def max_addr(self):
297 |         ad = -1
298 |         for memory in self.parent.memory.values():
299 |             ad = max(ad, memory.address+memory.size)
300 |         return ad
301 | 
302 | if __name__ == "__main__":
303 |     for file in sys.argv[1:]:
304 |         if len(sys.argv) > 2: print("File: %s"%file)
305 |         fd = open(file, 'rb')
306 |         try:
307 |             raw = fd.read()
308 |         finally:
309 |             fd.close()
310 |         e = Minidump(raw)
311 |         print(e.dump())
312 | 


--------------------------------------------------------------------------------
/tests/test_elf_manipulation.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import os
  4 | __dir__ = os.path.dirname(__file__)
  5 | 
  6 | from test_all import run_tests, assertion, hashlib, open_read
  7 | from elfesteem.strpatchwork import StrPatchwork
  8 | from elfesteem.elf_init import ELF, log
  9 | from elfesteem import elf
 10 | 
 11 | import struct
 12 | 
 13 | # We want to be able to verify warnings in non-regression test
 14 | log_history = []
 15 | log.warning = lambda *args, **kargs: log_history.append(('warn',args,kargs))
 16 | log.error = lambda *args, **kargs: log_history.append(('error',args,kargs))
 17 | 
 18 | def test_ELF_empty(assertion):
 19 |     e = ELF()
 20 |     d = e.pack()
 21 |     assertion('0ddf18391c150850c72257b3f3caa67b',
 22 |               hashlib.md5(d).hexdigest(),
 23 |               'Creation of a standard empty ELF')
 24 |     assertion(0,
 25 |               len(e.symbols),
 26 |               'Empty ELF has no symbols')
 27 |     d = ELF(d).pack()
 28 |     assertion('0ddf18391c150850c72257b3f3caa67b',
 29 |               hashlib.md5(d).hexdigest(),
 30 |               'Creation of a standard empty ELF; fix point')
 31 |     assertion(True,
 32 |               e.has_relocatable_sections(),
 33 |               'Standard empty ELF is relocatable')
 34 | 
 35 | def test_ELF_invalid(assertion):
 36 |     try:
 37 |         e = ELF(open_read(__dir__+'/binary_input/README.txt'))
 38 |         assertion(0,1, 'Not an ELF')
 39 |     except ValueError:
 40 |         pass
 41 | 
 42 | def test_ELF_creation(assertion):
 43 |     e = ELF(
 44 |         e_type    = elf.ET_REL, # Default value
 45 |         e_machine = elf.EM_386, # Default value
 46 |         sections = ['.text', '.text.startup', '.group',
 47 |                     '.data', '.rodata.str1.4', '.rodata.cst4',
 48 |                     '.bss', '.eh_frame', '.comment', '.note.GNU-stack',
 49 |                     ],
 50 |         relocs = ['.text'], # These sections will have relocs
 51 |         )
 52 |     d = e.pack()
 53 |     assertion('dc3f17080d002ba0bfb3aec9f3bec8b2',
 54 |               hashlib.md5(d).hexdigest(),
 55 |               'Creation of an ELF with a given list of sections')
 56 | 
 57 | def test_ELF_small32(assertion):
 58 |     global log_history
 59 |     elf_small = open_read(__dir__+'/binary_input/elf_small.out')
 60 |     assertion('d5284d5f438e25ef5502a0c1de97d84f',
 61 |               hashlib.md5(elf_small).hexdigest(),
 62 |               'Reading elf_small.out')
 63 |     e = ELF(elf_small)
 64 |     d = e.pack()
 65 |     assertion('d5284d5f438e25ef5502a0c1de97d84f',
 66 |               hashlib.md5(d).hexdigest(),
 67 |               'Packing after reading elf_small.out')
 68 |     # Packed file is identical :-)
 69 |     d = repr(e.ph).encode('latin1')
 70 |     assertion('ab4b1e52e7532789592878872910a2a1',
 71 |               hashlib.md5(d).hexdigest(),
 72 |               'Display Program Headers')
 73 |     d = repr(e.sh).encode('latin1')
 74 |     assertion('ddf01165114eb70bd27910e4c5b03c09',
 75 |               hashlib.md5(d).hexdigest(),
 76 |               'Display Section Headers (repr)')
 77 |     d = e.sh.readelf_display().encode('latin1')
 78 |     assertion('08da11fa164d7013561db398c068ac71',
 79 |               hashlib.md5(d).hexdigest(),
 80 |               'Display Section Headers (readelf)')
 81 |     d = e.getsectionbyname('.symtab').readelf_display().encode('latin1')
 82 |     assertion('943434f4cde658b1659b7d8db39d9e60',
 83 |               hashlib.md5(d).hexdigest(),
 84 |               'Display Symbol Table')
 85 |     assertion('    49: 0804a01c     0 NOTYPE  GLOBAL DEFAULT  ABS _edata',
 86 |               e.getsectionbyname('.symtab')['_edata'].readelf_display(),
 87 |               'Get symbol by name, found')
 88 |     assertion('     2: 00000000     0 FUNC    GLOBAL DEFAULT  UND __stack_chk_fail',
 89 |               e.getsectionbyname('.dynsym')[2].readelf_display(),
 90 |               'Get symbol by index, found')
 91 |     d = e.getsectionbytype(elf.SHT_SYMTAB).pack()
 92 |     assertion('4ed5a808faff1ca7c6a766ae45ebf377',
 93 |               hashlib.md5(d).hexdigest(),
 94 |               'Get existing section by type')
 95 |     d = e.getsectionbyname('.text').pack()
 96 |     assertion('7149c6e4b8baaab8beebfeb818585638',
 97 |               hashlib.md5(d).hexdigest(),
 98 |               'Get existing section by name')
 99 |     d = e.getsectionbyvad(0x080483d0+0x100).pack()
100 |     assertion('7149c6e4b8baaab8beebfeb818585638',
101 |               hashlib.md5(d).hexdigest(),
102 |               'Get existing section by address')
103 |     d = e.getsectionbyname('no_sect')
104 |     assertion(None, d, 'Get non-existing section by name')
105 |     d = e.getsectionbyvad(0x1000)
106 |     assertion(None, d, 'Get non-existing section by address')
107 |     d = e[0x100:0x120]
108 |     assertion('5e94f899265a799826a46ec86a293e16',
109 |               hashlib.md5(d).hexdigest(),
110 |               'Extract chunk from raw data')
111 |     assertion(True,
112 |               e.virt.is_addr_in(0x080483d0),
113 |               'Address in mapped virtual memory')
114 |     assertion(False,
115 |               e.virt.is_addr_in(0x08048000),
116 |               'Address not in mapped virtual memory')
117 |     d = e.virt[0x080483d0:0x080483e0]
118 |     assertion('9d225ebfd0f9562b74b17c5a4653dc6f',
119 |               hashlib.md5(d).hexdigest(),
120 |               'Extract chunk from mapped memory, in a section')
121 |     try:
122 |         e.virt[0x08040000:0x08040020]
123 |         assertion(0,1, 'Extract chunk from non-mapped memory')
124 |     except ValueError:
125 |         pass
126 |     assertion(e.virt[0x080483d0:0x080483e0],
127 |               e.virt(0x080483d0,0x080483e0),
128 |               'Extract chunk from mapped memory, old API')
129 |     e.virt[0x080483d0:0x080483e0] = e.virt[0x080483d0:0x080483e0]
130 |     d = e.pack()
131 |     assertion('d5284d5f438e25ef5502a0c1de97d84f',
132 |               hashlib.md5(d).hexdigest(),
133 |               'Writing in memory (interval)')
134 |     e.virt[0x080483d0] = e.virt[0x080483d0:0x080483e0]
135 |     d = e.pack()
136 |     assertion('d5284d5f438e25ef5502a0c1de97d84f',
137 |               hashlib.md5(d).hexdigest(),
138 |               'Writing in memory (address)')
139 |     assertion(0x804a028, len(e.virt), 'Max virtual address')
140 |     assertion([('warn', ('__len__ deprecated',), {})],
141 |               log_history,
142 |               '__len__ deprecated (logs)')
143 |     log_history = []
144 |     # Find leave; ret
145 |     assertion(0x8048481,
146 |               e.virt.find(struct.pack('BB', 0xc9, 0xc3)),
147 |               'Find pattern (existing)')
148 |     assertion(-1,
149 |               e.virt.find(struct.pack('BBBB', 1,2,3,4)),
150 |               'Find pattern (not existing)')
151 | 
152 | def test_ELF_small64(assertion):
153 |     elf64_small = open_read(__dir__+'/binary_input/elf64_small.out')
154 |     assertion('dc21d928bb6a3a0fa59b17fafe803d50',
155 |               hashlib.md5(elf64_small).hexdigest(),
156 |               'Reading elf64_small.out')
157 |     e = ELF(elf64_small)
158 |     d = e.pack()
159 |     assertion('dc21d928bb6a3a0fa59b17fafe803d50',
160 |               hashlib.md5(d).hexdigest(),
161 |               'Packing after reading elf64_small.out')
162 |     # Packed file is identical :-)
163 |     d = e.sh.readelf_display().encode('latin1')
164 |     assertion('0454c8b5354b3eda58fce252d5d48621',
165 |               hashlib.md5(d).hexdigest(),
166 |               'Display Section Headers (readelf, 64bit)')
167 |     d = e.getsectionbyname('.symtab').readelf_display().encode('latin1')
168 |     assertion('452e64fb0f2dad5c0e44d83e57b9d82b',
169 |               hashlib.md5(d).hexdigest(),
170 |               'Display Symbol Table (elf64)')
171 |     d = e.getsectionbyname('.rela.dyn').readelf_display().encode('latin1')
172 |     assertion('650cf3f99117d39d63fae73232e09acf',
173 |               hashlib.md5(d).hexdigest(),
174 |               'Display Reloc Table (elf64)')
175 | 
176 | def test_ELF_group(assertion):
177 |     elf_group = open_read(__dir__+'/binary_input/elf_cpp.o')
178 |     assertion('57fed5de9474bc0600173a1db5ee6327',
179 |               hashlib.md5(elf_group).hexdigest(),
180 |               'Reading elf_cpp.o')
181 |     e = ELF(elf_group)
182 |     d = e.pack()
183 |     assertion('57fed5de9474bc0600173a1db5ee6327',
184 |               hashlib.md5(d).hexdigest(),
185 |               'Packing after reading elf_cpp.o')
186 |     # Packed file is identical :-)
187 |     d = e.getsectionbyname('.group').readelf_display().encode('latin1')
188 |     assertion('5c80b11a64a32e7aaee8ef378da4ccef',
189 |               hashlib.md5(d).hexdigest(),
190 |               'Display Group Section')
191 | 
192 | def test_ELF_TMP320C6x(assertion):
193 |     elf_tmp320c6x = open_read(__dir__+'/binary_input/notle-tesla-dsp.xe64T')
194 |     assertion('fb83ed8d809f394e70f5d84d0c8e593f',
195 |               hashlib.md5(elf_tmp320c6x).hexdigest(),
196 |               'Reading notle-tesla-dsp.xe64T')
197 |     e = ELF(elf_tmp320c6x)
198 |     d = e.pack()
199 |     assertion('fb83ed8d809f394e70f5d84d0c8e593f',
200 |               hashlib.md5(d).hexdigest(),
201 |               'Packing after reading notle-tesla-dsp.xe64T')
202 |     # Packed file is identical :-)
203 |     d = e.sh.readelf_display().encode('latin1')
204 |     assertion('ecf169c765d29175177528e24601f1be',
205 |               hashlib.md5(d).hexdigest(),
206 |               'Display Section Headers (TMP320C6x)')
207 | 
208 | def test_ELF_invalid_entsize(assertion):
209 |     global log_history
210 |     # Some various ways for an ELF to be detected as invalid
211 |     e = ELF()
212 |     e.symbols.sh.entsize = 24
213 |     e = ELF(e.pack())
214 |     assertion([('error', ('SymTable has invalid entsize %d instead of %d', 24, 16), {})],
215 |               log_history,
216 |               'Invalid entsize for symbols (logs)')
217 |     log_history = []
218 | 
219 | def test_ELF_invalid_shstrndx(assertion):
220 |     global log_history
221 |     e = ELF()
222 |     e.Ehdr.shstrndx = 20
223 |     e = ELF(e.pack())
224 |     assertion([('error', ('No section of index shstrndx=20',), {})],
225 |               log_history,
226 |               'Invalid shstrndx (logs)')
227 |     assertion(88,
228 |               e.Ehdr.shoff,
229 |               'Normal e.Ehdr.shoff')
230 |     log_history = []
231 | 
232 | def test_ELF_offset_to_sections(assertion):
233 |     global log_history
234 |     data = StrPatchwork(ELF().pack())
235 |     data[88+20] = struct.pack("<I", 0x1000)
236 |     ELF(data)
237 |     assertion([('error', ('Offset to end of section %d after end of file', 0), {}),
238 |                     ("error", ('Section offset overlap for ' + \
239 |                             "[                00000000 001000 00000000 0] " + \
240 |                             '[.text           00000034 000000 00000000 6]',), {})
241 |                 ],
242 |               log_history,
243 |               'Section offset+size too far away (logs)')
244 |     log_history = []
245 |     data[88+16] = struct.pack("<I", 0x1000)
246 |     ELF(data)
247 |     assertion([('error', ('Offset to section %d after end of file', 0), {})],
248 |               log_history,
249 |               'Section offset very far away (logs)')
250 |     log_history = []
251 |     data[32] = struct.pack("<I", 100) # e.Ehdr.shoff
252 |     ELF(data)
253 |     assertion([('error', ('Offset to end of section headers after end of file',), {}),
254 |                ('error', ('No section of index shstrndx=2',), {})],
255 |               log_history,
256 |               'SH offset too far away (logs)')
257 |     log_history = []
258 |     data[32] = struct.pack("<I", 0x2000) # e.Ehdr.shoff
259 |     ELF(data)
260 |     assertion([('error', ('Offset to section headers after end of file',), {}),
261 |                ('error', ('No section of index shstrndx=2',), {})],
262 |               log_history,
263 |               'SH offset very far away (logs)')
264 |     log_history = []
265 | 
266 | def test_ELF_wordsize_endianess(assertion):
267 |     global log_history
268 |     data = StrPatchwork(ELF().pack())
269 |     data[4] = struct.pack("B", 4)
270 |     ELF(data)
271 |     assertion([('error', ('Invalid ELF, wordsize defined to %d', 128), {})],
272 |               log_history,
273 |               'Invalid ELF word size (logs)')
274 |     log_history = []
275 |     data = StrPatchwork(ELF().pack())
276 |     data[5] = struct.pack("B", 0)
277 |     ELF(data)
278 |     assertion([('error', ('Invalid ELF, endianess defined to %d', 0), {})],
279 |               log_history,
280 |               'Invalid ELF endianess (logs)')
281 |     log_history = []
282 | 
283 | def test_ELF_tiny84(assertion):
284 |     global log_history
285 |     elf_tiny = open_read(__dir__+'/binary_input/tiny84.bin')
286 |     assertion('90f9fa06566389883d82b9cda016b10d',
287 |               hashlib.md5(elf_tiny).hexdigest(),
288 |               'Reading tiny84')
289 |     e = ELF(elf_tiny)
290 |     assertion([('warn', ('No section (e.g. core file)',), {})],
291 |               log_history,
292 |               'tiny84 (logs)')
293 |     log_history = []
294 |     d = e.pack()
295 |     assertion('90f9fa06566389883d82b9cda016b10d',
296 |               hashlib.md5(d).hexdigest(),
297 |               'Packing after reading tiny84')
298 | 
299 | def test_ELF_tiny76(assertion):
300 |     global log_history
301 |     elf_tiny = open_read(__dir__+'/binary_input/tiny76.bin')
302 |     assertion('3a5753c93c492d2d1d3fc6c227baec7a',
303 |               hashlib.md5(elf_tiny).hexdigest(),
304 |               'Reading tiny76')
305 |     e = ELF(elf_tiny)
306 |     d = e.pack()
307 |     assertion('3a5753c93c492d2d1d3fc6c227baec7a',
308 |               hashlib.md5(d).hexdigest(),
309 |               'Packing after reading tiny76')
310 |     assertion([('warn', ('No section (e.g. core file)',), {})],
311 |               log_history,
312 |               'tiny76 (logs)')
313 |     log_history = []
314 | 
315 | def test_ELF_tiny64(assertion):
316 |     global log_history
317 |     elf_tiny = open_read(__dir__+'/binary_input/tiny64.bin')
318 |     assertion('0dd8a6325f7cf633ed8c527add5dc634',
319 |               hashlib.md5(elf_tiny).hexdigest(),
320 |               'Reading tiny64')
321 |     e = ELF(elf_tiny)
322 |     assertion([('warn', ('No section (e.g. core file)',), {})],
323 |               log_history,
324 |               'tiny64 (logs)')
325 |     log_history = []
326 |     d = e.pack()
327 |     # Not identical, it is an invalid ELF, with invalid section headers
328 |     assertion('05ab778ceccbbf67840d5d35bcd84ed9',
329 |               hashlib.md5(d).hexdigest(),
330 |               'Packing after reading tiny64')
331 | 
332 | def test_ELF_tiny52(assertion):
333 |     global log_history
334 |     elf_tiny = open_read(__dir__+'/binary_input/tiny52.bin')
335 |     assertion('18ddd4966cb003b80862735d19ddbeb7',
336 |               hashlib.md5(elf_tiny).hexdigest(),
337 |               'Reading tiny52')
338 |     e = ELF(elf_tiny)
339 |     assertion([('error', ('Invalid ELF, endianess defined to %d', 0), {}),
340 |                ('error', ('Offset to section headers after end of file',), {}),
341 |                ('error', ('Ehdr version is 65568 instead of 1',), {})],
342 |               log_history,
343 |               'tiny52 (logs)')
344 |     log_history = []
345 |     d = e.pack()
346 |     assertion('18ddd4966cb003b80862735d19ddbeb7',
347 |               hashlib.md5(d).hexdigest(),
348 |               'Packing after reading tiny52')
349 | 
350 | def test_ELF_tiny45(assertion):
351 |     global log_history
352 |     elf_tiny = open_read(__dir__+'/binary_input/tiny45.bin')
353 |     assertion('44023f74799f2e009a1400c74de50cdd',
354 |               hashlib.md5(elf_tiny).hexdigest(),
355 |               'Reading tiny45')
356 |     e = ELF(elf_tiny)
357 |     assertion([('error', ('Invalid ELF, endianess defined to %d', 0), {}),
358 |                ('error', ('Offset to section headers after end of file',), {}),
359 |                ('error', ('Ehdr version is 65568 instead of 1',), {})],
360 |               log_history,
361 |               'tiny45 (logs)')
362 |     log_history = []
363 |     d = e.pack()
364 |     # packing tiny45 generates tiny52 :-)
365 |     assertion('18ddd4966cb003b80862735d19ddbeb7',
366 |               hashlib.md5(d).hexdigest(),
367 |               'Packing after reading tiny45')
368 |     assertion([],
369 |               log_history,
370 |               'No non-regression test created unwanted log messages')
371 | 
372 | def run_test(assertion):
373 |     for name, value in dict(globals()).items():
374 |         if name.startswith('test_'):
375 |             value(assertion)
376 | 
377 | if __name__ == "__main__":
378 |     run_tests(run_test)
379 | 


--------------------------------------------------------------------------------
/elfesteem/cstruct.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import struct, re
  4 | 
  5 | # To be compatible with python 2 and python 3
  6 | data_empty = struct.pack("")
  7 | data_null = struct.pack("B",0)
  8 | 
  9 | import sys
 10 | if sys.version_info[0] < 3:
 11 |     bytes_to_name = lambda s: s
 12 |     name_to_bytes = lambda s: s
 13 | else:
 14 |     bytes_to_name = lambda s: s.decode(encoding="latin1")
 15 |     name_to_bytes = lambda s: s.encode(encoding="latin1")
 16 | 
 17 | class CBase(object):
 18 |     """
 19 |     This is the base class, used to define CString, CStruct, CArray
 20 | 
 21 |     Functions to manipulate a CBase object
 22 |       unpack(): two args (c, o) the bytestring and the starting offset
 23 |       pack():   creates a byte string from the object content
 24 |       bytelen:  length of this byte string
 25 |       pprint(): representation of the object, that can be used by pprint
 26 |       update(): named args, that change the object content
 27 | 
 28 |     Parameters used to create a CBase object from a bytestring:
 29 |       parent:  parent object (mandatory)
 30 |       content: binary stream to initialize the object
 31 |       start:   offset where to start parsing the content
 32 |       sex and wsize: endianess and wordsize
 33 |     """
 34 |     def __init__(self, *args, **kargs):
 35 |         if not 'parent' in kargs:
 36 |             # Old API of elfesteem
 37 |             # e.g. used by miasm2's example/jitter/unpack_upx.py
 38 |             kargs['parent'] = args[0]
 39 |         self._parent_parse(kargs)
 40 |         self._initialize()
 41 |         if 'content' in kargs:
 42 |             if not 'start' in kargs: kargs['start'] = 0
 43 |             if 'count' in kargs:
 44 |                 self.count = lambda c=kargs['count']: c
 45 |                 del kargs['count']
 46 |             self.unpack(kargs['content'], kargs['start'])
 47 |             del kargs['content']
 48 |             del kargs['start']
 49 |         self.update(**kargs)
 50 |     def _parent_parse(self, kargs):
 51 |         self.parent = kargs['parent']
 52 |         if not 'sex'   in kargs: kargs['sex']   = self.parent.sex
 53 |         if not 'wsize' in kargs: kargs['wsize'] = self.parent.wsize
 54 |         self.sex   = kargs['sex']
 55 |         self.wsize = kargs['wsize']
 56 |         del kargs['parent']
 57 |         del kargs['sex']
 58 |         del kargs['wsize']
 59 |     def _initialize(self):
 60 |         # For default values
 61 |         pass
 62 |     def update(self, **kargs):
 63 |         pass
 64 | 
 65 |     def __len__(self):
 66 |         # We don't use __len__ for the length in bytes, because we want to be able
 67 |         # to use it for the number of elements of a CArray
 68 |         raise AttributeError("__len__ not defined for '%s'"%self.__class__.__name__)
 69 |     def bytelen(self):
 70 |         return self._size
 71 |     bytelen = property(bytelen)
 72 | 
 73 |     def _size_align(self, o):
 74 |         s = o._size
 75 |         if hasattr(self, '_align'):
 76 |             s += ((self._align - s % self._align) % self._align)
 77 |         return s
 78 |     def _pack_align(self, o):
 79 |         s = o.pack()
 80 |         if hasattr(self, '_align'):
 81 |             s += '\0' * ((self._align - o._size % self._align) % self._align)
 82 |         return s
 83 | 
 84 | class CString(CBase):
 85 |     def set_value(self, s):
 86 |         self.X = s
 87 |         self._size = len(self.X) + 1
 88 |     def unpack(self, c, o):
 89 |         self.set_value(c[o:c.find(data_null,o)])
 90 |         self._off = o
 91 |     def update(self, **kargs):
 92 |         # If 's' is an argument, then the string value is set to s
 93 |         if 's' in kargs:
 94 |             self.set_value(kargs['s'])
 95 |     def _initialize(self):
 96 |         self.set_value(data_empty)
 97 |     def pack(self):
 98 |         return self.X + data_null
 99 |     def __str__(self):
100 |         return bytes_to_name(self.X)
101 |     def __repr__(self):
102 |         return '<CString %r>' % self.X
103 |     def pprint(self):
104 |         return self.X
105 | 
106 | from elfesteem.strpatchwork import StrPatchwork
107 | class CData(object):
108 |     # Generic class to be used at the end of a CStruct, to implement common
109 |     # cases implemented in C as     struct s { ...; char data[]; }
110 |     # We use StrPatchwork because the data may be very long, and we want to
111 |     # be able to modify it very efficiently.
112 |     def __new__(self, f):
113 |         class CDataInstance(CBase):
114 |             def _initialize(self, f=f):
115 |                 self._size = f(self.parent)
116 |                 self._data = StrPatchwork()
117 |             def unpack(self, c, o):
118 |                 self._data[0] = c[o:o+self._size]
119 |             def pack(self):
120 |                 return self._data.pack()
121 |             def __str__(self):
122 |                 return self.pack().decode('latin1')
123 |             def __getitem__(self, item):
124 |                 return self._data[item]
125 |             def __setitem__(self, item, value):
126 |                 self._data[item] = value
127 |         return CDataInstance
128 | 
129 | type_size = {}
130 | size2type = {}
131 | size2type_s = {}
132 | 
133 | for t in 'B', 'H', 'I', 'Q':
134 |     s = struct.calcsize(t)
135 |     type_size[t] = s*8
136 |     size2type[s*8] = t
137 | 
138 | for t in 'b', 'h', 'i', 'q':
139 |     s = struct.calcsize(t)
140 |     type_size[t] = s*8
141 |     size2type_s[s*8] = t
142 | 
143 | type_size['u08'] = size2type[8]
144 | type_size['u16'] = size2type[16]
145 | type_size['u32'] = size2type[32]
146 | type_size['u64'] = size2type[64]
147 | 
148 | type_size['s08'] = size2type_s[8]
149 | type_size['s16'] = size2type_s[16]
150 | type_size['s32'] = size2type_s[32]
151 | type_size['s64'] = size2type_s[64]
152 | 
153 | def convert_size2type(ftype, wsize):
154 |     if not isinstance(ftype, str):
155 |         return ''
156 |     elif re.match(r'\d+s', ftype):
157 |         return ftype
158 |     elif ftype == "ptr":
159 |         return size2type[wsize]
160 |     elif ftype in type_size:
161 |         return type_size[ftype]
162 |     else:
163 |         raise ValueError("unkown CStruct type", ftype)
164 | 
165 | class CStruct_metaclass(type):
166 |     """
167 |     metaclass, with a syntax compatible with python2 and python3
168 |     """
169 |     _prefix = "_field_" # To avoid namespace collisions
170 |     def __new__(cls, name, bases, dct):
171 |         if '_fields' in dct:
172 |             for fname, _ in dct['_fields']:
173 |                 dct[fname] = property(
174 |                     lambda self,fname=fname:   self.getf(fname),
175 |                     lambda self,v,fname=fname: self.setf(fname,v),
176 |                     None)
177 |         return type.__new__(cls, name, bases, dct)
178 | 
179 | CStruct_base = CStruct_metaclass('CStruct_base', (CBase,), {})
180 | class CStruct(CStruct_base):
181 |     """
182 |     The class CStruct is inherited by classes that simply
183 |     represent a concatenation of typed fields
184 | 
185 |     How to create a CStruct class:
186 |       _fields list the pairs (field_name, field_type)
187 |       if the last fields are (field_name, class), they are optional
188 |       _align: an optional integer value for alignment of optional fields
189 | 
190 |     How to create a CStruct object:
191 |       the keywords not used by CBase initialise the object fields
192 | 
193 |     How to use a CStruct object:
194 |       in addition to the CBase interface, the fields can be modified
195 | 
196 |     Field types:
197 |       basic types with fixed size (u08, ..., 16s)
198 |       wsize-dependent type (ptr)
199 |     """
200 | 
201 |     def getf(self, fname):
202 |         return getattr(self,'_0'+fname)
203 |     def setf(self, fname, v):
204 |         return setattr(self,'_0'+fname,v)
205 | 
206 |     _packformat = ""
207 | 
208 |     def _parent_parse(self, kargs):
209 |         CBase._parent_parse(self, kargs)
210 |         if self._packformat:
211 |             self.sex = ""
212 |         self._format = {}
213 |         pstr = []
214 |         for fname, ftype in self._fields:
215 |             ftype = convert_size2type(ftype, self.wsize)
216 |             self._format[fname] = ftype
217 |             pstr.append(ftype)
218 |         self._packstring =  self.sex + self._packformat+"".join(pstr)
219 |         self._names = [x[0] for x in self._fields if isinstance(x[1],str)]
220 |         self._opt = [x for x in self._fields if not isinstance(x[1],str)]
221 | 
222 |     def unpack(self, c, o):
223 |         self._size = struct.calcsize(self._packstring)
224 |         s = c[o:o+self._size]
225 |         s += data_null*(self._size-len(s))
226 |         disas = struct.unpack(self._packstring, s)
227 |         for n,v in zip(self._names,disas):
228 |             setattr(self, n, v)
229 |         # If the last fields are optional data, their types are a class
230 |         for fname, fclass in self._opt:
231 |             v = fclass(parent=self, content=c, start=o+self._size)
232 |             self._size += self._size_align(v)
233 |             self.setf(fname, v)
234 | 
235 |     def _initialize(self):
236 |         self._size = struct.calcsize(self._packstring)
237 |         for f in self._names:
238 |             # Default values
239 |             if self._format[f].endswith('s'): self.setf(f,data_empty)
240 |             else:                             self.setf(f,0)
241 |         for fname, fclass in self._opt:
242 |             v = fclass(parent=self)
243 |             self._size += self._size_align(v)
244 |             self.setf(fname, v)
245 | 
246 |     def update(self, **kargs):
247 |         for f in [f for f in kargs if f in self._names]:
248 |             self.setf(f,kargs[f])
249 |         for fname, fclass in self._opt:
250 |             v = self.getf(fname)
251 |             self._size -= self._size_align(v)
252 |             v.update(**kargs)
253 |             self._size += self._size_align(v)
254 | 
255 |     def pack(self):
256 |         fields = [getattr(self, x) for x in self._names]
257 |         s = struct.pack(self._packstring, *fields)
258 |         for fname, fclass in self._opt:
259 |             s += self._pack_align(self.getf(fname))
260 |         if self.bytelen != len(s):
261 |             raise ValueError("Inconsistent size %d != %d for %r"
262 |                 % (self.bytelen,len(s), self.__class__.__name__))
263 |         return s
264 | 
265 |     def __str__(self):
266 |         raise AttributeError("Use pack() instead of str()")
267 | 
268 |     def pprint(self):
269 |         rep = { }
270 |         for fname, _ in self._fields:
271 |             rep[fname] = getattr(self, fname)
272 |             if hasattr(rep[fname], 'pprint'):
273 |                 rep[fname] = rep[fname].pprint()
274 |         return ( "<%s>" % self.__class__.__name__, rep )
275 | 
276 |     def __repr__(self):
277 |         return "<%s=%s>" % (self.__class__.__name__,
278 |             "/".join(map(lambda x:repr(getattr(self,x[0])),self._fields)))
279 | 
280 |     def __getitem__(self, item): # to work with format strings
281 |         return getattr(self, item)
282 | 
283 | class CStructWithStrTable(CStruct):
284 |     # The attribute 'name' is computed from an integer index 'name_idx'
285 |     # and a link to the string table 'strtab'
286 |     def get_name(self):
287 |         return self.strtab.get_name(self.name_idx)
288 |     def set_name(self, name):
289 |         if self.name_idx == 0:
290 |             self.name_idx = self.strtab.add_name(name)
291 |         else:
292 |             self.strtab.mod_name(self.name_idx, name)
293 |     name = property(get_name, set_name)
294 |     def update(self, **kargs):
295 |         CStruct.update(self, **kargs)
296 |         if 'name' in kargs and 'name_idx' in self._names:
297 |             self.name = kargs['name']
298 | 
299 | class CArray_metaclass(type):
300 |     """
301 |     metaclass, with a syntax compatible with python2 and python3
302 |     """
303 |     def __new__(cls, name, bases, dct):
304 |         class_defined = '_cls' in dct
305 |         for c in bases:
306 |             class_defined = class_defined or '_cls' in c.__dict__
307 |         if not name.startswith('CArray') and not class_defined:
308 |             raise ValueError("Class %r should define '_cls'"%name)
309 |         return type.__new__(cls, name, bases, dct)
310 | 
311 | CArray_base = CArray_metaclass('CArray_base', (CBase,), {})
312 | class CArray(CArray_base):
313 |     """
314 |     The class CArray is inherited by classes that represent
315 |     a variable length array of objects of variable length.
316 | 
317 |     How to create a CArray subclass:
318 |       _cls: the class of the array elements
319 |       count (optional): method that returns the number of elements
320 | 
321 |     How to use a CArray object:
322 |       in addition to the CBase interface,
323 |       [item] gives access to an element of the array
324 |       len gives the number of elements
325 |       append adds an element to the array
326 |       _array is the whole array
327 |       _last is the terminating element, if count is not defined
328 |     """
329 |     def _initialize(self):
330 |         self._array = [] # Elements of the array
331 |         self._size  = 0
332 |         if not hasattr(self, 'count'):
333 |             # Array end is decided by a terminating element
334 |             # which is detected by 'stop', of by default by
335 |             # comparing with the default value of an object
336 |             # of class _cls
337 |             self._last  = self._cls(parent=self)
338 |             self._size  += self._size_align(self._last)
339 | 
340 |     def pack(self):
341 |         s = data_empty.join([self._pack_align(o) for o in self._array])
342 |         if hasattr(self, '_last'): s += self._pack_align(self._last)
343 |         if self._size != len(s):
344 |             raise ValueError("Inconsistent size %d != %d for %r"
345 |                 % (self._size,len(s), self.__class__.__name__))
346 |         return s
347 | 
348 |     def stop(self, elt):
349 |         return elt.pack() == self._last.pack()
350 | 
351 |     def unpack(self, c, o):
352 |         if o is None: return
353 |         self._off = o
354 |         if hasattr(self, 'count'):
355 |             # self.count() is recomputed each time
356 |             # This enables complicated conditions for array termination
357 |             idx = 0
358 |             while idx < self.count():
359 |                 if o+self._size >= len(c):
360 |                     break
361 |                 elt = self._cls(parent=self, content=c, start=o+self._size)
362 |                 self._array.append(elt)
363 |                 self._size += self._size_align(elt)
364 |                 idx += 1
365 |         else:
366 |             pos = 0
367 |             while True:
368 |                 if o+pos >= len(c):
369 |                     break
370 |                 elt = self._cls(parent=self, content=c, start=o+pos)
371 |                 if self.stop(elt):
372 |                     break
373 |                 self._array.append(elt)
374 |                 pos += self._size_align(elt)
375 |             self._size += pos
376 | 
377 |     def __getitem__(self, item):
378 |         return self._array[item]
379 | 
380 |     def __len__(self):
381 |         return len(self._array)
382 | 
383 |     def append(self, obj):
384 |         self._array.append(obj)
385 |         self._size += self._size_align(self._array[-1])
386 |         return obj
387 | 
388 |     def pprint(self):
389 |         return ("<%s>"%self.__class__.__name__,
390 |                 [x.pprint() for x in self._array],
391 |                )
392 | 
393 |     def __repr__(self):
394 |         return "<%s of length %d>" % (self.__class__.__name__, len(self))
395 | 
396 | # Method that defines constants (as in .h headers) and tables that
397 | # can recover the constant's name from its value.
398 | def Constants(globs = None, table = None,
399 |               name = None, prefix = None,
400 |               no_name = (), **kargs):
401 |     if prefix is None:
402 |         # Use the prefix common to all value names
403 |         for k in kargs:
404 |             if prefix is None:
405 |                 prefix = k
406 |             else:
407 |                 while not k.startswith(prefix):
408 |                     prefix = prefix[:-1]
409 |     if name is None:
410 |         if prefix.endswith('_'): name = prefix[:-1]
411 |         else:                    name = prefix
412 |     if name != '' and not name in table: table[name] = {}
413 |     for k in kargs:
414 |         globs[k] = kargs[k]
415 |         if name != '':
416 |             if k.startswith(prefix) and not k in no_name:
417 |                 if kargs[k] in table[name]:
418 |                     print("Duplicate at %s[%s]=%s; %s"%(name,kargs[k],table[name][kargs[k]],k))
419 |                 table[name][kargs[k]] = k[len(prefix):]
420 | 


--------------------------------------------------------------------------------
/examples/readelf.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | import sys, os
  3 | 
  4 | if sys.version_info[0] == 2 and sys.version_info[1] < 5:
  5 |     sys.stderr.write("python version older than 2.5 is not supported\n")
  6 |     exit(1)
  7 | 
  8 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..'))
  9 | from elfesteem import elf_init, elf
 10 | 
 11 | import subprocess
 12 | def popen_read_out_err(cmd):
 13 |     p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 14 |     p.wait()
 15 |     p.stdin.close()
 16 |     return p.stdout.read() + p.stderr.read()
 17 | 
 18 | import re
 19 | def get_readelf_version():
 20 |     readelf_v = popen_read_out_err(["readelf", "--version"])
 21 |     if type(readelf_v) != str: readelf_v = str(readelf_v, encoding='latin1')
 22 |     r = re.search(r'GNU readelf .* (\d+\.\d+)', readelf_v)
 23 |     if r:
 24 |         sys.stderr.write("readelf version %s\n" % float(r.groups()[0]))
 25 |         return float(r.groups()[0])
 26 |     else:
 27 |         sys.stderr.write("Could not detect readelf version\n")
 28 |         sys.stderr.write(readelf_v)
 29 |         return None
 30 | 
 31 | et_strings = {
 32 |     elf.ET_REL: 'REL (Relocatable file)',
 33 |     elf.ET_EXEC: 'EXEC (Executable file)',
 34 |     elf.ET_DYN: 'DYN (Shared object file)',
 35 |     elf.ET_CORE: 'CORE (Core file)',
 36 |     }
 37 | def expand_code(table, val):
 38 |     if val in table: return table[val]
 39 |     return '<unknown>: %#x' % val
 40 | 
 41 | def is_pie(e):
 42 |     # binutils 2.37
 43 |     # 2021-06-15 https://github.com/bminor/binutils-gdb/commit/93df3340fd5ad32f784214fc125de71811da72ff
 44 |     for i, sh in enumerate(e.sh):
 45 |         if sh.sh.type != elf.SHT_DYNAMIC:
 46 |             continue
 47 |         if e.wsize == 32:
 48 |             dyntab = sh.dyntab[:-2]
 49 |         elif e.wsize == 64:
 50 |             dyntab = sh.dyntab[:-1]
 51 |         for d in dyntab:
 52 |             if d.type == elf.DT_FLAGS_1 and d.name & elf.DF_1_PIE:
 53 |                 return True
 54 |     return False
 55 | 
 56 | def display_headers(e):
 57 |     print("ELF Header:")
 58 |     import struct
 59 |     ident = struct.unpack('16B', e.Ehdr.ident)
 60 |     print("  Magic:   %s "%' '.join(['%02x'%_ for _ in ident]))
 61 |     print("  Class:                             %s"%expand_code({
 62 |         elf.ELFCLASS32: 'ELF32',
 63 |         elf.ELFCLASS64: 'ELF64',
 64 |         }, ident[elf.EI_CLASS]))
 65 |     print("  Data:                              %s"%expand_code({
 66 |         elf.ELFDATA2LSB: "2's complement, little endian",
 67 |         elf.ELFDATA2MSB: "2's complement, big endian",
 68 |         }, ident[elf.EI_DATA]))
 69 |     print("  Version:                           %s"%expand_code({
 70 |         1: '1 (current)',
 71 |         }, ident[elf.EI_VERSION]))
 72 |     print("  OS/ABI:                            %s"%expand_code({
 73 |         0: 'UNIX - System V',
 74 |         }, ident[elf.EI_OSABI]))
 75 |     print("  ABI Version:                       %d"%ident[elf.EI_ABIVERSION])
 76 |     elf_file_type = expand_code(et_strings, e.Ehdr.type)
 77 |     if e.Ehdr.type == elf.ET_DYN and elf.is_pie(e):
 78 |         elf_file_type = 'DYN (Position-Independent Executable file)'
 79 |     print("  Type:                              %s"%elf_file_type)
 80 |     machine_code = dict(elf.constants['EM'])
 81 |     # Same textual output as readelf, from readelf.c
 82 |     machine_code[elf.EM_M32]            = 'ME32100'
 83 |     machine_code[elf.EM_SPARC]          = 'Sparc'
 84 |     machine_code[elf.EM_386]            = 'Intel 80386'
 85 |     machine_code[elf.EM_68K]            = 'MC68000'
 86 |     machine_code[elf.EM_88K]            = 'MC88000'
 87 |     machine_code[elf.EM_486]            = 'Intel 80486'
 88 |     machine_code[elf.EM_860]            = 'Intel 80860'
 89 |     machine_code[elf.EM_MIPS]           = 'MIPS R3000'
 90 |     machine_code[elf.EM_S370]           = 'IBM System/370'
 91 |     machine_code[elf.EM_MIPS_RS3_LE]    = 'MIPS R4000 big-endian'
 92 |     machine_code[elf.EM_PARISC]         = 'HPPA'
 93 |     machine_code[elf.EM_SPARC32PLUS]    = 'Sparc v8+'
 94 |     machine_code[elf.EM_960]            = 'Intel 80960'
 95 |     machine_code[elf.EM_PPC]            = 'PowerPC'
 96 |     machine_code[elf.EM_PPC64]          = 'PowerPC64'
 97 |     machine_code[elf.EM_V800]           = 'NEC V800'
 98 |     machine_code[elf.EM_FR20]           = 'Fujitsu FR20'
 99 |     machine_code[elf.EM_RH32]           = 'TRW RH32'
100 |     machine_code[elf.EM_ARM]            = 'ARM'
101 |     machine_code[elf.EM_FAKE_ALPHA]     = 'Digital Alpha (old)'
102 |     machine_code[elf.EM_SH]             = 'Renesas / SuperH SH'
103 |     machine_code[elf.EM_SPARCV9]        = 'Sparc v9'
104 |     machine_code[elf.EM_TRICORE]        = 'Siemens Tricore'
105 |     machine_code[elf.EM_ARC]            = 'ARC'
106 |     machine_code[elf.EM_H8_300]         = 'Renesas H8/300'
107 |     machine_code[elf.EM_H8_300H]        = 'Renesas H8/300H'
108 |     machine_code[elf.EM_H8S]            = 'Renesas H8S'
109 |     machine_code[elf.EM_H8_500]         = 'Renesas H8/500'
110 |     machine_code[elf.EM_IA_64]          = 'Intel IA-64'
111 |     machine_code[elf.EM_MIPS_X]         = 'Stanford MIPS-X'
112 |     machine_code[elf.EM_COLDFIRE]       = 'Motorola Coldfire'
113 |     machine_code[elf.EM_X86_64]         = 'Advanced Micro Devices X86-64'
114 |     print("  Machine:                           %s"%expand_code(machine_code, e.Ehdr.machine))
115 |     print("  Version:                           %#x"%e.Ehdr.version)
116 |     print("  Entry point address:               %#x"%e.Ehdr.entry)
117 |     print("  Start of program headers:          %d (bytes into file)"%e.Ehdr.phoff)
118 |     print("  Start of section headers:          %d (bytes into file)"%e.Ehdr.shoff)
119 |     print("  Flags:                             %#x"%e.Ehdr.flags)
120 |     print("  Size of this header:               %d (bytes)"%e.Ehdr.ehsize)
121 |     print("  Size of program headers:           %d (bytes)"%e.Ehdr.phentsize)
122 |     print("  Number of program headers:         %d"%e.Ehdr.phnum)
123 |     print("  Size of section headers:           %d (bytes)"%e.Ehdr.shentsize)
124 |     print("  Number of section headers:         %d"%e.Ehdr.shnum)
125 |     print("  Section header string table index: %d"%e.Ehdr.shstrndx)
126 | 
127 | def display_program_headers(e):
128 |     # Output format similar to readelf -l
129 |     if len(e.ph.phlist) == 0:
130 |         print("\nThere are no program headers in this file.")
131 |         return
132 |     print("\nElf file type is %s" % expand_code(et_strings, e.Ehdr.type))
133 |     print("Entry point 0x%x" % e.Ehdr.entry)
134 |     print("There are %d program headers, starting at offset %d" % (e.Ehdr.phnum, e.Ehdr.phoff))
135 |     print("\nProgram Headers:")
136 |     if e.wsize == 32:
137 |         header = "  Type           Offset   VirtAddr   PhysAddr   FileSiz MemSiz  Flg Align"
138 |         format = "  %-14s 0x%06x 0x%08x 0x%08x 0x%05x 0x%05x %-3s 0x%x"
139 |     elif e.wsize == 64:
140 |         header = "  Type           Offset             VirtAddr           PhysAddr\n                FileSiz            MemSiz              Flags  Align"
141 |         format = "  %-14s 0x%016x 0x%016x 0x%016x\n                0x%016x 0x%016x  %-3s    %x"
142 |     print(header)
143 |     for p in e.ph:
144 |         flags = [' ', ' ', ' ']
145 |         if p.ph.flags & 4: flags[0] = 'R'
146 |         if p.ph.flags & 2: flags[1] = 'W'
147 |         if p.ph.flags & 1: flags[2] = 'E'
148 |         print(format%(elf.constants['PT'][p.ph.type],
149 |                          p.ph.offset, p.ph.vaddr, p.ph.paddr,
150 |                          p.ph.filesz, p.ph.memsz, ''.join(flags),
151 |                          p.ph.align))
152 |         if p.ph.type == elf.PT_INTERP:
153 |             s = p.shlist[0]
154 |             print('      [Requesting program interpreter: %s]' % e[s.sh.offset:s.sh.offset+s.sh.size].strip('\0'))
155 |     if len(e.sh.shlist) == 0:
156 |         return
157 |     print("\n Section to Segment mapping:")
158 |     print("  Segment Sections...")
159 |     for i, p in enumerate(e.ph):
160 |         res = "   %02d     " % i
161 |         for s in p.shlist:
162 |             res += s.sh.name + " "
163 |         print(res)
164 | 
165 | def display_dynamic(e):
166 |     machine = elf.constants['EM'][e.Ehdr.machine]
167 |     for i, sh in enumerate(e.sh):
168 |         if sh.sh.type != elf.SHT_DYNAMIC:
169 |             continue
170 |         if e.wsize == 32:
171 |             header = "  Tag        Type                         Name/Value"
172 |             format = "%#010x %-28s  %s"
173 |             dyntab = sh.dyntab[:-2]
174 |         elif e.wsize == 64:
175 |             header = "  Tag        Type                         Name/Value"
176 |             format = "%#018x %-20s  %s"
177 |             dyntab = sh.dyntab[:-1]
178 |         print("\nDynamic section at offset %#x contains %d entries:" % (sh.sh.offset, len(dyntab)))
179 |         print(header)
180 |         for d in dyntab:
181 |             type = elf.constants['DT'].get(machine, {}).get(d.type, None)
182 |             if type is None: type = elf.constants['DT'].get(d.type, None)
183 |             else: type = machine + '_' + type
184 |             if type in ('NEEDED',):
185 |                 name = 'Shared library: [%s]' % d.name
186 |             elif type in ('STRSZ','SYMENT','RELSZ','RELENT','PLTRELSZ','RELASZ'):
187 |                 name = '%d (bytes)' % d.name
188 |             elif type in ('PLTGOT','HASH','STRTAB','SYMTAB','INIT','FINI','REL',
189 |                           'JMPREL','DEBUG','RELA',
190 |                           'CHECKSUM','VERNEED',
191 |                           'GNU_HASH',
192 |                           'MIPS_BASE_ADDRESS','MIPS_LIBLIST','MIPS_GOTSYM',
193 |                           'MIPS_HIDDEN_GOTIDX','MIPS_PROTECTED_GOTIDX',
194 |                           'MIPS_LOCAL_GOTIDX','MIPS_LOCALPAGE_GOTIDX',
195 |                           'MIPS_SYMBOL_LIB','MIPS_MSYM','MIPS_CONFLICT',
196 |                           'MIPS_RLD_MAP','MIPS_OPTIONS',
197 |                           'MIPS_INTERFACE','MIPS_INTERFACE_SIZE'):
198 |                 name = '%#x' % d.name
199 |             elif type == 'PLTREL':
200 |                 name = elf.constants['DT'].get(d.name, d.name)
201 |             elif type == 'MIPS_FLAGS':
202 |                 if d.name == 0:
203 |                     name = 'NONE'
204 |                 else:
205 |                     flags = ('QUICKSTART', 'NOTPOT', 'NO_LIBRARY_REPLACEMENT',
206 |                              'NO_MOVE', 'SGI_ONLY', 'GUARANTEE_INIT',
207 |                              'DELTA_C_PLUS_PLUS', 'GUARANTEE_START_INIT',
208 |                              'PIXIE', 'DEFAULT_DELAY_LOAD', 'REQUICKSTART',
209 |                              'REQUICKSTARTED', 'CORD', 'NO_UNRES_UNDEF',
210 |                              'RLD_ORDER_SAFE')
211 |                     name = ' '.join([ f for (f,b)
212 |                                         in zip(flags,reversed(bin(d.name)[2:]))
213 |                                         if b == '1' ])
214 |             else:
215 |                 name = d.name
216 |             output = format%(d.type, '(%s)'%type, name)
217 |             print(output)
218 | 
219 | 
220 | def display_symbols(sections):
221 |     for s in sections:
222 |         print("\n"+s.readelf_display())
223 | 
224 | 
225 | 
226 | if __name__ == '__main__':
227 |     import argparse
228 |     parser = argparse.ArgumentParser(add_help=False)
229 |     parser.add_argument('-H', '--help', action='help', default=argparse.SUPPRESS, help='Display this information')
230 |     parser.add_argument('-h', '--file-header', dest='options', action='append_const', const='headers',  help='Display the ELF file header')
231 |     parser.add_argument('-S', '--section-headers', '--sections', dest='options', action='append_const', const='sections', help="Display the sections' header")
232 |     parser.add_argument('-r', '--relocs', dest='options', action='append_const', const='reltab',   help='Display the relocations (if present)')
233 |     parser.add_argument('-s', '--syms', '--symbols', dest='options', action='append_const', const='symtab',   help='Display the symbol table')
234 |     parser.add_argument('--dyn-syms', dest='options', action='append_const', const='dynsym',   help='Display the dynamic symbol table')
235 |     parser.add_argument('-d', '--dynamic', dest='options', action='append_const', const='dynamic',  help='Display the dynamic section (if present)')
236 |     parser.add_argument('-l', '--program-headers', '--segments', dest='options', action='append_const', const='program',  help='Display the program headers')
237 |     parser.add_argument('-g', '--section-groups', dest='options', action='append_const', const='groups',   help='Display the section groups')
238 |     parser.add_argument('--readelf', dest='readelf_version', action='append', help='Simulate the output of a given version of readelf')
239 |     parser.add_argument('file', nargs='+', help='ELF file(s)')
240 |     args = parser.parse_args()
241 |     if args.options is None:
242 |         args.options = []
243 | 
244 |     elf.is_pie = lambda _: False
245 |     if args.readelf_version:
246 |         for readelf in args.readelf_version:
247 |             if 'native' in readelf:
248 |                 readelf_version = get_readelf_version()
249 |             else:
250 |                 readelf_version = float(readelf)
251 |         if True:
252 |             # TODO: readelf has a different output if "do_section_details" or "do_wide"
253 |             elf.Shdr.header64 = ["  [Nr] Name              Type             Address           Offset",
254 |                                  "       Size              EntSize          Flags  Link  Info  Align"]
255 |             elf.Shdr.format64 = ("  [%(idx)2d] %(name17)-17s %(type_txt)-15s  %(addr)016x  %(offset)08x\n"
256 |                                  "       %(size)016x  %(entsize)016x %(flags_txt)3s      %(link)2d    %(info)2d     %(addralign)d")
257 |         if readelf_version >= 2.26:
258 |             # 2016-01-20 https://github.com/bminor/binutils-gdb/commit/9fb71ee49fc37163697e4f34e16097928eb83d66
259 |             elf.Shdr.footer = property(lambda _: [
260 |                 "Key to Flags:",
261 |                 "  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),",
262 |                 "  L (link order), O (extra OS processing required), G (group), T (TLS),",
263 |                 "  C (compressed), x (unknown), o (OS specific), E (exclude),",
264 |                 "  %sp (processor specific)" % (
265 |                     "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else
266 |                     "y (noread), " if e.Ehdr.machine == elf.EM_ARM else
267 |                     "" ),
268 |                 ])
269 |         if readelf_version >= 2.27:
270 |             # 2016-07-05 https://github.com/bminor/binutils-gdb/commit/f0728ee368f217f2473798ad7ccfe9feae4412ce
271 |             elf.Shdr.footer = property(lambda _: [
272 |                 "Key to Flags:",
273 |                 "  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),",
274 |                 "  L (link order), O (extra OS processing required), G (group), T (TLS),",
275 |                 "  C (compressed), x (unknown), o (OS specific), E (exclude),",
276 |                 "  %sp (processor specific)" % (
277 |                     "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else
278 |                     "y (purecode), " if e.Ehdr.machine == elf.EM_ARM else
279 |                     "" ),
280 |                 ])
281 |         if readelf_version >= 2.29: # more precisely 2.29.1
282 |             # 2017-09-05 https://github.com/bminor/binutils-gdb/commit/83eef883581525d04df3a8e53a82c01d0d12b56a
283 |             elf.Shdr.footer = property(lambda _: [
284 |                 "Key to Flags:",
285 |                 "  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),",
286 |                 "  L (link order), O (extra OS processing required), G (group), T (TLS),",
287 |                 "  C (compressed), x (unknown), o (OS specific), E (exclude),",
288 |                 "  %sp (processor specific)" % (
289 |                     "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else
290 |                     "y (purecode), " if e.Ehdr.machine == elf.EM_ARM else
291 |                     "v (VLE), " if e.Ehdr.machine == elf.EM_PPC else
292 |                     "" ),
293 |                 ])
294 |         if readelf_version >= 2.36: # more precisely 2.36.1
295 |             # 2021-02-02 https://github.com/bminor/binutils-gdb/commit/5424d7ed94cf5a7ca24636ab9f4e6d5c353fc0d3
296 |             elf.Shdr.footer = property(lambda _: [
297 |                 "Key to Flags:",
298 |                 "  W (write), A (alloc), X (execute), M (merge), S (strings), I (info),",
299 |                 "  L (link order), O (extra OS processing required), G (group), T (TLS),",
300 |                 "  C (compressed), x (unknown), o (OS specific), E (exclude),",
301 |                 "  %s%sp (processor specific)" % (
302 |                     "R (retain), D (mbind), " if e.Ehdr.ident[elf.EI_OSABI] in (elf.ELFOSABI_GNU, elf.ELFOSABI_FREEBSD) else
303 |                     "D (mbind), " if e.Ehdr.ident[elf.EI_OSABI] == elf.ELFOSABI_NONE else
304 |                     ""
305 |                     ,
306 |                     "l (large), " if e.Ehdr.machine in (elf.EM_X86_64, elf.EM_L10M, elf.EM_K10M) else
307 |                     "y (noread), " if e.Ehdr.machine == elf.EM_ARM else
308 |                     "" ),
309 |                 ])
310 |         if readelf_version >= 2.35:
311 |             # 2020-07-02 https://github.com/bminor/binutils-gdb/commit/0942c7ab94e554657c3e11ab85ae7f15373ee80d
312 |             elf.Shdr.name17 = property(lambda _: _.name[:12]+"[...]" if len(_.name) > 17 else _.name)
313 |         if readelf_version >= 2.37:
314 |             # 2021-06-15 https://github.com/bminor/binutils-gdb/commit/93df3340fd5ad32f784214fc125de71811da72ff
315 |             elf.is_pie = is_pie
316 | 
317 | 
318 |     for file in args.file:
319 |         if len(args.file) > 1:
320 |             print("\nFile: %s" % file)
321 |         fd = open(file, 'rb')
322 |         try:
323 |             raw = fd.read()
324 |         finally:
325 |             fd.close()
326 |         e = elf_init.ELF(raw)
327 |         if 'headers' in args.options:
328 |             display_headers(e)
329 |         if 'sections' in args.options:
330 |             print(e.sh.readelf_display())
331 |         if 'reltab' in args.options:
332 |             for sh in e.sh:
333 |                 if not 'rel' in dir(sh): continue
334 |                 print("\n" + sh.readelf_display())
335 |         if 'symtab' in args.options or 'dynsym' in args.options:
336 |             display_symbols(e.getsectionsbytype(elf.SHT_DYNSYM))
337 |         if 'symtab' in args.options:
338 |             display_symbols(e.getsectionsbytype(elf.SHT_SYMTAB))
339 |         if 'dynamic' in args.options:
340 |             display_dynamic(e)
341 |         if 'program' in args.options:
342 |             display_program_headers(e)
343 |         if 'groups' in args.options:
344 |             for sh in e.sh:
345 |                 if not sh.sh.type == elf.SHT_GROUP: continue
346 |                 print(sh.readelf_display())
347 | 


--------------------------------------------------------------------------------
/examples/otool.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import sys, os
  4 | import platform
  5 | 
  6 | sys.path.insert(1, os.path.abspath(sys.path[0]+'/..'))
  7 | from elfesteem import macho_init, macho
  8 | 
  9 | def print_header(e, **fargs):
 10 |     print("Mach header")
 11 |     print("      magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags")
 12 |     print(" 0x%08x %7d %10d  0x%02x  %10u %5u %10u 0x%08x" %(e.Mhdr.magic,e.Mhdr.cputype ,e.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK),(e.Mhdr.cpusubtype & macho.CPU_SUBTYPE_MASK) >> 24,e.Mhdr.filetype,e.Mhdr.ncmds,e.Mhdr.sizeofcmds,e.Mhdr.flags))
 13 | 
 14 | import subprocess
 15 | def popen_read_out_err(cmd):
 16 |     p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 17 |     p.wait()
 18 |     p.stdin.close()
 19 |     return p.stdout.read() + p.stderr.read()
 20 | 
 21 | import re
 22 | def get_otool_version():
 23 |     otool_v = popen_read_out_err(["otool", "--version"])
 24 |     if type(otool_v) != str: otool_v = str(otool_v, encoding='latin1')
 25 |     r = re.search(r' LLVM version (\d+)', otool_v)
 26 |     if r:
 27 |         return int(r.groups()[0])
 28 |     else:
 29 |         sys.stderr.write("Could not detect otool version\n")
 30 |         sys.stderr.write(otool_v)
 31 |         return None
 32 | 
 33 | def split_integer(v, nbits, ndigits, truncate=None):
 34 |     mask = (1<<nbits)-1
 35 |     res = []
 36 |     while ndigits > 0:
 37 |         res.insert(0, v & mask)
 38 |         v = v >> nbits
 39 |         ndigits -= 1
 40 |     res[0] += v << nbits
 41 |     if truncate is not None:
 42 |         while len(res) > truncate and res[-1] == 0:
 43 |             res = res[:-1]
 44 |     return ".".join(["%u"%_ for _ in res])
 45 | 
 46 | def print_lc(e, llvm=False, **fargs):
 47 |     for i, lc in enumerate(e.load):
 48 |         print("Load command %u" %i)
 49 |         print("\n".join(lc.otool(llvm=llvm)))
 50 | 
 51 | 
 52 | 
 53 | def print_symbols(e, **fargs):
 54 |     for sect in e.sect:
 55 |         if type(sect) != macho_init.SymbolTable:
 56 |             continue
 57 |         print("%-35s %-15s %-4s %-10s %s"%("Symbol","Section","Type","Value","Description"))
 58 |         for symbol in sect.symbols:
 59 |             print(symbol.otool())
 60 | 
 61 | def print_dysym(e, **fargs):
 62 |     # Display indirect symbol tables
 63 |     for sect in e.sect:
 64 |         if getattr(sect, 'type', None) is None:
 65 |             continue
 66 |         elif sect.type == 'indirectsym':
 67 |             print("Indirect symbols [%d entries]"%len(sect))
 68 |             print("%5s %s"%("index","name"))
 69 |             for entry in sect:
 70 |                 entry = entry.index
 71 |                 if   entry == macho.INDIRECT_SYMBOL_LOCAL:
 72 |                     print("%5s" % "LOCAL")
 73 |                 elif entry == macho.INDIRECT_SYMBOL_ABS:
 74 |                     print("%5s" % "ABSOLUTE")
 75 |                 elif 0 <= entry < len(e.symbols.symbols):
 76 |                     print("%5s %s" % (entry,e.symbols.symbols[entry].name))
 77 |                 else:
 78 |                     print("INVALID(%d)" % entry)
 79 |         elif sect.type == 'locrel':
 80 |             print("Local relocations [%d entries]"%len(sect))
 81 |             for entry in sect:
 82 |                 print(repr(entry))
 83 |         elif sect.type == 'extrel':
 84 |             print("External relocations [%d entries]"%len(sect))
 85 |             for entry in sect:
 86 |                 print(repr(entry))
 87 | 
 88 | def print_indirect(e, **fargs):
 89 |     # Find section with indirect symbols and indirect symbols table
 90 |     indirectsym_table = None
 91 |     indirectsym_section = []
 92 |     for s in e.sect:
 93 |         if getattr(s, 'type', None) == 'indirectsym':
 94 |             if indirectsym_table is not None:
 95 |                 raise ValueError("Only one IndirectSymbolTable per Mach-O file")
 96 |             indirectsym_table = s
 97 |         if not hasattr(s, 'sh'): continue
 98 |         if s.sh.type in [
 99 |                 macho.S_SYMBOL_STUBS,
100 |                 macho.S_LAZY_SYMBOL_POINTERS,
101 |                 macho.S_NON_LAZY_SYMBOL_POINTERS,
102 |                 macho.S_LAZY_DYLIB_SYMBOL_POINTERS,
103 |                 ]:
104 |             indirectsym_section.append(s)
105 |     # Display
106 |     verbose = False # Exactly the same output as 'otool -Iv'
107 |     import struct
108 |     idx = 0
109 |     for s in indirectsym_section:
110 |         print("Indirect symbols for (%s,%s) %u entries"
111 |            % (s.sh.segname, s.sh.sectname, len(s)))
112 |         if e.wsize == 64:
113 |             header = "%-18s %5s"
114 |             format = "0x%016x %5s"
115 |             valfmt = e.sex+"Q"
116 |         if e.wsize == 32:
117 |             header = "%-10s %5s"
118 |             format = "0x%08x %5s"
119 |             valfmt = e.sex+"I"
120 |         if s.sh.type == macho.S_SYMBOL_STUBS:
121 |             # First two bytes are 0xff 0x25
122 |             valfmt = e.sex+"HI"
123 |         address = s.addr
124 |         data = [ "address", "index", " name" ]
125 |         if verbose:
126 |             # The value read in the table is not output by otool
127 |             # it may be useless ???
128 |             header += "%-20s "
129 |             format += "%-20s "
130 |             data += "value"
131 |         header += "%s"
132 |         format += "%s"
133 |         print(header % tuple(data))
134 |         for entry in s:
135 |             if verbose: content = struct.unpack(valfmt,entry.content)[-1]
136 |             index = indirectsym_table.entries[idx].index
137 |             name = ''
138 |             if   index == macho.INDIRECT_SYMBOL_LOCAL: index = "LOCAL"
139 |             elif index == macho.INDIRECT_SYMBOL_ABS:   index = "ABSOLUTE"
140 |             else:  name = ' '+e.symbols.symbols[index].name
141 |             data = [ address, index, name ]
142 |             if verbose: data.append(content)
143 |             print(format % tuple(data))
144 |             idx += 1
145 |             address += entry.bytelen
146 | 
147 | def print_relocs(e, **fargs):
148 |     for s in e.sect:
149 |         if not hasattr(s, 'reloclist'): continue
150 |         print("Relocation information (%s,%s) %u entries"
151 |            % (s.sh.segname, s.sh.sectname, s.sh.nreloc))
152 |         print("address  pcrel length extern type    scattered symbolnum/value")
153 |         for x in s.reloclist:
154 |             if x.scattered: xt, xn = 'n/a', '0x%08x' % x.symbolNumOrValue
155 |             else:           xt, xn = x.extern, '%u' % x.symbolNumOrValue
156 |             print("%08x %-5u %-6u %-6s %-7d %-9d %s" %
157 |                 (x.address, x.pcrel, x.length, xt, x.type, x.scattered, xn))
158 | 
159 | def print_opcodes(e, **fargs):
160 |     messages_and_values = (
161 |         ('rebase_', macho.REBASE_OPCODE_DONE,
162 |          'rebase opcodes:', 'no compressed rebase info'),
163 |         ('bind_', macho.BIND_OPCODE_DONE,
164 |          'binding opcodes:', 'no compressed binding info'),
165 |         ('weak_bind_', macho.BIND_OPCODE_DONE,
166 |          'weak binding opcodes:', 'no compressed weak binding info'),
167 |         ('lazy_bind_', -1,
168 |          'lazy binding opcodes:', 'no compressed lazy binding info'),
169 |         )
170 |     for t, v, ok, ko in messages_and_values:
171 |         s_list = [ _ for _ in e.sect if getattr(_, 'type', None) == t ]
172 |         if len(s_list) == 0:
173 |             print(ko)
174 |             continue
175 |         if len(s_list) > 1:
176 |             print("ERROR: many sections with %s"%t[:-1])
177 |         for s in s_list:
178 |             print(ok)
179 |             for x in s._array:
180 |                 print(x)
181 |                 if x.opcode == v:
182 |                     break
183 | 
184 | def print_rebase(e, **fargs):
185 |     for s in e.sect:
186 |         if getattr(s, 'type', None) != 'rebase_': continue
187 |         print("rebase information (from compressed dyld info):")
188 |         print("segment section          address     type")
189 |         for x in s.info: print(x)
190 | 
191 | def print_bind(e, **fargs):
192 |     for s in e.sect:
193 |         if getattr(s, 'type', None) != 'bind_': continue
194 |         print("bind information:")
195 |         print("segment section          address        type    addend dylib            symbol")
196 |         for x in s.info: print(x)
197 |         break
198 |     else:
199 |         print("no compressed binding info")
200 | 
201 | def print_weak_bind(e, **fargs):
202 |     for s in e.sect:
203 |         if getattr(s, 'type', None) != 'weak_bind_': continue
204 |         print("weak binding information:")
205 |         print("segment section          address       type     addend symbol")
206 |         for x in s.info: print(x)
207 |         break
208 |     else:
209 |         print("no weak binding")
210 | 
211 | def print_lazy_bind(e, **fargs):
212 |     for s in e.sect:
213 |         if getattr(s, 'type', None) != 'lazy_bind_': continue
214 |         print("lazy binding information (from lazy_bind part of dyld info):")
215 |         print("segment section          address    index  dylib            symbol")
216 |         for x in s.info: print(x)
217 |         break
218 |     else:
219 |         print("no compressed lazy binding info")
220 | 
221 | def print_export(e, **fargs):
222 |     for s in e.sect:
223 |         if getattr(s, 'type', None) != 'export_': continue
224 |         print("export information (from trie):")
225 |         for x in sorted(s.info, key=lambda _:_.addr): print(x)
226 |         break
227 |     else:
228 |         print("no compressed export info")
229 | 
230 | archi = {
231 |     (macho.CPU_TYPE_MC680x0,   macho.CPU_SUBTYPE_MC680x0_ALL):  'm68k',
232 |     (macho.CPU_TYPE_MC680x0,   macho.CPU_SUBTYPE_MC68030_ONLY): 'm68030',
233 |     (macho.CPU_TYPE_MC680x0,   macho.CPU_SUBTYPE_MC68040):      'm68040',
234 |     (macho.CPU_TYPE_MC88000,   macho.CPU_SUBTYPE_MC88000_ALL):  'm88k',
235 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_I386_ALL):     'i386',
236 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_486):          'i486',
237 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_486SX):        'i486SX',
238 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_PENT):         'pentium',
239 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_PENTPRO):      'pentpro',
240 |     #macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_PENTIUM_4):    'pentium4',
241 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_PENTII_M3):    'pentIIm3',
242 |     (macho.CPU_TYPE_I386,      macho.CPU_SUBTYPE_PENTII_M5):    'pentIIm5',
243 |     (macho.CPU_TYPE_X86_64,    macho.CPU_SUBTYPE_X86_64_ALL):   'x86_64',
244 |     (macho.CPU_TYPE_X86_64,    macho.CPU_SUBTYPE_X86_64_H):     'x86_64h',
245 |     (macho.CPU_TYPE_I860,      macho.CPU_SUBTYPE_I860_ALL):     'i860',
246 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_ALL):  'ppc',
247 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_601):  'ppc601',
248 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_603):  'ppc602',
249 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_603):  'ppc603',
250 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_603e): 'ppc603e',
251 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_603ev):'ppc603ev',
252 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_604):  'ppc604',
253 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_604e): 'ppc604e',
254 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_620):  'ppc620',
255 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_750):  'ppc750',
256 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_7400): 'ppc7400',
257 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_7450): 'ppc7450',
258 |     (macho.CPU_TYPE_POWERPC,   macho.CPU_SUBTYPE_POWERPC_970):  'ppc970',
259 |     (macho.CPU_TYPE_POWERPC64, macho.CPU_SUBTYPE_POWERPC64_ALL):'ppc64',
260 |     (macho.CPU_TYPE_POWERPC64, macho.CPU_SUBTYPE_POWERPC_970):  'ppc970-64',
261 |     (macho.CPU_TYPE_VEO,       macho.CPU_SUBTYPE_VEO_ALL):      'veo',
262 |     (macho.CPU_TYPE_VEO,       macho.CPU_SUBTYPE_VEO_1):        'veo1',
263 |     (macho.CPU_TYPE_VEO,       macho.CPU_SUBTYPE_VEO_2):        'veo2',
264 |     (macho.CPU_TYPE_VEO,       macho.CPU_SUBTYPE_VEO_3):        'veo3',
265 |     (macho.CPU_TYPE_VEO,       macho.CPU_SUBTYPE_VEO_4):        'veo4',
266 |     (macho.CPU_TYPE_HPPA,      macho.CPU_SUBTYPE_HPPA_ALL):     'hppa',
267 |     (macho.CPU_TYPE_HPPA,      macho.CPU_SUBTYPE_HPPA_7100LC):  'hppa7100LC',
268 |     (macho.CPU_TYPE_SPARC,     macho.CPU_SUBTYPE_SPARC_ALL):    'sparc',
269 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_ALL):      'arm',
270 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V4T):      'armv4t',
271 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V5TEJ):    'armv5',
272 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_XSCALE):   'xscale',
273 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V6):       'armv6',
274 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V6M):      'armv6m',
275 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V7):       'armv7',
276 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V7F):      'armv7f',
277 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V7S):      'armv7s',
278 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V7K):      'armv7k',
279 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V7M):      'armv7m',
280 |     (macho.CPU_TYPE_ARM,       macho.CPU_SUBTYPE_ARM_V7EM):     'armv7em',
281 |     (macho.CPU_TYPE_ARM64,     macho.CPU_SUBTYPE_ARM64_ALL):    'arm64',
282 |     (macho.CPU_TYPE_ARM64,     macho.CPU_SUBTYPE_ARM64_V8):     'arm64v8',
283 |     }
284 | 
285 | def arch_name(e):
286 |     return archi[(e.Mhdr.cputype,
287 |         e.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK))]
288 | 
289 | if __name__ == '__main__':
290 |     import argparse
291 |     parser = argparse.ArgumentParser(add_help=False)
292 |     # Simulates 'otool'
293 |     parser.add_argument('-arch', dest='arch_type', action='append', help='select architecture')
294 |     parser.add_argument('-h', dest='options', action='append_const', const='header', help='print the mach header')
295 |     parser.add_argument('-l', dest='options', action='append_const', const='load', help='print the load commands')
296 |     parser.add_argument('--symbols', dest='options', action='append_const', const='symbols', help='print the symbols')
297 |     parser.add_argument('--dysym', dest='options', action='append_const', const='dysym', help='print dynamic symbols')
298 |     parser.add_argument('-r', dest='options', action='append_const', const='reloc', help='Display the relocation entries')
299 |     parser.add_argument('-I', dest='options', action='append_const', const='indirect', help='Display the indirect symbol table')
300 |     parser.add_argument('--llvm', dest='llvm_version', action='append', help='Simulate the output of a given version of llvm-otool')
301 |     # Simulates 'dyldinfo'
302 |     parser.add_argument('-opcodes', dest='options', action='append_const', const='opcodes', help='opcodes used to generate the rebase and binding information')
303 |     parser.add_argument('-rebase', dest='options', action='append_const', const='rebase', help='addresses dyld will adjust if file not loaded at preferred address')
304 |     parser.add_argument('-bind', dest='options', action='append_const', const='bind', help='addresses dyld will set based on symbolic lookups')
305 |     parser.add_argument('-weak_bind', dest='options', action='append_const', const='weak_bind', help='symbols which dyld must coalesce')
306 |     parser.add_argument('-lazy_bind', dest='options', action='append_const', const='lazy_bind', help='addresses dyld will lazily set on first use')
307 |     parser.add_argument('-export', dest='options', action='append_const', const='export', help='addresses of all symbols this file exports')
308 |     parser.add_argument('file', nargs='*', help='object file')
309 |     args = parser.parse_args()
310 |     if args.options is None:
311 |         args.options = []
312 |     if len(args.file) == 0:
313 |         parser.print_help()
314 |     functions = []
315 |     fargs = {}
316 |     dyldinfo_simulation = False
317 |     if args.llvm_version:
318 |         # Hypothesis: the major number of the version of Xcode is sufficient
319 |         # to determine what the output format of llvm-otool is.
320 |         for llvm in args.llvm_version:
321 |             if 'native' in llvm:
322 |                 fargs['llvm'] = get_otool_version()
323 |             else:
324 |                 fargs['llvm'] = int(llvm)
325 |     if 'header' in args.options:
326 |         functions.append(print_header)
327 |     if 'load' in args.options:
328 |         if fargs.get('llvm',8) in (8, 9, 10, 11) and not 'header' in args.options:
329 |             functions.append(print_header)
330 |         functions.append(print_lc)
331 |     if 'symbols' in args.options:
332 |         functions.append(print_symbols)
333 |     if 'dysym' in args.options:
334 |         functions.append(print_dysym)
335 |     if 'reloc' in args.options:
336 |         functions.append(print_relocs)
337 |     if 'indirect' in args.options:
338 |         functions.append(print_indirect)
339 |     if 'rebase' in args.options:
340 |         functions.append(print_rebase)
341 |         dyldinfo_simulation = True
342 |     if 'bind' in args.options:
343 |         functions.append(print_bind)
344 |         dyldinfo_simulation = True
345 |     if 'weak_bind' in args.options:
346 |         functions.append(print_weak_bind)
347 |         dyldinfo_simulation = True
348 |     if 'lazy_bind' in args.options:
349 |         functions.append(print_lazy_bind)
350 |         dyldinfo_simulation = True
351 |     if 'export' in args.options:
352 |         functions.append(print_export)
353 |         dyldinfo_simulation = True
354 |     if 'opcodes' in args.options:
355 |         functions.append(print_opcodes)
356 |         dyldinfo_simulation = True
357 | 
358 |     for file in args.file:
359 |         fd = open(file, 'rb')
360 |         try:
361 |             raw = fd.read()
362 |         finally:
363 |             fd.close()
364 |         filesize = os.path.getsize(file)
365 |         try:
366 |             e = macho_init.MACHO(raw,
367 |                 parseSymbols = False)
368 |         except ValueError as err:
369 |             print("%s:" %file)
370 |             print("    %s" % err)
371 |             continue
372 |         if args.arch_type is None:
373 |             if hasattr(e, 'Fhdr'):
374 |                 # Select the current architecture, if present
375 |                 current = platform.machine()
376 |                 for _ in e.arch:
377 |                     if current == arch_name(_):
378 |                         e = _
379 |                         break
380 |                 else:
381 |                     # Display all architectures
382 |                     e = [ _ for _ in e.arch ]
383 |         elif 'all' in args.arch_type:
384 |             if hasattr(e, 'Fhdr'):
385 |                 # Display all architectures
386 |                 e = [ _ for _ in e.arch ]
387 |         elif len(args.arch_type) == 1:
388 |             if hasattr(e, 'Fhdr'):
389 |                 # Display one architecture
390 |                 current = args.arch_type[0]
391 |                 for _ in e.arch:
392 |                     if current == arch_name(_):
393 |                         e = _
394 |                         break
395 |                 else:
396 |                     sys.stderr.write("error: otool: file: %s does not contain architecture: %s\n" % (file, current))
397 |                     e = []
398 |             else:
399 |                 # Display if it is the architecture
400 |                 current = args.arch_type[0]
401 |                 if current != arch_name(e):
402 |                     e = []
403 |         else:
404 |             if hasattr(e, 'Fhdr'):
405 |                 # Display some architectures, in the order appearing in the args
406 |                 f = []
407 |                 for current in args.arch_type:
408 |                     for _ in e.arch:
409 |                         if current == arch_name(_):
410 |                             f.append(_)
411 |                             break
412 |                     else:
413 |                         sys.stderr.write("error: otool: file: %s does not contain architecture: %s\n" % (file, current))
414 |                 e = f
415 |             else:
416 |                 # Display if one is the architecture
417 |                 for current in args.arch_type:
418 |                     if current == arch_name(e):
419 |                         break
420 |                 else:
421 |                     e = []
422 | 
423 |         if dyldinfo_simulation and len(args.file) > 1:
424 |             print("\n%s:" %file)
425 |         if hasattr(e, 'Mhdr'):
426 |             if not dyldinfo_simulation and functions != [ print_header ]:
427 |                 print("%s:" %file)
428 |             for f in functions:
429 |                 f(e, **fargs)
430 |         else:
431 |             for _ in e:
432 |                 t0 = _.Mhdr.cputype
433 |                 t1 = _.Mhdr.cpusubtype & (0xffffffff ^ macho.CPU_SUBTYPE_MASK)
434 |                 if dyldinfo_simulation:
435 |                     print("for arch %s:" % arch_name(_))
436 |                 else:
437 |                     if functions != [ print_header ]:
438 |                         print("%s (architecture %s):" %(file, arch_name(_)))
439 |                 for f in functions:
440 |                     f(_, **fargs)
441 | 


--------------------------------------------------------------------------------
/elfesteem/jclass_init.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import struct
  4 | from new_cstruct import CStruct
  5 | import logging
  6 | log = logging.getLogger("classparse")
  7 | console_handler = logging.StreamHandler()
  8 | console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
  9 | log.addHandler(console_handler)
 10 | log.setLevel(logging.WARN)
 11 | 
 12 | 
 13 | 
 14 | 
 15 | def gensapce(lvl):
 16 |     return '    '*lvl
 17 | 
 18 | def out_attrs(o, lvl =  None):
 19 |     if lvl is None:
 20 |         lvl = 0
 21 |     out = ""
 22 |     if not isinstance(o, list):
 23 |         return gensapce(lvl)+repr(o)+'\n'
 24 |     for f, v in o:
 25 |         out += gensapce(lvl)+repr(f)
 26 |         if isinstance(v, list):
 27 |             out +='\n'
 28 |             for x in v:
 29 |                 out += out_attrs(x, lvl+1)
 30 |         else:
 31 |             out += " "+repr(v)
 32 |         out +="\n"
 33 |     return out
 34 | 
 35 | 
 36 | 
 37 | 
 38 | class CPUtf8(CStruct):
 39 |     _packformat = ">"
 40 |     _fields = [ ("tag", "u08"),
 41 |                 ("length", "u16"),
 42 |                 ("value", (lambda c, s, of:c.gets(s, of),
 43 |                            lambda c, value:c.sets(value)))
 44 |                 ]
 45 |     def gets(self, s, of):
 46 |         v = s[of:of+self.length]
 47 |         return v, of+self.length
 48 |     def sets(self, value):
 49 |         return str(value)
 50 | 
 51 |     def set_str(self, s):
 52 |         self.length = len(s)
 53 |         self.value = s
 54 |     def pp(self):
 55 |         return "%r"%(self.value)
 56 | 
 57 | class CPInteger(CStruct):
 58 |     _packformat = ">"
 59 |     _fields = [ ("tag", "u08"),
 60 |                 ("value", "u32")]
 61 | 
 62 | class CPFloat(CStruct):
 63 |     _packformat = ">"
 64 |     _fields = [ ("tag", "u08"),
 65 |                 ("value", "f")]
 66 | 
 67 | class CPLong(CStruct):
 68 |     _packformat = ">"
 69 |     _fields = [ ("tag", "u08"),
 70 |                 ("value", "q")]
 71 | 
 72 | class CPDouble(CStruct):
 73 |     _packformat = ">"
 74 |     _fields = [ ("tag", "u08"),
 75 |                 ("value", "d")]
 76 | 
 77 | class CPClass(CStruct):
 78 |     _packformat = ">"
 79 |     _fields = [ ("tag", "u08"),
 80 |                 ("name", "u16")]
 81 | 
 82 |     def get_name(self):
 83 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
 84 |     def pp(self):
 85 |         return "%r"%(self.name)
 86 | 
 87 | class CPString(CStruct):
 88 |     _packformat = ">"
 89 |     _fields = [ ("tag", "u08"),
 90 |                 ("value", "u16")]
 91 | 
 92 |     def get_value(self):
 93 |         return self.parent_head.get_constant_pool_by_index(self.value_value).value
 94 |     def set_value(self, v):
 95 |         self.parent_head.get_constant_pool_by_index(self.value_value).set_str(v)
 96 |     def pp(self):
 97 |         s = self.value
 98 |         """
 99 |         if len(s) > 40:
100 |             s = str(s)[:40]+'...'
101 |         """
102 |         return "%r"%(s)
103 | 
104 | class CPFieldref(CStruct):
105 |     _packformat = ">"
106 |     _fields = [ ("tag", "u08"),
107 |                 ("name", "u16"),
108 |                 ("type", "u16")]
109 | 
110 |     def get_name(self):
111 |         return self.parent_head.get_constant_pool_by_index(self.name_value).name
112 |     def get_type(self):
113 |         return self.parent_head.get_constant_pool_by_index(self.type_value)
114 |     def pp(self):
115 |         return "%r %r"%(self.name, parse_field_descriptor(self.type.type, self.type.name))
116 | 
117 | class CPMethodref(CStruct):
118 |     _packformat = ">"
119 |     _fields = [ ("tag", "u08"),
120 |                 ("name", "u16"),
121 |                 ("type", "u16")]
122 | 
123 | 
124 |     def get_name(self):
125 |         return self.parent_head.get_constant_pool_by_index(self.name_value).name
126 |     def get_type(self):
127 |         return self.parent_head.get_constant_pool_by_index(self.type_value)
128 |     def pp(self):
129 |         return "%r"%(demangle_java_name(self.name, self.type.type, self.type.name))
130 | 
131 | 
132 | # From hachoir project
133 | code_to_type_name = {
134 |     'B': "byte",
135 |     'C': "char",
136 |     'D': "double",
137 |     'F': "float",
138 |     'I': "int",
139 |     'J': "long",
140 |     'S': "short",
141 |     'Z': "boolean",
142 |     'V': "void",
143 | }
144 | 
145 | 
146 | def demangle_java_name(c_name, c_typetype, c_typename):
147 |     t = c_name.replace('/', '.')
148 |     return parse_method_descriptor(c_typetype, t+'->'+c_typename)
149 | 
150 | 
151 | def eat_descriptor(descr):
152 |     """
153 |     Read head of a field/method descriptor.  Returns a pair of strings, where
154 |     the first one is a human-readable string representation of the first found
155 |     type, and the second one is the tail of the parameter.
156 |     """
157 |     array_dim = 0
158 |     while descr[0] == '[':
159 |         array_dim += 1
160 |         descr = descr[1:]
161 |     if (descr[0] == 'L'):
162 |         try: end = descr.find(';')
163 |         except: raise ValueError("Not a valid descriptor string: " + descr)
164 |         type = descr[1:end]
165 |         descr = descr[end:]
166 |     else:
167 |         global code_to_type_name
168 |         try:
169 |             type = code_to_type_name[descr[0]]
170 |         except KeyError:
171 |             raise ValueError("Not a valid descriptor string: %s" % descr)
172 |     return (type.replace("/", ".") + array_dim * "[]", descr[1:])
173 | 
174 | def parse_field_descriptor(descr, name=None):
175 |     """
176 |     Parse a field descriptor (single type), and returns it as human-readable
177 |     string representation.
178 |     """
179 |     assert descr
180 |     (type, tail) = eat_descriptor(descr)
181 |     assert not tail
182 |     if name:
183 |         return type + " " + name
184 |     else:
185 |         return type
186 | 
187 | def parse_method_descriptor(descr, name=None):
188 |     """
189 |     Parse a method descriptor (params type and return type), and returns it
190 |     as human-readable string representation.
191 |     """
192 |     assert descr and (descr[0] == '(')
193 |     descr = descr[1:]
194 |     params_list = []
195 |     while descr[0] != ')':
196 |         (param, descr) = eat_descriptor(descr)
197 |         params_list.append(param)
198 |     (type, tail) = eat_descriptor(descr[1:])
199 |     assert not tail
200 |     params = ", ".join(params_list)
201 |     if name:
202 |         return "%s %s(%s)" % (type, name, params)
203 |     else:
204 |         return "%s (%s)" % (type, params)
205 | 
206 | 
207 | 
208 | class CPInterfaceMethodref(CStruct):
209 |     _packformat = ">"
210 |     _fields = [ ("tag", "u08"),
211 |                 ("name", "u16"),
212 |                 ("type", "u16")]
213 | 
214 |     def get_name(self):
215 |         return self.parent_head.get_constant_pool_by_index(self.name_value).name
216 |     def get_type(self):
217 |         return self.parent_head.get_constant_pool_by_index(self.type_value)
218 |     def pp(self):
219 |         return "%r %r %r"%(self.name.replace('/', '.'), self.type.name, self.type.type)
220 | 
221 | class CPNameandType(CStruct):
222 |     _packformat = ">"
223 |     _fields = [ ("tag", "u08"),
224 |                 ("name", "u16"),
225 |                 ("type", "u16")]
226 | 
227 | 
228 |     def get_name(self):
229 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
230 |     def get_type(self):
231 |         return self.parent_head.get_constant_pool_by_index(self.type_value).value
232 | 
233 |     def pp(self):
234 |         return "%r %r"%(self.type, self.name)
235 | 
236 | 
237 | CONSTANT_TYPES = {
238 |     1 : CPUtf8,
239 |     3 : CPInteger,
240 |     4 : CPFloat,
241 |     5 : CPLong,
242 |     6 : CPDouble,
243 |     7 : CPClass,
244 |     8 : CPString,
245 |     9 : CPFieldref,
246 |     10: CPMethodref,
247 |     11: CPInterfaceMethodref,
248 |     12: CPNameandType,
249 |     }
250 | 
251 | CONSTANT_TYPES_inv = dict([(x[1], x[0]) for x in  CONSTANT_TYPES.items()])
252 | 
253 | 
254 | class CPoolfield(CStruct):
255 |     _packformat = ">"
256 |     _fields = [("tag", "u08")]
257 |     @classmethod
258 |     def unpack_l(cls, s, off = 0, parent_head = None, _sex=1, _wsize=32):
259 |         tag = ord(s[off])
260 |         if not tag in CONSTANT_TYPES:
261 |             raise ValueError('unknown type', hex(tag))
262 |         c, l = CONSTANT_TYPES[tag].unpack_l(s, off, parent_head, _sex, _wsize)
263 |         return c, l
264 | 
265 |     @classmethod
266 |     def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None):
267 |         c, l = cls.unpack_l(s, off = off,
268 |                             parent_head = parent_head, _sex=_sex, _wsize=_wsize)
269 |         return c
270 | 
271 | 
272 | class CException_table(CStruct):
273 |     _packformat = ">"
274 |     _fields = [ ("start_pc", "u16"),
275 |                 ("end_pc", "u16"),
276 |                 ("handler_pc", "u16"),
277 |                 ("catch_type", "u16")
278 |                 ]
279 | 
280 | class CAttribute_code(CStruct):
281 |     _packformat = ">"
282 |     _fields = [ ("name", "u16"),
283 |                 ("attribute_length", "u32"),
284 |                 ("max_stack", "u16"),
285 |                 ("max_locals", "u16"),
286 |                 ("code_length", "u32"),
287 |                 ("code", (lambda c, s, of:c.getcode(s, of),
288 |                           lambda c, value:c.setcode(value))),
289 |                 ("exception_table_length", "u16"),
290 |                 ("exception_table", "CException_table", lambda c:c.exception_table_length),
291 |                 ("attributes_count", "u16"),
292 |                 ("attributes", "CAttributeInfo", lambda c:c.attributes_count),
293 |                 ]
294 |     def getcode(self, s, of):
295 |         v = s[of:of+self.code_length]
296 |         return v, of+self.code_length
297 |     def setcode(self, value):
298 |         return str(value)
299 | 
300 |     def get_name(self):
301 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
302 | 
303 | class LineNumberTableEntry(CStruct):
304 |     _packformat = ">"
305 |     _fields = [ ("start_pc", "u16"),
306 |                 ("line_number", "u16")
307 |                 ]
308 | 
309 | class CLineNumberTable(CStruct):
310 |     _packformat = ">"
311 |     _fields = [ ("name", "u16"),
312 |                 ("attribute_length", "u32"),
313 |                 ("line_number_table_length", "u16"),
314 |                 ("line_number_table", "LineNumberTableEntry", lambda c:c.line_number_table_length),
315 |                 ]
316 |     def get_name(self):
317 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
318 | 
319 | 
320 | class CException(CStruct):
321 |     _packformat = ">"
322 |     _fields = [ ("name", "u16"),
323 |                 ("attribute_length", "u32"),
324 |                 ("exceptions_count", "u16"),
325 |                 ("exceptions", "u16", lambda c:c.exceptions_count),
326 |                 ]
327 |     def get_name(self):
328 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
329 | 
330 | class CClass(CStruct):
331 |     _packformat = ">"
332 |     _fields = [ ("inner_class_info", "u16"),
333 |                 ("outer_class_info", "u16"),
334 |                 ("inner_name", "u16"),
335 |                 ("inner_class_access_flags", "u16"),
336 |                 ]
337 | 
338 |     def get_inner_class_info(self):
339 |         return self.parent_head.get_constant_pool_by_index(self.inner_value_class_info).name
340 |     def get_outer_class_info(self):
341 |         return self.parent_head.get_constant_pool_by_index(self.outer_value_class_info).name
342 |     def get_inner_name(self):
343 |         return self.parent_head.get_constant_pool_by_index(self.inner_value_name).name
344 | 
345 | class CInnerClasses(CStruct):
346 |     _packformat = ">"
347 |     _fields = [ ("name", "u16"),
348 |                 ("attribute_length", "u32"),
349 |                 ("classes_count", "u16"),
350 |                 ("classes", "CClass", lambda c:c.classes_count),
351 |                 ]
352 | 
353 |     def get_name(self):
354 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
355 | 
356 | class CSourceFile(CStruct):
357 |     _packformat = ">"
358 |     _fields = [ ("name", "u16"),
359 |                 ("attribute_length", "u32"),
360 |                 ("sourcefile", "u16"),
361 |                 ]
362 |     def get_name(self):
363 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
364 |     def get_sourcefile(self):
365 |         return self.parent_head.get_constant_pool_by_index(self.sourcefile_value).value
366 | 
367 | class CSynthetic(CStruct):
368 |     _packformat = ">"
369 |     _fields = [ ("name", "u16"),
370 |                 ("attribute_length", "u32")
371 |                 ]
372 | 
373 |     def get_name(self):
374 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
375 | 
376 | class CAttributeInfo_default(CStruct):
377 |     _packformat = ">"
378 |     _fields = [ ("name", "u16"),
379 |                 ("attribute_length", "u32"),
380 |                 ("attribute", (lambda c, s, of:c.getcode(s, of),
381 |                                 lambda c, value:c.setcode(value))),
382 |                 ]
383 | 
384 |     def get_name(self):
385 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
386 |     def getcode(self, s, of):
387 |         v = s[of:of+self.attribute_length]
388 |         return v, of+self.attribute_length
389 |     def setcode(self, value):
390 |         return str(value)
391 | 
392 | 
393 | class CAttributeInfo(CStruct):
394 |     _packformat = ">"
395 |     _fields = [ ("name", "u16")
396 |                 ]
397 |     @classmethod
398 |     def unpack_l(cls, s, off = 0, parent_head = None, _sex=1, _wsize=32):
399 |         tag = struct.unpack('>H', s[off:off+2])[0]
400 |         c = parent_head.get_constant_pool_by_index(tag)
401 |         if not isinstance(c, CPUtf8):
402 |             raise ValueError('Error in parsing, should be string', hex(tag))
403 |         name = c.value
404 |         if name == "Code":
405 |             c, l = CAttribute_code.unpack_l(s, off, parent_head, _sex, _wsize)
406 |         elif name == "LineNumberTable":
407 |             c, l = CLineNumberTable.unpack_l(s, off, parent_head, _sex, _wsize)
408 |         elif name == "Exceptions":
409 |             c, l = CException.unpack_l(s, off, parent_head, _sex, _wsize)
410 |         elif name == "InnerClasses":
411 |             c, l = CInnerClasses.unpack_l(s, off, parent_head, _sex, _wsize)
412 |         elif name == "SourceFile":
413 |             c, l = CSourceFile.unpack_l(s, off, parent_head, _sex, _wsize)
414 |         elif name == "Synthetic":
415 |             c, l = CSynthetic.unpack_l(s, off, parent_head, _sex, _wsize)
416 |         else:
417 |             log.warning("unsupported attribute, skipping:\n%r"%(c))
418 |             c, l = CAttributeInfo_default.unpack_l(s, off, parent_head, _sex, _wsize)
419 |         return c, l
420 | 
421 |     @classmethod
422 |     def unpack(cls, s, off = 0, parent_head = None, _sex=None, _wsize=None):
423 |         c, l = cls.unpack_l(s, off = off,
424 |                             parent_head = parent_head, _sex=_sex, _wsize=_wsize)
425 |         return c
426 | 
427 | class CFieldInfo(CStruct):
428 |     _packformat = ">"
429 |     _fields = [ ("access_flags", "u16"),
430 |                 ("name", "u16"),
431 |                 ("descriptor", "u16"),
432 |                 ("attributes_count", "u16"),
433 |                 ("attributes", "CAttributeInfo", lambda c:c.attributes_count),
434 |                 ]
435 | 
436 |     def get_name(self):
437 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
438 | 
439 | class CMethods(CStruct):
440 |     _packformat = ">"
441 |     _fields = [ ("access_flags", "u16"),
442 |                 ("name", "u16"),
443 |                 ("descriptor", "u16"),
444 |                 ("attributes_count", "u16"),
445 |                 ("attributes", "CAttributeInfo", lambda c:c.attributes_count),
446 |                 ]
447 |     def get_name(self):
448 |         return self.parent_head.get_constant_pool_by_index(self.name_value).value
449 |     def get_descriptor(self):
450 |         return self.parent_head.get_constant_pool_by_index(self.descriptor_value).value
451 | 
452 | class Jclass_hdr(CStruct):
453 |     _packformat = ">"
454 |     _fields = [ ("magic", "u32"),
455 |                 ("minor_version","u16"),
456 |                 ("major_version","u16"),
457 |                 ("constants_pool_count","u16"),
458 |                 ("constants_pool", (lambda c, s, of:c.gets(s, of),
459 |                                     lambda c, value:c.sets(value))),
460 |                 ("bitmask", "u16"),
461 |                 ("this","u16"),
462 |                 ("super","u16")
463 |                 ]
464 | 
465 |     def gets(self, s, of):
466 |         v = []
467 |         while len(v) < self.constants_pool_count-1:
468 |             c, l = CPoolfield.unpack_l(s, of, self.parent_head)
469 |             v.append(c)
470 |             of += l
471 |             if c.tag in [5, 6]:
472 |                 # XXX long objects insert an supplementary object
473 |                 v.append(None)
474 |         return v, of
475 |     def sets(self, value):
476 |         out = "".join([str(x) for x in value if x != None])
477 |         return out
478 | 
479 | class Jclass_description(CStruct):
480 |     _packformat = ">"
481 |     _fields = [ ("interface_count","u16"),
482 |                 ("interfaces","u16", lambda c:c.interface_count),
483 |                 ("fields_count","u16"),
484 |                 ("fields","CFieldInfo", lambda c:c.fields_count),
485 |                 ("methods_count","u16"),
486 |                 ("methods","CMethods", lambda c:c.methods_count),
487 |                 ("attributes_count","u16"),
488 |                 ("attributes","CAttributeInfo", lambda c:c.attributes_count),
489 |                 ]
490 | 
491 |     def get_interfaces(self):
492 |         out = [self.parent_head.get_constant_pool_by_index(x).name for x in self.interfaces_value]
493 |         return out
494 | 
495 | 
496 | class JCLASS(object):
497 |     def __getitem__(self, item):
498 |         return self.content[item]
499 |     def __setitem__(self, item, data):
500 |         self.content.__setitem__(item, data)
501 |         return
502 | 
503 |     def __init__(self, pestr = None):
504 |         self._sex = 0
505 |         self._wsize = 32
506 |         self.content = pestr
507 |         self.parse_content()
508 | 
509 |     def get_constant_pool_by_index(self, index):
510 |         index -=1
511 |         if 0 <= index < len(self.hdr.constants_pool):
512 |             return self.hdr.constants_pool[index]
513 |         return None
514 | 
515 |     def parse_content(self):
516 |         self.hdr, l = Jclass_hdr.unpack_l(self.content, 0, self, self)
517 |         self.description = Jclass_description.unpack(self.content, l, self, self)
518 | 
519 |     def __str__(self):
520 |         out = ''
521 |         out += str(self.hdr)
522 |         out += str(self.description)
523 |         return out
524 | 
525 | 
526 |     def add_constant(self, c):
527 |         self.hdr.constants_pool.append(c)
528 |         self.hdr.constants_pool_count = len(self.hdr.constants_pool) + 1
529 |         return len(self.hdr.constants_pool)
530 | 
531 |     def add_integer(self, i):
532 |         c = CPInteger(parent_head = self, value = i)
533 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
534 |         return self.add_constant(c)
535 | 
536 |     def add_float(self, i):
537 |         c = CPFloat(parent_head = self, value = i)
538 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
539 |         return self.add_constant(c)
540 | 
541 |     def add_long(self, i):
542 |         c = CPLong(parent_head = self, value = i)
543 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
544 |         return self.add_constant(c)
545 | 
546 |     def add_double(self, i):
547 |         c = CPDouble(parent_head = self, value = i)
548 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
549 |         return self.add_constant(c)
550 | 
551 |     def add_utf8(self, i):
552 |         c = CPUtf8(parent_head = self, length = len(i), value = i)
553 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
554 |         return self.add_constant(c)
555 | 
556 |     def add_string(self, i):
557 |         x = self.add_utf8(i)
558 |         c = CPString(parent_head = self, value = x)
559 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
560 |         return self.add_constant(c)
561 | 
562 |     def add_nameandtype(self, name, t):
563 |         namei = self.add_utf8(name)
564 |         typei = self.add_utf8(t)
565 |         c = CPNameandType(parent_head = self, name = namei, type = typei)
566 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
567 |         return self.add_constant(c)
568 | 
569 |     def add_class(self, i):
570 |         x = self.add_utf8(i)
571 |         c = CPClass(parent_head = self, name = x)
572 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
573 |         return self.add_constant(c)
574 | 
575 |     def add_methodref(self, name, typetype, typename):
576 |         namei = self.add_class(name)
577 |         typei = self.add_nameandtype(typename, typetype)
578 |         c = CPMethodref(parent_head = self, name = namei, type = typei)
579 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
580 |         return self.add_constant(c)
581 | 
582 |     def add_fieldref(self, name, typetype, typename):
583 |         namei = self.add_class(name)
584 |         typei = self.add_nameandtype(typename, typetype)
585 |         c = CPFieldref(parent_head = self, name = namei, type = typei)
586 |         c.tag = CONSTANT_TYPES_inv[c.__class__]
587 |         return self.add_constant(c)
588 | 
589 | 
590 | 
591 | if __name__ == "__main__":
592 |     import sys
593 |     fd = open(sys.argv[1])
594 |     try:
595 |         data = fd.read()
596 |     finally:
597 |         fd.close()
598 |     e = JCLASS(data)
599 | 


--------------------------------------------------------------------------------
/tests/test_pe_manipulation.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import os
  4 | __dir__ = os.path.dirname(__file__)
  5 | 
  6 | from test_all import run_tests, assertion, hashlib, open_read
  7 | from elfesteem.pe_init import log, PE, COFF, Coff
  8 | from elfesteem.strpatchwork import StrPatchwork
  9 | from elfesteem import pe
 10 | import struct
 11 | 
 12 | # We want to be able to verify warnings in non-regression test
 13 | log_history = []
 14 | log.warning = lambda *args, **kargs: log_history.append(('warn',args,kargs))
 15 | log.error = lambda *args, **kargs: log_history.append(('error',args,kargs))
 16 | 
 17 | def test_PE_addsections_32(assertion):
 18 |     global log_history
 19 |     e = PE()
 20 |     d = e.pack()
 21 |     assertion('901e6383ee161b569af1d35d3f77b038',
 22 |               hashlib.md5(d).hexdigest(),
 23 |               'Creation of a standard empty PE')
 24 |     e.SHList.add_section(name = 'new', rawsize = 0x1000)
 25 |     d = e.pack()
 26 |     assertion('15aefbcc8f4b39e9484df8b1ed277c75',
 27 |               hashlib.md5(d).hexdigest(),
 28 |               'Adding a section to an empty PE')
 29 |     e.SHList.add_section(name = 'nxt', rawsize = 0x1000)
 30 |     d = e.virt[0x401000:0x402000]
 31 |     assertion('620f0b67a91f7f74151bc5be745b7110',
 32 |               hashlib.md5(d).hexdigest(),
 33 |               'Extract chunk from mapped memory, across multiple sections')
 34 |     for _ in range(89):
 35 |         e.SHList.add_section(name = 'nxt', rawsize = 0x1000)
 36 |     assertion([('error', ('Cannot add section %s: not enough space for section list', 'nxt'), {})],
 37 |               log_history,
 38 |               'Add too many sections (logs)')
 39 |     log_history = []
 40 |     assertion(90, # Should be 91 if the last section could been added
 41 |               len(e.SHList),
 42 |               'Add too many sections')
 43 | 
 44 | def test_PE_empty64(assertion):
 45 |     e = PE(wsize=64)
 46 |     d = e.pack()
 47 |     assertion('863bf62f521b0cad3209e42cff959eed',
 48 |               hashlib.md5(d).hexdigest(),
 49 |               'Creation of a standard empty PE+')
 50 | 
 51 | def test_PE_manipulate(assertion):
 52 |     global log_history
 53 |     pe_mingw = open_read(__dir__+'/binary_input/pe_mingw.exe')
 54 |     e = PE(pe_mingw)
 55 |     # Packed file is not identical :-(
 56 |     # Are missing:
 57 |     # - the data between the end of DOS header and the start of PE header
 58 |     # - the padding after the list of sections, before the first section
 59 |     # - many parts of directories
 60 |     d = e.pack()
 61 |     assertion('2f08b8315c4e0a30d51a8decf104345c',
 62 |               hashlib.md5(d).hexdigest(),
 63 |               'Packing after reading pe_mingw.exe')
 64 |     d = PE(d).pack()
 65 |     assertion('2f08b8315c4e0a30d51a8decf104345c',
 66 |               hashlib.md5(d).hexdigest(),
 67 |               'Packing after reading pe_mingw.exe; fix point')
 68 |     d = e.SHList.display().encode('latin1')
 69 |     assertion('ba631f3f172712b6526e284269c1ecbb',
 70 |               hashlib.md5(d).hexdigest(),
 71 |               'Display Sections from PE')
 72 |     d = e.Symbols.display().encode('latin1')
 73 |     assertion('1ee89dc3dc2104190734747d148b7511',
 74 |               hashlib.md5(d).hexdigest(),
 75 |               'Display COFF Symbols')
 76 |     assertion('__gnu_exception_handler@4',
 77 |               e.Symbols.getbyindex(2).name,
 78 |               'Get symbol by index, found')
 79 |     assertion(None,
 80 |               e.Symbols.getbyindex(2000),
 81 |               'Get symbol by index, not existing')
 82 |     d = e.getsectionbyname('.text').pack()
 83 |     assertion('ad0d51a670cb6cd2015499840ffefb8f',
 84 |               hashlib.md5(d).hexdigest(),
 85 |               'Get existing section by name')
 86 |     d = e.getsectionbyoff(0x400+0x100).pack()
 87 |     assertion('ad0d51a670cb6cd2015499840ffefb8f',
 88 |               hashlib.md5(d).hexdigest(),
 89 |               'Get existing section by offset')
 90 |     d = e.getsectionbyvad(0x400000+0x1000+0x100).pack()
 91 |     assertion('ad0d51a670cb6cd2015499840ffefb8f',
 92 |               hashlib.md5(d).hexdigest(),
 93 |               'Get existing section by address')
 94 |     d = e.getsectionbyname('no_sect')
 95 |     assertion(None, d, 'Get non-existing section by name')
 96 |     d = e.getsectionbyoff(0x80000)
 97 |     assertion(None, d, 'Get non-existing section by offset')
 98 |     d = e.getsectionbyvad(0x1000)
 99 |     assertion(None, d, 'Get non-existing section by address')
100 |     d = e[0x100:0x120]
101 |     assertion('6b8897a89909959320f8adfc1d81c9ee',
102 |               hashlib.md5(d).hexdigest(),
103 |               'Extract chunk from raw data')
104 |     assertion(True,
105 |               e.virt.is_addr_in(0x401000),
106 |               'Address in mapped virtual memory')
107 |     assertion(False,
108 |               e.virt.is_addr_in(0x201000),
109 |               'Address not in mapped virtual memory')
110 |     d = e.virt[0x401000]
111 |     assertion('4c614360da93c0a041b22e537de151eb',
112 |               hashlib.md5(d).hexdigest(),
113 |               'Extract byte from mapped memory, in a section')
114 |     d = e.virt[0x400100]
115 |     assertion('93b885adfe0da089cdf634904fd59f71',
116 |               hashlib.md5(d).hexdigest(),
117 |               'Extract byte from mapped memory, in no section')
118 |     d = e.virt[0x400100:0x400120]
119 |     assertion('6b8897a89909959320f8adfc1d81c9ee',
120 |               hashlib.md5(d).hexdigest(),
121 |               'Extract chunk from mapped memory, in headers')
122 |     d = e.virt[0x401000:0x401020]
123 |     assertion('21ac18c2564a3b408b31aae0af19d502',
124 |               hashlib.md5(d).hexdigest(),
125 |               'Extract chunk from mapped memory, in a section')
126 |     d = e.virt[0x100:0x200] # One null byte
127 |     assertion([('warn', ('unknown rva address! -3fff00',), {})],
128 |               log_history,
129 |               'Extract chunk from non-mapped memory (logs)')
130 |     log_history = []
131 |     assertion('d41d8cd98f00b204e9800998ecf8427e',
132 |               hashlib.md5(d).hexdigest(),
133 |               'Extract chunk from non-mapped memory')
134 |     assertion(e.virt[0x401000:0x401020],
135 |               e.virt(0x401000,0x401020),
136 |               'Extract chunk from mapped memory, old API')
137 |     e[0x100:0x120] = e[0x100:0x120]
138 |     d = e.pack()
139 |     assertion('2f08b8315c4e0a30d51a8decf104345c',
140 |               hashlib.md5(d).hexdigest(),
141 |               'Writing in raw data')
142 |     e.virt[0x401100:0x401120] = e.virt[0x401100:0x401120]
143 |     d = e.pack()
144 |     assertion('2f08b8315c4e0a30d51a8decf104345c',
145 |               hashlib.md5(d).hexdigest(),
146 |               'Writing in memory (interval)')
147 |     e.virt[0x401100] = e.virt[0x401100:0x401120]
148 |     d = e.pack()
149 |     assertion('2f08b8315c4e0a30d51a8decf104345c',
150 |               hashlib.md5(d).hexdigest(),
151 |               'Writing in memory (address)')
152 |     e.virt[0x400100:0x400120] = e.virt[0x400100:0x400120]
153 |     assertion([('warn', ('Cannot write at RVA %s', slice(256, 288, None)), {})],
154 |               log_history,
155 |               'Writing at invalid RVA (logs)')
156 |     log_history = []
157 |     assertion(0x468e71, len(e.virt), 'Max virtual address')
158 |     assertion([('warn', ('__len__ deprecated',), {})],
159 |               log_history,
160 |               '__len__ deprectated (logs)')
161 |     log_history = []
162 |     # Find leave; ret
163 |     assertion(0x401294,
164 |               e.virt.find(struct.pack('BB', 0xc9, 0xc3)),
165 |               'Find pattern (from the start)')
166 |     assertion(0x4014B4,
167 |               e.virt.rfind(struct.pack('BB', 0xc9, 0xc3)),
168 |               'Find pattern (from the end)')
169 |     e.SHList.align_sections()
170 |     d = e.pack()
171 |     assertion('2f08b8315c4e0a30d51a8decf104345c',
172 |               hashlib.md5(d).hexdigest(),
173 |               'Align sections')
174 |     # Remove Bound Import directory
175 |     # Usually, its content is not stored in any section... that's
176 |     # a future version of elfesteem will need to manage this
177 |     # specific directory in a specific way.
178 |     e.NThdr.optentries[pe.DIRECTORY_ENTRY_BOUND_IMPORT].rva = 0
179 |     e.NThdr.optentries[pe.DIRECTORY_ENTRY_BOUND_IMPORT].size = 0
180 |     # Create new sections with all zero content
181 |     s_redir = e.SHList.add_section(name = "redir", size = 0x1000)
182 |     s_test  = e.SHList.add_section(name = "test",  size = 0x1000)
183 |     s_rel   = e.SHList.add_section(name = "rel",   size = 0x5000)
184 |     d = e.pack()
185 |     assertion('439f6c698d3d5238d88c5ccef99761e2',
186 |               hashlib.md5(d).hexdigest(),
187 |               'Adding sections')
188 |     d = PE(d).pack()
189 |     assertion('439f6c698d3d5238d88c5ccef99761e2',
190 |               hashlib.md5(d).hexdigest(),
191 |               'Adding sections; fix point')
192 |     e = PE(pe_mingw)
193 |     # Delete the last sections => OK
194 |     for _ in range(2):
195 |         del e.SHList._array[-1]
196 |         e.SHList._size -= 40
197 |         e.COFFhdr.numberofsections -= 1
198 |     # Add two Descriptors in the Import Directory
199 |     e.DirImport.add_dlldesc(
200 |           [({"name":"kernel32.dll",
201 |              "firstthunk":s_test.addr},
202 |             ["CreateFileA",
203 |              "SetFilePointer",
204 |              "WriteFile",
205 |              "CloseHandle",
206 |              ]
207 |             ),
208 |            ({"name":"USER32.dll",
209 |              "firstthunk":None},
210 |             ["SetDlgItemInt",
211 |              "GetMenu",
212 |              "HideCaret",
213 |              ]
214 |             )
215 |            ]
216 |           )
217 |     s_myimp = e.SHList.add_section(name="myimp", rawsize=len(e.DirImport))
218 |     e.DirImport.set_rva(s_myimp.addr)
219 |     assertion(0x4050a8,
220 |               e.DirImport.get_funcvirt('KERNEL32.dll','ExitProcess'),
221 |               'Import ExitProcess')
222 |     assertion(None,
223 |               e.DirImport.get_funcvirt(None,'LoadStringW'),
224 |               'Import LoadStringW')
225 |     assertion(None,
226 |               e.DirExport.get_funcvirt('SetUserGeoID'),
227 |               'Export SetUserGeoID')
228 |     d = e.pack()
229 |     assertion('8a3a1c8c9aa2db211e1d34c7efbb8473',
230 |               hashlib.md5(d).hexdigest(),
231 |               'Adding new imports')
232 |     d = PE(d).pack()
233 |     assertion([('warn', ('Section %d size %#x not aligned to %#x', 5, 294, 512), {})],
234 |               log_history,
235 |               'Adding new imports (logs)')
236 |     log_history = []
237 |     assertion('8a3a1c8c9aa2db211e1d34c7efbb8473',
238 |               hashlib.md5(d).hexdigest(),
239 |               'Adding new imports; fix point')
240 |     # Add an export
241 |     if e.DirExport.expdesc is None:
242 |         e.DirExport.create(['coco'])
243 |     assertion(0x40703e,
244 |               e.DirExport.get_funcvirt('coco'),
245 |               'Export: get_funcvirt')
246 |     # 'eval' avoids warnings with python2.3
247 |     assertion({1: eval("0xdeedc0fe"), 'coco': eval("0xdeedc0fe")},
248 |               e.export_funcs(),
249 |               'Export: export_funcs')
250 |     d = e.pack()
251 |     assertion('47a864481296d88f908126fb822ded59',
252 |               hashlib.md5(d).hexdigest(),
253 |               'Adding new exports')
254 |     d = PE(d).pack()
255 |     assertion([('warn', ('Section %d size %#x not aligned to %#x', 5, 294, 512), {})],
256 |               log_history,
257 |               'Adding new exports (logs)')
258 |     log_history = []
259 |     assertion('47a864481296d88f908126fb822ded59',
260 |               hashlib.md5(d).hexdigest(),
261 |               'Adding new exports; fix point')
262 |     # Add a new Descriptor in the Import Directory
263 |     e.DirImport.add_dlldesc([ ({"name":"MyDLL.dll"}, ["MyFunc"]) ])
264 |     e.DirImport.set_rva(None)
265 |     assertion('47a864481296d88f908126fb822ded59',
266 |               hashlib.md5(d).hexdigest(),
267 |               'Adding imports, no specified section')
268 | 
269 | def test_PE_dll(assertion):
270 |     global log_history
271 |     # Small DLL created with Visual Studio
272 |     dll_vstudio = open_read(__dir__+'/binary_input/pe_vstudio.dll')
273 |     e = PE(dll_vstudio)
274 |     d = e.pack()
275 |     assertion('19028e1a1bde785fb4a58aeacf56007b',
276 |               hashlib.md5(d).hexdigest(),
277 |               'Packing after reading pe_vstudio.dll')
278 |     # Test the display() functions
279 |     d = e.DirImport.display().encode('latin1')
280 |     assertion('e9f925c32ed91f889a2b57e73360d444',
281 |               hashlib.md5(d).hexdigest(),
282 |               'Display Directory IMPORT')
283 |     d = e.DirExport.display().encode('latin1')
284 |     assertion('2d262c4d834e58b17d4c7f2359d1f6f1',
285 |               hashlib.md5(d).hexdigest(),
286 |               'Display Directory EXPORT')
287 |     d = e.DirRes.display().encode('latin1')
288 |     assertion('a794e58acca2f6b2d9628e64008ad6d8',
289 |               hashlib.md5(d).hexdigest(),
290 |               'Display Directory RESOURCE')
291 |     d = e.DirReloc.display().encode('latin1')
292 |     assertion('33af05a3215689dec4cdae3656c63af0',
293 |               hashlib.md5(d).hexdigest(),
294 |               'Display Directory BASERELOC')
295 |     d = '\n'.join([repr(_) for reldir in e.DirReloc for _ in reldir.rels])
296 |     d = d.encode('latin1')
297 |     assertion('87951bfbb3c09dec8c54d41f72cc4263',
298 |               hashlib.md5(d).hexdigest(),
299 |               'Display all relocations')
300 | 
301 | def test_PE_ange(assertion):
302 |     global log_history
303 |     # Parse some ill-formed PE made by Ange Albertini
304 |     PE(open_read(__dir__+'/binary_input/Ange/resourceloop.exe'))
305 |     assertion([('warn', ('Resource tree too deep',), {})]*212,
306 |               log_history,
307 |               'Ange/resourceloop.exe (logs)')
308 |     log_history = []
309 |     PE(open_read(__dir__+'/binary_input/Ange/namedresource.exe'))
310 |     assertion([],
311 |               log_history,
312 |               'Ange/namedresource.exe (logs)')
313 |     PE(open_read(__dir__+'/binary_input/Ange/weirdsord.exe'))
314 |     assertion([('warn', ('Section %d offset %#x not aligned to %#x', 0, 513, 16384), {}), ('warn', ('Section %d size %#x not aligned to %#x', 0, 270, 16384), {})],
315 |               log_history,
316 |               'Ange/weirdsord.exe (logs)')
317 |     log_history = []
318 |     PE(open_read(__dir__+'/binary_input/Ange/nosectionW7.exe'))
319 |     assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 16, 0), {})],
320 |               log_history,
321 |               'Ange/nosectionW7.exe (logs)')
322 |     log_history = []
323 |     PE(open_read(__dir__+'/binary_input/Ange/imports_relocW7.exe'))
324 |     assertion([],
325 |               log_history,
326 |               'Ange/imports_relocW7.exe (logs)')
327 |     PE(open_read(__dir__+'/binary_input/Ange/imports_tinyXP.exe'))
328 |     assertion([],
329 |               log_history,
330 |               'Ange/imports_tinyXP.exe (logs)')
331 |     PE(open_read(__dir__+'/binary_input/Ange/bottomsecttbl.exe'))
332 |     assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 16, 696), {})],
333 |               log_history,
334 |               'Ange/bottomsecttbl.exe (logs)')
335 |     log_history = []
336 |     PE(open_read(__dir__+'/binary_input/Ange/delayfake.exe'))
337 |     assertion([],
338 |               log_history,
339 |               'Ange/delayfake.exe (logs)')
340 |     PE(open_read(__dir__+'/binary_input/Ange/exportobf.exe'))
341 |     assertion([],
342 |               log_history,
343 |               'Ange/exportobf.exe (logs)')
344 |     PE(open_read(__dir__+'/binary_input/Ange/dllbound-ld.exe'))
345 |     assertion([],
346 |               log_history,
347 |               'Ange/dllbound-ld.exe (logs)')
348 |     PE(open_read(__dir__+'/binary_input/Ange/d_tiny.dll'))
349 |     assertion([('warn', ('Opthdr magic %#x', 31074), {}),
350 |                ('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 0, 13864), {}),
351 |                ('warn', ('Windows 8 needs at least 13 directories, %d found', 0), {}),
352 |                ('warn', ('Too many symbols: %d', 541413408), {}),
353 |                ('warn', ('File too short for StrTable -0x61746127 != 0x0',), {})],
354 |               log_history,
355 |               'Ange/d_tiny.dll (logs)')
356 |     log_history = []
357 |     PE(open_read(__dir__+'/binary_input/Ange/dllfw.dll'))
358 |     assertion([],
359 |               log_history,
360 |               'Ange/dllfw.dll (logs)')
361 |     PE(open_read(__dir__+'/binary_input/Ange/tinydllXP.dll'))
362 |     assertion([('warn', ('Number of rva %d does not match sizeofoptionalheader %d', 0, 0), {}),
363 |                ('warn', ('Windows 8 needs at least 13 directories, %d found', 0), {}),
364 |                ('warn', ('File too short for StrTable 0x55 != 0xc258016a',), {})],
365 |               log_history,
366 |               'Ange/tinydllXP.dll (logs)')
367 |     log_history = []
368 |     e = PE(open_read(__dir__+'/binary_input/Ange/resourceloop.exe'))
369 |     log_history = []
370 |     d = e.DirRes.display().encode('latin1')
371 |     assertion('98701be30b09759a64340e5245e48195',
372 |               hashlib.md5(d).hexdigest(),
373 |               'Display Directory RESOURCE that is too deep')
374 | 
375 | def test_PE_invalids(assertion):
376 |     # Some various ways for a PE to be detected as invalid
377 |     e = PE()
378 |     data = StrPatchwork(e.pack())
379 |     try:
380 |         e.NTsig.signature = 0x2000
381 |         e = PE(e.pack())
382 |         assertion(0,1, 'Not a PE, invalid NTsig')
383 |     except ValueError:
384 |         pass
385 |     try:
386 |         e.DOShdr.lfanew = 0x200000
387 |         data[60] = struct.pack("<I", e.DOShdr.lfanew)
388 |         e = PE(data)
389 |         assertion(0,1, 'Not a PE, NTsig offset after eof')
390 |     except ValueError:
391 |         pass
392 | 
393 | def test_COFF_invalid(assertion):
394 |     # Now, we parse COFF files
395 |     try:
396 |         # Not COFF: OptHdr size too big
397 |         e = Coff(open_read(__dir__+'/binary_input/README.txt'))
398 |         assertion(0,1, 'Not COFF')
399 |     except ValueError:
400 |         pass
401 | 
402 | def test_COFF_valid(assertion):
403 |     obj_mingw = open_read(__dir__+'/binary_input/coff_mingw.obj')
404 |     try:
405 |         e = PE(obj_mingw)
406 |         assertion(0,1, 'Not PE')
407 |     except ValueError:
408 |         pass
409 |     e = Coff(obj_mingw)
410 |     d = e.rva2off(0x10, section='.text')
411 |     assertion(0x8c+0x10, d, 'rva2off in a .obj')
412 |     d = e.off2virt(0x10)
413 |     assertion(None, d, 'Invalid RVA cannot be converted')
414 |     d = e.virt2off(0x10)
415 |     assertion(None, d, 'No virt for .obj')
416 | 
417 | def test_COFF_tms320(assertion):
418 |     out_tms320 = open_read(__dir__+'/binary_input/C28346_Load_Program_to_Flash.out')
419 |     e = Coff(out_tms320)
420 |     d = e.SHList.display().encode('latin1')
421 |     assertion('a63cf686186105b83e49509f213b20ea',
422 |               hashlib.md5(d).hexdigest(),
423 |               'Display Sections from COFF')
424 | 
425 | def test_COFF_ckermit(assertion):
426 |     # C-Kermit binary for OSF1
427 |     out_osf1 = open_read(__dir__+'/binary_input/cku200.dec-osf-1.3a')
428 |     e = Coff(out_osf1)
429 |     d = repr(e.OSF1Symbols).encode('latin1')
430 |     assertion('c7df867846612e6fc1c52a8042f706cc',
431 |               hashlib.md5(d).hexdigest(),
432 |               'Display OSF/1 Symbols')
433 |     # C-Kermit binary for Clipper CLIX
434 |     Coff(open_read(__dir__+'/binary_input/cku196.clix-3.1'))
435 |     # C-Kermit binary for Apollo
436 |     Coff(open_read(__dir__+'/binary_input/cku193a05.apollo-sr10-s5r3'))
437 |     # C-Kermit XCOFF32 binary for AIX
438 |     Coff(open_read(__dir__+'/binary_input/cku190.rs6aix32c-3.2.4'))
439 |     # C-Kermit eCOFF32 binary for MIPS, big endian
440 |     Coff(open_read(__dir__+'/binary_input/cku192.irix40'))
441 |     # C-Kermit eCOFF32 binary for MIPS, little endian
442 |     Coff(open_read(__dir__+'/binary_input/cku192.ultrix43c-mips3'))
443 | 
444 | def test_COFF_invalidity(assertion):
445 |     global log_history
446 |     # Some various ways for a COFF to be detected as invalid
447 |     obj_mingw = open_read(__dir__+'/binary_input/coff_mingw.obj')
448 |     obj_mingw = StrPatchwork(obj_mingw)
449 |     COFF(obj_mingw)
450 |     try:
451 |         obj_mingw[2] = struct.pack("<H", 0)
452 |         COFF(obj_mingw)
453 |         assertion(0,1, 'COFF cannot have no section')
454 |     except ValueError:
455 |         pass
456 |     try:
457 |         obj_mingw[2] = struct.pack("<H", 0x2000)
458 |         COFF(obj_mingw)
459 |         assertion(0,1, 'Too many sections in COFF')
460 |     except ValueError:
461 |         pass
462 |     try:
463 |         obj_mingw[2] = struct.pack("<H", 0x100)
464 |         COFF(obj_mingw)
465 |         assertion(0,1, 'Too many sections in COFF, past end of file')
466 |     except ValueError:
467 |         pass
468 |     try:
469 |         obj_mingw[2] = struct.pack("<H", 3)
470 |         obj_mingw[8] = struct.pack("<I", 0x100000)
471 |         COFF(obj_mingw)
472 |         assertion(0,1, 'COFF invalid ptr to symbol table')
473 |     except ValueError:
474 |         pass
475 |     obj_mingw[8] = struct.pack("<I", 220)
476 |     obj_mingw[436] = struct.pack("<I", 10000)
477 |     COFF(obj_mingw)
478 |     assertion([('warn', ('File too short for StrTable 0x4 != 0x2710',), {})],
479 |               log_history,
480 |               'File too short for StrTable (logs)')
481 |     log_history = []
482 |     assertion([],
483 |               log_history,
484 |               'No non-regression test created unwanted log messages')
485 | 
486 | def run_test(assertion):
487 |     for name, value in dict(globals()).items():
488 |         if name.startswith('test_'):
489 |             value(assertion)
490 |     # print('HASH', hashlib.md5(d).hexdigest())
491 | 
492 | if __name__ == "__main__":
493 |     run_tests(run_test)
494 | 


--------------------------------------------------------------------------------