├── hachoir ├── wx │ ├── __init__.py │ ├── resource │ │ ├── __init__.py │ │ └── resource.py │ ├── __main__.py │ ├── hex_view │ │ ├── __init__.py │ │ ├── hex_view_setup.py │ │ └── file_cache.py │ ├── tree_view │ │ ├── __init__.py │ │ └── tree_view_setup.py │ ├── frame_view │ │ ├── __init__.py │ │ ├── frame_view.py │ │ ├── frame_view_fwd.py │ │ ├── frame_view_setup.py │ │ └── frame_view_imp.py │ ├── field_view │ │ ├── __init__.py │ │ ├── core_type_menu_fwd.py │ │ ├── core_type_menu.py │ │ ├── field_split_menu_fwd.py │ │ ├── field_menu.py │ │ ├── format.py │ │ ├── field_view_setup.py │ │ ├── field_split_menu.py │ │ ├── field_view_fwd.py │ │ ├── core_type_menu_imp.py │ │ ├── mutator.py │ │ ├── field_menu_fwd.py │ │ ├── field_split_menu_imp.py │ │ ├── stubs.py │ │ ├── field_menu_imp.py │ │ ├── field_view.py │ │ └── field_menu_setup.py │ ├── unicode.py │ ├── dialogs.py │ ├── dispatcher.py │ └── main.py ├── core │ ├── __init__.py │ ├── endian.py │ ├── error.py │ ├── event_handler.py │ ├── language.py │ ├── config.py │ ├── profiler.py │ ├── cmd_line.py │ ├── text_handler.py │ ├── timeout.py │ └── memory.py ├── subfile │ ├── __init__.py │ ├── __main__.py │ ├── data_rate.py │ ├── output.py │ └── pattern.py ├── metadata │ ├── qt │ │ ├── __init__.py │ │ └── dialog.ui │ ├── __main__.py │ ├── config.py │ ├── __init__.py │ ├── formatter.py │ ├── safe.py │ ├── timezone.py │ ├── file_system.py │ ├── filter.py │ ├── cr2.py │ └── csv.py ├── parser │ ├── common │ │ ├── __init__.py │ │ ├── tracker.py │ │ ├── deflate.py │ │ └── msdos.py │ ├── network │ │ └── __init__.py │ ├── video │ │ └── __init__.py │ ├── game │ │ ├── __init__.py │ │ └── spider_man_video.py │ ├── __init__.py │ ├── container │ │ └── __init__.py │ ├── misc │ │ ├── common.py │ │ ├── __init__.py │ │ ├── ole2_util.py │ │ └── hlp.py │ ├── file_system │ │ └── __init__.py │ ├── program │ │ ├── __init__.py │ │ └── prc.py │ ├── audio │ │ └── __init__.py │ ├── image │ │ ├── __init__.py │ │ ├── common.py │ │ ├── psd.py │ │ ├── pcx.py │ │ └── tiff.py │ ├── archive │ │ ├── __init__.py │ │ ├── prs_pak.py │ │ ├── ar.py │ │ ├── mar.py │ │ └── mozilla_ar.py │ └── template.py ├── stream │ ├── stream.py │ ├── __init__.py │ └── input_helper.py ├── __init__.py ├── test.py ├── field │ ├── field_set.py │ ├── character.py │ ├── enum.py │ ├── vector.py │ ├── fragment.py │ ├── parser.py │ ├── bit_field.py │ ├── integer.py │ ├── static_field_set.py │ ├── helper.py │ ├── byte_field.py │ ├── fake_array.py │ ├── sub_file.py │ └── __init__.py ├── editor │ ├── __init__.py │ └── field.py └── regex │ └── __init__.py ├── doc ├── .gitignore ├── images │ ├── wx.png │ └── urwid.png ├── hacking.rst ├── examples │ ├── editor_gzip.py │ ├── editor_zip.py │ ├── editor_add_extra.py │ └── metadata.py ├── install.rst ├── editor.rst ├── gen_parser_list.py ├── contact.rst ├── wx.rst ├── list.rst ├── grep.rst ├── strip.rst ├── index.rst ├── urwid.rst └── authors.rst ├── tests ├── files │ ├── cd_0008_5C48_1m53s.cda │ ├── 7zip.chm │ ├── eula.exe │ ├── gps.jpg │ ├── hero.tga │ ├── swat.blp │ ├── vim.lnk │ ├── 10min.mkv │ ├── 25min.aifc │ ├── 32bpp.tga │ ├── andorra.map │ ├── archive.7z │ ├── cercle.exe │ ├── cross.xcf │ ├── dell8.fat16 │ ├── dontyou.xm │ ├── firstrun.rm │ ├── georgia.cab │ ├── globe.wmf │ ├── indiana.mid │ ├── kino14s.laf │ ├── my60k.ext2 │ ├── ocr10.laf │ ├── paktest.pak │ ├── pikachu.wmf │ ├── radpoor.doc │ ├── sample.tif │ ├── sample.ts │ ├── test.txt.gz │ ├── types.ext2 │ ├── weka.model │ ├── article01.bmp │ ├── canon.raw.cr2 │ ├── example2.arj │ ├── flashmob.mkv │ ├── india_map.gif │ ├── kde_click.wav │ ├── nitrodir.nds │ ├── quicktime.mp4 │ ├── test_file.fit │ ├── twunk_16.exe │ ├── verdan32.exe │ ├── breakdance.flv │ ├── green_fire.jpg │ ├── hachoir-core.ace │ ├── hachoir-core.rar │ ├── hachoir.org.sxw │ ├── ladouce_1h15.wav │ ├── lara_croft.pcx │ ├── logo-kubuntu.png │ ├── macos_10.5.macho │ ├── marc_kravetz.mp3 │ ├── minimal_xcf1.xcf │ ├── minimal_xcf3.xcf │ ├── usa_railroad.jpg │ ├── yellowdude.3ds │ ├── 08lechat_hq_fr.mp3 │ ├── ReferenceMap.class │ ├── cacert_class3.der │ ├── claque-beignet.swf │ ├── linux_swap_9pages │ ├── macos_10.12.macho │ ├── mbr_linux_and_ext │ ├── mev.32bit.big.elf │ ├── mev.64bit.big.elf │ ├── minimal_xcf10.xcf │ ├── pentax_320x240.mov │ ├── satellite_one.s3m │ ├── sheep_on_drugs.mp3 │ ├── steganography.mp3 │ ├── angle-bear-48x48.ani │ ├── deja_vu_serif-2.7.ttf │ ├── example4_chapters.arj │ ├── ftp-0.17-537.i586.rpm │ ├── grasslogo_vector.emf │ ├── hotel_california.flac │ ├── kde_haypo_corner.bmp │ ├── matrix_ping_pong.wmv │ ├── reiserfs_v3_332k.bin │ ├── smallville.s03e02.avi │ ├── wormux_32x32_16c.ico │ ├── anti-arpeggio_tune.ptm │ ├── default_mount_opts.ext2 │ ├── jpeg.exif.photoshop.jpg │ ├── ping_20020927-3ubuntu2 │ ├── usa_railroad.jpg.0.zlib │ ├── usa_railroad.jpg.6.zlib │ ├── Panasonic_AG_HMC_151.MTS │ ├── arp_dns_ping_dns.tcpdump │ ├── bsize-1024-isize-1024.ext2 │ ├── bsize-2048-isize-512.ext2 │ ├── bsize-4096-isize-128.ext2 │ ├── interlude_david_aubrun.ogg │ ├── png_331x90x8_truncated.png │ ├── pyc_example_1.5.2_pyc.bin │ ├── pyc_example_2.2.3_pyc.bin │ ├── pyc_example_2.5c1_pyc.bin │ ├── python.cpython-312.pyc.bin │ ├── python.cpython-37.pyc.bin │ ├── audio_8khz_8bit_ulaw_4s39.au │ ├── free-software-song.midi.bz2 │ ├── get-versions.64bit.little.elf │ ├── 1024x1024-better-compression.xcf │ ├── com.apple.pkg.BaseSystemResources.bom │ ├── debian-31r4-i386-binary-1.iso.torrent │ └── pack-31c691f659cbc7841ca55a26a342fdaf0b89c533.pack ├── test_editor.py ├── test_strip.py ├── test_grep.py ├── regex_regression.rst └── test_doc.py ├── hachoir-list ├── hachoir-urwid ├── hachoir-metadata ├── hachoir-subfile ├── hachoir-metadata-csv ├── hachoir-metadata-qt ├── hachoir-metadata-gtk ├── hachoir-strip ├── hachoir-grep ├── hachoir-wx ├── tools ├── flake8.sh ├── fuzzer │ ├── tools.py │ └── mangle.py ├── flv_extractor.py ├── find_deflate.py └── entropy.py ├── .gitignore ├── .readthedocs.yaml ├── .hgignore ├── MANIFEST.in ├── tox.ini ├── .github └── workflows │ └── build.yml ├── benchmark.sh ├── README.rst └── setup.py /hachoir/wx/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hachoir/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hachoir/subfile/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hachoir/metadata/qt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hachoir/parser/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | parser_list.rst 2 | -------------------------------------------------------------------------------- /hachoir/wx/resource/__init__.py: -------------------------------------------------------------------------------- 1 | from .resource import * # noqa 2 | -------------------------------------------------------------------------------- /hachoir/stream/stream.py: -------------------------------------------------------------------------------- 1 | class StreamError(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /hachoir/wx/__main__.py: -------------------------------------------------------------------------------- 1 | from hachoir.wx.main import main 2 | main() 3 | -------------------------------------------------------------------------------- /tests/files/cd_0008_5C48_1m53s.cda: -------------------------------------------------------------------------------- 1 | RIFF$CDDAfmt H\L;!4 5 -------------------------------------------------------------------------------- /hachoir/metadata/__main__.py: -------------------------------------------------------------------------------- 1 | from hachoir.metadata.main import main 2 | main() 3 | -------------------------------------------------------------------------------- /doc/images/wx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/doc/images/wx.png -------------------------------------------------------------------------------- /hachoir-list: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.listtool import main 3 | main() 4 | -------------------------------------------------------------------------------- /hachoir-urwid: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.urwid import main 3 | main() 4 | -------------------------------------------------------------------------------- /hachoir/metadata/config.py: -------------------------------------------------------------------------------- 1 | MAX_STR_LENGTH = 300 # characters 2 | RAW_OUTPUT = False 3 | -------------------------------------------------------------------------------- /doc/images/urwid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/doc/images/urwid.png -------------------------------------------------------------------------------- /hachoir/__init__.py: -------------------------------------------------------------------------------- 1 | VERSION = (3, 3, 0) 2 | __version__ = ".".join(map(str, VERSION)) 3 | -------------------------------------------------------------------------------- /tests/files/7zip.chm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/7zip.chm -------------------------------------------------------------------------------- /tests/files/eula.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/eula.exe -------------------------------------------------------------------------------- /tests/files/gps.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/gps.jpg -------------------------------------------------------------------------------- /tests/files/hero.tga: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hero.tga -------------------------------------------------------------------------------- /tests/files/swat.blp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/swat.blp -------------------------------------------------------------------------------- /tests/files/vim.lnk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/vim.lnk -------------------------------------------------------------------------------- /hachoir-metadata: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.metadata.main import main 3 | main() 4 | -------------------------------------------------------------------------------- /hachoir-subfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.subfile.main import main 3 | main() 4 | -------------------------------------------------------------------------------- /tests/files/10min.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/10min.mkv -------------------------------------------------------------------------------- /tests/files/25min.aifc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/25min.aifc -------------------------------------------------------------------------------- /tests/files/32bpp.tga: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/32bpp.tga -------------------------------------------------------------------------------- /tests/files/andorra.map: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/andorra.map -------------------------------------------------------------------------------- /tests/files/archive.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/archive.7z -------------------------------------------------------------------------------- /tests/files/cercle.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/cercle.exe -------------------------------------------------------------------------------- /tests/files/cross.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/cross.xcf -------------------------------------------------------------------------------- /tests/files/dell8.fat16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/dell8.fat16 -------------------------------------------------------------------------------- /tests/files/dontyou.xm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/dontyou.xm -------------------------------------------------------------------------------- /tests/files/firstrun.rm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/firstrun.rm -------------------------------------------------------------------------------- /tests/files/georgia.cab: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/georgia.cab -------------------------------------------------------------------------------- /tests/files/globe.wmf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/globe.wmf -------------------------------------------------------------------------------- /tests/files/indiana.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/indiana.mid -------------------------------------------------------------------------------- /tests/files/kino14s.laf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/kino14s.laf -------------------------------------------------------------------------------- /tests/files/my60k.ext2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/my60k.ext2 -------------------------------------------------------------------------------- /tests/files/ocr10.laf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ocr10.laf -------------------------------------------------------------------------------- /tests/files/paktest.pak: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/paktest.pak -------------------------------------------------------------------------------- /tests/files/pikachu.wmf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pikachu.wmf -------------------------------------------------------------------------------- /tests/files/radpoor.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/radpoor.doc -------------------------------------------------------------------------------- /tests/files/sample.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/sample.tif -------------------------------------------------------------------------------- /tests/files/sample.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/sample.ts -------------------------------------------------------------------------------- /tests/files/test.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/test.txt.gz -------------------------------------------------------------------------------- /tests/files/types.ext2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/types.ext2 -------------------------------------------------------------------------------- /tests/files/weka.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/weka.model -------------------------------------------------------------------------------- /hachoir-metadata-csv: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.metadata.csv import main 3 | main() 4 | -------------------------------------------------------------------------------- /hachoir-metadata-qt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.metadata.qt.main import main 3 | main() 4 | -------------------------------------------------------------------------------- /hachoir/parser/network/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.network.tcpdump import TcpdumpFile # noqa 2 | -------------------------------------------------------------------------------- /tests/files/article01.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/article01.bmp -------------------------------------------------------------------------------- /tests/files/canon.raw.cr2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/canon.raw.cr2 -------------------------------------------------------------------------------- /tests/files/example2.arj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/example2.arj -------------------------------------------------------------------------------- /tests/files/flashmob.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/flashmob.mkv -------------------------------------------------------------------------------- /tests/files/india_map.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/india_map.gif -------------------------------------------------------------------------------- /tests/files/kde_click.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/kde_click.wav -------------------------------------------------------------------------------- /tests/files/nitrodir.nds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/nitrodir.nds -------------------------------------------------------------------------------- /tests/files/quicktime.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/quicktime.mp4 -------------------------------------------------------------------------------- /tests/files/test_file.fit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/test_file.fit -------------------------------------------------------------------------------- /tests/files/twunk_16.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/twunk_16.exe -------------------------------------------------------------------------------- /tests/files/verdan32.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/verdan32.exe -------------------------------------------------------------------------------- /hachoir/subfile/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.subfile.main import main 3 | main() 4 | -------------------------------------------------------------------------------- /tests/files/breakdance.flv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/breakdance.flv -------------------------------------------------------------------------------- /tests/files/green_fire.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/green_fire.jpg -------------------------------------------------------------------------------- /tests/files/hachoir-core.ace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hachoir-core.ace -------------------------------------------------------------------------------- /tests/files/hachoir-core.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hachoir-core.rar -------------------------------------------------------------------------------- /tests/files/hachoir.org.sxw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hachoir.org.sxw -------------------------------------------------------------------------------- /tests/files/ladouce_1h15.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ladouce_1h15.wav -------------------------------------------------------------------------------- /tests/files/lara_croft.pcx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/lara_croft.pcx -------------------------------------------------------------------------------- /tests/files/logo-kubuntu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/logo-kubuntu.png -------------------------------------------------------------------------------- /tests/files/macos_10.5.macho: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/macos_10.5.macho -------------------------------------------------------------------------------- /tests/files/marc_kravetz.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/marc_kravetz.mp3 -------------------------------------------------------------------------------- /tests/files/minimal_xcf1.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/minimal_xcf1.xcf -------------------------------------------------------------------------------- /tests/files/minimal_xcf3.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/minimal_xcf3.xcf -------------------------------------------------------------------------------- /tests/files/usa_railroad.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/usa_railroad.jpg -------------------------------------------------------------------------------- /tests/files/yellowdude.3ds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/yellowdude.3ds -------------------------------------------------------------------------------- /tests/files/08lechat_hq_fr.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/08lechat_hq_fr.mp3 -------------------------------------------------------------------------------- /tests/files/ReferenceMap.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ReferenceMap.class -------------------------------------------------------------------------------- /tests/files/cacert_class3.der: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/cacert_class3.der -------------------------------------------------------------------------------- /tests/files/claque-beignet.swf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/claque-beignet.swf -------------------------------------------------------------------------------- /tests/files/linux_swap_9pages: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/linux_swap_9pages -------------------------------------------------------------------------------- /tests/files/macos_10.12.macho: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/macos_10.12.macho -------------------------------------------------------------------------------- /tests/files/mbr_linux_and_ext: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/mbr_linux_and_ext -------------------------------------------------------------------------------- /tests/files/mev.32bit.big.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/mev.32bit.big.elf -------------------------------------------------------------------------------- /tests/files/mev.64bit.big.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/mev.64bit.big.elf -------------------------------------------------------------------------------- /tests/files/minimal_xcf10.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/minimal_xcf10.xcf -------------------------------------------------------------------------------- /tests/files/pentax_320x240.mov: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pentax_320x240.mov -------------------------------------------------------------------------------- /tests/files/satellite_one.s3m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/satellite_one.s3m -------------------------------------------------------------------------------- /tests/files/sheep_on_drugs.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/sheep_on_drugs.mp3 -------------------------------------------------------------------------------- /tests/files/steganography.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/steganography.mp3 -------------------------------------------------------------------------------- /tests/files/angle-bear-48x48.ani: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/angle-bear-48x48.ani -------------------------------------------------------------------------------- /tests/files/deja_vu_serif-2.7.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/deja_vu_serif-2.7.ttf -------------------------------------------------------------------------------- /tests/files/example4_chapters.arj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/example4_chapters.arj -------------------------------------------------------------------------------- /tests/files/ftp-0.17-537.i586.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ftp-0.17-537.i586.rpm -------------------------------------------------------------------------------- /tests/files/grasslogo_vector.emf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/grasslogo_vector.emf -------------------------------------------------------------------------------- /tests/files/hotel_california.flac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hotel_california.flac -------------------------------------------------------------------------------- /tests/files/kde_haypo_corner.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/kde_haypo_corner.bmp -------------------------------------------------------------------------------- /tests/files/matrix_ping_pong.wmv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/matrix_ping_pong.wmv -------------------------------------------------------------------------------- /tests/files/reiserfs_v3_332k.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/reiserfs_v3_332k.bin -------------------------------------------------------------------------------- /tests/files/smallville.s03e02.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/smallville.s03e02.avi -------------------------------------------------------------------------------- /tests/files/wormux_32x32_16c.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/wormux_32x32_16c.ico -------------------------------------------------------------------------------- /hachoir-metadata-gtk: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.metadata.gtk import MetadataGtk 3 | MetadataGtk().main() 4 | -------------------------------------------------------------------------------- /hachoir-strip: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.strip import main 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /tests/files/anti-arpeggio_tune.ptm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/anti-arpeggio_tune.ptm -------------------------------------------------------------------------------- /tests/files/default_mount_opts.ext2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/default_mount_opts.ext2 -------------------------------------------------------------------------------- /tests/files/jpeg.exif.photoshop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/jpeg.exif.photoshop.jpg -------------------------------------------------------------------------------- /tests/files/ping_20020927-3ubuntu2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ping_20020927-3ubuntu2 -------------------------------------------------------------------------------- /tests/files/usa_railroad.jpg.0.zlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/usa_railroad.jpg.0.zlib -------------------------------------------------------------------------------- /tests/files/usa_railroad.jpg.6.zlib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/usa_railroad.jpg.6.zlib -------------------------------------------------------------------------------- /hachoir-grep: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.grep import main 3 | 4 | if __name__ == "__main__": 5 | main() 6 | -------------------------------------------------------------------------------- /hachoir-wx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.wx.main import main 3 | 4 | if __name__ == "__main__": 5 | main() 6 | -------------------------------------------------------------------------------- /tests/files/Panasonic_AG_HMC_151.MTS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/Panasonic_AG_HMC_151.MTS -------------------------------------------------------------------------------- /tests/files/arp_dns_ping_dns.tcpdump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/arp_dns_ping_dns.tcpdump -------------------------------------------------------------------------------- /tests/files/bsize-1024-isize-1024.ext2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/bsize-1024-isize-1024.ext2 -------------------------------------------------------------------------------- /tests/files/bsize-2048-isize-512.ext2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/bsize-2048-isize-512.ext2 -------------------------------------------------------------------------------- /tests/files/bsize-4096-isize-128.ext2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/bsize-4096-isize-128.ext2 -------------------------------------------------------------------------------- /tests/files/interlude_david_aubrun.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/interlude_david_aubrun.ogg -------------------------------------------------------------------------------- /tests/files/png_331x90x8_truncated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/png_331x90x8_truncated.png -------------------------------------------------------------------------------- /tests/files/pyc_example_1.5.2_pyc.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pyc_example_1.5.2_pyc.bin -------------------------------------------------------------------------------- /tests/files/pyc_example_2.2.3_pyc.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pyc_example_2.2.3_pyc.bin -------------------------------------------------------------------------------- /tests/files/pyc_example_2.5c1_pyc.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pyc_example_2.5c1_pyc.bin -------------------------------------------------------------------------------- /tests/files/python.cpython-312.pyc.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/python.cpython-312.pyc.bin -------------------------------------------------------------------------------- /tests/files/python.cpython-37.pyc.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/python.cpython-37.pyc.bin -------------------------------------------------------------------------------- /hachoir/wx/hex_view/__init__.py: -------------------------------------------------------------------------------- 1 | from .hex_view import hex_view_t # noqa 2 | from .hex_view_setup import setup_hex_view # noqa 3 | -------------------------------------------------------------------------------- /tests/files/audio_8khz_8bit_ulaw_4s39.au: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/audio_8khz_8bit_ulaw_4s39.au -------------------------------------------------------------------------------- /tests/files/free-software-song.midi.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/free-software-song.midi.bz2 -------------------------------------------------------------------------------- /hachoir/wx/tree_view/__init__.py: -------------------------------------------------------------------------------- 1 | from .tree_view import tree_view_t # noqa 2 | from .tree_view_setup import setup_tree_view # noqa 3 | -------------------------------------------------------------------------------- /tests/files/get-versions.64bit.little.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/get-versions.64bit.little.elf -------------------------------------------------------------------------------- /tests/files/1024x1024-better-compression.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/1024x1024-better-compression.xcf -------------------------------------------------------------------------------- /tests/files/com.apple.pkg.BaseSystemResources.bom: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/com.apple.pkg.BaseSystemResources.bom -------------------------------------------------------------------------------- /tests/files/debian-31r4-i386-binary-1.iso.torrent: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/debian-31r4-i386-binary-1.iso.torrent -------------------------------------------------------------------------------- /tests/files/pack-31c691f659cbc7841ca55a26a342fdaf0b89c533.pack: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pack-31c691f659cbc7841ca55a26a342fdaf0b89c533.pack -------------------------------------------------------------------------------- /hachoir/wx/frame_view/__init__.py: -------------------------------------------------------------------------------- 1 | from .frame_view import frame_view_t # noqa 2 | from .frame_view_setup import setup_frame_view # noqa 3 | from .frame_view_imp import frame_view_imp_t # noqa 4 | -------------------------------------------------------------------------------- /tools/flake8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e -x 3 | cd $(dirname "$0")/.. 4 | # use /bin/sh to support "*.py" 5 | # FIXME: add hachoir-wx (currrently broken) 6 | flake8 hachoir/ tests/ runtests.py setup.py doc/examples/*.py 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | *.swp 3 | MANIFEST 4 | build/ 5 | dist/ 6 | hachoir-metadata/hachoir_metadata/qt/dialog_ui.py 7 | 8 | # generated by tox 9 | .tox/ 10 | hachoir.egg-info/ 11 | 12 | # Mac files 13 | .DS_Store 14 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/__init__.py: -------------------------------------------------------------------------------- 1 | from .field_view_setup import setup_field_view # noqa 2 | from .field_view import field_view_t # noqa 3 | 4 | from .field_menu_setup import setup_field_menu # noqa 5 | from .field_menu import field_menu_t # noqa 6 | -------------------------------------------------------------------------------- /hachoir/test.py: -------------------------------------------------------------------------------- 1 | from hachoir.core import config 2 | from locale import setlocale, LC_ALL 3 | 4 | 5 | def setup_tests(): 6 | config.use_i18n = False # Don't use i18n 7 | config.quiet = True # Don't display warnings 8 | setlocale(LC_ALL, "C") 9 | -------------------------------------------------------------------------------- /hachoir/parser/video/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.video.asf import AsfFile # noqa 2 | from hachoir.parser.video.flv import FlvFile # noqa 3 | from hachoir.parser.video.mpeg_video import MPEGVideoFile # noqa 4 | from hachoir.parser.video.mpeg_ts import MPEG_TS # noqa 5 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-lts-latest 8 | tools: 9 | python: "3" 10 | 11 | sphinx: 12 | configuration: doc/conf.py 13 | -------------------------------------------------------------------------------- /hachoir/parser/game/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.game.zsnes import ZSNESFile # noqa 2 | from hachoir.parser.game.spider_man_video import SpiderManVideoFile # noqa 3 | from hachoir.parser.game.laf import LafFile # noqa 4 | from hachoir.parser.game.blp import BLP1File, BLP2File # noqa 5 | -------------------------------------------------------------------------------- /doc/hacking.rst: -------------------------------------------------------------------------------- 1 | ++++++++++++ 2 | Hack Hachoir 3 | ++++++++++++ 4 | 5 | Run tests 6 | ========= 7 | 8 | Using tox 9 | --------- 10 | 11 | Install tox (``pip install tox``) and then run tox:: 12 | 13 | tox 14 | 15 | Manually 16 | -------- 17 | 18 | Run tests manually:: 19 | 20 | python3 runtests.py 21 | -------------------------------------------------------------------------------- /hachoir/core/endian.py: -------------------------------------------------------------------------------- 1 | """ 2 | Constant values about endian. 3 | """ 4 | 5 | BIG_ENDIAN = "ABCD" 6 | LITTLE_ENDIAN = "DCBA" 7 | MIDDLE_ENDIAN = "BADC" 8 | NETWORK_ENDIAN = BIG_ENDIAN 9 | 10 | endian_name = { 11 | BIG_ENDIAN: "Big endian", 12 | LITTLE_ENDIAN: "Little endian", 13 | MIDDLE_ENDIAN: "Middle endian", 14 | } 15 | -------------------------------------------------------------------------------- /hachoir/parser/common/tracker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shared code for tracker parser. 3 | """ 4 | 5 | NOTE_NAME = {} 6 | NOTES = ("C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "G#", "A", "A#", "B") 7 | for octave in range(10): 8 | for index, note in enumerate(NOTES): 9 | NOTE_NAME[octave * 12 + index] = "%s (octave %s)" % (note, octave) 10 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | # Generated files: .py => .pyc 4 | *.pyc 5 | *.pyo 6 | __pycache__ 7 | hachoir-metadata/hachoir_metadata/qt/dialog_ui.py 8 | 9 | # Temporary files (vim backups) 10 | *.swp 11 | 12 | # build/ subdirectories 13 | build 14 | 15 | # build by the tox command 16 | .tox/ 17 | 18 | # build by distutils 19 | hachoir.egg-info/ 20 | -------------------------------------------------------------------------------- /hachoir/field/field_set.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import BasicFieldSet, GenericFieldSet 2 | 3 | 4 | class FieldSet(GenericFieldSet): 5 | 6 | def __init__(self, parent, name, *args, **kw): 7 | assert issubclass(parent.__class__, BasicFieldSet) 8 | GenericFieldSet.__init__( 9 | self, parent, name, parent.stream, *args, **kw) 10 | -------------------------------------------------------------------------------- /doc/examples/editor_gzip.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser import createParser 2 | from hachoir.editor import createEditor 3 | from hachoir.field import writeIntoFile 4 | 5 | parser = createParser("file.gz") 6 | with parser: 7 | editor = createEditor(parser) 8 | del editor["filename"] 9 | editor["has_filename"].value = False 10 | writeIntoFile(editor, "noname.gz") 11 | -------------------------------------------------------------------------------- /doc/examples/editor_zip.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser import createParser 2 | from hachoir.editor import createEditor 3 | from hachoir.field import writeIntoFile 4 | 5 | parser = createParser("file.zip") 6 | with parser: 7 | editor = createEditor(parser) 8 | editor["end_central_directory/comment"].value = "new comment" 9 | writeIntoFile(editor, "file_comment.zip") 10 | -------------------------------------------------------------------------------- /hachoir/wx/hex_view/hex_view_setup.py: -------------------------------------------------------------------------------- 1 | from hachoir.wx.resource import get_child_control 2 | 3 | 4 | def setup_hex_view(parent, dispatcher): 5 | print("[+] Setup hex view") 6 | hex_view = get_child_control(parent, 'hex_view') 7 | dispatcher.add_sender(hex_view) 8 | dispatcher.add(hex_view) 9 | dispatcher.add_receiver(hex_view) 10 | 11 | return hex_view 12 | -------------------------------------------------------------------------------- /hachoir/wx/tree_view/tree_view_setup.py: -------------------------------------------------------------------------------- 1 | from hachoir.wx.resource import get_child_control 2 | 3 | 4 | def setup_tree_view(parent, dispatcher): 5 | print("[+] Setup tree view") 6 | tree_view = get_child_control(parent, "tree_view") 7 | dispatcher.add_sender(tree_view) 8 | dispatcher.add(tree_view) 9 | dispatcher.add_receiver(tree_view) 10 | 11 | return tree_view 12 | -------------------------------------------------------------------------------- /hachoir/wx/frame_view/frame_view.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class frame_view_t(wx.Frame): 5 | def __init__(self): 6 | wx.Frame.__init__(self) 7 | # OnCreate required to avoid crashing wx 8 | self.Bind(wx.EVT_WINDOW_CREATE, self.OnCreate) 9 | 10 | def OnCreate(self, evt): 11 | pass 12 | 13 | def ready(self): 14 | self.dispatcher.trigger('frame_view_ready', self) 15 | -------------------------------------------------------------------------------- /hachoir/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.parser import ValidateError, HachoirParser, Parser # noqa 2 | from hachoir.parser.parser_list import ParserList, HachoirParserList # noqa 3 | from hachoir.parser.guess import QueryParser, guessParser, createParser # noqa 4 | from hachoir.parser import (archive, audio, container, # noqa 5 | file_system, image, game, misc, network, program, 6 | video) 7 | -------------------------------------------------------------------------------- /hachoir/editor/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.editor.field import EditorError, FakeField # noqa 2 | from hachoir.editor.typed_field import (EditableField, EditableBits, # noqa 3 | EditableBytes, EditableInteger, 4 | EditableString, createEditableField) 5 | from hachoir.editor.fieldset import (EditableFieldSet, NewFieldSet, # noqa 6 | createEditor) 7 | -------------------------------------------------------------------------------- /hachoir/parser/container/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.container.asn1 import ASN1File # noqa 2 | from hachoir.parser.container.mkv import MkvFile # noqa 3 | from hachoir.parser.container.ogg import OggFile, OggStream # noqa 4 | from hachoir.parser.container.riff import RiffFile # noqa 5 | from hachoir.parser.container.swf import SwfFile # noqa 6 | from hachoir.parser.container.realmedia import RealMediaFile # noqa 7 | from hachoir.parser.container.mp4 import MP4File # noqa 8 | -------------------------------------------------------------------------------- /hachoir/parser/misc/common.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import StaticFieldSet, Float32 2 | 3 | 4 | class Vertex(StaticFieldSet): 5 | format = ((Float32, "x"), (Float32, "y"), (Float32, "z")) 6 | 7 | def createValue(self): 8 | return (self["x"].value, self["y"].value, self["z"].value) 9 | 10 | 11 | class MapUV(StaticFieldSet): 12 | format = ((Float32, "u"), (Float32, "v")) 13 | 14 | def createValue(self): 15 | return (self["u"].value, self["v"].value) 16 | -------------------------------------------------------------------------------- /hachoir/regex/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.regex.regex import (RegexEmpty, # noqa 2 | RegexString, createString, 3 | RegexRangeItem, RegexRangeCharacter, RegexRange, createRange, 4 | RegexAnd, RegexOr, RegexRepeat, 5 | RegexDot, RegexStart, RegexEnd, RegexWord) 6 | from hachoir.regex.parser import parse # noqa 7 | from hachoir.regex.pattern import PatternMatching # noqa 8 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/core_type_menu_fwd.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class core_type_menu_fwd_t: 5 | def __init__(self, imp): 6 | self.imp = imp 7 | 8 | def on_field_menu_ready(self, dispatcher, view): 9 | assert view is not None 10 | view.Bind(wx.EVT_MENU, self.on_type_selected) 11 | 12 | def on_type_selected(self, event): 13 | try: 14 | self.imp.on_type_selected(event.GetId()) 15 | except KeyError: 16 | event.Skip() 17 | -------------------------------------------------------------------------------- /doc/install.rst: -------------------------------------------------------------------------------- 1 | +++++++++++++++ 2 | Install Hachoir 3 | +++++++++++++++ 4 | 5 | To install Hachoir, type:: 6 | 7 | python3 -m pip install -U hachoir 8 | 9 | To use hachoir-urwid, you will also need to install `urwid library 10 | `_:: 11 | 12 | python3 -m pip install -U urwid 13 | 14 | To use hachoir-wx, you will also need to install `wxPython 15 | `_:: 16 | 17 | python3 -m pip install -U wxPython 18 | 19 | Hachoir requires Python 3.6 or newer. 20 | -------------------------------------------------------------------------------- /hachoir/parser/file_system/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.file_system.ext2 import EXT2_FS # noqa 2 | from hachoir.parser.file_system.fat import FAT12, FAT16, FAT32 # noqa 3 | from hachoir.parser.file_system.mbr import MSDos_HardDrive # noqa 4 | from hachoir.parser.file_system.ntfs import NTFS # noqa 5 | from hachoir.parser.file_system.iso9660 import ISO9660 # noqa 6 | from hachoir.parser.file_system.reiser_fs import REISER_FS # noqa 7 | from hachoir.parser.file_system.linux_swap import LinuxSwapFile # noqa 8 | -------------------------------------------------------------------------------- /hachoir/wx/unicode.py: -------------------------------------------------------------------------------- 1 | import locale 2 | import sys 3 | 4 | 5 | def get_charset(): 6 | try: 7 | charset = locale.getdefaultlocale()[1] 8 | except (locale.Error, NameError, AttributeError, IndexError): 9 | pass 10 | 11 | if charset is None: 12 | charset = sys.getdefaultencoding() 13 | 14 | return charset 15 | 16 | 17 | def force_unicode(name): 18 | if not isinstance(name, str): 19 | charset = get_charset() 20 | name = str(name, charset) 21 | 22 | return name 23 | -------------------------------------------------------------------------------- /hachoir/parser/program/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.program.elf import ElfFile # noqa 2 | from hachoir.parser.program.exe import ExeFile # noqa 3 | from hachoir.parser.program.macho import MachoFile, MachoFatFile # noqa 4 | from hachoir.parser.program.python import PythonCompiledFile # noqa 5 | from hachoir.parser.program.java import JavaCompiledClassFile # noqa 6 | from hachoir.parser.program.prc import PRCFile # noqa 7 | from hachoir.parser.program.nds import NdsFile # noqa 8 | from hachoir.parser.program.java_serialized import JavaSerializedFile # noqa 9 | -------------------------------------------------------------------------------- /hachoir/wx/frame_view/frame_view_fwd.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class frame_view_fwd_t: 5 | def __init__(self, imp): 6 | self.imp = imp 7 | 8 | def on_frame_view_ready(self, dispatcher, view): 9 | assert view is not None 10 | view.Bind(wx.EVT_ACTIVATE, self.on_activated) 11 | view.Bind(wx.EVT_SHOW, self.on_shown) 12 | 13 | def on_activated(self, event): 14 | if event.GetActive(): 15 | self.imp.on_activated() 16 | 17 | def on_shown(self, event): 18 | if event.Show: 19 | self.imp.on_activated() 20 | -------------------------------------------------------------------------------- /hachoir/metadata/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.metadata.metadata import extractMetadata # noqa 2 | 3 | # Just import the module, 4 | # each module use registerExtractor() method 5 | import hachoir.metadata.archive # noqa 6 | import hachoir.metadata.audio # noqa 7 | import hachoir.metadata.file_system # noqa 8 | import hachoir.metadata.image # noqa 9 | import hachoir.metadata.jpeg # noqa 10 | import hachoir.metadata.misc # noqa 11 | import hachoir.metadata.program # noqa 12 | import hachoir.metadata.riff # noqa 13 | import hachoir.metadata.video # noqa 14 | import hachoir.metadata.cr2 # noqa -------------------------------------------------------------------------------- /hachoir/wx/dialogs.py: -------------------------------------------------------------------------------- 1 | import wx 2 | import os 3 | 4 | 5 | def file_open_dialog(): 6 | dialog_style = wx.FD_OPEN | wx.FD_FILE_MUST_EXIST 7 | 8 | dialog = wx.FileDialog( 9 | None, message='Open', 10 | defaultDir=os.getcwd(), 11 | defaultFile='', style=dialog_style) 12 | 13 | return dialog 14 | 15 | 16 | def file_save_dialog(title): 17 | dialog_style = wx.FD_SAVE 18 | 19 | dialog = wx.FileDialog( 20 | None, message=title, 21 | defaultDir=os.getcwd(), 22 | defaultFile='', style=dialog_style) 23 | 24 | return dialog 25 | -------------------------------------------------------------------------------- /hachoir/wx/frame_view/frame_view_setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .frame_view_imp import frame_view_imp_t 4 | from .frame_view_fwd import frame_view_fwd_t 5 | 6 | from hachoir.wx.resource import get_frame 7 | 8 | 9 | def setup_frame_view(dispatcher): 10 | print('[+] Setup frame view') 11 | frame = get_frame('frame_view') 12 | dispatcher.add_sender(frame) 13 | 14 | frame_view_imp = frame_view_imp_t() 15 | dispatcher.add(frame_view_imp) 16 | 17 | frame_view_fwd = frame_view_fwd_t(frame_view_imp) 18 | dispatcher.add_receiver(frame_view_fwd) 19 | 20 | return frame 21 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include benchmark.sh 2 | include AUTHORS 3 | include COPYING 4 | include ChangeLog 5 | include INSTALL 6 | include MANIFEST.in 7 | include README.rst 8 | include TODO.rst 9 | include tox.ini 10 | include .travis.yml 11 | include runtests.py 12 | include hachoir-metadata-csv 13 | include run_testcase.py 14 | 15 | include doc/*.rst doc/conf.py doc/make.bat doc/Makefile doc/gen_parser_list.py 16 | include doc/examples/*.py 17 | 18 | include tests/*.py tests/*.rst tests/files/* 19 | 20 | # IGNORED files: 21 | # 22 | # hachoir-metadata-gtk 23 | # hachoir-metadata-qt 24 | # hachoir/metadata/qt/dialog.ui 25 | # tools/*.py 26 | -------------------------------------------------------------------------------- /hachoir/metadata/formatter.py: -------------------------------------------------------------------------------- 1 | NB_CHANNEL_NAME = {1: "mono", 2: "stereo"} 2 | 3 | 4 | def humanAudioChannel(value): 5 | return NB_CHANNEL_NAME.get(value, str(value)) 6 | 7 | 8 | def humanFrameRate(value): 9 | if isinstance(value, (int, float)): 10 | return "%.1f fps" % value 11 | else: 12 | return value 13 | 14 | 15 | def humanComprRate(rate): 16 | return "%.1fx" % rate 17 | 18 | 19 | def humanAltitude(value): 20 | return "%.1f meters" % value 21 | 22 | 23 | def humanPixelSize(value): 24 | return "%s pixels" % value 25 | 26 | 27 | def humanDPI(value): 28 | return "%s DPI" % value 29 | -------------------------------------------------------------------------------- /doc/examples/editor_add_extra.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser import createParser 2 | from hachoir.editor import createEditor 3 | from hachoir.field import writeIntoFile 4 | from hachoir.editor import EditableInteger, EditableBytes 5 | 6 | parser = createParser("file.gz") 7 | with parser: 8 | editor = createEditor(parser) 9 | extra = "abcd" 10 | editor["has_extra"].value = True 11 | editor.insertAfter("os", 12 | EditableInteger(editor, "extra_length", False, 13 | 16, len(extra)), 14 | EditableBytes(editor, "extra", extra)) 15 | writeIntoFile(editor, "file_extra.gz") 16 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/core_type_menu.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class core_type_menu_t: 5 | def __init__(self, menu): 6 | self.menu = menu 7 | self.id_to_type = {} 8 | 9 | self.Bind = self.menu.Bind # see note in field_menu.py 10 | 11 | def add_type(self, type_name): 12 | type_id = wx.NewId() 13 | self.id_to_type[type_id] = type_name 14 | self.menu.Append(type_id, type_name) 15 | 16 | def get_type_name(self, id): 17 | return self.id_to_type[id] 18 | 19 | def clear(self): 20 | items = self.menu.GetMenuItems() 21 | for item in items: 22 | self.menu.Delete(item) 23 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_split_menu_fwd.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class field_split_menu_fwd_t: 5 | def __init__(self, imp): 6 | self.imp = imp 7 | 8 | def on_field_menu_ready(self, dispatcher, view): 9 | assert view is not None 10 | 11 | view.Bind(wx.EVT_MENU, self.on_split_bytes, 12 | id=wx.xrc.XRCID('field_menu_split_bytes')) 13 | view.Bind(wx.EVT_MENU, self.on_split_bits, 14 | id=wx.xrc.XRCID('field_menu_split_bits')) 15 | 16 | def on_split_bits(self, event): 17 | self.imp.on_split_bits() 18 | 19 | def on_split_bytes(self, event): 20 | self.imp.on_split_bytes() 21 | -------------------------------------------------------------------------------- /hachoir/stream/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN # noqa 2 | from hachoir.stream.stream import StreamError # noqa 3 | from hachoir.stream.input import (InputStreamError, # noqa 4 | InputStream, InputIOStream, StringInputStream, 5 | InputSubStream, InputFieldStream, 6 | FragmentedStream, ConcatStream) 7 | from hachoir.stream.input_helper import FileInputStream, guessStreamCharset # noqa 8 | from hachoir.stream.output import (OutputStreamError, # noqa 9 | FileOutputStream, StringOutputStream, OutputStream) 10 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_menu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from hachoir.wx.dialogs import file_save_dialog 4 | import wx 5 | 6 | 7 | class field_menu_t: 8 | def __init__(self, parent, menu): 9 | self.parent = parent 10 | self.menu = menu 11 | 12 | # forward this call because xrc doesn't allow menu 13 | # subclassing (as of 2.6.3) 14 | self.Bind = self.menu.Bind 15 | 16 | def show_opts(self): 17 | self.parent.PopupMenu(self.menu) 18 | 19 | def ask_for_dump_file(self, title): 20 | dump_dlog = file_save_dialog(title) 21 | if wx.ID_OK == dump_dlog.ShowModal(): 22 | return dump_dlog.GetPath() 23 | -------------------------------------------------------------------------------- /hachoir/parser/audio/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.audio.aiff import AiffFile # noqa 2 | from hachoir.parser.audio.au import AuFile # noqa 3 | from hachoir.parser.audio.itunesdb import ITunesDBFile # noqa 4 | from hachoir.parser.audio.midi import MidiFile # noqa 5 | from hachoir.parser.audio.mpeg_audio import MpegAudioFile # noqa 6 | from hachoir.parser.audio.real_audio import RealAudioFile # noqa 7 | from hachoir.parser.audio.xm import XMModule # noqa 8 | from hachoir.parser.audio.s3m import S3MModule # noqa 9 | from hachoir.parser.audio.s3m import PTMModule # noqa 10 | from hachoir.parser.audio.mod import AmigaModule # noqa 11 | from hachoir.parser.audio.flac import FlacParser # noqa 12 | -------------------------------------------------------------------------------- /doc/editor.rst: -------------------------------------------------------------------------------- 1 | .. _editor: 2 | 3 | +++++++++++++++++++++ 4 | hachoir.editor module 5 | +++++++++++++++++++++ 6 | 7 | Hachoir editor is a Python library based on Hachoir core used to edit binary 8 | files. 9 | 10 | Today, only one program uses it: :ref:`hachoir-strip ` (remove "useless" 11 | information to make a file smaller). 12 | 13 | Example: gzip, remove filename 14 | ============================== 15 | 16 | .. literalinclude:: examples/editor_gzip.py 17 | 18 | Example: gzip, add extra 19 | ======================== 20 | 21 | .. literalinclude:: examples/editor_add_extra.py 22 | 23 | Example: zip, set comment 24 | ========================= 25 | 26 | .. literalinclude:: examples/editor_zip.py 27 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/format.py: -------------------------------------------------------------------------------- 1 | def format_addr_dec(addr): 2 | return "%08d.%01d" % divmod(addr, 8) 3 | 4 | 5 | def format_addr_hex(addr): 6 | return "%08x.%01d" % divmod(addr, 8) 7 | 8 | 9 | def format_size(size): 10 | return "%08u.%01d" % divmod(size, 8) 11 | 12 | 13 | def format_data(field): 14 | data = '' 15 | 16 | if field.hasValue(): 17 | data = field.display 18 | 19 | return data 20 | 21 | 22 | def format_name(field): 23 | name = field.name 24 | if field.is_field_set: 25 | name += '/' 26 | 27 | return name 28 | 29 | 30 | def format_desc(field): 31 | if field.description: 32 | return str(field.description) 33 | return '' 34 | -------------------------------------------------------------------------------- /hachoir/parser/image/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.image.bmp import BmpFile # noqa 2 | from hachoir.parser.image.gif import GifFile # noqa 3 | from hachoir.parser.image.ico import IcoFile # noqa 4 | from hachoir.parser.image.jpeg import JpegFile # noqa 5 | from hachoir.parser.image.pcx import PcxFile # noqa 6 | from hachoir.parser.image.psd import PsdFile # noqa 7 | from hachoir.parser.image.png import PngFile # noqa 8 | from hachoir.parser.image.tga import TargaFile # noqa 9 | from hachoir.parser.image.tiff import TiffFile # noqa 10 | from hachoir.parser.image.wmf import WMF_File # noqa 11 | from hachoir.parser.image.xcf import XcfFile # noqa 12 | from hachoir.parser.image.cr2 import CR2File # noqa 13 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_view_setup.py: -------------------------------------------------------------------------------- 1 | from hachoir.wx.resource import get_child_control 2 | from .field_view_imp import field_view_imp_t 3 | from .field_view_fwd import field_view_fwd_t 4 | from .field_menu_setup import setup_field_menu 5 | 6 | 7 | def setup_field_view(parent, dispatcher): 8 | print("[+] Setup field view") 9 | field_view = get_child_control(parent, 'field_view') 10 | dispatcher.add_sender(field_view) 11 | 12 | field_view_imp = field_view_imp_t() 13 | dispatcher.add(field_view_imp) 14 | 15 | field_view_fwd = field_view_fwd_t(field_view_imp) 16 | dispatcher.add(field_view_fwd) 17 | 18 | setup_field_menu(field_view, dispatcher) 19 | 20 | return field_view 21 | -------------------------------------------------------------------------------- /doc/gen_parser_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os.path 3 | 4 | def writeReadme(out): 5 | from hachoir.parser.parser_list import HachoirParserList 6 | 7 | # Write parser list 8 | format = "rest" 9 | if format == "rest": 10 | print("Parser list", file=out) 11 | print("===========", file=out) 12 | print(file=out) 13 | HachoirParserList().print_(out=out, format=format) 14 | 15 | def main(): 16 | path = os.path.dirname(__file__) 17 | filename = os.path.join(path, 'parser_list.rst') 18 | with open(filename, 'w') as readme: 19 | writeReadme(readme) 20 | print("%s regenerated" % filename) 21 | 22 | if __name__ == "__main__": 23 | main() 24 | 25 | -------------------------------------------------------------------------------- /doc/contact.rst: -------------------------------------------------------------------------------- 1 | +++++++ 2 | Contact 3 | +++++++ 4 | 5 | Mailing list 6 | ============ 7 | 8 | Mailing list: ``hachoir AT lists.tuxfamily.org`` 9 | 10 | * `Read archives on mail-archives.com `_ 11 | * `Mailing list archives on Gmane.org 12 | `_: Read mailing list using 13 | HTTP, NNTP or RSS 14 | 15 | * To subscribe, send an email with subject ``subscribe`` (and empty body) to 16 | ``hachoir-request@lists.tux(...).org`` 17 | * To unsubscribe, send an email with subject ``unsubscribe`` (and empty body) 18 | to ``hachoir-request@lists.tux(...).org`` 19 | * You have to subscribe to post email. 20 | * Created October 22nd 2006 21 | 22 | -------------------------------------------------------------------------------- /doc/examples/metadata.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser import createParser 2 | from hachoir.metadata import extractMetadata 3 | from sys import argv, stderr, exit 4 | 5 | if len(argv) != 2: 6 | print("usage: %s filename" % argv[0], file=stderr) 7 | exit(1) 8 | filename = argv[1] 9 | parser = createParser(filename) 10 | if not parser: 11 | print("Unable to parse file", file=stderr) 12 | exit(1) 13 | 14 | with parser: 15 | try: 16 | metadata = extractMetadata(parser) 17 | except Exception as err: 18 | print("Metadata extraction error: %s" % err) 19 | metadata = None 20 | if not metadata: 21 | print("Unable to extract metadata") 22 | exit(1) 23 | 24 | for line in metadata.exportPlaintext(): 25 | print(line) 26 | -------------------------------------------------------------------------------- /hachoir/wx/frame_view/frame_view_imp.py: -------------------------------------------------------------------------------- 1 | class frame_view_imp_t: 2 | def on_frame_view_ready(self, dispatcher, frame_view): 3 | assert frame_view is not None 4 | self.view = frame_view 5 | 6 | def on_file_ready(self, dispatcher, file): 7 | assert file is not None 8 | self.filename = file.name 9 | 10 | def on_filename_update(self, dispatcher, filename): 11 | self.filename = filename 12 | 13 | def format_title(self, field): 14 | field_path = field.path 15 | return self.filename + '/' + field_path[1:] 16 | 17 | def on_field_activated(self, dispatcher, field): 18 | self.view.SetTitle(self.format_title(field)) 19 | 20 | def on_activated(self): 21 | self.dispatcher.trigger('frame_activated', self.view) 22 | -------------------------------------------------------------------------------- /hachoir/core/error.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions to display an error (error, warning or information) message. 3 | """ 4 | 5 | from hachoir.core.log import log 6 | import sys 7 | import traceback 8 | 9 | 10 | def getBacktrace(empty="Empty backtrace."): 11 | """ 12 | Try to get backtrace as string. 13 | Returns "Error while trying to get backtrace" on failure. 14 | """ 15 | try: 16 | info = sys.exc_info() 17 | trace = traceback.format_exception(*info) 18 | if trace[0] != "None\n": 19 | return "".join(trace) 20 | except Exception: 21 | # No i18n here (imagine if i18n function calls error...) 22 | return "Error while trying to get backtrace" 23 | return empty 24 | 25 | 26 | info = log.info 27 | warning = log.warning 28 | error = log.error 29 | -------------------------------------------------------------------------------- /hachoir/wx/dispatcher.py: -------------------------------------------------------------------------------- 1 | class dispatcher_t: 2 | def __init__(self): 3 | self.receivers = [] 4 | 5 | def add(self, who): 6 | self.add_sender(who) 7 | self.add_receiver(who) 8 | 9 | def add_sender(self, sender): 10 | sender.dispatcher = self 11 | 12 | def add_receiver(self, receiver): 13 | self.receivers.append(receiver) 14 | 15 | def trigger(self, event_name, *args): 16 | handler_name = 'on_' + event_name 17 | unhandled_name = 'on_unhandled' 18 | 19 | for receiver in self.receivers: 20 | if hasattr(receiver, handler_name): 21 | getattr(receiver, handler_name)(self, *args) 22 | elif hasattr(receiver, unhandled_name): 23 | getattr(receiver, unhandled_name)(self, event_name, *args) 24 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3, pep8, doc 3 | 4 | [testenv] 5 | basepython = python3 6 | commands= 7 | python -bb -Werror -X faulthandler runtests.py 8 | 9 | [testenv:pep8] 10 | allowlist_externals = sh 11 | deps = flake8 12 | commands = 13 | sh tools/flake8.sh 14 | 15 | [flake8] 16 | # E121 continuation line under-indented for hanging indent 17 | # hachoir/parser/network/ouid.py 18 | # E131 continuation line unaligned for hanging indent 19 | # parser/container/mp4.py 20 | # E501 line too long (88 > 79 characters) 21 | # W503 line break before binary operator 22 | # W504 line break after binary operator 23 | ignore = E121,E131,E501,W503,W504 24 | 25 | [testenv:doc] 26 | deps= 27 | sphinx 28 | allowlist_externals = make 29 | commands= 30 | python doc/gen_parser_list.py 31 | make -C doc html 32 | 33 | -------------------------------------------------------------------------------- /hachoir/core/event_handler.py: -------------------------------------------------------------------------------- 1 | class EventHandler(object): 2 | """ 3 | Class to connect events to event handlers. 4 | """ 5 | 6 | def __init__(self): 7 | self.handlers = {} 8 | 9 | def connect(self, event_name, handler): 10 | """ 11 | Connect an event handler to an event. Append it to handlers list. 12 | """ 13 | try: 14 | self.handlers[event_name].append(handler) 15 | except KeyError: 16 | self.handlers[event_name] = [handler] 17 | 18 | def raiseEvent(self, event_name, *args): 19 | """ 20 | Raiser an event: call each handler for this event_name. 21 | """ 22 | if event_name not in self.handlers: 23 | return 24 | for handler in self.handlers[event_name]: 25 | handler(*args) 26 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_split_menu.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class field_split_menu_t: 5 | def __init__(self, parent, menu): 6 | self.parent = parent 7 | self.menu = menu 8 | self.Bind = self.menu.Bind # see note in field_menu.py 9 | 10 | def ask_split(self, caption, min, max): 11 | # Note: we would prefer a NumberEntryDialog but this isn't currently wrapped 12 | # by wxPython Phoenix. 13 | res = None 14 | dlg = wx.TextEntryDialog(self.parent, 'Enter split offset:', '', 15 | caption, min, min, max) 16 | if dlg.ShowModal() == wx.ID_OK: 17 | try: 18 | res = int(dlg.GetValue()) 19 | except ValueError: 20 | res = None 21 | dlg.Destroy() 22 | return res 23 | -------------------------------------------------------------------------------- /hachoir/field/character.py: -------------------------------------------------------------------------------- 1 | """ 2 | Character field class: a 8-bit character 3 | """ 4 | 5 | from hachoir.field import Bits 6 | from hachoir.core.tools import makePrintable 7 | 8 | 9 | class Character(Bits): 10 | """ 11 | A 8-bit character using ASCII charset for display attribute. 12 | """ 13 | static_size = 8 14 | 15 | def __init__(self, parent, name, description=None): 16 | Bits.__init__(self, parent, name, self.static_size, description=description) 17 | 18 | def createValue(self): 19 | return chr(self._parent.stream.readBits( 20 | self.absolute_address, self.static_size, self.parent.endian)) 21 | 22 | def createRawDisplay(self): 23 | return str(Bits.createValue(self)) 24 | 25 | def createDisplay(self): 26 | return makePrintable(self.value, "ASCII", quote="'") 27 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_view_fwd.py: -------------------------------------------------------------------------------- 1 | import wx 2 | 3 | 4 | class field_view_fwd_t: 5 | def __init__(self, imp): 6 | self.imp = imp 7 | 8 | def on_field_view_ready(self, dispatcher, field_view): 9 | assert field_view is not None 10 | 11 | field_view.Bind(wx.EVT_COMMAND_RIGHT_CLICK, self.on_item_right_clicked) 12 | field_view.Bind(wx.EVT_RIGHT_UP, self.on_item_right_clicked) 13 | field_view.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.on_item_activated) 14 | field_view.Bind(wx.EVT_LIST_ITEM_SELECTED, self.on_item_selected) 15 | 16 | def on_item_activated(self, event): 17 | self.imp.on_item_activated() 18 | 19 | def on_item_selected(self, event): 20 | self.imp.on_item_selected() 21 | 22 | def on_item_right_clicked(self, event): 23 | self.imp.on_item_show_ops() 24 | -------------------------------------------------------------------------------- /hachoir/core/language.py: -------------------------------------------------------------------------------- 1 | import functools 2 | from hachoir.core.iso639 import ISO639_2 3 | 4 | 5 | @functools.total_ordering 6 | class Language: 7 | 8 | def __init__(self, code): 9 | code = str(code) 10 | if code not in ISO639_2: 11 | raise ValueError("Invalid language code: %r" % code) 12 | self.code = code 13 | 14 | def __eq__(self, other): 15 | if other.__class__ != Language: 16 | return NotImplemented 17 | return self.code == other.code 18 | 19 | def __lt__(self, other): 20 | if other.__class__ != Language: 21 | return NotImplemented 22 | return self.code < other.code 23 | 24 | def __str__(self): 25 | return ISO639_2[self.code] 26 | 27 | def __repr__(self): 28 | return "" % (str(self), self.code) 29 | -------------------------------------------------------------------------------- /hachoir/wx/resource/resource.py: -------------------------------------------------------------------------------- 1 | import os 2 | from wx.xrc import XmlResource, XRCID 3 | 4 | 5 | def get_resource(): 6 | filename = os.path.join(os.getcwd(), os.path.dirname(__file__), 'hachoir_wx.xrc') 7 | return XmlResource(filename) 8 | 9 | 10 | def get_frame(name): 11 | return get_resource().LoadFrame(None, name) 12 | 13 | 14 | def get_child_control(parent, child): 15 | # We do this instead of XRCCTRL to work around a bug in wxPython 3.0.3. 16 | # FindWindowById, FindWindowByName and XRCCTRL all seem to return the 17 | # first-created "child" instead of the proper one; only FindWindow behaves 18 | # as expected. 19 | return parent.FindWindow(XRCID(child)) 20 | 21 | 22 | def get_menu_bar(name): 23 | return get_resource().LoadMenuBar(name) 24 | 25 | 26 | def get_menu(name): 27 | return get_resource().LoadMenu(name) 28 | -------------------------------------------------------------------------------- /hachoir/field/enum.py: -------------------------------------------------------------------------------- 1 | def Enum(field, enum, key_func=None): 2 | """ 3 | Enum is an adapter to another field: it will just change its display 4 | attribute. It uses a dictionary to associate a value to another. 5 | 6 | key_func is an optional function with prototype "def func(key)->key" 7 | which is called to transform key. 8 | """ 9 | display = field.createDisplay 10 | if key_func: 11 | def createDisplay(): 12 | try: 13 | key = key_func(field.value) 14 | return enum[key] 15 | except LookupError: 16 | return display() 17 | else: 18 | def createDisplay(): 19 | try: 20 | return enum[field.value] 21 | except LookupError: 22 | return display() 23 | field.createDisplay = createDisplay 24 | field.getEnum = lambda: enum 25 | return field 26 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/core_type_menu_imp.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import available_types 2 | from hachoir.wx.field_view.mutator import convert_field 3 | from hachoir.wx.field_view.stubs import can_convert 4 | 5 | 6 | class core_type_menu_imp_t: 7 | def __init__(self): 8 | self.cur_field = None 9 | 10 | def on_core_type_menu_ready(self, dispatcher, view): 11 | assert view is not None 12 | self.view = view 13 | 14 | def on_type_selected(self, id): 15 | convert_field(self.cur_field, self.view.get_type_name(id)) 16 | self.dispatcher.trigger('field_modified', self.cur_field) 17 | 18 | def on_field_selected(self, dispatcher, field): 19 | self.cur_field = field 20 | 21 | self.view.clear() 22 | for type in available_types: 23 | if can_convert(field, type) and field.__class__ is not type: 24 | self.view.add_type(type.__name__) 25 | -------------------------------------------------------------------------------- /hachoir/metadata/safe.py: -------------------------------------------------------------------------------- 1 | from hachoir.core.error import warning 2 | 3 | 4 | def fault_tolerant(func, *args): 5 | def safe_func(*args, **kw): 6 | try: 7 | func(*args, **kw) 8 | except Exception as err: 9 | warning("Error when calling function %s(): %s" % ( 10 | func.__name__, err)) 11 | return safe_func 12 | 13 | 14 | def getFieldAttribute(fieldset, key, attrname): 15 | try: 16 | field = fieldset[key] 17 | if field.hasValue(): 18 | return getattr(field, attrname) 19 | except Exception as err: 20 | warning("Unable to get %s of field %s/%s: %s" % ( 21 | attrname, fieldset.path, key, err)) 22 | return None 23 | 24 | 25 | def getValue(fieldset, key): 26 | return getFieldAttribute(fieldset, key, "value") 27 | 28 | 29 | def getDisplay(fieldset, key): 30 | return getFieldAttribute(fieldset, key, "display") 31 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/mutator.py: -------------------------------------------------------------------------------- 1 | from hachoir import field as field_module 2 | from hachoir.wx.field_view.stubs import has_static_size, convert_size 3 | 4 | 5 | def split_field(field, split_pos, split_name, split_t, size_func): 6 | split_name += '[]' 7 | 8 | subfields = [ 9 | split_t(field._parent, split_name, split_pos), 10 | split_t(field._parent, split_name, size_func(field) - split_pos)] 11 | 12 | field._parent.replaceField(field.name, subfields) 13 | 14 | 15 | def convert_field(field, new_type_name): 16 | field_set = field._parent 17 | new_type = getattr(field_module, new_type_name) 18 | 19 | if has_static_size(new_type): 20 | new_field = new_type(field_set, field.name, field._getDescription()) 21 | else: 22 | new_field = new_type(field_set, field.name, convert_size(field, new_type), field._getDescription()) 23 | 24 | field_set.replaceField(field.name, [new_field]) 25 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest] 15 | python: ['3.11'] 16 | include: 17 | # Python 3.6 uses old Ubuntu 20.04 18 | - os: ubuntu-20.04 19 | python: '3.6' 20 | 21 | steps: 22 | # https://github.com/actions/checkout 23 | - uses: actions/checkout@v3 24 | - name: Setup Python 25 | # https://github.com/actions/setup-python 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python }} 29 | - name: Install Tox and any other packages 30 | run: pip install tox 31 | - name: Display the Python version 32 | run: python -VV 33 | - name: Run Tox 34 | run: tox 35 | -------------------------------------------------------------------------------- /hachoir/core/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration of Hachoir 3 | """ 4 | 5 | import os 6 | 7 | # UI: display options 8 | max_string_length = 40 # Max. length in characters of GenericString.display 9 | max_byte_length = 14 # Max. length in bytes of RawBytes.display 10 | max_bit_length = 256 # Max. length in bits of RawBits.display 11 | 12 | # Global options 13 | debug = False # Display many informations usefull to debug 14 | verbose = False # Display more informations 15 | quiet = False # Don't display warnings 16 | 17 | # Use internationalization and localization (gettext)? 18 | if os.name == "nt": 19 | # TODO: Remove this hack and make i18n works on Windows :-) 20 | use_i18n = False 21 | else: 22 | use_i18n = True 23 | 24 | # Parser global options 25 | autofix = True # Enable Autofix? see hachoir.field.GenericFieldSet 26 | check_padding_pattern = True # Check padding fields pattern? 27 | -------------------------------------------------------------------------------- /hachoir/wx/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from hachoir.wx.app import app_t 4 | from hachoir import __version__ 5 | from hachoir.core.cmd_line import getHachoirOptions, configureHachoir 6 | from optparse import OptionParser 7 | import sys 8 | 9 | 10 | def parseOptions(): 11 | parser = OptionParser(usage="%prog [options] [filename]") 12 | hachoir = getHachoirOptions(parser) 13 | parser.add_option_group(hachoir) 14 | 15 | values, arguments = parser.parse_args() 16 | if len(arguments) == 1: 17 | filename = arguments[0] 18 | elif not arguments: 19 | filename = None 20 | else: 21 | parser.print_help() 22 | sys.exit(1) 23 | return values, filename 24 | 25 | 26 | def main(): 27 | print("hachoir version %s" % __version__) 28 | print() 29 | values, filename = parseOptions() 30 | configureHachoir(values) 31 | app = app_t(filename) 32 | app.MainLoop() 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "x$PYTHON" = "x" ]; then 4 | PYTHON=`which python3` 5 | fi 6 | PYTHON=$PYTHON 7 | SRC=$(cd `dirname $0`; pwd) 8 | TESTCASE=tests/files/ 9 | export PYTHONPATH=$SRC/hachoir-core:$SRC/hachoir-parser:$SRC/hachoir-metadata:$PYTHONPATH 10 | 11 | function prepare_benchmark 12 | { 13 | echo 14 | echo "=== $1 ===" 15 | sync 16 | } 17 | 18 | HACHOIR_VERSION="unknown" 19 | PYTHON_VERSION=`$PYTHON -c 'from sys import version; print(version.split("\n")[0].split("(")[0].strip())' 2>&1` 20 | echo "Benchmark Hachoir version $HACHOIR_VERSION on Python $PYTHON_VERSION" 21 | 22 | prepare_benchmark "hachoir-grep: yellowcase" 23 | $PYTHON -OO $SRC/hachoir-grep --bench --all $TESTCASE/yellowdude.3ds 24 | 25 | prepare_benchmark "hachoir-metadata: set A (mp3, wav, png, au, mkv)" 26 | (cd $TESTCASE; $PYTHON -OO $SRC/hachoir-metadata \ 27 | --bench \ 28 | sheep_on_drugs.mp3 kde_click.wav logo-kubuntu.png \ 29 | audio_8khz_8bit_ulaw_4s39.au flashmob.mkv 10min.mkv) 30 | 31 | -------------------------------------------------------------------------------- /doc/wx.rst: -------------------------------------------------------------------------------- 1 | .. _wx: 2 | 3 | ++++++++++++++++++ 4 | hachoir-wx program 5 | ++++++++++++++++++ 6 | 7 | hachoir-wx is a graphical binary file explorer and hex viewer, which uses the 8 | Hachoir library to parse the files and the WxPython library to create the user 9 | interface. 10 | 11 | Before use, make sure to install the required dependencies with ``pip install 12 | hachoir[wx]``. On Mac OS and Windows, this will install WxPython. On Linux, you 13 | may need to install a version of WxPython using your distribution's package manager 14 | or from the `WxPython Download page `_. 15 | 16 | .. image:: images/wx.png 17 | :alt: hachoir-wx screenshot (MP3 audio file) 18 | 19 | Command line options 20 | ==================== 21 | 22 | * ``--preload=10``: Load 10 fields when loading a new field set 23 | * ``--path="/header/bpp"``: Open the specified path and focus on the field 24 | * ``--parser=PARSERID``: Force a parser (and skip parser validation) 25 | * ``--help``: Show all command line options 26 | -------------------------------------------------------------------------------- /hachoir/parser/archive/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.archive.ace import AceFile # noqa 2 | from hachoir.parser.archive.ar import ArchiveFile # noqa 3 | from hachoir.parser.archive.arj import ArjParser # noqa 4 | from hachoir.parser.archive.bomstore import BomFile # noqa 5 | from hachoir.parser.archive.bzip2_parser import Bzip2Parser # noqa 6 | from hachoir.parser.archive.cab import CabFile # noqa 7 | from hachoir.parser.archive.gzip_parser import GzipParser # noqa 8 | from hachoir.parser.archive.tar import TarFile # noqa 9 | from hachoir.parser.archive.zip import ZipFile # noqa 10 | from hachoir.parser.archive.rar import RarFile # noqa 11 | from hachoir.parser.archive.rpm import RpmFile # noqa 12 | from hachoir.parser.archive.sevenzip import SevenZipParser # noqa 13 | from hachoir.parser.archive.mar import MarFile # noqa 14 | from hachoir.parser.archive.mozilla_ar import MozillaArchive # noqa 15 | from hachoir.parser.archive.zlib import ZlibData # noqa 16 | from hachoir.parser.archive.prs_pak import PRSPakFile # noqa 17 | -------------------------------------------------------------------------------- /tools/fuzzer/tools.py: -------------------------------------------------------------------------------- 1 | from sys import platform 2 | 3 | if platform == 'win32': 4 | from win32process import (GetCurrentProcess, SetPriorityClass, 5 | BELOW_NORMAL_PRIORITY_CLASS) 6 | 7 | def beNice(): 8 | process = GetCurrentProcess() 9 | # FIXME: Not supported on Windows 95/98/Me/NT: ignore error? 10 | # which error? 11 | SetPriorityClass(process, BELOW_NORMAL_PRIORITY_CLASS) 12 | 13 | OS_ERRORS = (OSError, WindowsError) 14 | else: 15 | from os import nice 16 | 17 | def beNice(): 18 | nice(19) 19 | 20 | OS_ERRORS = OSError 21 | 22 | try: 23 | import sha 24 | 25 | def generateUniqueID(data): 26 | return sha.new(data).hexdigest() 27 | except ImportError: 28 | def generateUniqueID(data): 29 | generateUniqueID.sequence += 1 30 | return generateUniqueID.sequence 31 | generateUniqueID.sequence = 0 32 | 33 | 34 | def getFilesize(file): 35 | file.seek(0, 2) 36 | size = file.tell() 37 | file.seek(0, 0) 38 | return size 39 | -------------------------------------------------------------------------------- /hachoir/core/profiler.py: -------------------------------------------------------------------------------- 1 | from hotshot import Profile 2 | from hotshot.stats import load as loadStats 3 | from os import unlink 4 | 5 | 6 | def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25, 7 | sort_by=('cumulative', 'calls')): 8 | profile_filename = "/tmp/profiler" 9 | prof = Profile(profile_filename) 10 | try: 11 | if verbose: 12 | print("[+] Run profiler") 13 | result = prof.runcall(func, *args, **kw) 14 | prof.close() 15 | if verbose: 16 | print("[+] Stop profiler") 17 | print("[+] Process data...") 18 | stat = loadStats(profile_filename) 19 | if verbose: 20 | print("[+] Strip...") 21 | stat.strip_dirs() 22 | if verbose: 23 | print("[+] Sort data...") 24 | stat.sort_stats(*sort_by) 25 | if verbose: 26 | print() 27 | print("[+] Display statistics") 28 | print() 29 | stat.print_stats(nb_func) 30 | return result 31 | finally: 32 | unlink(profile_filename) 33 | -------------------------------------------------------------------------------- /hachoir/parser/common/deflate.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import CompressedField 2 | 3 | try: 4 | from zlib import decompressobj, MAX_WBITS 5 | 6 | class DeflateStream: 7 | 8 | def __init__(self, stream, wbits=None): 9 | if wbits: 10 | self.gzip = decompressobj(-MAX_WBITS) 11 | else: 12 | self.gzip = decompressobj() 13 | 14 | def __call__(self, size, data=None): 15 | if data is None: 16 | data = b'' 17 | return self.gzip.decompress(self.gzip.unconsumed_tail + data, size) 18 | 19 | class DeflateStreamWbits(DeflateStream): 20 | 21 | def __init__(self, stream): 22 | DeflateStream.__init__(self, stream, True) 23 | 24 | def Deflate(field, wbits=True): 25 | if wbits: 26 | CompressedField(field, DeflateStreamWbits) 27 | else: 28 | CompressedField(field, DeflateStream) 29 | return field 30 | has_deflate = True 31 | except ImportError: 32 | def Deflate(field, wbits=True): 33 | return field 34 | has_deflate = False 35 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_menu_fwd.py: -------------------------------------------------------------------------------- 1 | from wx import EVT_MENU 2 | from wx.xrc import XRCID 3 | 4 | 5 | class field_menu_fwd_t: 6 | def __init__(self, imp): 7 | self.imp = imp 8 | 9 | def on_field_view_ready(self, dispatcher, view): 10 | assert view is not None 11 | 12 | view.Bind(EVT_MENU, self.imp.on_addr_rel, 13 | id=XRCID('field_menu_address_relative')) 14 | view.Bind(EVT_MENU, self.imp.on_addr_abs, 15 | id=XRCID('field_menu_address_absolute')) 16 | view.Bind(EVT_MENU, self.imp.on_addr_hex, 17 | id=XRCID('field_menu_address_base_hex')) 18 | view.Bind(EVT_MENU, self.imp.on_addr_dec, 19 | id=XRCID('field_menu_address_base_dec')) 20 | view.Bind(EVT_MENU, self.imp.on_dump_to_disk, 21 | id=XRCID('field_menu_dump_to_disk')) 22 | view.Bind(EVT_MENU, self.imp.on_parse_substream, 23 | id=XRCID('field_menu_parse_substream')) 24 | view.Bind(EVT_MENU, self.imp.on_open_window_here, 25 | id=XRCID('field_menu_open_window_here')) 26 | -------------------------------------------------------------------------------- /hachoir/metadata/timezone.py: -------------------------------------------------------------------------------- 1 | from datetime import tzinfo, timedelta 2 | 3 | 4 | class TimezoneUTC(tzinfo): 5 | """UTC timezone""" 6 | ZERO = timedelta(0) 7 | 8 | def utcoffset(self, dt): 9 | return TimezoneUTC.ZERO 10 | 11 | def tzname(self, dt): 12 | return "UTC" 13 | 14 | def dst(self, dt): 15 | return TimezoneUTC.ZERO 16 | 17 | def __repr__(self): 18 | return "" 19 | 20 | 21 | class Timezone(TimezoneUTC): 22 | """Fixed offset in hour from UTC.""" 23 | 24 | def __init__(self, offset): 25 | self._offset = timedelta(minutes=offset * 60) 26 | self._name = "%+03u00" % offset 27 | 28 | def utcoffset(self, dt): 29 | return self._offset 30 | 31 | def tzname(self, dt): 32 | return self._name 33 | 34 | def __repr__(self): 35 | return "" % ( 36 | self._offset, self._name) 37 | 38 | 39 | UTC = TimezoneUTC() 40 | 41 | 42 | def createTimezone(offset): 43 | if offset: 44 | return Timezone(offset) 45 | else: 46 | return UTC 47 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_split_menu_imp.py: -------------------------------------------------------------------------------- 1 | # from hachoir.wx.field_view.mutator import split_field 2 | from hachoir.field import RawBytes, RawBits 3 | 4 | 5 | class field_split_menu_imp_t: 6 | def on_field_split_menu_ready(self, dispatcher, view): 7 | assert view is not None 8 | self.view = view 9 | 10 | def on_field_selected(self, dispatcher, field): 11 | self.field = field 12 | 13 | def on_split_bytes(self): 14 | if self.split_field('Split Bytes...', self.field, RawBytes, lambda field: field.size // 8): 15 | self.dispatcher.trigger('field_was_split_bytes', self.field) 16 | 17 | def on_split_bits(self): 18 | if self.split_field('Split Bits...', self.field, RawBits, lambda field: field.size): 19 | self.dispatcher.trigger('field_was_split_bits', self.field) 20 | 21 | def split_field(self, caption, field, split_type, size_func): 22 | offset = self.view.ask_split(caption, 1, size_func(field) - 1) 23 | # FIXME: code commented because of pep8 warning 24 | # if offset is not None: 25 | # new_fields = split_field(field, offset, field.name, split_type, size_func) 26 | return offset 27 | -------------------------------------------------------------------------------- /tools/flv_extractor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Extract audio from a FLV movie 4 | 5 | Author: Victor Stinner 6 | Creation date: 2006-11-06 7 | """ 8 | from hachoir.parser import createParser 9 | from hachoir.stream import FileOutputStream 10 | from hachoir.parser.video.flv import AUDIO_CODEC_MP3 11 | from sys import stderr, exit, argv 12 | 13 | 14 | def main(): 15 | if len(argv) != 2: 16 | print("usage: %s video.flv" % argv[0], file=stderr) 17 | exit(1) 18 | 19 | # Open input video 20 | inputname = argv[1] 21 | parser = createParser(inputname) 22 | if parser["audio[0]/codec"].value != AUDIO_CODEC_MP3: 23 | print("Unknown audio codec: %s" % 24 | parser["audio[0]/codec"].display, file=stderr) 25 | 26 | # Extract audio 27 | print("Extractor audio from: %s" % inputname) 28 | outputname = inputname + ".mp3" 29 | output = FileOutputStream(outputname) 30 | for chunk in parser.array("audio"): 31 | data = chunk["music_data"] 32 | output.copyBitsFrom( 33 | data.parent.stream, data.absolute_address, data.size, data.parent.endian) 34 | print("Write audio into: %s" % outputname) 35 | 36 | 37 | main() 38 | -------------------------------------------------------------------------------- /hachoir/metadata/file_system.py: -------------------------------------------------------------------------------- 1 | from hachoir.metadata.metadata import RootMetadata, registerExtractor 2 | from hachoir.metadata.safe import fault_tolerant 3 | from hachoir.parser.file_system import ISO9660 4 | from datetime import datetime 5 | 6 | 7 | class ISO9660_Metadata(RootMetadata): 8 | 9 | def extract(self, iso): 10 | desc = iso['volume[0]/content'] 11 | self.title = desc['volume_id'].value 12 | self.title = desc['vol_set_id'].value 13 | self.author = desc['publisher'].value 14 | self.author = desc['data_preparer'].value 15 | self.producer = desc['application'].value 16 | self.copyright = desc['copyright'].value 17 | self.readTimestamp('creation_date', desc['creation_ts'].value) 18 | self.readTimestamp('last_modification', desc['modification_ts'].value) 19 | 20 | @fault_tolerant 21 | def readTimestamp(self, key, value): 22 | if value.startswith("0000"): 23 | return 24 | value = datetime( 25 | int(value[0:4]), int(value[4:6]), int(value[6:8]), 26 | int(value[8:10]), int(value[10:12]), int(value[12:14])) 27 | setattr(self, key, value) 28 | 29 | 30 | registerExtractor(ISO9660, ISO9660_Metadata) 31 | -------------------------------------------------------------------------------- /hachoir/parser/misc/__init__.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser.misc.file_3do import File3do # noqa 2 | from hachoir.parser.misc.file_3ds import File3ds # noqa 3 | from hachoir.parser.misc.torrent import TorrentFile # noqa 4 | from hachoir.parser.misc.ttf import TrueTypeFontFile # noqa 5 | from hachoir.parser.misc.chm import ChmFile # noqa 6 | from hachoir.parser.misc.lnk import LnkFile # noqa 7 | from hachoir.parser.misc.pcf import PcfFile # noqa 8 | from hachoir.parser.misc.ole2 import OLE2_File # noqa 9 | from hachoir.parser.misc.pdf import PDFDocument # noqa 10 | from hachoir.parser.misc.pifv import PIFVFile # noqa 11 | from hachoir.parser.misc.hlp import HlpFile # noqa 12 | from hachoir.parser.misc.gnome_keyring import GnomeKeyring # noqa 13 | from hachoir.parser.misc.bplist import BPList # noqa 14 | from hachoir.parser.misc.dsstore import DSStore # noqa 15 | from hachoir.parser.misc.word_doc import WordDocumentParser # noqa 16 | from hachoir.parser.misc.word_2 import Word2DocumentParser # noqa 17 | from hachoir.parser.misc.mstask import MSTaskFile # noqa 18 | from hachoir.parser.misc.mapsforge_map import MapsforgeMapFile # noqa 19 | from hachoir.parser.misc.fit import FITFile # noqa 20 | from hachoir.parser.misc.git_pack import GitPackFile # noqa 21 | -------------------------------------------------------------------------------- /hachoir/subfile/data_rate.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | DATARATE_UPDATE = 1.0 # Time slice (in second) for datarate computation 4 | 5 | 6 | class DataRate: 7 | """ 8 | Compute average speed in bits per second of a function. 9 | Store self.size data rates to compute good average speed. 10 | Don't compute average before self.min_size values are computed. 11 | """ 12 | 13 | def __init__(self, offset, size=20, min_size=3): 14 | self.last_offset = offset 15 | self.last_time = time() 16 | self.datarates = [] 17 | # Average bit rate 18 | self.average = None 19 | # Number of stored value used to compute average data rate 20 | self.size = size 21 | self.min_size = min_size 22 | 23 | def update(self, offset): 24 | # Compute time delta 25 | difftime = time() - self.last_time 26 | if difftime < DATARATE_UPDATE: 27 | # Only update each second 28 | return 29 | self.last_time = time() 30 | 31 | # Compute data rate 32 | rate = float(offset - self.last_offset) / difftime 33 | self.last_offset = offset 34 | 35 | # Update statistics 36 | self.datarates.append(rate) 37 | self.datarates = self.datarates[-self.size:] 38 | if self.min_size <= len(self.datarates): 39 | self.average = sum(self.datarates) / len(self.datarates) 40 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/stubs.py: -------------------------------------------------------------------------------- 1 | # 2 | # some stubs that could be in hachoir-core. 3 | # 4 | 5 | from hachoir.stream.input import FileFromInputStream 6 | from shutil import copyfileobj 7 | 8 | 9 | def field_index(field_set, field): 10 | return field_set._fields.index(field.name) 11 | 12 | 13 | def field_from_index(field_set, index): 14 | return field_set._fields.values[index] 15 | 16 | 17 | def has_static_size(type): 18 | return isinstance(type.static_size, int) 19 | 20 | 21 | def can_convert(from_field, to_type): 22 | if has_static_size(from_field) and has_static_size(to_type): 23 | return from_field.static_size == to_type.static_size 24 | elif has_static_size(to_type): 25 | return from_field.size == to_type.static_size 26 | else: 27 | return False 28 | 29 | 30 | def field_type_name(field): 31 | return field.__class__.__name__ 32 | 33 | 34 | def convert_size(from_field, to_type): 35 | if not (('Byte' in field_type_name(from_field)) ^ ('Byte' in to_type.__name__)): 36 | return from_field.size 37 | elif 'Byte' in field_type_name(from_field): 38 | return from_field.size * 8 39 | else: 40 | return from_field.size // 8 41 | 42 | 43 | def save_substream_to_disk(field, dest_path): 44 | dest_stream = open(dest_path, 'wb') 45 | f = FileFromInputStream(field.getSubIStream()) 46 | copyfileobj(f, dest_stream) 47 | dest_stream.close() 48 | -------------------------------------------------------------------------------- /doc/list.rst: -------------------------------------------------------------------------------- 1 | .. _list: 2 | 3 | ++++++++++++++++++++ 4 | hachoir-list program 5 | ++++++++++++++++++++ 6 | 7 | hachoir-list uses Hachoir to decode a binary file and then 8 | prints out the decoded fields, so that the output can be processed 9 | by other text-oriented tools. 10 | 11 | Examples 12 | ======== 13 | 14 | Print all decoded fields:: 15 | 16 | $ hachoir-list cd_0008_5C48_1m53s.cda 17 | signature : "RIFF" 18 | filesize : 36 bytes 19 | type : "CDDA" 20 | cdda 21 | tag : "fmt " 22 | size : 24 bytes 23 | cda_version : 1 24 | track_no : 4 25 | disc_serial : 0008-5C48 26 | hsg_offset : 19477 27 | hsg_length : 8507 28 | rb_offset 29 | frame : 52 30 | second : 21 31 | minute : 4 32 | notused : "\0" 33 | rb_length 34 | frame : 32 35 | second : 53 36 | minute : 1 37 | notused : "\0" 38 | 39 | Other options: 40 | 41 | * ``--description``: Show description for each field 42 | * ``--indent-width``: Change (or disable) indentation 43 | * ``--hide-value``: Don't display the string value 44 | * ``--hide-size``: Don't display field size 45 | * Get full option list using ``--help`` 46 | -------------------------------------------------------------------------------- /hachoir/subfile/output.py: -------------------------------------------------------------------------------- 1 | from hachoir.stream import FileOutputStream, StreamError 2 | from hachoir.core.error import error 3 | from errno import EEXIST 4 | from os import mkdir, path 5 | 6 | 7 | class Output: 8 | """ 9 | Store files found by search tool. 10 | """ 11 | 12 | def __init__(self, directory): 13 | self.directory = directory 14 | self.mkdir = False 15 | self.file_id = 1 16 | 17 | def createDirectory(self): 18 | try: 19 | mkdir(self.directory) 20 | except OSError as err: 21 | if err.errno == EEXIST: 22 | pass 23 | else: 24 | raise 25 | 26 | def createFilename(self, file_ext=None): 27 | filename = "file-%04u" % self.file_id 28 | self.file_id += 1 29 | if file_ext: 30 | filename += file_ext 31 | return filename 32 | 33 | def writeFile(self, filename, stream, offset, size): 34 | # Create directory (only on first call) 35 | if not self.mkdir: 36 | self.createDirectory() 37 | self.mkdir = True 38 | 39 | # Create output file 40 | filename = path.join(self.directory, filename) 41 | output = FileOutputStream(filename) 42 | 43 | # Write output 44 | try: 45 | output.copyBytesFrom(stream, offset, size // 8) 46 | except StreamError as err: 47 | error("copyBytesFrom() error: %s" % err) 48 | return filename 49 | -------------------------------------------------------------------------------- /tests/test_editor.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from io import BytesIO 3 | from hachoir.core.endian import BIG_ENDIAN 4 | from hachoir.editor import createEditor 5 | from hachoir.field import Parser, Bits 6 | from hachoir.stream import StringInputStream, OutputStream 7 | from hachoir.test import setup_tests 8 | 9 | 10 | class TestEditor(unittest.TestCase): 11 | def test_bit_alignment(self): 12 | data = bytes([255, 255, 255, 254]) 13 | stream = StringInputStream(data) 14 | parser = TestParser(stream) 15 | editor = createEditor(parser) 16 | 17 | # Cause a change in a non-byte-aligned field 18 | editor['flags[2]'].value -= 1 19 | 20 | # Generate output and verify operation 21 | output_io = BytesIO() 22 | output_stream = OutputStream(output_io) 23 | 24 | editor.writeInto(output_stream) 25 | output_bits = "{0:b}".format(int.from_bytes(output_io.getvalue(), 'big')) 26 | 27 | # X is the modified bit 28 | # .....,,,,,,,,,,,,,,,,..X,,,,,,,, 29 | self.assertEqual(output_bits, "11111111111111111111111011111110") 30 | 31 | 32 | class TestParser(Parser): 33 | endian = BIG_ENDIAN 34 | 35 | def createFields(self): 36 | yield Bits(self, 'flags[]', 5) 37 | yield Bits(self, 'flags[]', 16) 38 | yield Bits(self, 'flags[]', 3) 39 | yield Bits(self, 'flags[]', 8) 40 | 41 | 42 | if __name__ == "__main__": 43 | setup_tests() 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /hachoir/core/cmd_line.py: -------------------------------------------------------------------------------- 1 | from optparse import OptionGroup 2 | from hachoir.core.log import log 3 | from hachoir import __version__ 4 | import hachoir.core.config as config 5 | import sys 6 | 7 | 8 | def displayVersion(*args): 9 | print("Hachoir version %s" % __version__) 10 | sys.exit(0) 11 | 12 | 13 | def getHachoirOptions(parser): 14 | """ 15 | Create an option group (type optparse.OptionGroup) of Hachoir 16 | library options. 17 | """ 18 | def setLogFilename(*args): 19 | log.setFilename(args[2]) 20 | 21 | common = OptionGroup(parser, "Hachoir library", 22 | "Configure Hachoir library") 23 | common.add_option("--verbose", help="Verbose mode", 24 | default=False, action="store_true") 25 | common.add_option("--log", help="Write log in a file", 26 | type="string", action="callback", 27 | callback=setLogFilename) 28 | common.add_option("--quiet", help="Quiet mode (don't display warning)", 29 | default=False, action="store_true") 30 | common.add_option("--debug", help="Debug mode", 31 | default=False, action="store_true") 32 | return common 33 | 34 | 35 | def configureHachoir(option): 36 | # Configure Hachoir using "option" (value from optparse) 37 | if option.quiet: 38 | config.quiet = True 39 | if option.verbose: 40 | config.verbose = True 41 | if option.debug: 42 | config.debug = True 43 | -------------------------------------------------------------------------------- /hachoir/subfile/pattern.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser import QueryParser 2 | from hachoir.regex import PatternMatching 3 | 4 | 5 | # XXX hachoir.regex uses str but the rest of hachoir uses bytes, 6 | # which means we have to convert bytes to str using latin1 encoding 7 | # (the closest "raw bytes" encoding) in order for matching to work. 8 | class HachoirPatternMatching(PatternMatching): 9 | 10 | def __init__(self, categories=None, parser_ids=None): 11 | PatternMatching.__init__(self) 12 | 13 | # Load parser list 14 | tags = [] 15 | if categories: 16 | tags += [("category", cat) for cat in categories] 17 | if parser_ids: 18 | tags += [("id", parser_id) for parser_id in parser_ids] 19 | if tags: 20 | tags += [None] 21 | parser_list = QueryParser(tags) 22 | 23 | # Create string patterns 24 | for parser in parser_list: 25 | for (magic, offset) in parser.getParserTags().get("magic", ()): 26 | self.addString(magic.decode('latin1'), (offset, parser)) 27 | 28 | # Create regex patterns 29 | for parser in parser_list: 30 | for (regex, offset) in parser.getParserTags().get("magic_regex", ()): 31 | self.addRegex(regex.decode('latin1'), (offset, parser)) 32 | self.commit() 33 | 34 | def search(self, data): 35 | for start, stop, item in PatternMatching.search(self, data.decode('latin1')): 36 | yield (item.user[1], start * 8 - item.user[0]) 37 | -------------------------------------------------------------------------------- /tests/test_strip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.test import setup_tests 3 | import hashlib 4 | import os.path 5 | import subprocess 6 | import sys 7 | import unittest 8 | 9 | DATADIR = os.path.join(os.path.dirname(__file__), "files") 10 | KDE_CLICK = os.path.join(DATADIR, 'kde_click.wav') 11 | PROGRAM = os.path.join(os.path.dirname(__file__), "..", "hachoir-strip") 12 | 13 | 14 | def checksum(filename): 15 | hash = hashlib.sha1() 16 | with open(filename, 'rb') as fp: 17 | chunk = fp.read(4096) 18 | hash.update(chunk) 19 | return hash.hexdigest() 20 | 21 | 22 | class TestStripCommandLine(unittest.TestCase): 23 | 24 | def test_strip_all(self): 25 | self.assertEqual(checksum(KDE_CLICK), 26 | 'dcafdef2048985aa925df5f86053bda5a87eb64b') 27 | 28 | newname = KDE_CLICK + ".new" 29 | if os.path.exists(newname): 30 | os.unlink(newname) 31 | 32 | args = [sys.executable, PROGRAM, KDE_CLICK] 33 | proc = subprocess.Popen(args, 34 | stdout=subprocess.PIPE, 35 | stderr=subprocess.STDOUT) 36 | stdout, _ = proc.communicate() 37 | stdout = stdout.decode('ascii', 'replace') 38 | self.assertIn('Save new file', stdout) 39 | 40 | self.assertEqual(checksum(newname), 41 | '6456990d3931292a1c96c6e8f035e983cd84d477') 42 | 43 | os.unlink(newname) 44 | 45 | 46 | if __name__ == "__main__": 47 | setup_tests() 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /hachoir/field/vector.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import Field, FieldSet, ParserError 2 | 3 | 4 | class GenericVector(FieldSet): 5 | 6 | def __init__(self, parent, name, nb_items, item_class, item_name="item", description=None): 7 | # Sanity checks 8 | assert issubclass(item_class, Field) 9 | assert isinstance(item_class.static_size, int) 10 | if not (0 < nb_items): 11 | raise ParserError('Unable to create empty vector "%s" in %s' 12 | % (name, parent.path)) 13 | size = nb_items * item_class.static_size 14 | self.__nb_items = nb_items 15 | self._item_class = item_class 16 | self._item_name = item_name 17 | FieldSet.__init__(self, parent, name, description, size=size) 18 | 19 | def __len__(self): 20 | return self.__nb_items 21 | 22 | def createFields(self): 23 | name = self._item_name + "[]" 24 | parser = self._item_class 25 | for index in range(len(self)): 26 | yield parser(self, name) 27 | 28 | 29 | class UserVector(GenericVector): 30 | """ 31 | To implement: 32 | - item_name: name of a field without [] (eg. "color" becomes "color[0]"), 33 | default value is "item" 34 | - item_class: class of an item 35 | """ 36 | item_class = None 37 | item_name = "item" 38 | 39 | def __init__(self, parent, name, nb_items, description=None): 40 | GenericVector.__init__(self, parent, name, nb_items, 41 | self.item_class, self.item_name, description) 42 | -------------------------------------------------------------------------------- /hachoir/field/fragment.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import FieldSet, RawBytes 2 | from hachoir.stream import StringInputStream 3 | 4 | 5 | class FragmentGroup: 6 | 7 | def __init__(self, parser): 8 | self.items = [] 9 | self.parser = parser 10 | self.args = {} 11 | 12 | def add(self, item): 13 | self.items.append(item) 14 | 15 | def createInputStream(self): 16 | # FIXME: Use lazy stream creation 17 | data = [] 18 | for item in self.items: 19 | data.append(item["rawdata"].value) 20 | data = b"".join(data) 21 | 22 | tags = {"args": self.args} 23 | if self.parser is not None: 24 | tags["class"] = self.parser 25 | tags = iter(tags.items()) 26 | return StringInputStream(data, "", tags=tags) 27 | 28 | 29 | class CustomFragment(FieldSet): 30 | 31 | def __init__(self, parent, name, size, parser, description=None, group=None): 32 | FieldSet.__init__(self, parent, name, description, size=size) 33 | if not group: 34 | group = FragmentGroup(parser) 35 | self.field_size = size 36 | self.group = group 37 | self.group.add(self) 38 | 39 | def createFields(self): 40 | yield RawBytes(self, "rawdata", self.field_size // 8) 41 | 42 | def _createInputStream(self, **args): 43 | return self.group.createInputStream() 44 | 45 | def createValue(self): 46 | return self["rawdata"].value 47 | 48 | def createDisplay(self): 49 | return self["rawdata"].display 50 | -------------------------------------------------------------------------------- /hachoir/parser/misc/ole2_util.py: -------------------------------------------------------------------------------- 1 | from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN 2 | from hachoir.field import RawBytes, RootSeekableFieldSet, ParserError 3 | from hachoir.parser import HachoirParser 4 | 5 | 6 | class OLE2FragmentParser(HachoirParser, RootSeekableFieldSet): 7 | tags = { 8 | "description": "Microsoft Office document subfragments", 9 | } 10 | endian = LITTLE_ENDIAN 11 | 12 | ENDIAN_CHECK = False 13 | 14 | def __init__(self, stream, **args): 15 | RootSeekableFieldSet.__init__( 16 | self, None, "root", stream, None, stream.askSize(self)) 17 | HachoirParser.__init__(self, stream, **args) 18 | if self.ENDIAN_CHECK: 19 | if self["endian"].value == b"\xFF\xFE": 20 | self.endian = BIG_ENDIAN 21 | elif self["endian"].value == b"\xFE\xFF": 22 | self.endian = LITTLE_ENDIAN 23 | else: 24 | raise ParserError("OLE2: Invalid endian value") 25 | 26 | def validate(self): 27 | if self.ENDIAN_CHECK: 28 | if self["endian"].value not in ["\xFF\xFE", "\xFE\xFF"]: 29 | return "Unknown endian value %s" % self["endian"].value.encode('hex') 30 | return True 31 | 32 | 33 | class RawParser(OLE2FragmentParser): 34 | ENDIAN_CHECK = False 35 | OS_CHECK = False 36 | 37 | def createFields(self): 38 | yield RawBytes(self, "rawdata", self.datasize) 39 | if self.datasize < self.size // 8: 40 | yield RawBytes(self, "slack_space", (self.size // 8) - self.datasize) 41 | -------------------------------------------------------------------------------- /hachoir/parser/image/common.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import FieldSet, UserVector, UInt8 2 | 3 | 4 | class RGB(FieldSet): 5 | color_name = { 6 | (0, 0, 0): "Black", 7 | (255, 0, 0): "Red", 8 | (0, 255, 0): "Green", 9 | (0, 0, 255): "Blue", 10 | (255, 255, 255): "White", 11 | } 12 | static_size = 24 13 | 14 | def createFields(self): 15 | yield UInt8(self, "red", "Red") 16 | yield UInt8(self, "green", "Green") 17 | yield UInt8(self, "blue", "Blue") 18 | 19 | def createDescription(self): 20 | rgb = self["red"].value, self["green"].value, self["blue"].value 21 | name = self.color_name.get(rgb) 22 | if not name: 23 | name = "#%02X%02X%02X" % rgb 24 | return "RGB color: " + name 25 | 26 | 27 | class RGBA(RGB): 28 | static_size = 32 29 | 30 | def createFields(self): 31 | yield UInt8(self, "red", "Red") 32 | yield UInt8(self, "green", "Green") 33 | yield UInt8(self, "blue", "Blue") 34 | yield UInt8(self, "alpha", "Alpha") 35 | 36 | def createDescription(self): 37 | description = RGB.createDescription(self) 38 | opacity = self["alpha"].value * 100 // 255 39 | return "%s (opacity: %s%%)" % (description, opacity) 40 | 41 | 42 | class PaletteRGB(UserVector): 43 | item_class = RGB 44 | item_name = "color" 45 | 46 | def createDescription(self): 47 | return "Palette of %u RGB colors" % len(self) 48 | 49 | 50 | class PaletteRGBA(PaletteRGB): 51 | item_class = RGBA 52 | 53 | def createDescription(self): 54 | return "Palette of %u RGBA colors" % len(self) 55 | -------------------------------------------------------------------------------- /doc/grep.rst: -------------------------------------------------------------------------------- 1 | .. _grep: 2 | 3 | ++++++++++++++++++++ 4 | hachoir-grep program 5 | ++++++++++++++++++++ 6 | 7 | hachoir-grep is an experimantal search program based on Hachoir. Its goal is 8 | to search a substring in a binary file or list all strings. 9 | 10 | Examples 11 | ======== 12 | 13 | List all strings:: 14 | 15 | $ .hachoir-grep --all sheep_on_drugs.mp3 16 | 0:ID3 17 | 10:TAL 18 | 17:Bilmusik vol 1. Stainless Steel Providers 19 | (...) 20 | 143:COM 21 | 150:eng 22 | 154:Stainless Steel Provider is compilated to the car of Twinstar. 23 | 24 | Search substring:: 25 | 26 | $ hachoir-grep "il" sheep_on_drugs.mp3 27 | 17:Bilmusik vol 1. Stainless Steel Providers 28 | 154:Stainless Steel Provider is compilated to the car of Twinstar. 29 | 30 | Search substring with case sensitive and display string path:: 31 | 32 | $ hachoir-grep --path --case Car sheep_on_drugs.mp3 33 | 78:/id3v2/field[2]/content/text:Car music 34 | 35 | Other options: 36 | 37 | * ``--no-value``: Don't display the string value 38 | * ``--no-addr``: Don't display string address 39 | * ``--percent``: Display search percent (on stderr) 40 | * Get full option list using --help 41 | 42 | 43 | Comparaison to grep and strings 44 | =============================== 45 | 46 | The difference between hachoir-grep and grep or strings are: 47 | 48 | * hachoir-grep don't parse non-string data 49 | * hachoir-grep cares about string charset (ISO-8859-1, UTF-8, UTF-16, etc.) 50 | but search is charset independant 51 | * hachoir-grep can display string address 52 | 53 | Drawbacks: 54 | 55 | * hachoir-grep is slow and might use a lot of memory 56 | * results depend on the quality of Hachoir parsers 57 | 58 | -------------------------------------------------------------------------------- /hachoir/parser/common/msdos.py: -------------------------------------------------------------------------------- 1 | """ 2 | MS-DOS structures. 3 | 4 | Documentation: 5 | - File attributes: 6 | http://www.cs.colorado.edu/~main/cs1300/include/ddk/winddk.h 7 | """ 8 | 9 | from hachoir.field import StaticFieldSet 10 | from hachoir.field import Bit, NullBits 11 | 12 | _FIELDS = ( 13 | (Bit, "read_only"), 14 | (Bit, "hidden"), 15 | (Bit, "system"), 16 | (NullBits, "reserved[]", 1), 17 | (Bit, "directory"), 18 | (Bit, "archive"), 19 | (Bit, "device"), 20 | (Bit, "normal"), 21 | (Bit, "temporary"), 22 | (Bit, "sparse_file"), 23 | (Bit, "reparse_file"), 24 | (Bit, "compressed"), 25 | (Bit, "offline"), 26 | (Bit, "dont_index_content"), 27 | (Bit, "encrypted"), 28 | ) 29 | 30 | 31 | class MSDOSFileAttr16(StaticFieldSet): 32 | """ 33 | MSDOS 16-bit file attributes 34 | """ 35 | format = _FIELDS + ((NullBits, "reserved[]", 1),) 36 | 37 | _text_keys = ( 38 | # Sort attributes by importance 39 | "directory", "read_only", "compressed", 40 | "hidden", "system", 41 | "normal", "device", 42 | "temporary", "archive") 43 | 44 | def createValue(self): 45 | mode = [] 46 | for name in self._text_keys: 47 | if self[name].value: 48 | if 4 <= len(mode): 49 | mode.append("...") 50 | break 51 | else: 52 | mode.append(name) 53 | if mode: 54 | return ", ".join(mode) 55 | else: 56 | return "(none)" 57 | 58 | 59 | class MSDOSFileAttr32(MSDOSFileAttr16): 60 | """ 61 | MSDOS 32-bit file attributes 62 | """ 63 | format = _FIELDS + ((NullBits, "reserved[]", 17),) 64 | -------------------------------------------------------------------------------- /hachoir/parser/archive/prs_pak.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parallel Realities Starfighter .pak file parser 3 | 4 | See http://www.parallelrealities.co.uk/projects/starfighter.php 5 | or svn://svn.debian.org/svn/pkg-games/packages/trunk/starfighter/ 6 | 7 | Author: Oliver Gerlich 8 | """ 9 | 10 | from hachoir.parser import Parser 11 | from hachoir.field import (UInt32, String, SubFile, FieldSet) 12 | from hachoir.core.endian import LITTLE_ENDIAN 13 | from hachoir.core.text_handler import filesizeHandler 14 | 15 | 16 | class FileEntry(FieldSet): 17 | 18 | def createFields(self): 19 | yield String(self, "filename", 56, truncate="\0") 20 | yield filesizeHandler(UInt32(self, "size")) 21 | yield SubFile(self, "data", self["size"].value, filename=self["filename"].value) 22 | 23 | def createDescription(self): 24 | return self["filename"].value 25 | 26 | 27 | class PRSPakFile(Parser): 28 | 29 | PARSER_TAGS = { 30 | "id": "prs_pak", 31 | "category": "archive", 32 | "file_ext": ("pak",), 33 | "mime": (u"application/octet-stream",), 34 | "min_size": 4 * 8, # just the identifier 35 | "magic": ((b'PACK', 0),), 36 | "description": "Parallel Realities Starfighter .pak archive", 37 | } 38 | 39 | endian = LITTLE_ENDIAN 40 | 41 | def validate(self): 42 | return (self.stream.readBytes(0, 4) == b'PACK' 43 | and self["file[0]/size"].value >= 0 44 | and len(self["file[0]/filename"].value) > 0) 45 | 46 | def createFields(self): 47 | yield String(self, "magic", 4) 48 | 49 | # all remaining data must be file entries: 50 | while self.current_size < self._size: 51 | yield FileEntry(self, "file[]") 52 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ******* 2 | Hachoir 3 | ******* 4 | 5 | .. image:: https://img.shields.io/pypi/v/hachoir.svg 6 | :alt: Latest release on the Python Cheeseshop (PyPI) 7 | :target: https://pypi.python.org/pypi/hachoir 8 | 9 | .. image:: https://github.com/vstinner/hachoir/actions/workflows/build.yml/badge.svg 10 | :alt: Build status of hachoir on GitHub Actions 11 | :target: https://github.com/vstinner/hachoir/actions 12 | 13 | .. image:: http://unmaintained.tech/badge.svg 14 | :target: http://unmaintained.tech/ 15 | :alt: No Maintenance Intended 16 | 17 | Hachoir is a Python library to view and edit a binary stream field by field. 18 | In other words, Hachoir allows you to "browse" any binary stream just like you 19 | browse directories and files. 20 | 21 | A file is splitted in a tree of fields, where the smallest field is just one 22 | bit. Examples of fields types: integers, strings, bits, padding types, floats, 23 | etc. Hachoir is the French word for a meat grinder (meat mincer), which is used 24 | by butchers to divide meat into long tubes; Hachoir is used by computer 25 | butchers to divide binary files into fields. 26 | 27 | * `Hachoir website `_ (source code, bugs) 28 | * `Hachoir on GitHub (Source code, bug tracker) `_ 29 | * License: GNU GPL v2 30 | 31 | Command line tools using Hachoir parsers: 32 | 33 | * hachoir-grep: find a text pattern in a binary file 34 | * hachoir-metadata: get metadata from binary files 35 | * hachoir-strip: modify a file to remove metadata 36 | * hachoir-urwid: display the content of a binary file in text mode 37 | 38 | Installation instructions: http://hachoir.readthedocs.io/en/latest/install.html 39 | 40 | Hachoir is written for Python 3.6 or newer. 41 | -------------------------------------------------------------------------------- /tools/find_deflate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from zlib import decompress, error as zlib_error 3 | from sys import argv, stderr, exit 4 | from time import time 5 | 6 | MIN_SIZE = 2 7 | 8 | 9 | def canDeflate(compressed_data): 10 | try: 11 | data = decompress(compressed_data) 12 | return True 13 | except zlib_error: 14 | return False 15 | 16 | 17 | def findDeflateBlocks(data): 18 | next_msg = time() + 1.0 19 | max_index = len(data) - MIN_SIZE - 1 20 | for index in range(max_index + 1): 21 | if next_msg < time(): 22 | next_msg = time() + 1.0 23 | print("Progress: %.1f%% (offset %s/%s)" % ( 24 | index * 100.0 / max_index, index, max_index)) 25 | if canDeflate(data[index:]): 26 | yield index 27 | 28 | 29 | def guessDeflateSize(data, offset): 30 | size = len(data) - offset 31 | while size: 32 | if canDeflate(data[offset:offset + size]): 33 | yield size 34 | size -= 1 35 | 36 | 37 | def main(): 38 | if len(argv) != 2: 39 | print("usage: %s filename" % argv[0], file=stderr) 40 | exit(1) 41 | data = open(argv[1], 'rb').read() 42 | offsets = [] 43 | for offset in findDeflateBlocks(data): 44 | print("Offset %s" % offset) 45 | offsets.append(offset) 46 | if offsets: 47 | for offset in offsets: 48 | for size in guessDeflateSize(data, offset): 49 | if size == (len(data) - offset): 50 | size = "%s (until the end)" % size 51 | print("Offset %s -- size %s" % (offset, size)) 52 | else: 53 | print("No deflate block found", file=stderr) 54 | exit(0) 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_menu_imp.py: -------------------------------------------------------------------------------- 1 | from hachoir.wx.field_view.stubs import save_substream_to_disk 2 | 3 | 4 | class field_menu_imp_t: 5 | def on_field_set_ready(self, dispatcher, fields): 6 | assert fields is not None 7 | self.fields = fields 8 | self.selected = None 9 | 10 | def on_field_menu_ready(self, dispatcher, view): 11 | assert view is not None 12 | self.view = view 13 | 14 | def on_field_show_ops(self, dispatcher, field): 15 | self.view.show_opts() 16 | 17 | def on_addr_rel(self, event): 18 | self.dispatcher.trigger('address_relative') 19 | 20 | def on_addr_abs(self, event): 21 | self.dispatcher.trigger('address_absolute') 22 | 23 | def on_addr_hex(self, event): 24 | self.dispatcher.trigger('address_hexadecimal') 25 | 26 | def on_addr_dec(self, event): 27 | self.dispatcher.trigger('address_decimal') 28 | 29 | def on_split_bits(self): 30 | self.dispatcher.trigger('field_split_bits') 31 | 32 | def on_split_bytes(self): 33 | self.dispatcher.trigger('field_split_bytes') 34 | 35 | def on_field_selected(self, dispatcher, field): 36 | self.selected = field 37 | 38 | def on_file_ready(self, dispatcher, file): 39 | self.file = file 40 | 41 | def on_parse_substream(self, dispatcher): 42 | self.dispatcher.trigger('field_parse_substream', self.selected) 43 | 44 | def on_open_window_here(self, dispatcher): 45 | self.dispatcher.trigger('field_open_window_here', self.selected) 46 | 47 | def on_dump_to_disk(self, event): 48 | dump_path = self.view.ask_for_dump_file('Dump "' + self.selected.path + '" To Disk...') 49 | if dump_path is not None: 50 | save_substream_to_disk(self.selected, dump_path) 51 | -------------------------------------------------------------------------------- /doc/strip.rst: -------------------------------------------------------------------------------- 1 | .. _strip: 2 | 3 | +++++++++++++++++++++ 4 | hachoir-strip program 5 | +++++++++++++++++++++ 6 | 7 | hachoir-strip is an experimental program based on Hachoir library: it removes 8 | "useless" informations from a file. Don't use it to create smaller file, you 9 | have better to recompress your data :-) hachoir-strip can be used if you would 10 | like to remove spy information which can be used to know the origin of a file. 11 | 12 | Examples 13 | ======== 14 | 15 | Our victim:: 16 | 17 | $ hachoir-metadata KDE_Click.wav.new 18 | Common: 19 | - Creation date: 2001-02-21 <== here they are 20 | - Producer: Sound Forge 4.5 <== spy informations :-) 21 | - MIME type: audio/x-wav 22 | - Endian: Little endian 23 | Audio: 24 | - Duration: 39 ms 25 | ... 26 | 27 | Clean up the file:: 28 | 29 | $ hachoir-strip KDE_Click.wav 30 | [+] Process file KDE_Click.wav 31 | Remove field /info 32 | Remove 56 bytes (3.1%) 33 | Save new file into KDE_Click.wav.new 34 | 35 | $ hachoir-metadata KDE_Click.wav.new 36 | Common: 37 | - MIME type: audio/x-wav 38 | - Endian: Little endian 39 | Audio: 40 | - Duration: 39 ms 41 | ... 42 | 43 | So hachoir-strip removed creation date (2001-02-21) and producer (software 44 | used to record/edit the sound: Sound Forge 4.5). The file is also 56 bytes 45 | smaller. 46 | 47 | 48 | Options 49 | ======= 50 | 51 | You can select field types to remove using --strip: 52 | 53 | * (default): remove all useless fields 54 | * ``--strip=useless``: remove really useless fields (eg. padding) 55 | * ``--strip=metadata``: remove metadata like ID3 tags and EXIF and IPTC metadatas 56 | * ``--strip=index``: remove video index 57 | 58 | You can combine options with comma: ``--strip="useless,metadata"``. 59 | 60 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_view.py: -------------------------------------------------------------------------------- 1 | from wx import ListCtrl, EVT_WINDOW_CREATE, CallAfter 2 | from wx.lib.mixins.listctrl import ListCtrlAutoWidthMixin 3 | 4 | 5 | class field_view_t(ListCtrl, ListCtrlAutoWidthMixin): 6 | def __init__(self): 7 | self.cols = {} 8 | 9 | ListCtrl.__init__(self) 10 | self.Bind(EVT_WINDOW_CREATE, self.on_create) 11 | 12 | def post_init(self): 13 | ListCtrlAutoWidthMixin.__init__(self) 14 | 15 | columns = ['address', 'name', 'type', 'size', 'data', 'description'] 16 | for name in columns: 17 | self.append_column(name) 18 | self.col_min_width = [len(s) for s in columns] 19 | 20 | self.Layout() 21 | self.dispatcher.trigger('field_view_ready', self) 22 | 23 | def on_create(self, event): 24 | self.Unbind(EVT_WINDOW_CREATE) 25 | CallAfter(self.post_init) 26 | 27 | def append_column(self, name): 28 | index = self.GetColumnCount() 29 | self.cols[name] = index 30 | self.InsertColumn(col=index, heading=name) 31 | 32 | def get_selected(self, name): 33 | return self.GetItem(self.GetFocusedItem(), self.cols['name']).GetText() 34 | 35 | def clear(self): 36 | self.DeleteAllItems() 37 | 38 | def register_callback(self, cbGetItemText): 39 | self.OnGetItemText_imp = cbGetItemText 40 | 41 | def OnGetItemText(self, item, col): 42 | return self.OnGetItemText_imp(item, col) 43 | 44 | def get_col_index(self, name): 45 | return self.cols[name] 46 | 47 | def get_col_count(self): 48 | return len(self.cols) 49 | 50 | def resize_column(self, col_index, width): 51 | width = max(self.col_min_width[col_index], width) + 1 52 | self.SetColumnWidth(col_index, self.GetCharWidth() * width) 53 | -------------------------------------------------------------------------------- /hachoir/metadata/qt/dialog.ui: -------------------------------------------------------------------------------- 1 | 2 | Form 3 | 4 | 5 | 6 | 0 7 | 0 8 | 441 9 | 412 10 | 11 | 12 | 13 | hachoir-metadata 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | Open 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 0 30 | 0 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | true 41 | 42 | 43 | false 44 | 45 | 46 | 0 47 | 48 | 49 | 0 50 | 51 | 52 | 53 | 54 | 55 | 56 | Quit 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /hachoir/field/parser.py: -------------------------------------------------------------------------------- 1 | from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN 2 | from hachoir.field import GenericFieldSet 3 | from hachoir.core.log import Logger 4 | import hachoir.core.config as config 5 | 6 | 7 | class Parser(GenericFieldSet): 8 | """ 9 | A parser is the root of all other fields. It create first level of fields 10 | and have special attributes and methods: 11 | - endian: Byte order (L{BIG_ENDIAN}, L{LITTLE_ENDIAN} or L{MIDDLE_ENDIAN}) of input data ; 12 | - stream: Data input stream (set in L{__init__()}) ; 13 | - size: Field set size will be size of input stream. 14 | """ 15 | 16 | def __init__(self, stream, description=None): 17 | """ 18 | Parser constructor 19 | 20 | @param stream: Data input stream (see L{InputStream}) 21 | @param description: (optional) String description 22 | """ 23 | # Check arguments 24 | assert hasattr(self, "endian") \ 25 | and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN) 26 | 27 | # Call parent constructor 28 | GenericFieldSet.__init__( 29 | self, None, "root", stream, description, stream.askSize(self)) 30 | 31 | def close(self): 32 | self.stream.close() 33 | 34 | def __enter__(self): 35 | return self 36 | 37 | def __exit__(self, type, value, traceback): 38 | self.close() 39 | 40 | def _logger(self): 41 | return Logger._logger(self) 42 | 43 | def _setSize(self, size): 44 | self._truncate(size) 45 | self.raiseEvent("field-resized", self) 46 | size = property(lambda self: self._size, doc="Size in bits") 47 | 48 | path = property(lambda self: "/") 49 | 50 | # dummy definition to prevent hachoir-core from depending on hachoir-parser 51 | autofix = property(lambda self: config.autofix) 52 | -------------------------------------------------------------------------------- /hachoir/stream/input_helper.py: -------------------------------------------------------------------------------- 1 | from hachoir.core.i18n import guessBytesCharset 2 | from hachoir.stream import InputIOStream, InputSubStream, InputStreamError 3 | 4 | 5 | def FileInputStream(filename, real_filename=None, **args): 6 | """ 7 | Create an input stream of a file. filename must be unicode or a file 8 | object. 9 | 10 | real_filename is an optional argument used to specify the real filename, 11 | its type can be 'str' or 'unicode'. Use real_filename when you are 12 | not able to convert filename to real unicode string (ie. you have to 13 | use unicode(name, 'replace') or unicode(name, 'ignore')). 14 | """ 15 | if not real_filename: 16 | real_filename = (filename if isinstance(filename, str) 17 | else getattr(filename, 'name', '')) 18 | try: 19 | if isinstance(filename, str): 20 | inputio = open(real_filename, 'rb') 21 | else: 22 | inputio = filename 23 | filename = getattr(filename, 'name', '') 24 | except IOError as err: 25 | errmsg = str(err) 26 | raise InputStreamError( 27 | "Unable to open file %s: %s" % (filename, errmsg)) 28 | source = "file:" + filename 29 | offset = args.pop("offset", 0) 30 | size = args.pop("size", None) 31 | if offset or size: 32 | if size: 33 | size = 8 * size 34 | stream = InputIOStream(inputio, source=source, **args) 35 | return InputSubStream(stream, 8 * offset, size, **args) 36 | else: 37 | args.setdefault("tags", []).append(("filename", filename)) 38 | return InputIOStream(inputio, source=source, **args) 39 | 40 | 41 | def guessStreamCharset(stream, address, size, default=None): 42 | size = min(size, 1024 * 8) 43 | bytes = stream.readBytes(address, size // 8) 44 | return guessBytesCharset(bytes, default) 45 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Hachoir's documentation! 2 | =================================== 3 | 4 | *Hachoir* is a Python library to view and edit a binary stream field by field. 5 | In other words, Hachoir allows you to "browse" any binary stream just like you 6 | browse directories and files. A file is splitted in a tree of fields, where the 7 | smallest field is just one bit. Examples of fields types: integers, 8 | strings, bits, padding types, floats, etc. Hachoir is the French word for a 9 | meat grinder (meat mincer), which is used by butchers to divide meat into long 10 | tubes; Hachoir is used by computer butchers to divide binary files into fields. 11 | 12 | * `Hachoir website `_ (this site) 13 | * `Hachoir at GitHub `_ (source code, bugs) 14 | 15 | Command line tools using Hachoir parsers: 16 | 17 | * :ref:`hachoir-metadata `: get metadata from binary files 18 | * :ref:`hachoir-urwid `: display the content of a binary file in text mode 19 | * :ref:`hachoir-wx `: display the content of a binary file in GUI mode 20 | * :ref:`hachoir-grep `: find a text pattern in a binary file 21 | * :ref:`hachoir-strip `: modify a file to remove metadata 22 | * :ref:`hachoir-list `: print decoded content of a binary file 23 | 24 | .. image:: images/urwid.png 25 | :alt: hachoir-urwid screenshot (MP3 audio file) 26 | 27 | 28 | User Guide 29 | ========== 30 | 31 | .. toctree:: 32 | :maxdepth: 1 33 | 34 | install 35 | metadata 36 | urwid 37 | wx 38 | subfile 39 | grep 40 | strip 41 | list 42 | 43 | 44 | Developer Guide 45 | =============== 46 | 47 | .. toctree:: 48 | :maxdepth: 1 49 | 50 | developer 51 | internals 52 | parser 53 | regex 54 | editor 55 | 56 | Others pages 57 | ============ 58 | 59 | .. toctree:: 60 | :maxdepth: 1 61 | 62 | contact 63 | hacking 64 | authors 65 | changelog 66 | -------------------------------------------------------------------------------- /hachoir/metadata/filter.py: -------------------------------------------------------------------------------- 1 | from hachoir.metadata.timezone import UTC 2 | from datetime import date, datetime 3 | 4 | # Year in 1850..2030 5 | MIN_YEAR = 1850 6 | MAX_YEAR = 2030 7 | 8 | 9 | class Filter: 10 | 11 | def __init__(self, valid_types, min=None, max=None): 12 | self.types = valid_types 13 | self.min = min 14 | self.max = max 15 | 16 | def __call__(self, value): 17 | if not isinstance(value, self.types): 18 | return True 19 | if self.min is not None and value < self.min: 20 | return False 21 | if self.max is not None and self.max < value: 22 | return False 23 | return True 24 | 25 | 26 | class NumberFilter(Filter): 27 | 28 | def __init__(self, min=None, max=None): 29 | Filter.__init__(self, (int, float), min, max) 30 | 31 | 32 | class DatetimeFilter(Filter): 33 | 34 | def __init__(self, min=None, max=None): 35 | Filter.__init__(self, (date, datetime), 36 | datetime(MIN_YEAR, 1, 1), 37 | datetime(MAX_YEAR, 12, 31)) 38 | self.min_date = date(MIN_YEAR, 1, 1) 39 | self.max_date = date(MAX_YEAR, 12, 31) 40 | self.min_tz = datetime(MIN_YEAR, 1, 1, tzinfo=UTC) 41 | self.max_tz = datetime(MAX_YEAR, 12, 31, tzinfo=UTC) 42 | 43 | def __call__(self, value): 44 | """ 45 | Use different min/max values depending on value type 46 | (datetime with timezone, datetime or date). 47 | """ 48 | if not isinstance(value, self.types): 49 | return True 50 | if hasattr(value, "tzinfo") and value.tzinfo: 51 | return (self.min_tz <= value <= self.max_tz) 52 | elif isinstance(value, datetime): 53 | return (self.min <= value <= self.max) 54 | else: 55 | return (self.min_date <= value <= self.max_date) 56 | 57 | 58 | DATETIME_FILTER = DatetimeFilter() 59 | -------------------------------------------------------------------------------- /hachoir/wx/field_view/field_menu_setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .field_menu_imp import field_menu_imp_t 4 | from .field_menu_fwd import field_menu_fwd_t 5 | from .field_menu import field_menu_t 6 | 7 | from .core_type_menu import core_type_menu_t 8 | from .core_type_menu_fwd import core_type_menu_fwd_t 9 | from .core_type_menu_imp import core_type_menu_imp_t 10 | 11 | from .field_split_menu import field_split_menu_t 12 | from .field_split_menu_fwd import field_split_menu_fwd_t 13 | from .field_split_menu_imp import field_split_menu_imp_t 14 | 15 | import wx 16 | 17 | from hachoir.wx.resource import get_menu 18 | 19 | 20 | def setup_field_menu(parent, dispatcher): 21 | menu = get_menu('field_menu') 22 | field_menu = field_menu_t(parent, menu) 23 | 24 | imp = field_menu_imp_t() 25 | dispatcher.add(imp) 26 | 27 | fwd = field_menu_fwd_t(imp) 28 | dispatcher.add_receiver(fwd) 29 | 30 | setup_core_type_menu(menu, dispatcher) 31 | setup_field_split_menu(parent, menu, dispatcher) 32 | 33 | dispatcher.trigger('field_menu_ready', field_menu) 34 | 35 | return field_menu 36 | 37 | 38 | def setup_core_type_menu(parent, dispatcher): 39 | menu = parent.FindItemById(wx.xrc.XRCID('field_menu_convert_to_core_type')).GetSubMenu() 40 | core_type_menu = core_type_menu_t(menu) 41 | 42 | imp = core_type_menu_imp_t() 43 | dispatcher.add(imp) 44 | 45 | fwd = core_type_menu_fwd_t(imp) 46 | dispatcher.add_receiver(fwd) 47 | 48 | dispatcher.trigger('core_type_menu_ready', core_type_menu) 49 | 50 | 51 | def setup_field_split_menu(parent, parent_menu, dispatcher): 52 | menu = parent_menu.FindItemById(wx.xrc.XRCID('field_menu_split')).GetSubMenu() 53 | split_menu = field_split_menu_t(parent, menu) 54 | 55 | imp = field_split_menu_imp_t() 56 | dispatcher.add(imp) 57 | 58 | fwd = field_split_menu_fwd_t(imp) 59 | dispatcher.add_receiver(fwd) 60 | 61 | dispatcher.trigger('field_split_menu_ready', split_menu) 62 | -------------------------------------------------------------------------------- /hachoir/parser/archive/ar.py: -------------------------------------------------------------------------------- 1 | """ 2 | GNU ar archive : archive file (.a) and Debian (.deb) archive. 3 | """ 4 | 5 | from hachoir.parser import Parser 6 | from hachoir.field import (FieldSet, ParserError, 7 | String, RawBytes, UnixLine) 8 | from hachoir.core.endian import BIG_ENDIAN 9 | 10 | 11 | class ArchiveFileEntry(FieldSet): 12 | 13 | def createFields(self): 14 | yield UnixLine(self, "header", "Header") 15 | info = self["header"].value 16 | info = info.split() 17 | if len(info) < 3: 18 | raise ParserError("Invalid file entry header: %r" % info) 19 | size = int(info[-2]) 20 | if 0 < size: 21 | yield RawBytes(self, "content", size, "File data") 22 | 23 | def createDescription(self): 24 | return "File entry (%s)" % self["header"].value.split()[0] 25 | 26 | 27 | class ArchiveFile(Parser): 28 | endian = BIG_ENDIAN 29 | MAGIC = b'!\n' 30 | PARSER_TAGS = { 31 | "id": "unix_archive", 32 | "category": "archive", 33 | "file_ext": ("a", "deb"), 34 | "mime": 35 | ("application/x-debian-package", 36 | "application/x-archive", 37 | "application/x-dpkg"), 38 | "min_size": (8 + 13) * 8, # file signature + smallest file as possible 39 | "magic": ((MAGIC, 0),), 40 | "description": "Unix archive" 41 | } 42 | 43 | def validate(self): 44 | if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC: 45 | return "Invalid magic string" 46 | return True 47 | 48 | def createFields(self): 49 | yield String(self, "id", 8, "Unix archive identifier (\"\")", charset="ASCII") 50 | while not self.eof: 51 | data = self.stream.readBytes(self.current_size, 1) 52 | if data == b"\n": 53 | yield RawBytes(self, "empty_line[]", 1, "Empty line") 54 | else: 55 | yield ArchiveFileEntry(self, "file[]", "File") 56 | -------------------------------------------------------------------------------- /hachoir/field/bit_field.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bit sized classes: 3 | - Bit: Single bit, value is False or True ; 4 | - Bits: Integer with a size in bits ; 5 | - RawBits: unknown content with a size in bits. 6 | """ 7 | 8 | from hachoir.field import Field 9 | from hachoir.core import config 10 | 11 | 12 | class RawBits(Field): 13 | """ 14 | Unknown content with a size in bits. 15 | """ 16 | static_size = staticmethod(lambda *args, **kw: args[1]) 17 | 18 | def __init__(self, parent, name, size, description=None): 19 | """ 20 | Constructor: see L{Field.__init__} for parameter description 21 | """ 22 | Field.__init__(self, parent, name, size, description) 23 | 24 | def hasValue(self): 25 | return True 26 | 27 | def createValue(self): 28 | return self._parent.stream.readBits( 29 | self.absolute_address, self._size, self._parent.endian) 30 | 31 | def createDisplay(self): 32 | if self._size < config.max_bit_length: 33 | return str(self.value) 34 | else: 35 | return ("<%s size=%u>" % 36 | (self.__class__.__name__, self._size)) 37 | createRawDisplay = createDisplay 38 | 39 | 40 | class Bits(RawBits): 41 | """ 42 | Positive integer with a size in bits 43 | 44 | @see: L{Bit} 45 | @see: L{RawBits} 46 | """ 47 | pass 48 | 49 | 50 | class Bit(RawBits): 51 | """ 52 | Single bit: value can be False or True, and size is exactly one bit. 53 | 54 | @see: L{Bits} 55 | """ 56 | static_size = 1 57 | 58 | def __init__(self, parent, name, description=None): 59 | """ 60 | Constructor: see L{Field.__init__} for parameter description 61 | """ 62 | RawBits.__init__(self, parent, name, 1, description=description) 63 | 64 | def createValue(self): 65 | return 1 == self._parent.stream.readBits( 66 | self.absolute_address, 1, self._parent.endian) 67 | 68 | def createRawDisplay(self): 69 | return str(int(self.value)) 70 | -------------------------------------------------------------------------------- /hachoir/field/integer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Integer field classes: 3 | - UInt8, UInt16, UInt24, UInt32, UInt64: unsigned integer of 8, 16, 32, 64 bits ; 4 | - Int8, Int16, Int24, Int32, Int64: signed integer of 8, 16, 32, 64 bits. 5 | """ 6 | 7 | from hachoir.field import Bits, FieldError 8 | 9 | 10 | class GenericInteger(Bits): 11 | """ 12 | Generic integer class used to generate other classes. 13 | """ 14 | 15 | def __init__(self, parent, name, signed, size, description=None): 16 | if not (8 <= size <= 16384): 17 | raise FieldError( 18 | "Invalid integer size (%s): have to be in 8..16384" % size) 19 | Bits.__init__(self, parent, name, size, description) 20 | self.signed = signed 21 | 22 | def createValue(self): 23 | return self._parent.stream.readInteger( 24 | self.absolute_address, self.signed, self._size, self._parent.endian) 25 | 26 | 27 | def integerFactory(name, is_signed, size, doc): 28 | class Integer(GenericInteger): 29 | __doc__ = doc 30 | static_size = size 31 | 32 | def __init__(self, parent, name, description=None): 33 | GenericInteger.__init__( 34 | self, parent, name, is_signed, size, description) 35 | cls = Integer 36 | cls.__name__ = name 37 | return cls 38 | 39 | 40 | UInt8 = integerFactory("UInt8", False, 8, "Unsigned integer of 8 bits") 41 | UInt16 = integerFactory("UInt16", False, 16, "Unsigned integer of 16 bits") 42 | UInt24 = integerFactory("UInt24", False, 24, "Unsigned integer of 24 bits") 43 | UInt32 = integerFactory("UInt32", False, 32, "Unsigned integer of 32 bits") 44 | UInt64 = integerFactory("UInt64", False, 64, "Unsigned integer of 64 bits") 45 | 46 | Int8 = integerFactory("Int8", True, 8, "Signed integer of 8 bits") 47 | Int16 = integerFactory("Int16", True, 16, "Signed integer of 16 bits") 48 | Int24 = integerFactory("Int24", True, 24, "Signed integer of 24 bits") 49 | Int32 = integerFactory("Int32", True, 32, "Signed integer of 32 bits") 50 | Int64 = integerFactory("Int64", True, 64, "Signed integer of 64 bits") 51 | -------------------------------------------------------------------------------- /hachoir/field/static_field_set.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import FieldSet, ParserError 2 | 3 | 4 | class StaticFieldSet(FieldSet): 5 | """ 6 | Static field set: format class attribute is a tuple of all fields 7 | in syntax like: 8 | format = ( 9 | (TYPE1, ARG1, ARG2, ...), 10 | (TYPE2, ARG1, ARG2, ..., {KEY1=VALUE1, ...}), 11 | ... 12 | ) 13 | 14 | Types with dynamic size are forbidden, eg. CString, PascalString8, etc. 15 | """ 16 | format = None # You have to redefine this class variable 17 | _class = None 18 | 19 | def __new__(cls, *args, **kw): 20 | assert cls.format is not None, "Class attribute 'format' is not set" 21 | if cls._class is not cls.__name__: 22 | cls._class = cls.__name__ 23 | cls.static_size = cls._computeStaticSize() 24 | return object.__new__(cls) 25 | 26 | @staticmethod 27 | def _computeItemSize(item): 28 | item_class = item[0] 29 | if item_class.static_size is None: 30 | raise ParserError("Unable to get static size of field type: %s" 31 | % item_class.__name__) 32 | if callable(item_class.static_size): 33 | if isinstance(item[-1], dict): 34 | return item_class.static_size(*item[1:-1], **item[-1]) 35 | else: 36 | return item_class.static_size(*item[1:]) 37 | else: 38 | assert isinstance(item_class.static_size, int) 39 | return item_class.static_size 40 | 41 | def createFields(self): 42 | for item in self.format: 43 | if isinstance(item[-1], dict): 44 | yield item[0](self, *item[1:-1], **item[-1]) 45 | else: 46 | yield item[0](self, *item[1:]) 47 | 48 | @classmethod 49 | def _computeStaticSize(cls, *args): 50 | return sum(cls._computeItemSize(item) for item in cls.format) 51 | 52 | # Initial value of static_size, it changes when first instance 53 | # is created (see __new__) 54 | static_size = _computeStaticSize 55 | -------------------------------------------------------------------------------- /doc/urwid.rst: -------------------------------------------------------------------------------- 1 | .. _urwid: 2 | 3 | +++++++++++++++++++++ 4 | hachoir-urwid program 5 | +++++++++++++++++++++ 6 | 7 | hachoir-urwid is a binary file explorer based on Hachoir library to parse the 8 | files. Using this tool you can exactly know the meaning of each bit/byte of 9 | your files. With direction keys, you can navigate in the field tree. The key 10 | 'h' will disable 'human display' and switch to 'raw display'. It's sometime 11 | useful when you would like to compare hexadecimal data and Hachoir 12 | reprensentation. 13 | 14 | **hachoir-urwid** is the most user friendly interface based on Hachoir to 15 | explore a binary file. 16 | 17 | Before use, make sure to install the required dependencies with ``pip install 18 | hachoir[urwid]``. 19 | 20 | .. image:: images/urwid.png 21 | :alt: hachoir-urwid screenshot (MP3 audio file) 22 | 23 | Command line options 24 | ==================== 25 | 26 | * ``--preload=10``: Load 10 fields when loading a new field set 27 | * ``--path="/header/bpp"``: Open the specified path and focus on the field 28 | * ``--parser=PARSERID``: Force a parser (and skip parser validation) 29 | 30 | 31 | Usefull keys 32 | ============ 33 | 34 | Move: 35 | 36 | * up/down: move up/down 37 | * home: go to parent 38 | * end: go to the last field of a field set 39 | * left/right: horizontal scrolling 40 | 41 | Setup display: 42 | 43 | * h: most important option :-) switch between human display (default) and raw value 44 | * v / d / s: show or hide field value / description / size 45 | * a: switch between relative (default) and absolute address 46 | * b: switch between address in decimal (default) and hexadecimal 47 | 48 | Interaction: 49 | 50 | * enter: on a field set, expand/collaspe the children 51 | * space: parse a file/stream contained in the current field 52 | 53 | Application: 54 | 55 | * q: quit 56 | * < / >: previous / next tab 57 | * + / -: move separator vertically 58 | * esc or CTRL+W: close current tab 59 | * F1: display help 60 | 61 | 62 | Help 63 | ==== 64 | 65 | Command line options: use --help option. 66 | 67 | In hachoir-use, use F1 key to get help (keyboard keys). 68 | -------------------------------------------------------------------------------- /hachoir/metadata/cr2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Canon CR2 raw image data, version 2.0 image metadata extractor. 3 | 4 | Authors: Fernando Crespo 5 | Creation date: 21 february 2017 6 | """ 7 | 8 | from hachoir.metadata.metadata import (registerExtractor, RootMetadata) 9 | from hachoir.parser.image import CR2File 10 | from hachoir.metadata.safe import fault_tolerant 11 | 12 | 13 | class CR2Metadata(RootMetadata): 14 | key_to_attr = { 15 | "ImageWidth": "width", 16 | "ImageLength": "height", 17 | "ImageDescription": "comment", 18 | "DocumentName": "title", 19 | "XResolution": "width_dpi", 20 | "YResolution": "height_dpi", 21 | } 22 | 23 | def extract(self, tiff): 24 | if "ifd[0]" in tiff: 25 | self.useIFD(tiff["ifd[0]"]) 26 | self.camera_manufacturer = tiff["ifd[0]"]["value[4]"].value 27 | self.camera_model = tiff["ifd[0]"]["value[5]"].value 28 | 29 | if "exif[0]" in tiff: 30 | self.date_time_original = tiff["exif[0]"]["value[7]"].value 31 | self.date_time_digitized = tiff["exif[0]"]["value[8]"].value 32 | 33 | def useIFD(self, ifd): 34 | attr = {} 35 | for entry in ifd.array("entry"): 36 | self.processIfdEntry(ifd, entry, attr) 37 | if 'BitsPerSample' in attr and 'SamplesPerPixel' in attr: 38 | self.bits_per_pixel = attr[ 39 | 'BitsPerSample'] * attr['SamplesPerPixel'] 40 | 41 | @fault_tolerant 42 | def processIfdEntry(self, ifd, entry, attr): 43 | tag = entry["tag"].display 44 | if tag in {"BitsPerSample", "SamplesPerPixel"}: 45 | value = ifd.getEntryValues(entry)[0].value 46 | attr[tag] = value 47 | return 48 | 49 | try: 50 | attrname = self.key_to_attr[tag] 51 | except KeyError: 52 | return 53 | value = ifd.getEntryValues(entry)[0].value 54 | if tag in {"XResolution", "YResolution"}: 55 | value = round(value) 56 | setattr(self, attrname, value) 57 | 58 | 59 | registerExtractor(CR2File, CR2Metadata) 60 | -------------------------------------------------------------------------------- /hachoir/core/text_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities used to convert a field to human classic reprentation of data. 3 | """ 4 | 5 | from hachoir.core.tools import ( 6 | humanDuration, humanFilesize, alignValue, 7 | durationWin64 as doDurationWin64, 8 | deprecated) 9 | from types import FunctionType, MethodType 10 | from hachoir.field import Field 11 | 12 | 13 | def textHandler(field, handler): 14 | assert isinstance(handler, (FunctionType, MethodType)) 15 | assert issubclass(field.__class__, Field) 16 | field.createDisplay = lambda: handler(field) 17 | return field 18 | 19 | 20 | def displayHandler(field, handler): 21 | assert isinstance(handler, (FunctionType, MethodType)) 22 | assert issubclass(field.__class__, Field) 23 | field.createDisplay = lambda: handler(field.value) 24 | return field 25 | 26 | 27 | @deprecated("Use TimedeltaWin64 field type") 28 | def durationWin64(field): 29 | """ 30 | Convert Windows 64-bit duration to string. The timestamp format is 31 | a 64-bit number: number of 100ns. See also timestampWin64(). 32 | 33 | >>> durationWin64(type("", (), dict(value=2146280000, size=64))) 34 | '3 min 34 sec 628 ms' 35 | >>> durationWin64(type("", (), dict(value=(1 << 64)-1, size=64))) 36 | '58494 years 88 days 5 hours' 37 | """ 38 | assert hasattr(field, "value") and hasattr(field, "size") 39 | assert field.size == 64 40 | delta = doDurationWin64(field.value) 41 | return humanDuration(delta) 42 | 43 | 44 | def filesizeHandler(field): 45 | """ 46 | Format field value using humanFilesize() 47 | """ 48 | return displayHandler(field, humanFilesize) 49 | 50 | 51 | def hexadecimal(field): 52 | """ 53 | Convert an integer to hexadecimal in lower case. Returns unicode string. 54 | 55 | >>> hexadecimal(type("", (), dict(value=412, size=16))) 56 | '0x019c' 57 | >>> hexadecimal(type("", (), dict(value=0, size=32))) 58 | '0x00000000' 59 | """ 60 | assert hasattr(field, "value") and hasattr(field, "size") 61 | size = field.size 62 | padding = alignValue(size, 4) // 4 63 | pattern = "0x%%0%ux" % padding 64 | return pattern % field.value 65 | -------------------------------------------------------------------------------- /hachoir/parser/template.py: -------------------------------------------------------------------------------- 1 | """ 2 | ====================== 8< ============================ 3 | This file is an Hachoir parser template. Make a copy 4 | of it, and adapt it to your needs. 5 | 6 | You have to replace all "TODO" with you code. 7 | ====================== 8< ============================ 8 | 9 | TODO parser. 10 | 11 | Author: TODO TODO 12 | Creation date: YYYY-mm-DD 13 | """ 14 | 15 | # TODO: Just keep what you need 16 | from hachoir.parser import Parser 17 | # from hachoir.field import (ParserError, 18 | # UInt8, UInt16, UInt32, String, RawBytes) 19 | # from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN 20 | 21 | 22 | class TODOFile(Parser): 23 | PARSER_TAGS = { 24 | "id": "TODO", 25 | "category": "TODO", # "archive", "audio", "container", ... 26 | # TODO: Example ("bmp",) to parse the file "image.bmp" 27 | "file_ext": ("TODO",), 28 | "mime": ("TODO",), # TODO: Example: "image/png" 29 | # TODO: Minimum file size (x bits, or x*8 in bytes) 30 | "min_size": 0, 31 | "description": "TODO", # TODO: Example: "A bitmap picture" 32 | } 33 | 34 | # TODO: Choose between little or big endian 35 | # endian = LITTLE_ENDIAN 36 | # endian = BIG_ENDIAN 37 | 38 | def validate(self): 39 | # TODO: Check that file looks like your format 40 | # Example: check first two bytes 41 | # return (self.stream.readBytes(0, 2) == b'BM') 42 | return False 43 | 44 | def createFields(self): 45 | # TODO: Write your parser using this model: 46 | # yield UInt8(self, "name1", "description1") 47 | # yield UInt16(self, "name2", "description2") 48 | # yield UInt32(self, "name3", "description3") 49 | # yield String(self, "name4", 1, "description4") # TODO: add ", charset="ASCII")" 50 | # yield String(self, "name5", 1, "description5", charset="ASCII") 51 | # yield String(self, "name6", 1, "description6", charset="ISO-8859-1") 52 | 53 | # Read rest of the file (if any) 54 | # TODO: You may remove this code 55 | if self.current_size < self._size: 56 | yield self.seekBit(self._size, "end") 57 | -------------------------------------------------------------------------------- /hachoir/editor/field.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import joinPath, MissingField 2 | 3 | 4 | class EditorError(Exception): 5 | pass 6 | 7 | 8 | class FakeField(object): 9 | """ 10 | This class have API looks similar to Field API, but objects don't contain 11 | any value: all values are _computed_ by parent methods. 12 | 13 | Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc"). 14 | """ 15 | is_field_set = False 16 | 17 | def __init__(self, parent, name): 18 | self._parent = parent 19 | self._name = name 20 | 21 | def _getPath(self): 22 | return joinPath(self._parent.path, self._name) 23 | path = property(_getPath) 24 | 25 | def _getName(self): 26 | return self._name 27 | name = property(_getName) 28 | 29 | def _getAddress(self): 30 | return self._parent._getFieldAddress(self._name) 31 | address = property(_getAddress) 32 | 33 | def _getSize(self): 34 | return self._parent.input[self._name].size 35 | size = property(_getSize) 36 | 37 | def _getValue(self): 38 | return self._parent.input[self._name].value 39 | value = property(_getValue) 40 | 41 | def createDisplay(self): 42 | # TODO: Returns new value if field is altered 43 | return self._parent.input[self._name].display 44 | display = property(createDisplay) 45 | 46 | def _getParent(self): 47 | return self._parent 48 | parent = property(_getParent) 49 | 50 | def hasValue(self): 51 | return self._parent.input[self._name].hasValue() 52 | 53 | def __getitem__(self, key): 54 | # TODO: Implement this function! 55 | raise MissingField(self, key) 56 | 57 | def _isAltered(self): 58 | return False 59 | is_altered = property(_isAltered) 60 | 61 | def writeInto(self, output): 62 | size = self.size 63 | addr = self._parent._getFieldInputAddress(self._name) 64 | input = self._parent.input 65 | stream = input.stream 66 | if size % 8 or addr % 8: 67 | output.copyBitsFrom(stream, addr, size, input.endian) 68 | else: 69 | output.copyBytesFrom(stream, addr, size // 8) 70 | -------------------------------------------------------------------------------- /hachoir/wx/hex_view/file_cache.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | 3 | 4 | class FileCache(object): 5 | CHUNKSIZE = 4096 6 | 7 | def __init__(self, file): 8 | self.file = file 9 | 10 | self.update_file_size() 11 | 12 | def update_file_size(self): 13 | pos = self.file.tell() 14 | self.file.seek(0, 2) 15 | self.filesize = self.file.tell() 16 | self.file.seek(pos) 17 | 18 | @lru_cache(maxsize=100) 19 | def get_chunk(self, cstart): 20 | pos = self.file.tell() 21 | self.file.seek(cstart) 22 | chunk = self.file.read(self.CHUNKSIZE) 23 | self.file.seek(pos) 24 | return chunk 25 | 26 | def hint(self, s, e): 27 | '''Hint that the range [s, e) may be needed soon''' 28 | 29 | sc = s // self.CHUNKSIZE 30 | ec = (e + self.CHUNKSIZE - 1) // self.CHUNKSIZE 31 | for c in range(sc, ec): 32 | self.get_chunk(c * self.CHUNKSIZE) 33 | 34 | def get(self, s, e): 35 | '''Obtain the file contents in the range [s, e)''' 36 | soff = s % self.CHUNKSIZE 37 | eoff = e % self.CHUNKSIZE 38 | sc = s // self.CHUNKSIZE 39 | ec = (e + self.CHUNKSIZE - 1) // self.CHUNKSIZE 40 | 41 | out = [] 42 | for c in range(sc, ec): 43 | out.append(self.get_chunk(c * self.CHUNKSIZE)) 44 | 45 | if eoff: 46 | out[-1] = out[-1][:eoff] 47 | if soff: 48 | out[0] = out[0][soff:] 49 | return b''.join(out) 50 | 51 | 52 | def test(): 53 | from io import BytesIO 54 | 55 | for blocksize in [8, 1024]: 56 | instr = bytes(range(256)) 57 | sf = BytesIO(instr) 58 | fc = FileCache(sf) 59 | fc.CHUNKSIZE = blocksize 60 | 61 | import random 62 | random.seed(1) 63 | for iter in range(256): 64 | s = random.randrange(0, fc.filesize + 10) 65 | e = random.randrange(s, fc.filesize + 10) 66 | print("testing", s, e) 67 | got = fc.get(s, e) 68 | expected = instr[s:e] 69 | assert got == expected, "Failed to get %d, %d: got %r, expected %r" % (s, e, got, expected) 70 | 71 | 72 | if __name__ == '__main__': 73 | test() 74 | -------------------------------------------------------------------------------- /hachoir/field/helper.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import (FieldError, 2 | RawBits, RawBytes, 3 | PaddingBits, PaddingBytes, 4 | NullBits, NullBytes, 5 | GenericString, GenericInteger) 6 | from hachoir.stream import FileOutputStream 7 | 8 | 9 | def createRawField(parent, size, name="raw[]", description=None): 10 | if size <= 0: 11 | raise FieldError("Unable to create raw field of %s bits" % size) 12 | if (size % 8) == 0: 13 | return RawBytes(parent, name, size // 8, description) 14 | else: 15 | return RawBits(parent, name, size, description) 16 | 17 | 18 | def createPaddingField(parent, nbits, name="padding[]", description=None): 19 | if nbits <= 0: 20 | raise FieldError("Unable to create padding of %s bits" % nbits) 21 | if (nbits % 8) == 0: 22 | return PaddingBytes(parent, name, nbits // 8, description) 23 | else: 24 | return PaddingBits(parent, name, nbits, description) 25 | 26 | 27 | def createNullField(parent, nbits, name="padding[]", description=None): 28 | if nbits <= 0: 29 | raise FieldError("Unable to create null padding of %s bits" % nbits) 30 | if (nbits % 8) == 0: 31 | return NullBytes(parent, name, nbits // 8, description) 32 | else: 33 | return NullBits(parent, name, nbits, description) 34 | 35 | 36 | def isString(field): 37 | return issubclass(field.__class__, GenericString) 38 | 39 | 40 | def isInteger(field): 41 | return issubclass(field.__class__, GenericInteger) 42 | 43 | 44 | def writeIntoFile(fieldset, filename): 45 | output = FileOutputStream(filename) 46 | with output: 47 | fieldset.writeInto(output) 48 | 49 | 50 | def createOrphanField(fieldset, address, field_cls, *args, **kw): 51 | """ 52 | Create an orphan field at specified address: 53 | field_cls(fieldset, *args, **kw) 54 | 55 | The field uses the fieldset properties but it isn't added to the 56 | field set. 57 | """ 58 | save_size = fieldset._current_size 59 | try: 60 | fieldset._current_size = address 61 | field = field_cls(fieldset, *args, **kw) 62 | finally: 63 | fieldset._current_size = save_size 64 | return field 65 | -------------------------------------------------------------------------------- /tests/test_grep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from hachoir.grep import Grep 3 | from hachoir.parser import createParser 4 | from hachoir.test import setup_tests 5 | import os.path 6 | import subprocess 7 | import sys 8 | import unittest 9 | 10 | DATADIR = os.path.join(os.path.dirname(__file__), "files") 11 | GEORGIA_CAB = os.path.join(DATADIR, 'georgia.cab') 12 | PROGRAM = os.path.join(os.path.dirname(__file__), "..", "hachoir-grep") 13 | 14 | 15 | class TestGrepClass(unittest.TestCase): 16 | 17 | def test_grep(self): 18 | fields = [] 19 | 20 | class TestGrep(Grep): 21 | 22 | def onMatch(self, field): 23 | fields.append(field) 24 | 25 | parser = createParser(GEORGIA_CAB) 26 | with parser: 27 | grep = TestGrep() 28 | grep.grep(parser) 29 | fields = [(field.absolute_address, field.path, field.value) 30 | for field in fields] 31 | self.assertEqual(fields, 32 | [(0, '/magic', 'MSCF'), 33 | (480, '/file[0]/filename', 'fontinst.inf'), 34 | (712, '/file[1]/filename', 'Georgiaz.TTF'), 35 | (944, '/file[2]/filename', 'Georgiab.TTF'), 36 | (1176, '/file[3]/filename', 'Georgiai.TTF'), 37 | (1408, '/file[4]/filename', 'Georgia.TTF'), 38 | (1632, '/file[5]/filename', 'fontinst.exe')]) 39 | 40 | 41 | class TestGrepCommandLine(unittest.TestCase): 42 | 43 | def test_grep(self): 44 | args = [sys.executable, PROGRAM, "--all", "--path", GEORGIA_CAB] 45 | proc = subprocess.Popen(args, 46 | stdout=subprocess.PIPE, 47 | stderr=subprocess.STDOUT) 48 | stdout, _ = proc.communicate() 49 | stdout = stdout.decode('ascii', 'replace') 50 | self.assertEqual(stdout, """ 51 | 0:/magic:MSCF 52 | 60:/file[0]/filename:fontinst.inf 53 | 89:/file[1]/filename:Georgiaz.TTF 54 | 118:/file[2]/filename:Georgiab.TTF 55 | 147:/file[3]/filename:Georgiai.TTF 56 | 176:/file[4]/filename:Georgia.TTF 57 | 204:/file[5]/filename:fontinst.exe 58 | """.lstrip()) 59 | 60 | 61 | if __name__ == "__main__": 62 | setup_tests() 63 | unittest.main() 64 | -------------------------------------------------------------------------------- /tools/entropy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from math import log 3 | 4 | 5 | class Entropy: 6 | 7 | def __init__(self): 8 | self.frequence = dict((index, 0) for index in range(0, 256)) 9 | self.count = 0 10 | 11 | def readBytes(self, bytes): 12 | for byte in bytes: 13 | self.frequence[byte] = self.frequence[byte] + 1 14 | self.count += len(bytes) 15 | return self 16 | 17 | def compute(self): 18 | h = 0 19 | for value in self.frequence.values(): 20 | if not value: 21 | continue 22 | p_i = float(value) / self.count 23 | h -= p_i * log(p_i, 2) 24 | return h 25 | 26 | from time import time 27 | from sys import stderr 28 | 29 | 30 | class EntropyFile(Entropy): 31 | 32 | def __init__(self): 33 | Entropy.__init__(self) 34 | self.progress_time = 1.0 35 | self.buffer_size = 4096 36 | 37 | def displayProgress(self, percent): 38 | print("Progress: %.1f%%" % percent, file=stderr) 39 | 40 | def readStream(self, stream, streamsize=None): 41 | # Read stream size 42 | if streamsize is None: 43 | stream.seek(0, 2) 44 | streamsize = stream.tell() 45 | if streamsize <= 0: 46 | raise ValueError("Empty stream") 47 | 48 | # Read stream content 49 | stream.seek(0, 0) 50 | next_msg = time() + self.progress_time 51 | while True: 52 | if next_msg <= time(): 53 | self.displayProgress(stream.tell() * 100.0 / streamsize) 54 | next_msg = time() + self.progress_time 55 | raw = stream.read(self.buffer_size) 56 | if not raw: 57 | break 58 | self.readBytes(raw) 59 | return self 60 | 61 | def readFile(self, filename): 62 | stream = open(filename, 'rb') 63 | self.readStream(stream) 64 | return self 65 | 66 | 67 | def main(): 68 | from sys import argv, exit 69 | if len(argv) != 2: 70 | print("usage: %s filename" % argv[0], file=stderr) 71 | exit(1) 72 | entropy = EntropyFile() 73 | entropy.readFile(argv[1]) 74 | print("Entropy: %.4f bit/byte" % entropy.compute()) 75 | exit(0) 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /doc/authors.rst: -------------------------------------------------------------------------------- 1 | +++++++++++++++ 2 | Hachoir Authors 3 | +++++++++++++++ 4 | 5 | Team: 6 | 7 | * Julien Muchembled 8 | * Victor Stinner 9 | * Robert Xiao aka nneonneo - improve SeekableFieldSet 10 | 11 | Packagers: 12 | 13 | * Arnaud Pithon aka bildoon - ArchLinux package (v0.5.2 and svn) 14 | * Emmanuel Garette aka GnunuX - ArchLinux package (v0.5.2) 15 | * Michael Scherer aka misc - Mandriva package (v0.5.2) 16 | * Michel Casabona aka plumbear - Debian package (v0.5.2) 17 | * Richard Demongeot - Debian package (v0.5.2) 18 | * Thomas de Grenier de Latour TGL - Gentoo ebuild 19 | 20 | Contributors 21 | 22 | * Alexandre Boeglin - PIFV parser 23 | * Aurélien Jacobs - AVI parser big contributor 24 | * Christophe Fergeau - Improve iTunesDB parser 25 | * Christophe Gisquet - Write RAR parser 26 | * Kirill Zorin - Author of hachoir-wx, 3DO and game parsers 27 | * Elie Roudninski aka adema - Started Gtk GUI 28 | * Feth Arezki - Fix hachoir-metadata-qt to save the current directory 29 | * Frédéric Weisbecker - Author of ReiserFS parser 30 | * Gottfried Ganßauge - Fix Win32 curses issues 31 | * Jason Gorski - Author of zsnes parser 32 | * Jean-Marc Libs - KDE plugin 33 | * Mickaël Kenikssi - Write CDDA parser 34 | * Mike Melanson - Write RealMedia and SpiderMan video parser 35 | * Olivier Schwab - Write 7-zip parser 36 | * Pierre Thierry - KDE plugin 37 | * Robert Xiao aka nneonneo - improve LNK parser 38 | * Romain Hérault - Author of iTunesDB parser 39 | * Sebastien Ponce - ActionScript (for SWF) parser 40 | * Thomas Pabst - Gnome plugin 41 | * Thomas de Grenier de Latour aka TGL - Java class parser 42 | -------------------------------------------------------------------------------- /tests/regex_regression.rst: -------------------------------------------------------------------------------- 1 | Regex regression (repeat) 2 | ========================= 3 | 4 | Factorisation of (a{n,p}){x,y}: 5 | ------------------------------- 6 | 7 | >>> from hachoir.regex import parse 8 | >>> parse("(a{2,3}){4,5}") 9 | 10 | >>> parse("(a{2,}){3,4}") 11 | 12 | >>> parse("(a{2,3})+") 13 | 14 | >>> parse("(a*){2,3}") 15 | 16 | >>> parse("(a+){2,3}") 17 | 18 | 19 | Factorisation of (a|b)*: 20 | ------------------------ 21 | 22 | >>> parse("(a*|b)*") 23 | 24 | >>> parse("(a+|b)*") 25 | 26 | >>> parse("(a{2,}|b)*") 27 | 28 | 29 | Factorisation of (a|b)+: 30 | ------------------------ 31 | 32 | >>> parse("(a*|b)+") 33 | 34 | >>> parse("(a+|b|)+") 35 | 36 | >>> parse("(a+|b)+") 37 | 38 | >>> parse("(a{5,}|b)+") 39 | 40 | 41 | Factorisation of (a|b){x,}: 42 | --------------------------- 43 | 44 | >>> parse("(a+|b){3,}") 45 | 46 | >>> parse("(a{2,}|b){3,}") 47 | 48 | 49 | Factorisation of (a|b){x,y}: 50 | ---------------------------- 51 | 52 | >>> parse("(a*|b|){4,5}") 53 | 54 | >>> parse("(a+|b|){4,5}") 55 | 56 | >>> parse("(a*|b){4,5}") 57 | 58 | 59 | Do not optimize: 60 | ---------------- 61 | 62 | >>> parse('(a*|b){3,}') 63 | 64 | >>> parse("(a{2,3}|b){3,}") 65 | 66 | >>> parse("(a{2,3}|b)*") 67 | 68 | >>> parse("(a{2,3}|b)+") 69 | 70 | >>> parse("(a+|b){4,5}") 71 | 72 | >>> parse("(a{2,}|b){4,5}") 73 | 74 | >>> parse("(a{2,3}|b){4,5}") 75 | 76 | 77 | 78 | Regex regression (b) 79 | ==================== 80 | 81 | >>> from hachoir.regex import parse 82 | >>> parse("(M(SCF|Thd)|B(MP4|Zh))") 83 | 84 | >>> parse("(FWS1|CWS1|FWS2|CWS2)") 85 | 86 | >>> parse("(abcdeZ|abZ)") 87 | 88 | >>> parse("(00t003|10t003|00[12]0[1-9].abc\0|1CD001)") 89 | 90 | 91 | -------------------------------------------------------------------------------- /hachoir/field/byte_field.py: -------------------------------------------------------------------------------- 1 | """ 2 | Very basic field: raw content with a size in byte. Use this class for 3 | unknown content. 4 | """ 5 | 6 | from hachoir.field import Field, FieldError 7 | from hachoir.core.tools import makePrintable 8 | from hachoir.core import config 9 | 10 | MAX_LENGTH = (2**64) 11 | 12 | 13 | class RawBytes(Field): 14 | """ 15 | Byte vector of unknown content 16 | 17 | @see: L{Bytes} 18 | """ 19 | static_size = staticmethod(lambda *args, **kw: args[1] * 8) 20 | 21 | def __init__(self, parent, name, length, description="Raw data"): 22 | assert issubclass(parent.__class__, Field) 23 | if not (0 < length <= MAX_LENGTH): 24 | raise FieldError("Invalid RawBytes length (%s)!" % length) 25 | Field.__init__(self, parent, name, length * 8, description) 26 | self._display = None 27 | 28 | def _createDisplay(self, human): 29 | max_bytes = config.max_byte_length 30 | try: 31 | display = makePrintable(self.value[:max_bytes], "ASCII") 32 | except Exception: 33 | if self._display is None: 34 | address = self.absolute_address 35 | length = min(self._size // 8, max_bytes) 36 | self._display = self._parent.stream.readBytes(address, length) 37 | display = makePrintable(self._display, "ASCII") 38 | truncated = (8 * len(display) < self._size) 39 | if human: 40 | if truncated: 41 | display += "(...)" 42 | return makePrintable(display, "latin-1", quote='"') 43 | else: 44 | if truncated: 45 | return '"%s(...)"' % display 46 | else: 47 | return '"%s"' % display 48 | 49 | def createDisplay(self): 50 | return self._createDisplay(True) 51 | 52 | def createRawDisplay(self): 53 | return self._createDisplay(False) 54 | 55 | def hasValue(self): 56 | return True 57 | 58 | def createValue(self): 59 | assert (self._size % 8) == 0 60 | if self._display: 61 | self._display = None 62 | return self._parent.stream.readBytes( 63 | self.absolute_address, self._size // 8) 64 | 65 | 66 | class Bytes(RawBytes): 67 | """ 68 | Byte vector: can be used for magic number or GUID/UUID for example. 69 | 70 | @see: L{RawBytes} 71 | """ 72 | pass 73 | -------------------------------------------------------------------------------- /hachoir/core/timeout.py: -------------------------------------------------------------------------------- 1 | """ 2 | limitedTime(): set a timeout in seconds when calling a function, 3 | raise a Timeout error if time exceed. 4 | """ 5 | from math import ceil 6 | 7 | IMPLEMENTATION = None 8 | 9 | 10 | class Timeout(RuntimeError): 11 | """ 12 | Timeout error, inherits from RuntimeError 13 | """ 14 | pass 15 | 16 | 17 | def signalHandler(signum, frame): 18 | """ 19 | Signal handler to catch timeout signal: raise Timeout exception. 20 | """ 21 | raise Timeout("Timeout exceed!") 22 | 23 | 24 | def limitedTime(second, func, *args, **kw): 25 | """ 26 | Call func(*args, **kw) with a timeout of second seconds. 27 | """ 28 | return func(*args, **kw) 29 | 30 | 31 | def fixTimeout(second): 32 | """ 33 | Fix timeout value: convert to integer with a minimum of 1 second 34 | """ 35 | if isinstance(second, float): 36 | second = int(ceil(second)) 37 | assert isinstance(second, int) 38 | return max(second, 1) 39 | 40 | 41 | if not IMPLEMENTATION: 42 | try: 43 | from signal import signal, alarm, SIGALRM 44 | 45 | # signal.alarm() implementation 46 | def limitedTime(second, func, *args, **kw): # noqa 47 | second = fixTimeout(second) 48 | old_alarm = signal(SIGALRM, signalHandler) 49 | try: 50 | alarm(second) 51 | return func(*args, **kw) 52 | finally: 53 | alarm(0) 54 | signal(SIGALRM, old_alarm) 55 | 56 | IMPLEMENTATION = "signal.alarm()" 57 | except ImportError: 58 | pass 59 | 60 | if not IMPLEMENTATION: 61 | try: 62 | from signal import signal, SIGXCPU # noqa 63 | from resource import getrlimit, setrlimit, RLIMIT_CPU 64 | 65 | # resource.setrlimit(RLIMIT_CPU) implementation 66 | # "Bug": timeout is 'CPU' time so sleep() are not part of the timeout 67 | def limitedTime(second, func, *args, **kw): # noqa 68 | second = fixTimeout(second) 69 | old_alarm = signal(SIGXCPU, signalHandler) 70 | current = getrlimit(RLIMIT_CPU) 71 | try: 72 | setrlimit(RLIMIT_CPU, (second, current[1])) 73 | return func(*args, **kw) 74 | finally: 75 | setrlimit(RLIMIT_CPU, current) 76 | signal(SIGXCPU, old_alarm) 77 | 78 | IMPLEMENTATION = "resource.setrlimit(RLIMIT_CPU)" 79 | except ImportError: 80 | pass 81 | -------------------------------------------------------------------------------- /hachoir/parser/archive/mar.py: -------------------------------------------------------------------------------- 1 | """ 2 | Microsoft Archive parser 3 | 4 | Author: Victor Stinner 5 | Creation date: 2007-03-04 6 | """ 7 | 8 | from hachoir.parser import Parser 9 | from hachoir.field import FieldSet, String, UInt32, SubFile 10 | from hachoir.core.endian import LITTLE_ENDIAN 11 | from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal 12 | 13 | MAX_NB_FILE = 100000 14 | 15 | 16 | class FileIndex(FieldSet): 17 | static_size = 68 * 8 18 | 19 | def createFields(self): 20 | yield String(self, "filename", 56, truncate="\0", charset="ASCII") 21 | yield filesizeHandler(UInt32(self, "filesize")) 22 | yield textHandler(UInt32(self, "crc32"), hexadecimal) 23 | yield UInt32(self, "offset") 24 | 25 | def createDescription(self): 26 | return "File %s (%s) at %s" % ( 27 | self["filename"].value, self["filesize"].display, self["offset"].value) 28 | 29 | 30 | class MarFile(Parser): 31 | MAGIC = b"MARC" 32 | PARSER_TAGS = { 33 | "id": "mar", 34 | "category": "archive", 35 | "file_ext": ("mar",), 36 | "min_size": 80 * 8, # At least one file index 37 | "magic": ((MAGIC, 0),), 38 | "description": "Microsoft Archive", 39 | } 40 | endian = LITTLE_ENDIAN 41 | 42 | def validate(self): 43 | if self.stream.readBytes(0, 4) != self.MAGIC: 44 | return "Invalid magic" 45 | if self["version"].value != 3: 46 | return "Invalid version" 47 | if not (1 <= self["nb_file"].value <= MAX_NB_FILE): 48 | return "Invalid number of file" 49 | return True 50 | 51 | def createFields(self): 52 | yield String(self, "magic", 4, "File signature (MARC)", charset="ASCII") 53 | yield UInt32(self, "version") 54 | yield UInt32(self, "nb_file") 55 | files = [] 56 | for index in range(self["nb_file"].value): 57 | item = FileIndex(self, "file[]") 58 | yield item 59 | if item["filesize"].value: 60 | files.append(item) 61 | files.sort(key=lambda item: item["offset"].value) 62 | for index in files: 63 | padding = self.seekByte(index["offset"].value) 64 | if padding: 65 | yield padding 66 | size = index["filesize"].value 67 | desc = "File %s" % index["filename"].value 68 | yield SubFile(self, "data[]", size, desc, filename=index["filename"].value) 69 | -------------------------------------------------------------------------------- /hachoir/parser/game/spider_man_video.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parser for an obscure FMV file format: bin files from the game 3 | "The Amazing Spider-Man vs. The Kingpin" (Sega CD) 4 | 5 | Author: Mike Melanson 6 | Creation date: 2006-09-30 7 | File samples: http://samples.mplayerhq.hu/game-formats/spiderman-segacd-bin/ 8 | """ 9 | 10 | from hachoir.parser import Parser 11 | from hachoir.field import FieldSet, UInt32, String, RawBytes 12 | from hachoir.core.endian import BIG_ENDIAN 13 | from hachoir.core.text_handler import textHandler, hexadecimal 14 | 15 | 16 | class Chunk(FieldSet): 17 | tag_info = { 18 | "CONF": ("conf[]", None, "Configuration header"), 19 | "AUDI": ("audio[]", None, "Audio chunk"), 20 | "SYNC": ("sync[]", None, "Start of video frame data"), 21 | "IVRA": ("ivra[]", None, "Vector codebook (?)"), 22 | "VRAM": ("video[]", None, "Video RAM tile pattern"), 23 | "CRAM": ("color[]", None, "Color RAM (palette)"), 24 | "CEND": ("video_end[]", None, "End of video data"), 25 | "MEND": ("end_file", None, "End of file"), 26 | } 27 | 28 | def __init__(self, *args): 29 | FieldSet.__init__(self, *args) 30 | self._size = self["length"].value * 8 31 | fourcc = self["fourcc"].value 32 | if fourcc in self.tag_info: 33 | self._name, self._parser, self._description = self.tag_info[fourcc] 34 | else: 35 | self._parser = None 36 | self._description = "Unknown chunk: fourcc %s" % self[ 37 | "fourcc"].display 38 | 39 | def createFields(self): 40 | yield String(self, "fourcc", 4, "FourCC", charset="ASCII") 41 | yield textHandler(UInt32(self, "length", "length"), hexadecimal) 42 | size = self["length"].value - 8 43 | if 0 < size: 44 | if self._parser: 45 | yield from self._parser(self, size) 46 | else: 47 | yield RawBytes(self, "data", size) 48 | 49 | 50 | class SpiderManVideoFile(Parser): 51 | PARSER_TAGS = { 52 | "id": "spiderman_video", 53 | "category": "game", 54 | "file_ext": ("bin",), 55 | "min_size": 8 * 8, 56 | "description": "The Amazing Spider-Man vs. The Kingpin (Sega CD) FMV video" 57 | } 58 | 59 | endian = BIG_ENDIAN 60 | 61 | def validate(self): 62 | return (self.stream.readBytes(0, 4) == b'CONF') 63 | 64 | def createFields(self): 65 | while not self.eof: 66 | yield Chunk(self, "chunk[]") 67 | -------------------------------------------------------------------------------- /tools/fuzzer/mangle.py: -------------------------------------------------------------------------------- 1 | from random import randint, choice as random_choice 2 | from array import array 3 | 4 | MAX_MIX = 20 5 | MIN_MIX = -MAX_MIX 6 | MIN_COUNT = 15 7 | MAX_COUNT = 2500 8 | MAX_INC = 32 9 | MIN_INC = -MAX_INC 10 | 11 | SPECIAL_VALUES_NOENDIAN = ( 12 | "\x00", 13 | "\x00\x00", 14 | "\x7f", 15 | "\x7f\xff", 16 | "\x7f\xff\xff\xff", 17 | "\x80", 18 | "\x80\x00", 19 | "\x80\x00\x00\x00", 20 | "\xfe", 21 | "\xfe\xff", 22 | "\xfe\xff\xff\xff", 23 | "\xff", 24 | "\xff\xff", 25 | "\xff\xff\xff\xff", 26 | ) 27 | 28 | SPECIAL_VALUES = [] 29 | for item in SPECIAL_VALUES_NOENDIAN: 30 | SPECIAL_VALUES.append(item) 31 | itemb = item[::-1] 32 | if item != itemb: 33 | SPECIAL_VALUES.append(itemb) 34 | 35 | 36 | def mangle_replace(data, offset): 37 | data[offset] = randint(0, 255) 38 | 39 | 40 | def mangle_increment(data, offset): 41 | value = data[offset] + randint(MIN_INC, MAX_INC) 42 | data[offset] = max(min(value, 255), 0) 43 | 44 | 45 | def mangle_bit(data, offset): 46 | bit = randint(0, 7) 47 | if randint(0, 1) == 1: 48 | value = data[offset] | (1 << bit) 49 | else: 50 | value = data[offset] & (~(1 << bit) & 0xFF) 51 | data[offset] = value 52 | 53 | 54 | def mangle_special_value(data, offset): 55 | tlen = len(data) - offset 56 | text = random_choice(SPECIAL_VALUES)[:tlen] 57 | data[offset:offset + len(text)] = array("B", text) 58 | 59 | 60 | def mangle_mix(data, ofs1): 61 | ofs2 = ofs1 + randint(MIN_MIX, MAX_MIX) 62 | ofs2 = max(min(ofs2, len(data) - 1), 0) 63 | data[ofs1], data[ofs2] = data[ofs2], data[ofs1] 64 | 65 | 66 | MANGLE_OPERATIONS = ( 67 | mangle_replace, 68 | mangle_increment, 69 | mangle_bit, 70 | mangle_special_value, 71 | mangle_mix, 72 | ) 73 | 74 | 75 | def mangle(data, percent, min_count=MIN_COUNT, max_count=MAX_COUNT): 76 | """ 77 | Mangle data: add few random bytes in input byte array. 78 | 79 | This function is based on an idea of Ilja van Sprundel (file mangle.c). 80 | """ 81 | hsize = len(data) - 1 82 | max_percent = max(min(percent, 1.0), 0.0001) 83 | count = int(float(len(data)) * max_percent) 84 | count = max(count, min_count) 85 | count = min(count, max_count) 86 | count = randint(1, count) 87 | for index in range(count): 88 | operation = random_choice(MANGLE_OPERATIONS) 89 | offset = randint(0, hsize) 90 | operation(data, offset) 91 | return count 92 | -------------------------------------------------------------------------------- /tests/test_doc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import doctest 3 | import hachoir.core.i18n # noqa: import it because it does change the locale 4 | from hachoir.test import setup_tests 5 | import os 6 | import unittest 7 | 8 | ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), '..')) 9 | 10 | 11 | def importModule(name): 12 | mod = __import__(name) 13 | components = name.split('.') 14 | for comp in components[1:]: 15 | mod = getattr(mod, comp) 16 | return mod 17 | 18 | 19 | class TestDoc(unittest.TestCase): 20 | verbose = False 21 | 22 | def check_doc(self, filename, subdir=None, name=None): 23 | if self.verbose: 24 | print("--- %s: Run tests" % filename) 25 | if not subdir: 26 | fullpath = os.path.join('..', 'doc', filename) 27 | else: 28 | fullpath = os.path.join(subdir, filename) 29 | failure, nb_test = doctest.testfile( 30 | fullpath, optionflags=doctest.ELLIPSIS, name=name) 31 | if failure: 32 | self.fail("error") 33 | if self.verbose: 34 | print("--- %s: End of tests" % filename) 35 | 36 | def check_module(self, name): 37 | if self.verbose: 38 | print("--- Test module %s" % name) 39 | module = importModule(name) 40 | failure, nb_test = doctest.testmod(module) 41 | if failure: 42 | self.fail("error") 43 | if self.verbose: 44 | print("--- End of test") 45 | 46 | def test_doc_directory(self): 47 | self.check_doc('developer.rst') 48 | self.check_doc('internals.rst') 49 | self.check_doc('regex.rst') 50 | 51 | def test_tests_directory(self): 52 | self.check_doc('regex_regression.rst', subdir='.') 53 | 54 | def test_hachoir_core(self): 55 | self.check_module("hachoir.core.bits") 56 | self.check_module("hachoir.core.dict") 57 | self.check_module("hachoir.core.i18n") 58 | self.check_module("hachoir.core.text_handler") 59 | self.check_module("hachoir.core.tools") 60 | 61 | def test_hachoir_metadata(self): 62 | self.check_module("hachoir.metadata.metadata") 63 | self.check_module("hachoir.metadata.setter") 64 | 65 | def test_hachoir_regex(self): 66 | self.check_module("hachoir.regex.parser") 67 | self.check_module("hachoir.regex.regex") 68 | self.check_module("hachoir.regex.pattern") 69 | 70 | 71 | if __name__ == "__main__": 72 | setup_tests() 73 | unittest.main() 74 | -------------------------------------------------------------------------------- /hachoir/field/fake_array.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from hachoir.field import MissingField 3 | 4 | 5 | class FakeArray: 6 | """ 7 | Simulate an array for GenericFieldSet.array(): fielset.array("item")[0] is 8 | equivalent to fielset.array("item[0]"). 9 | 10 | It's possible to iterate over the items using:: 11 | 12 | for element in fieldset.array("item"): 13 | ... 14 | 15 | And to get array size using len(fieldset.array("item")). 16 | """ 17 | 18 | def __init__(self, fieldset, name): 19 | pos = name.rfind("/") 20 | if pos != -1: 21 | self.fieldset = fieldset[name[:pos]] 22 | self.name = name[pos + 1:] 23 | else: 24 | self.fieldset = fieldset 25 | self.name = name 26 | self._format = "%s[%%u]" % self.name 27 | self._cache = {} 28 | self._known_size = False 29 | self._max_index = -1 30 | 31 | def __bool__(self): 32 | "Is the array empty or not?" 33 | if self._cache: 34 | return True 35 | else: 36 | return (0 in self) 37 | 38 | def __len__(self): 39 | "Number of fields in the array" 40 | total = self._max_index + 1 41 | if not self._known_size: 42 | for index in itertools.count(total): 43 | try: 44 | self[index] 45 | total += 1 46 | except MissingField: 47 | break 48 | return total 49 | 50 | def __contains__(self, index): 51 | try: 52 | self[index] 53 | return True 54 | except MissingField: 55 | return False 56 | 57 | def __getitem__(self, index): 58 | """ 59 | Get a field of the array. Returns a field, or raise MissingField 60 | exception if the field doesn't exist. 61 | """ 62 | try: 63 | value = self._cache[index] 64 | except KeyError: 65 | try: 66 | value = self.fieldset[self._format % index] 67 | except MissingField: 68 | self._known_size = True 69 | raise 70 | self._cache[index] = value 71 | self._max_index = max(index, self._max_index) 72 | return value 73 | 74 | def __iter__(self): 75 | """ 76 | Iterate in the fields in their index order: field[0], field[1], ... 77 | """ 78 | for index in itertools.count(0): 79 | try: 80 | yield self[index] 81 | except MissingField: 82 | break 83 | -------------------------------------------------------------------------------- /hachoir/parser/archive/mozilla_ar.py: -------------------------------------------------------------------------------- 1 | """MAR (Mozilla ARchive) parser 2 | 3 | Author: Robert Xiao 4 | Creation date: July 10, 2007 5 | 6 | """ 7 | 8 | from hachoir.core.endian import BIG_ENDIAN 9 | from hachoir.field import (RootSeekableFieldSet, FieldSet, 10 | String, CString, UInt32, RawBytes) 11 | from hachoir.core.text_handler import displayHandler, filesizeHandler 12 | from hachoir.core.tools import humanUnixAttributes 13 | from hachoir.parser import HachoirParser 14 | 15 | 16 | class IndexEntry(FieldSet): 17 | 18 | def createFields(self): 19 | yield UInt32(self, "offset", "Offset in bytes relative to start of archive") 20 | yield filesizeHandler(UInt32(self, "length", "Length in bytes")) 21 | yield displayHandler(UInt32(self, "flags"), humanUnixAttributes) 22 | yield CString(self, "name", "Filename (byte array)") 23 | 24 | def createDescription(self): 25 | return 'File %s, Size %s, Mode %s' % ( 26 | self["name"].display, self["length"].display, self["flags"].display) 27 | 28 | 29 | class MozillaArchive(HachoirParser, RootSeekableFieldSet): 30 | MAGIC = b"MAR1" 31 | PARSER_TAGS = { 32 | "id": "mozilla_ar", 33 | "category": "archive", 34 | "file_ext": ("mar",), 35 | "min_size": (8 + 4 + 13) * 8, # Header, Index Header, 1 Index Entry 36 | "magic": ((MAGIC, 0),), 37 | "description": "Mozilla Archive", 38 | } 39 | endian = BIG_ENDIAN 40 | 41 | def __init__(self, stream, **args): 42 | RootSeekableFieldSet.__init__( 43 | self, None, "root", stream, None, stream.askSize(self)) 44 | HachoirParser.__init__(self, stream, **args) 45 | 46 | def validate(self): 47 | if self.stream.readBytes(0, 4) != self.MAGIC: 48 | return "Invalid magic" 49 | return True 50 | 51 | def createFields(self): 52 | yield String(self, "magic", 4, "File signature (MAR1)", charset="ASCII") 53 | yield UInt32(self, "index_offset", "Offset to index relative to file start") 54 | self.seekByte(self["index_offset"].value, False) 55 | yield UInt32(self, "index_size", "size of index in bytes") 56 | current_index_size = 0 # bytes 57 | while current_index_size < self["index_size"].value: 58 | # plus 4 compensates for index_size 59 | self.seekByte(self["index_offset"].value + 60 | current_index_size + 4, False) 61 | entry = IndexEntry(self, "index_entry[]") 62 | yield entry 63 | current_index_size += entry.size // 8 64 | self.seekByte(entry["offset"].value, False) 65 | yield RawBytes(self, "file[]", entry["length"].value) 66 | -------------------------------------------------------------------------------- /hachoir/parser/image/psd.py: -------------------------------------------------------------------------------- 1 | """ 2 | Photoshop parser (.psd file). 3 | 4 | Creation date: 8 january 2006 5 | Author: Victor Stinner 6 | """ 7 | 8 | from hachoir.parser import Parser 9 | from hachoir.field import (FieldSet, 10 | UInt16, UInt32, String, NullBytes, Enum, RawBytes) 11 | from hachoir.core.endian import BIG_ENDIAN 12 | from hachoir.parser.image.photoshop_metadata import Photoshop8BIM 13 | 14 | 15 | class Config(FieldSet): 16 | 17 | def __init__(self, *args): 18 | FieldSet.__init__(self, *args) 19 | self._size = (4 + self["size"].value) * 8 20 | 21 | def createFields(self): 22 | yield UInt32(self, "size") 23 | while not self.eof: 24 | yield Photoshop8BIM(self, "item[]") 25 | 26 | 27 | class PsdFile(Parser): 28 | endian = BIG_ENDIAN 29 | PARSER_TAGS = { 30 | "id": "psd", 31 | "category": "image", 32 | "file_ext": ("psd",), 33 | "mime": ("image/psd", "image/photoshop", "image/x-photoshop"), 34 | "min_size": 4 * 8, 35 | "magic": ((b"8BPS\0\1", 0),), 36 | "description": "Photoshop (PSD) picture", 37 | } 38 | COLOR_MODE = { 39 | 0: "Bitmap", 40 | 1: "Grayscale", 41 | 2: "Indexed", 42 | 3: "RGB color", 43 | 4: "CMYK color", 44 | 7: "Multichannel", 45 | 8: "Duotone", 46 | 9: "Lab Color", 47 | } 48 | COMPRESSION_NAME = { 49 | 0: "Raw data", 50 | 1: "RLE", 51 | } 52 | 53 | def validate(self): 54 | if self.stream.readBytes(0, 4) != b"8BPS": 55 | return "Invalid signature" 56 | return True 57 | 58 | def createFields(self): 59 | yield String(self, "signature", 4, "PSD signature (8BPS)", charset="ASCII") 60 | yield UInt16(self, "version") 61 | yield NullBytes(self, "reserved[]", 6) 62 | yield UInt16(self, "nb_channels") 63 | yield UInt32(self, "width") 64 | yield UInt32(self, "height") 65 | yield UInt16(self, "depth") 66 | yield Enum(UInt16(self, "color_mode"), self.COLOR_MODE) 67 | 68 | # Mode data 69 | yield UInt32(self, "mode_data_size") 70 | size = self["mode_data_size"].value 71 | if size: 72 | yield RawBytes(self, "mode_data", size) 73 | 74 | # Resources 75 | yield Config(self, "config") 76 | 77 | # Reserved 78 | yield UInt32(self, "reserved_data_size") 79 | size = self["reserved_data_size"].value 80 | if size: 81 | yield RawBytes(self, "reserved_data", size) 82 | 83 | yield Enum(UInt16(self, "compression"), self.COMPRESSION_NAME) 84 | 85 | size = (self.size - self.current_size) // 8 86 | if size: 87 | yield RawBytes(self, "end", size) 88 | -------------------------------------------------------------------------------- /hachoir/core/memory.py: -------------------------------------------------------------------------------- 1 | import gc 2 | 3 | # ---- Default implementation when resource is missing ---------------------- 4 | PAGE_SIZE = 4096 5 | 6 | 7 | def getMemoryLimit(): 8 | """ 9 | Get current memory limit in bytes. 10 | 11 | Return None on error. 12 | """ 13 | return None 14 | 15 | 16 | def setMemoryLimit(max_mem): 17 | """ 18 | Set memory limit in bytes. 19 | Use value 'None' to disable memory limit. 20 | 21 | Return True if limit is set, False on error. 22 | """ 23 | return False 24 | 25 | 26 | def getMemorySize(): 27 | """ 28 | Read currenet process memory size: size of available virtual memory. 29 | This value is NOT the real memory usage. 30 | 31 | This function only works on Linux (use /proc/self/statm file). 32 | """ 33 | try: 34 | statm = open('/proc/self/statm').readline().split() 35 | except IOError: 36 | return None 37 | return int(statm[0]) * PAGE_SIZE 38 | 39 | 40 | def clearCaches(): 41 | """ 42 | Try to clear all caches: call gc.collect() (Python garbage collector). 43 | """ 44 | gc.collect() 45 | # import re; re.purge() 46 | 47 | 48 | try: 49 | # ---- 'resource' implementation ----------------------------------- 50 | from resource import getpagesize, getrlimit, setrlimit, RLIMIT_AS 51 | 52 | PAGE_SIZE = getpagesize() 53 | 54 | def getMemoryLimit(): # noqa 55 | try: 56 | limit = getrlimit(RLIMIT_AS)[0] 57 | if 0 < limit: 58 | limit *= PAGE_SIZE 59 | return limit 60 | except ValueError: 61 | return None 62 | 63 | def setMemoryLimit(max_mem): # noqa 64 | if max_mem is None: 65 | max_mem = -1 66 | try: 67 | setrlimit(RLIMIT_AS, (max_mem, -1)) 68 | return True 69 | except ValueError: 70 | return False 71 | except ImportError: 72 | pass 73 | 74 | 75 | def limitedMemory(limit, func, *args, **kw): 76 | """ 77 | Limit memory grow when calling func(*args, **kw): 78 | restrict memory grow to 'limit' bytes. 79 | 80 | Use try/except MemoryError to catch the error. 81 | """ 82 | # First step: clear cache to gain memory 83 | clearCaches() 84 | 85 | # Get total program size 86 | max_rss = getMemorySize() 87 | if max_rss is not None: 88 | # Get old limit and then set our new memory limit 89 | old_limit = getMemoryLimit() 90 | limit = max_rss + limit 91 | limited = setMemoryLimit(limit) 92 | else: 93 | limited = False 94 | 95 | try: 96 | # Call function 97 | return func(*args, **kw) 98 | finally: 99 | # and unset our memory limit 100 | if limited: 101 | setMemoryLimit(old_limit) 102 | 103 | # After calling the function: clear all caches 104 | clearCaches() 105 | -------------------------------------------------------------------------------- /hachoir/field/sub_file.py: -------------------------------------------------------------------------------- 1 | from hachoir.field import Bytes 2 | from hachoir.core.tools import makePrintable, humanFilesize 3 | from hachoir.stream import InputIOStream 4 | 5 | 6 | class SubFile(Bytes): 7 | """ 8 | File stored in another file 9 | """ 10 | 11 | def __init__(self, parent, name, length, description=None, 12 | parser=None, filename=None, mime_type=None, parser_class=None): 13 | if filename: 14 | if not isinstance(filename, str): 15 | filename = makePrintable(filename, "ISO-8859-1") 16 | if not description: 17 | description = 'File "%s" (%s)' % ( 18 | filename, humanFilesize(length)) 19 | Bytes.__init__(self, parent, name, length, description) 20 | 21 | def createInputStream(cis, **args): 22 | tags = args.setdefault("tags", []) 23 | if parser_class: 24 | tags.append(("class", parser_class)) 25 | if parser is not None: 26 | tags.append(("id", parser.PARSER_TAGS["id"])) 27 | if mime_type: 28 | tags.append(("mime", mime_type)) 29 | if filename: 30 | tags.append(("filename", filename)) 31 | return cis(**args) 32 | self.setSubIStream(createInputStream) 33 | 34 | 35 | class CompressedStream: 36 | offset = 0 37 | 38 | def __init__(self, stream, decompressor): 39 | self.stream = stream 40 | self.decompressor = decompressor(stream) 41 | self._buffer = b'' 42 | 43 | def read(self, size): 44 | d = self._buffer 45 | data = [d[:size]] 46 | size -= len(d) 47 | if size > 0: 48 | d = self.decompressor(size) 49 | data.append(d[:size]) 50 | size -= len(d) 51 | while size > 0: 52 | n = 4096 53 | if self.stream.size: 54 | n = min(self.stream.size - self.offset, n) 55 | if not n: 56 | break 57 | d = self.stream.read(self.offset, n)[1] 58 | self.offset += 8 * len(d) 59 | d = self.decompressor(size, d) 60 | data.append(d[:size]) 61 | size -= len(d) 62 | self._buffer = d[size + len(d):] 63 | return b''.join(data) 64 | 65 | 66 | def CompressedField(field, decompressor): 67 | def createInputStream(cis, source=None, **args): 68 | if field._parent: 69 | stream = cis(source=source) 70 | args.setdefault("tags", []).extend(stream.tags) 71 | else: 72 | stream = field.stream 73 | input = CompressedStream(stream, decompressor) 74 | if source is None: 75 | source = "Compressed source: '%s' (offset=%s)" % ( 76 | stream.source, field.absolute_address) 77 | return InputIOStream(input, source=source, **args) 78 | field.setSubIStream(createInputStream) 79 | return field 80 | -------------------------------------------------------------------------------- /hachoir/parser/image/pcx.py: -------------------------------------------------------------------------------- 1 | """ 2 | PCX picture filter. 3 | """ 4 | 5 | from hachoir.parser import Parser 6 | from hachoir.field import ( 7 | UInt8, UInt16, 8 | PaddingBytes, RawBytes, 9 | Enum) 10 | from hachoir.parser.image.common import PaletteRGB 11 | from hachoir.core.endian import LITTLE_ENDIAN 12 | 13 | 14 | class PcxFile(Parser): 15 | endian = LITTLE_ENDIAN 16 | PARSER_TAGS = { 17 | "id": "pcx", 18 | "category": "image", 19 | "file_ext": ("pcx",), 20 | "mime": ("image/x-pcx",), 21 | "min_size": 128 * 8, 22 | "description": "PC Paintbrush (PCX) picture" 23 | } 24 | compression_name = {1: "Run-length encoding (RLE)"} 25 | version_name = { 26 | 0: "Version 2.5 of PC Paintbrush", 27 | 2: "Version 2.8 with palette information", 28 | 3: "Version 2.8 without palette information", 29 | 4: "PC Paintbrush for Windows", 30 | 5: "Version 3.0 (or greater) of PC Paintbrush" 31 | } 32 | 33 | def validate(self): 34 | if self["id"].value != 10: 35 | return "Wrong signature" 36 | if self["version"].value not in self.version_name: 37 | return "Unknown format version" 38 | if self["bpp"].value not in (1, 2, 4, 8, 24, 32): 39 | return "Unknown bits/pixel" 40 | if self["reserved[0]"].value != b"\0": 41 | return "Invalid reserved value" 42 | return True 43 | 44 | def createFields(self): 45 | yield UInt8(self, "id", "PCX identifier (10)") 46 | yield Enum(UInt8(self, "version", "PCX version"), self.version_name) 47 | yield Enum(UInt8(self, "compression", "Compression method"), self.compression_name) 48 | yield UInt8(self, "bpp", "Bits / pixel") 49 | yield UInt16(self, "xmin", "Minimum X") 50 | yield UInt16(self, "ymin", "Minimum Y") 51 | yield UInt16(self, "xmax", "Width minus one") # value + 1 52 | yield UInt16(self, "ymax", "Height minus one") # value + 1 53 | yield UInt16(self, "horiz_dpi", "Horizontal DPI") 54 | yield UInt16(self, "vert_dpi", "Vertical DPI") 55 | yield PaletteRGB(self, "palette_4bits", 16, "Palette (4 bits)") 56 | yield PaddingBytes(self, "reserved[]", 1) 57 | yield UInt8(self, "nb_color_plan", "Number of color plans") 58 | yield UInt16(self, "bytes_per_line", "Bytes per line") 59 | yield UInt16(self, "color_mode", "Color mode") 60 | yield PaddingBytes(self, "reserved[]", 58) 61 | 62 | if self._size is None: # TODO: is it possible to handle piped input? 63 | raise NotImplementedError 64 | 65 | nb_colors = 256 66 | size = (self._size - self.current_size) // 8 67 | has_palette = self["bpp"].value == 8 68 | if has_palette: 69 | size -= nb_colors * 3 70 | yield RawBytes(self, "image_data", size, "Image data") 71 | 72 | if has_palette: 73 | yield PaletteRGB(self, "palette_8bits", nb_colors, "Palette (8 bit)") 74 | -------------------------------------------------------------------------------- /hachoir/parser/image/tiff.py: -------------------------------------------------------------------------------- 1 | """ 2 | TIFF image parser. 3 | 4 | Authors: Victor Stinner, Sebastien Ponce, Robert Xiao 5 | Creation date: 30 september 2006 6 | """ 7 | 8 | from hachoir.parser import Parser 9 | from hachoir.field import SeekableFieldSet, RootSeekableFieldSet, Bytes 10 | from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN 11 | from hachoir.parser.image.exif import TIFF, IFD 12 | 13 | 14 | def getStrips(ifd): 15 | data = {} 16 | for i, entry in enumerate(ifd.array('entry')): 17 | data[entry['tag'].display] = entry 18 | # image data 19 | if "StripOffsets" in data and "StripByteCounts" in data: 20 | offs = ifd.getEntryValues(data["StripOffsets"]) 21 | bytes = ifd.getEntryValues(data["StripByteCounts"]) 22 | for off, byte in zip(offs, bytes): 23 | yield off.value, byte.value 24 | 25 | # image data 26 | if "TileOffsets" in data and "TileByteCounts" in data: 27 | offs = ifd.getEntryValues(data["TileOffsets"]) 28 | bytes = ifd.getEntryValues(data["TileByteCounts"]) 29 | for off, byte in zip(offs, bytes): 30 | yield off.value, byte.value 31 | 32 | 33 | class ImageFile(SeekableFieldSet): 34 | 35 | def __init__(self, parent, name, description, ifd): 36 | SeekableFieldSet.__init__(self, parent, name, description, None) 37 | self._ifd = ifd 38 | 39 | def createFields(self): 40 | for off, byte in getStrips(self._ifd): 41 | self.seekByte(off, relative=False) 42 | field = Bytes(self, "strip[]", byte) 43 | yield field 44 | 45 | 46 | class TiffFile(RootSeekableFieldSet, Parser): 47 | PARSER_TAGS = { 48 | "id": "tiff", 49 | "category": "image", 50 | "file_ext": ("tif", "tiff"), 51 | "mime": ("image/tiff",), 52 | "min_size": 8 * 8, 53 | "magic": ((b"II\x2A\0", 0), (b"MM\0\x2A", 0)), 54 | "description": "TIFF picture" 55 | } 56 | 57 | # Correct endian is set in constructor 58 | endian = LITTLE_ENDIAN 59 | 60 | def __init__(self, stream, **args): 61 | RootSeekableFieldSet.__init__( 62 | self, None, "root", stream, None, stream.askSize(self)) 63 | if self.stream.readBytes(0, 2) == b"MM": 64 | self.endian = BIG_ENDIAN 65 | Parser.__init__(self, stream, **args) 66 | 67 | def validate(self): 68 | endian = self.stream.readBytes(0, 2) 69 | if endian not in (b"MM", b"II"): 70 | return "Invalid endian (%r)" % endian 71 | if self["version"].value != 42: 72 | return "Unknown TIFF version" 73 | return True 74 | 75 | def createFields(self): 76 | yield from TIFF(self) 77 | 78 | for ifd in self: 79 | if not isinstance(ifd, IFD): 80 | continue 81 | offs = (off for off, byte in getStrips(ifd)) 82 | self.seekByte(min(offs), relative=False) 83 | image = ImageFile(self, "image[]", "Image File", ifd) 84 | yield image 85 | -------------------------------------------------------------------------------- /hachoir/metadata/csv.py: -------------------------------------------------------------------------------- 1 | from hachoir.parser import createParser 2 | from hachoir.core.tools import makePrintable 3 | from hachoir.metadata import extractMetadata 4 | from hachoir.core.i18n import initLocale 5 | from sys import argv, stderr, exit 6 | from os import walk 7 | from os.path import join as path_join 8 | from fnmatch import fnmatch 9 | import codecs 10 | 11 | OUTPUT_FILENAME = "metadata.csv" 12 | 13 | 14 | class Extractor: 15 | 16 | def __init__(self, directory, fields): 17 | self.directory = directory 18 | self.fields = fields 19 | self.charset = "UTF-8" 20 | self.total = 0 21 | self.invalid = 0 22 | 23 | def main(self): 24 | output = codecs.open(OUTPUT_FILENAME, "w", self.charset) 25 | for filename in self.findFiles(self.directory, '*.doc'): 26 | self.total += 1 27 | line = self.processFile(filename) 28 | if line: 29 | print(line, file=output) 30 | else: 31 | self.invalid += 1 32 | output.close() 33 | self.summary() 34 | 35 | def summary(self): 36 | print(file=stderr) 37 | print("Valid files: %s" % (self.total - self.invalid), file=stderr) 38 | print("Invalid files: %s" % self.invalid, file=stderr) 39 | print("Total files: %s" % self.total, file=stderr) 40 | print(file=stderr) 41 | print("Result written into %s" % OUTPUT_FILENAME, file=stderr) 42 | 43 | def findFiles(self, directory, pattern): 44 | for dirpath, dirnames, filenames in walk(directory): 45 | for filename in filenames: 46 | if not fnmatch(filename.lower(), pattern): 47 | continue 48 | yield path_join(dirpath, filename) 49 | 50 | def processFile(self, filename): 51 | print("[%s] Process file %s..." % (self.total, filename)) 52 | parser = createParser(filename) 53 | if not parser: 54 | print("Unable to parse file", file=stderr) 55 | return None 56 | try: 57 | metadata = extractMetadata(parser) 58 | except Exception as err: 59 | print("Metadata extraction error: %s" % str(err), file=stderr) 60 | return None 61 | if not metadata: 62 | print("Unable to extract metadata", file=stderr) 63 | return None 64 | 65 | filename = makePrintable(filename, self.charset) 66 | line = [filename] 67 | for field in self.fields: 68 | value = metadata.getText(field, '') 69 | value = makePrintable(value, self.charset) 70 | line.append(value) 71 | return '; '.join(line) 72 | 73 | 74 | def main(): 75 | initLocale() 76 | if len(argv) != 3: 77 | print("usage: %s directory fields" % argv[0], file=stderr) 78 | print(file=stderr) 79 | print("eg. %s . title,creation_date" % argv[0], file=stderr) 80 | exit(1) 81 | directory = argv[1] 82 | fields = [field.strip() for field in argv[2].split(",")] 83 | Extractor(directory, fields).main() 84 | -------------------------------------------------------------------------------- /hachoir/parser/program/prc.py: -------------------------------------------------------------------------------- 1 | """ 2 | PRC (Palm resource) parser. 3 | 4 | Author: Sebastien Ponce 5 | Creation date: 29 october 2008 6 | """ 7 | 8 | from hachoir.parser import Parser 9 | from hachoir.field import (FieldSet, 10 | UInt16, UInt32, TimestampMac32, 11 | String, RawBytes) 12 | from hachoir.core.endian import BIG_ENDIAN 13 | 14 | 15 | class PRCHeader(FieldSet): 16 | static_size = 78 * 8 17 | 18 | def createFields(self): 19 | yield String(self, "name", 32, "Name") 20 | yield UInt16(self, "flags", "Flags") 21 | yield UInt16(self, "version", "Version") 22 | yield TimestampMac32(self, "create_time", "Creation time") 23 | yield TimestampMac32(self, "mod_time", "Modification time") 24 | yield TimestampMac32(self, "backup_time", "Backup time") 25 | yield UInt32(self, "mod_num", "mod num") 26 | yield UInt32(self, "app_info", "app info") 27 | yield UInt32(self, "sort_info", "sort info") 28 | yield UInt32(self, "type", "type") 29 | yield UInt32(self, "id", "id") 30 | yield UInt32(self, "unique_id_seed", "unique_id_seed") 31 | yield UInt32(self, "next_record_list", "next_record_list") 32 | yield UInt16(self, "num_records", "num_records") 33 | 34 | 35 | class ResourceHeader(FieldSet): 36 | static_size = 10 * 8 37 | 38 | def createFields(self): 39 | yield String(self, "name", 4, "Name of the resource") 40 | yield UInt16(self, "flags", "ID number of the resource") 41 | yield UInt32(self, "offset", "Pointer to the resource data") 42 | 43 | def createDescription(self): 44 | return "Resource Header (%s)" % self["name"] 45 | 46 | 47 | class PRCFile(Parser): 48 | PARSER_TAGS = { 49 | "id": "prc", 50 | "category": "program", 51 | "file_ext": ("prc", ""), 52 | "min_size": ResourceHeader.static_size, # At least one program header 53 | "mime": ( 54 | "application/x-pilot-prc", 55 | "application/x-palmpilot"), 56 | "description": "Palm Resource File" 57 | } 58 | endian = BIG_ENDIAN 59 | 60 | def validate(self): 61 | # FIXME: Implement the validation function! 62 | return False 63 | 64 | def createFields(self): 65 | # Parse header and program headers 66 | yield PRCHeader(self, "header", "Header") 67 | lens = [] 68 | firstOne = True 69 | poff = 0 70 | for index in range(self["header/num_records"].value): 71 | r = ResourceHeader(self, "res_header[]") 72 | if firstOne: 73 | firstOne = False 74 | else: 75 | lens.append(r["offset"].value - poff) 76 | poff = r["offset"].value 77 | yield r 78 | lens.append(self.size // 8 - poff) 79 | yield UInt16(self, "placeholder", "Place holder bytes") 80 | for i in range(len(lens)): 81 | yield RawBytes(self, "res[]", lens[i], '"' + self["res_header[" + str(i) + "]/name"].value + "\" Resource") 82 | 83 | def createDescription(self): 84 | return "Palm Resource file" 85 | -------------------------------------------------------------------------------- /hachoir/parser/misc/hlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Microsoft Windows Help (HLP) parser for Hachoir project. 3 | 4 | Documents: 5 | - Windows Help File Format / Annotation File Format / SHG and MRB File Format 6 | written by M. Winterhoff (100326.2776@compuserve.com) 7 | found on http://www.wotsit.org/ 8 | 9 | Author: Victor Stinner 10 | Creation date: 2007-09-03 11 | """ 12 | 13 | from hachoir.parser import Parser 14 | from hachoir.field import (FieldSet, 15 | Bits, Int32, UInt16, UInt32, 16 | NullBytes, RawBytes, PaddingBytes, String) 17 | from hachoir.core.endian import LITTLE_ENDIAN 18 | from hachoir.core.text_handler import (textHandler, hexadecimal, 19 | displayHandler, humanFilesize) 20 | 21 | 22 | class FileEntry(FieldSet): 23 | 24 | def __init__(self, *args, **kw): 25 | FieldSet.__init__(self, *args, **kw) 26 | self._size = self["res_space"].value * 8 27 | 28 | def createFields(self): 29 | yield displayHandler(UInt32(self, "res_space", "Reserved space"), humanFilesize) 30 | yield displayHandler(UInt32(self, "used_space", "Used space"), humanFilesize) 31 | yield Bits(self, "file_flags", 8, "(=4)") 32 | 33 | yield textHandler(UInt16(self, "magic"), hexadecimal) 34 | yield Bits(self, "flags", 16) 35 | yield displayHandler(UInt16(self, "page_size", "Page size in bytes"), humanFilesize) 36 | yield String(self, "structure", 16, strip="\0", charset="ASCII") 37 | yield NullBytes(self, "zero", 2) 38 | yield UInt16(self, "nb_page_splits", "Number of page splits B+ tree has suffered") 39 | yield UInt16(self, "root_page", "Page number of B+ tree root page") 40 | yield PaddingBytes(self, "one", 2, pattern="\xFF") 41 | yield UInt16(self, "nb_page", "Number of B+ tree pages") 42 | yield UInt16(self, "nb_level", "Number of levels of B+ tree") 43 | yield UInt16(self, "nb_entry", "Number of entries in B+ tree") 44 | 45 | size = (self.size - self.current_size) // 8 46 | if size: 47 | yield PaddingBytes(self, "reserved_space", size) 48 | 49 | 50 | class HlpFile(Parser): 51 | PARSER_TAGS = { 52 | "id": "hlp", 53 | "category": "misc", 54 | "file_ext": ("hlp",), 55 | "min_size": 32, 56 | "description": "Microsoft Windows Help (HLP)", 57 | } 58 | endian = LITTLE_ENDIAN 59 | 60 | def validate(self): 61 | if self["magic"].value != 0x00035F3F: 62 | return "Invalid magic" 63 | if self["filesize"].value != self.stream.size // 8: 64 | return "Invalid magic" 65 | return True 66 | 67 | def createFields(self): 68 | yield textHandler(UInt32(self, "magic"), hexadecimal) 69 | yield UInt32(self, "dir_start", "Directory start") 70 | yield Int32(self, "first_free_block", "First free block") 71 | yield UInt32(self, "filesize", "File size in bytes") 72 | 73 | yield self.seekByte(self["dir_start"].value) 74 | yield FileEntry(self, "file[]") 75 | 76 | size = (self.size - self.current_size) // 8 77 | if size: 78 | yield RawBytes(self, "end", size) 79 | -------------------------------------------------------------------------------- /hachoir/field/__init__.py: -------------------------------------------------------------------------------- 1 | # Field classes 2 | from hachoir.field.field import Field, FieldError, MissingField, joinPath # noqa 3 | from hachoir.field.bit_field import Bit, Bits, RawBits # noqa 4 | from hachoir.field.byte_field import Bytes, RawBytes # noqa 5 | from hachoir.field.sub_file import SubFile, CompressedField # noqa 6 | from hachoir.field.character import Character # noqa 7 | from hachoir.field.integer import (Int8, Int16, Int24, Int32, Int64, # noqa 8 | UInt8, UInt16, UInt24, UInt32, UInt64, 9 | GenericInteger) 10 | from hachoir.field.enum import Enum # noqa 11 | from hachoir.field.string_field import (GenericString, # noqa 12 | String, CString, UnixLine, 13 | PascalString8, PascalString16, 14 | PascalString32) 15 | from hachoir.field.padding import (PaddingBits, PaddingBytes, # noqa 16 | NullBits, NullBytes) 17 | 18 | # Functions 19 | from hachoir.field.helper import (isString, isInteger, # noqa 20 | createPaddingField, createNullField, 21 | createRawField, writeIntoFile, 22 | createOrphanField) 23 | 24 | # FieldSet classes 25 | from hachoir.field.fake_array import FakeArray # noqa 26 | from hachoir.field.basic_field_set import (BasicFieldSet, # noqa 27 | ParserError, MatchError) 28 | from hachoir.field.generic_field_set import GenericFieldSet # noqa 29 | from hachoir.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet # noqa 30 | from hachoir.field.field_set import FieldSet # noqa 31 | from hachoir.field.static_field_set import StaticFieldSet # noqa 32 | from hachoir.field.parser import Parser # noqa 33 | from hachoir.field.vector import GenericVector, UserVector # noqa 34 | 35 | # Complex types 36 | from hachoir.field.float import Float32, Float64, Float80 # noqa 37 | from hachoir.field.timestamp import ( # noqa 38 | GenericTimestamp, 39 | TimestampUnix32, TimestampUnix64, TimestampMac32, TimestampUUID60, 40 | TimestampWin64, TimedeltaMillisWin64, 41 | DateTimeMSDOS32, TimeDateMSDOS32, TimedeltaWin64) 42 | 43 | # Special Field classes 44 | from hachoir.field.link import Link, Fragment # noqa 45 | from hachoir.field.fragment import FragmentGroup, CustomFragment # noqa 46 | 47 | available_types = (Bit, Bits, RawBits, 48 | Bytes, RawBytes, 49 | SubFile, 50 | Character, 51 | Int8, Int16, Int24, Int32, Int64, 52 | UInt8, UInt16, UInt24, UInt32, UInt64, 53 | String, CString, UnixLine, 54 | PascalString8, PascalString16, PascalString32, 55 | Float32, Float64, 56 | PaddingBits, PaddingBytes, 57 | NullBits, NullBytes, 58 | TimestampUnix32, TimestampMac32, TimestampWin64, 59 | TimedeltaMillisWin64, 60 | DateTimeMSDOS32, TimeDateMSDOS32, 61 | # GenericInteger, GenericString, 62 | ) 63 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Prepare a release: 4 | # 5 | # - check version: hachoir/__init__.py and doc/conf.py 6 | # - set the release date: edit doc/changelog.rst 7 | # - run: git commit -a 8 | # - Remove untracked files/dirs: git clean -fdx 9 | # - run tests: tox --parallel auto 10 | # - run: git push 11 | # - check GitHub Actions status: 12 | # https://github.com/vstinner/hachoir/actions 13 | # 14 | # Release a new version: 15 | # 16 | # - git tag x.y.z 17 | # - git clean -fdx # Remove untracked files/dirs 18 | # - python3 setup.py sdist bdist_wheel 19 | # - git push --tags 20 | # - twine upload dist/* 21 | # 22 | # After the release: 23 | # 24 | # - set version to N+1: hachoir/__init__.py and doc/conf.py 25 | 26 | ENTRY_POINTS = { 27 | 'console_scripts': [ 28 | "hachoir-grep = hachoir.grep:main", 29 | "hachoir-metadata = hachoir.metadata.main:main", 30 | "hachoir-strip = hachoir.strip:main", 31 | "hachoir-urwid = hachoir.urwid:main" 32 | ], 33 | 'gui_scripts': [ 34 | "hachoir-wx = hachoir.wx.main:main" 35 | ] 36 | } 37 | # FIXME: hachoir-subfile is currently broken 38 | # "hachoir-subfile", 39 | 40 | CLASSIFIERS = [ 41 | 'Development Status :: 5 - Production/Stable', 42 | 'Environment :: Console :: Curses', 43 | 'Environment :: Plugins', 44 | 'Intended Audience :: Developers', 45 | 'Intended Audience :: Education', 46 | 'License :: OSI Approved :: GNU General Public License (GPL)', 47 | 'Natural Language :: English', 48 | 'Operating System :: OS Independent', 49 | 'Programming Language :: Python :: 3', 50 | 'Topic :: Multimedia', 51 | 'Topic :: Scientific/Engineering :: Information Analysis', 52 | 'Topic :: Software Development :: Disassemblers', 53 | 'Topic :: Software Development :: Interpreters', 54 | 'Topic :: Software Development :: Libraries :: Python Modules', 55 | 'Topic :: System :: Filesystems', 56 | 'Topic :: Text Processing', 57 | 'Topic :: Utilities', 58 | ] 59 | 60 | 61 | def main(): 62 | from setuptools import setup 63 | from setuptools import find_packages 64 | 65 | import hachoir 66 | 67 | readme = open('README.rst') 68 | long_description = readme.read() 69 | readme.close() 70 | 71 | install_options = { 72 | "name": "hachoir", 73 | "version": hachoir.__version__, 74 | "url": 'http://hachoir.readthedocs.io/', 75 | "project_urls": { 76 | "Source": "https://github.com/vstinner/hachoir", 77 | }, 78 | "author": "Hachoir team (see AUTHORS file)", 79 | "description": "Package of Hachoir parsers used to open binary files", 80 | "long_description": long_description, 81 | "classifiers": CLASSIFIERS, 82 | "license": 'GNU GPL v2', 83 | "packages": find_packages(), 84 | "package_data": {"hachoir.wx.resource": ['hachoir_wx.xrc']}, 85 | "entry_points": ENTRY_POINTS, 86 | "extras_require": { 87 | "urwid": [ 88 | "urwid" 89 | ], 90 | "wx": [ 91 | "wxPython==4.*" 92 | ] 93 | }, 94 | "zip_safe": True, 95 | } 96 | setup(**install_options) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | --------------------------------------------------------------------------------