├── hachoir
├── wx
│ ├── __init__.py
│ ├── resource
│ │ ├── __init__.py
│ │ └── resource.py
│ ├── __main__.py
│ ├── hex_view
│ │ ├── __init__.py
│ │ ├── hex_view_setup.py
│ │ └── file_cache.py
│ ├── tree_view
│ │ ├── __init__.py
│ │ └── tree_view_setup.py
│ ├── frame_view
│ │ ├── __init__.py
│ │ ├── frame_view.py
│ │ ├── frame_view_fwd.py
│ │ ├── frame_view_setup.py
│ │ └── frame_view_imp.py
│ ├── field_view
│ │ ├── __init__.py
│ │ ├── core_type_menu_fwd.py
│ │ ├── core_type_menu.py
│ │ ├── field_split_menu_fwd.py
│ │ ├── field_menu.py
│ │ ├── format.py
│ │ ├── field_view_setup.py
│ │ ├── field_split_menu.py
│ │ ├── field_view_fwd.py
│ │ ├── core_type_menu_imp.py
│ │ ├── mutator.py
│ │ ├── field_menu_fwd.py
│ │ ├── field_split_menu_imp.py
│ │ ├── stubs.py
│ │ ├── field_menu_imp.py
│ │ ├── field_view.py
│ │ └── field_menu_setup.py
│ ├── unicode.py
│ ├── dialogs.py
│ ├── dispatcher.py
│ └── main.py
├── core
│ ├── __init__.py
│ ├── endian.py
│ ├── error.py
│ ├── event_handler.py
│ ├── language.py
│ ├── config.py
│ ├── profiler.py
│ ├── cmd_line.py
│ ├── text_handler.py
│ ├── timeout.py
│ └── memory.py
├── subfile
│ ├── __init__.py
│ ├── __main__.py
│ ├── data_rate.py
│ ├── output.py
│ └── pattern.py
├── metadata
│ ├── qt
│ │ ├── __init__.py
│ │ └── dialog.ui
│ ├── __main__.py
│ ├── config.py
│ ├── __init__.py
│ ├── formatter.py
│ ├── safe.py
│ ├── timezone.py
│ ├── file_system.py
│ ├── filter.py
│ ├── cr2.py
│ └── csv.py
├── parser
│ ├── common
│ │ ├── __init__.py
│ │ ├── tracker.py
│ │ ├── deflate.py
│ │ └── msdos.py
│ ├── network
│ │ └── __init__.py
│ ├── video
│ │ └── __init__.py
│ ├── game
│ │ ├── __init__.py
│ │ └── spider_man_video.py
│ ├── __init__.py
│ ├── container
│ │ └── __init__.py
│ ├── misc
│ │ ├── common.py
│ │ ├── __init__.py
│ │ ├── ole2_util.py
│ │ └── hlp.py
│ ├── file_system
│ │ └── __init__.py
│ ├── program
│ │ ├── __init__.py
│ │ └── prc.py
│ ├── audio
│ │ └── __init__.py
│ ├── image
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── psd.py
│ │ ├── pcx.py
│ │ └── tiff.py
│ ├── archive
│ │ ├── __init__.py
│ │ ├── prs_pak.py
│ │ ├── ar.py
│ │ ├── mar.py
│ │ └── mozilla_ar.py
│ └── template.py
├── stream
│ ├── stream.py
│ ├── __init__.py
│ └── input_helper.py
├── __init__.py
├── test.py
├── field
│ ├── field_set.py
│ ├── character.py
│ ├── enum.py
│ ├── vector.py
│ ├── fragment.py
│ ├── parser.py
│ ├── bit_field.py
│ ├── integer.py
│ ├── static_field_set.py
│ ├── helper.py
│ ├── byte_field.py
│ ├── fake_array.py
│ ├── sub_file.py
│ └── __init__.py
├── editor
│ ├── __init__.py
│ └── field.py
└── regex
│ └── __init__.py
├── doc
├── .gitignore
├── images
│ ├── wx.png
│ └── urwid.png
├── hacking.rst
├── examples
│ ├── editor_gzip.py
│ ├── editor_zip.py
│ ├── editor_add_extra.py
│ └── metadata.py
├── install.rst
├── editor.rst
├── gen_parser_list.py
├── contact.rst
├── wx.rst
├── list.rst
├── grep.rst
├── strip.rst
├── index.rst
├── urwid.rst
└── authors.rst
├── tests
├── files
│ ├── cd_0008_5C48_1m53s.cda
│ ├── 7zip.chm
│ ├── eula.exe
│ ├── gps.jpg
│ ├── hero.tga
│ ├── swat.blp
│ ├── vim.lnk
│ ├── 10min.mkv
│ ├── 25min.aifc
│ ├── 32bpp.tga
│ ├── andorra.map
│ ├── archive.7z
│ ├── cercle.exe
│ ├── cross.xcf
│ ├── dell8.fat16
│ ├── dontyou.xm
│ ├── firstrun.rm
│ ├── georgia.cab
│ ├── globe.wmf
│ ├── indiana.mid
│ ├── kino14s.laf
│ ├── my60k.ext2
│ ├── ocr10.laf
│ ├── paktest.pak
│ ├── pikachu.wmf
│ ├── radpoor.doc
│ ├── sample.tif
│ ├── sample.ts
│ ├── test.txt.gz
│ ├── types.ext2
│ ├── weka.model
│ ├── article01.bmp
│ ├── canon.raw.cr2
│ ├── example2.arj
│ ├── flashmob.mkv
│ ├── india_map.gif
│ ├── kde_click.wav
│ ├── nitrodir.nds
│ ├── quicktime.mp4
│ ├── test_file.fit
│ ├── twunk_16.exe
│ ├── verdan32.exe
│ ├── breakdance.flv
│ ├── green_fire.jpg
│ ├── hachoir-core.ace
│ ├── hachoir-core.rar
│ ├── hachoir.org.sxw
│ ├── ladouce_1h15.wav
│ ├── lara_croft.pcx
│ ├── logo-kubuntu.png
│ ├── macos_10.5.macho
│ ├── marc_kravetz.mp3
│ ├── minimal_xcf1.xcf
│ ├── minimal_xcf3.xcf
│ ├── usa_railroad.jpg
│ ├── yellowdude.3ds
│ ├── 08lechat_hq_fr.mp3
│ ├── ReferenceMap.class
│ ├── cacert_class3.der
│ ├── claque-beignet.swf
│ ├── linux_swap_9pages
│ ├── macos_10.12.macho
│ ├── mbr_linux_and_ext
│ ├── mev.32bit.big.elf
│ ├── mev.64bit.big.elf
│ ├── minimal_xcf10.xcf
│ ├── pentax_320x240.mov
│ ├── satellite_one.s3m
│ ├── sheep_on_drugs.mp3
│ ├── steganography.mp3
│ ├── angle-bear-48x48.ani
│ ├── deja_vu_serif-2.7.ttf
│ ├── example4_chapters.arj
│ ├── ftp-0.17-537.i586.rpm
│ ├── grasslogo_vector.emf
│ ├── hotel_california.flac
│ ├── kde_haypo_corner.bmp
│ ├── matrix_ping_pong.wmv
│ ├── reiserfs_v3_332k.bin
│ ├── smallville.s03e02.avi
│ ├── wormux_32x32_16c.ico
│ ├── anti-arpeggio_tune.ptm
│ ├── default_mount_opts.ext2
│ ├── jpeg.exif.photoshop.jpg
│ ├── ping_20020927-3ubuntu2
│ ├── usa_railroad.jpg.0.zlib
│ ├── usa_railroad.jpg.6.zlib
│ ├── Panasonic_AG_HMC_151.MTS
│ ├── arp_dns_ping_dns.tcpdump
│ ├── bsize-1024-isize-1024.ext2
│ ├── bsize-2048-isize-512.ext2
│ ├── bsize-4096-isize-128.ext2
│ ├── interlude_david_aubrun.ogg
│ ├── png_331x90x8_truncated.png
│ ├── pyc_example_1.5.2_pyc.bin
│ ├── pyc_example_2.2.3_pyc.bin
│ ├── pyc_example_2.5c1_pyc.bin
│ ├── python.cpython-312.pyc.bin
│ ├── python.cpython-37.pyc.bin
│ ├── audio_8khz_8bit_ulaw_4s39.au
│ ├── free-software-song.midi.bz2
│ ├── get-versions.64bit.little.elf
│ ├── 1024x1024-better-compression.xcf
│ ├── com.apple.pkg.BaseSystemResources.bom
│ ├── debian-31r4-i386-binary-1.iso.torrent
│ └── pack-31c691f659cbc7841ca55a26a342fdaf0b89c533.pack
├── test_editor.py
├── test_strip.py
├── test_grep.py
├── regex_regression.rst
└── test_doc.py
├── hachoir-list
├── hachoir-urwid
├── hachoir-metadata
├── hachoir-subfile
├── hachoir-metadata-csv
├── hachoir-metadata-qt
├── hachoir-metadata-gtk
├── hachoir-strip
├── hachoir-grep
├── hachoir-wx
├── tools
├── flake8.sh
├── fuzzer
│ ├── tools.py
│ └── mangle.py
├── flv_extractor.py
├── find_deflate.py
└── entropy.py
├── .gitignore
├── .readthedocs.yaml
├── .hgignore
├── MANIFEST.in
├── tox.ini
├── .github
└── workflows
│ └── build.yml
├── benchmark.sh
├── README.rst
└── setup.py
/hachoir/wx/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hachoir/core/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hachoir/subfile/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hachoir/metadata/qt/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hachoir/parser/common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | parser_list.rst
2 |
--------------------------------------------------------------------------------
/hachoir/wx/resource/__init__.py:
--------------------------------------------------------------------------------
1 | from .resource import * # noqa
2 |
--------------------------------------------------------------------------------
/hachoir/stream/stream.py:
--------------------------------------------------------------------------------
1 | class StreamError(Exception):
2 | pass
3 |
--------------------------------------------------------------------------------
/hachoir/wx/__main__.py:
--------------------------------------------------------------------------------
1 | from hachoir.wx.main import main
2 | main()
3 |
--------------------------------------------------------------------------------
/tests/files/cd_0008_5C48_1m53s.cda:
--------------------------------------------------------------------------------
1 | RIFF$ CDDAfmt H\ L ;! 4 5
--------------------------------------------------------------------------------
/hachoir/metadata/__main__.py:
--------------------------------------------------------------------------------
1 | from hachoir.metadata.main import main
2 | main()
3 |
--------------------------------------------------------------------------------
/doc/images/wx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/doc/images/wx.png
--------------------------------------------------------------------------------
/hachoir-list:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.listtool import main
3 | main()
4 |
--------------------------------------------------------------------------------
/hachoir-urwid:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.urwid import main
3 | main()
4 |
--------------------------------------------------------------------------------
/hachoir/metadata/config.py:
--------------------------------------------------------------------------------
1 | MAX_STR_LENGTH = 300 # characters
2 | RAW_OUTPUT = False
3 |
--------------------------------------------------------------------------------
/doc/images/urwid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/doc/images/urwid.png
--------------------------------------------------------------------------------
/hachoir/__init__.py:
--------------------------------------------------------------------------------
1 | VERSION = (3, 3, 0)
2 | __version__ = ".".join(map(str, VERSION))
3 |
--------------------------------------------------------------------------------
/tests/files/7zip.chm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/7zip.chm
--------------------------------------------------------------------------------
/tests/files/eula.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/eula.exe
--------------------------------------------------------------------------------
/tests/files/gps.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/gps.jpg
--------------------------------------------------------------------------------
/tests/files/hero.tga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hero.tga
--------------------------------------------------------------------------------
/tests/files/swat.blp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/swat.blp
--------------------------------------------------------------------------------
/tests/files/vim.lnk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/vim.lnk
--------------------------------------------------------------------------------
/hachoir-metadata:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.metadata.main import main
3 | main()
4 |
--------------------------------------------------------------------------------
/hachoir-subfile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.subfile.main import main
3 | main()
4 |
--------------------------------------------------------------------------------
/tests/files/10min.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/10min.mkv
--------------------------------------------------------------------------------
/tests/files/25min.aifc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/25min.aifc
--------------------------------------------------------------------------------
/tests/files/32bpp.tga:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/32bpp.tga
--------------------------------------------------------------------------------
/tests/files/andorra.map:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/andorra.map
--------------------------------------------------------------------------------
/tests/files/archive.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/archive.7z
--------------------------------------------------------------------------------
/tests/files/cercle.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/cercle.exe
--------------------------------------------------------------------------------
/tests/files/cross.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/cross.xcf
--------------------------------------------------------------------------------
/tests/files/dell8.fat16:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/dell8.fat16
--------------------------------------------------------------------------------
/tests/files/dontyou.xm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/dontyou.xm
--------------------------------------------------------------------------------
/tests/files/firstrun.rm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/firstrun.rm
--------------------------------------------------------------------------------
/tests/files/georgia.cab:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/georgia.cab
--------------------------------------------------------------------------------
/tests/files/globe.wmf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/globe.wmf
--------------------------------------------------------------------------------
/tests/files/indiana.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/indiana.mid
--------------------------------------------------------------------------------
/tests/files/kino14s.laf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/kino14s.laf
--------------------------------------------------------------------------------
/tests/files/my60k.ext2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/my60k.ext2
--------------------------------------------------------------------------------
/tests/files/ocr10.laf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ocr10.laf
--------------------------------------------------------------------------------
/tests/files/paktest.pak:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/paktest.pak
--------------------------------------------------------------------------------
/tests/files/pikachu.wmf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pikachu.wmf
--------------------------------------------------------------------------------
/tests/files/radpoor.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/radpoor.doc
--------------------------------------------------------------------------------
/tests/files/sample.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/sample.tif
--------------------------------------------------------------------------------
/tests/files/sample.ts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/sample.ts
--------------------------------------------------------------------------------
/tests/files/test.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/test.txt.gz
--------------------------------------------------------------------------------
/tests/files/types.ext2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/types.ext2
--------------------------------------------------------------------------------
/tests/files/weka.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/weka.model
--------------------------------------------------------------------------------
/hachoir-metadata-csv:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.metadata.csv import main
3 | main()
4 |
--------------------------------------------------------------------------------
/hachoir-metadata-qt:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.metadata.qt.main import main
3 | main()
4 |
--------------------------------------------------------------------------------
/hachoir/parser/network/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.network.tcpdump import TcpdumpFile # noqa
2 |
--------------------------------------------------------------------------------
/tests/files/article01.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/article01.bmp
--------------------------------------------------------------------------------
/tests/files/canon.raw.cr2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/canon.raw.cr2
--------------------------------------------------------------------------------
/tests/files/example2.arj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/example2.arj
--------------------------------------------------------------------------------
/tests/files/flashmob.mkv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/flashmob.mkv
--------------------------------------------------------------------------------
/tests/files/india_map.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/india_map.gif
--------------------------------------------------------------------------------
/tests/files/kde_click.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/kde_click.wav
--------------------------------------------------------------------------------
/tests/files/nitrodir.nds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/nitrodir.nds
--------------------------------------------------------------------------------
/tests/files/quicktime.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/quicktime.mp4
--------------------------------------------------------------------------------
/tests/files/test_file.fit:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/test_file.fit
--------------------------------------------------------------------------------
/tests/files/twunk_16.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/twunk_16.exe
--------------------------------------------------------------------------------
/tests/files/verdan32.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/verdan32.exe
--------------------------------------------------------------------------------
/hachoir/subfile/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.subfile.main import main
3 | main()
4 |
--------------------------------------------------------------------------------
/tests/files/breakdance.flv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/breakdance.flv
--------------------------------------------------------------------------------
/tests/files/green_fire.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/green_fire.jpg
--------------------------------------------------------------------------------
/tests/files/hachoir-core.ace:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hachoir-core.ace
--------------------------------------------------------------------------------
/tests/files/hachoir-core.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hachoir-core.rar
--------------------------------------------------------------------------------
/tests/files/hachoir.org.sxw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hachoir.org.sxw
--------------------------------------------------------------------------------
/tests/files/ladouce_1h15.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ladouce_1h15.wav
--------------------------------------------------------------------------------
/tests/files/lara_croft.pcx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/lara_croft.pcx
--------------------------------------------------------------------------------
/tests/files/logo-kubuntu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/logo-kubuntu.png
--------------------------------------------------------------------------------
/tests/files/macos_10.5.macho:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/macos_10.5.macho
--------------------------------------------------------------------------------
/tests/files/marc_kravetz.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/marc_kravetz.mp3
--------------------------------------------------------------------------------
/tests/files/minimal_xcf1.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/minimal_xcf1.xcf
--------------------------------------------------------------------------------
/tests/files/minimal_xcf3.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/minimal_xcf3.xcf
--------------------------------------------------------------------------------
/tests/files/usa_railroad.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/usa_railroad.jpg
--------------------------------------------------------------------------------
/tests/files/yellowdude.3ds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/yellowdude.3ds
--------------------------------------------------------------------------------
/tests/files/08lechat_hq_fr.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/08lechat_hq_fr.mp3
--------------------------------------------------------------------------------
/tests/files/ReferenceMap.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ReferenceMap.class
--------------------------------------------------------------------------------
/tests/files/cacert_class3.der:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/cacert_class3.der
--------------------------------------------------------------------------------
/tests/files/claque-beignet.swf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/claque-beignet.swf
--------------------------------------------------------------------------------
/tests/files/linux_swap_9pages:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/linux_swap_9pages
--------------------------------------------------------------------------------
/tests/files/macos_10.12.macho:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/macos_10.12.macho
--------------------------------------------------------------------------------
/tests/files/mbr_linux_and_ext:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/mbr_linux_and_ext
--------------------------------------------------------------------------------
/tests/files/mev.32bit.big.elf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/mev.32bit.big.elf
--------------------------------------------------------------------------------
/tests/files/mev.64bit.big.elf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/mev.64bit.big.elf
--------------------------------------------------------------------------------
/tests/files/minimal_xcf10.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/minimal_xcf10.xcf
--------------------------------------------------------------------------------
/tests/files/pentax_320x240.mov:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pentax_320x240.mov
--------------------------------------------------------------------------------
/tests/files/satellite_one.s3m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/satellite_one.s3m
--------------------------------------------------------------------------------
/tests/files/sheep_on_drugs.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/sheep_on_drugs.mp3
--------------------------------------------------------------------------------
/tests/files/steganography.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/steganography.mp3
--------------------------------------------------------------------------------
/tests/files/angle-bear-48x48.ani:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/angle-bear-48x48.ani
--------------------------------------------------------------------------------
/tests/files/deja_vu_serif-2.7.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/deja_vu_serif-2.7.ttf
--------------------------------------------------------------------------------
/tests/files/example4_chapters.arj:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/example4_chapters.arj
--------------------------------------------------------------------------------
/tests/files/ftp-0.17-537.i586.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ftp-0.17-537.i586.rpm
--------------------------------------------------------------------------------
/tests/files/grasslogo_vector.emf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/grasslogo_vector.emf
--------------------------------------------------------------------------------
/tests/files/hotel_california.flac:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/hotel_california.flac
--------------------------------------------------------------------------------
/tests/files/kde_haypo_corner.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/kde_haypo_corner.bmp
--------------------------------------------------------------------------------
/tests/files/matrix_ping_pong.wmv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/matrix_ping_pong.wmv
--------------------------------------------------------------------------------
/tests/files/reiserfs_v3_332k.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/reiserfs_v3_332k.bin
--------------------------------------------------------------------------------
/tests/files/smallville.s03e02.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/smallville.s03e02.avi
--------------------------------------------------------------------------------
/tests/files/wormux_32x32_16c.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/wormux_32x32_16c.ico
--------------------------------------------------------------------------------
/hachoir-metadata-gtk:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.metadata.gtk import MetadataGtk
3 | MetadataGtk().main()
4 |
--------------------------------------------------------------------------------
/hachoir-strip:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.strip import main
3 | if __name__ == "__main__":
4 | main()
5 |
--------------------------------------------------------------------------------
/tests/files/anti-arpeggio_tune.ptm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/anti-arpeggio_tune.ptm
--------------------------------------------------------------------------------
/tests/files/default_mount_opts.ext2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/default_mount_opts.ext2
--------------------------------------------------------------------------------
/tests/files/jpeg.exif.photoshop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/jpeg.exif.photoshop.jpg
--------------------------------------------------------------------------------
/tests/files/ping_20020927-3ubuntu2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/ping_20020927-3ubuntu2
--------------------------------------------------------------------------------
/tests/files/usa_railroad.jpg.0.zlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/usa_railroad.jpg.0.zlib
--------------------------------------------------------------------------------
/tests/files/usa_railroad.jpg.6.zlib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/usa_railroad.jpg.6.zlib
--------------------------------------------------------------------------------
/hachoir-grep:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.grep import main
3 |
4 | if __name__ == "__main__":
5 | main()
6 |
--------------------------------------------------------------------------------
/hachoir-wx:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.wx.main import main
3 |
4 | if __name__ == "__main__":
5 | main()
6 |
--------------------------------------------------------------------------------
/tests/files/Panasonic_AG_HMC_151.MTS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/Panasonic_AG_HMC_151.MTS
--------------------------------------------------------------------------------
/tests/files/arp_dns_ping_dns.tcpdump:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/arp_dns_ping_dns.tcpdump
--------------------------------------------------------------------------------
/tests/files/bsize-1024-isize-1024.ext2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/bsize-1024-isize-1024.ext2
--------------------------------------------------------------------------------
/tests/files/bsize-2048-isize-512.ext2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/bsize-2048-isize-512.ext2
--------------------------------------------------------------------------------
/tests/files/bsize-4096-isize-128.ext2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/bsize-4096-isize-128.ext2
--------------------------------------------------------------------------------
/tests/files/interlude_david_aubrun.ogg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/interlude_david_aubrun.ogg
--------------------------------------------------------------------------------
/tests/files/png_331x90x8_truncated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/png_331x90x8_truncated.png
--------------------------------------------------------------------------------
/tests/files/pyc_example_1.5.2_pyc.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pyc_example_1.5.2_pyc.bin
--------------------------------------------------------------------------------
/tests/files/pyc_example_2.2.3_pyc.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pyc_example_2.2.3_pyc.bin
--------------------------------------------------------------------------------
/tests/files/pyc_example_2.5c1_pyc.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pyc_example_2.5c1_pyc.bin
--------------------------------------------------------------------------------
/tests/files/python.cpython-312.pyc.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/python.cpython-312.pyc.bin
--------------------------------------------------------------------------------
/tests/files/python.cpython-37.pyc.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/python.cpython-37.pyc.bin
--------------------------------------------------------------------------------
/hachoir/wx/hex_view/__init__.py:
--------------------------------------------------------------------------------
1 | from .hex_view import hex_view_t # noqa
2 | from .hex_view_setup import setup_hex_view # noqa
3 |
--------------------------------------------------------------------------------
/tests/files/audio_8khz_8bit_ulaw_4s39.au:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/audio_8khz_8bit_ulaw_4s39.au
--------------------------------------------------------------------------------
/tests/files/free-software-song.midi.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/free-software-song.midi.bz2
--------------------------------------------------------------------------------
/hachoir/wx/tree_view/__init__.py:
--------------------------------------------------------------------------------
1 | from .tree_view import tree_view_t # noqa
2 | from .tree_view_setup import setup_tree_view # noqa
3 |
--------------------------------------------------------------------------------
/tests/files/get-versions.64bit.little.elf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/get-versions.64bit.little.elf
--------------------------------------------------------------------------------
/tests/files/1024x1024-better-compression.xcf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/1024x1024-better-compression.xcf
--------------------------------------------------------------------------------
/tests/files/com.apple.pkg.BaseSystemResources.bom:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/com.apple.pkg.BaseSystemResources.bom
--------------------------------------------------------------------------------
/tests/files/debian-31r4-i386-binary-1.iso.torrent:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/debian-31r4-i386-binary-1.iso.torrent
--------------------------------------------------------------------------------
/tests/files/pack-31c691f659cbc7841ca55a26a342fdaf0b89c533.pack:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vstinner/hachoir/HEAD/tests/files/pack-31c691f659cbc7841ca55a26a342fdaf0b89c533.pack
--------------------------------------------------------------------------------
/hachoir/wx/frame_view/__init__.py:
--------------------------------------------------------------------------------
1 | from .frame_view import frame_view_t # noqa
2 | from .frame_view_setup import setup_frame_view # noqa
3 | from .frame_view_imp import frame_view_imp_t # noqa
4 |
--------------------------------------------------------------------------------
/tools/flake8.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e -x
3 | cd $(dirname "$0")/..
4 | # use /bin/sh to support "*.py"
5 | # FIXME: add hachoir-wx (currrently broken)
6 | flake8 hachoir/ tests/ runtests.py setup.py doc/examples/*.py
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 | *.swp
3 | MANIFEST
4 | build/
5 | dist/
6 | hachoir-metadata/hachoir_metadata/qt/dialog_ui.py
7 |
8 | # generated by tox
9 | .tox/
10 | hachoir.egg-info/
11 |
12 | # Mac files
13 | .DS_Store
14 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/__init__.py:
--------------------------------------------------------------------------------
1 | from .field_view_setup import setup_field_view # noqa
2 | from .field_view import field_view_t # noqa
3 |
4 | from .field_menu_setup import setup_field_menu # noqa
5 | from .field_menu import field_menu_t # noqa
6 |
--------------------------------------------------------------------------------
/hachoir/test.py:
--------------------------------------------------------------------------------
1 | from hachoir.core import config
2 | from locale import setlocale, LC_ALL
3 |
4 |
5 | def setup_tests():
6 | config.use_i18n = False # Don't use i18n
7 | config.quiet = True # Don't display warnings
8 | setlocale(LC_ALL, "C")
9 |
--------------------------------------------------------------------------------
/hachoir/parser/video/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.video.asf import AsfFile # noqa
2 | from hachoir.parser.video.flv import FlvFile # noqa
3 | from hachoir.parser.video.mpeg_video import MPEGVideoFile # noqa
4 | from hachoir.parser.video.mpeg_ts import MPEG_TS # noqa
5 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | version: 2
5 |
6 | build:
7 | os: ubuntu-lts-latest
8 | tools:
9 | python: "3"
10 |
11 | sphinx:
12 | configuration: doc/conf.py
13 |
--------------------------------------------------------------------------------
/hachoir/parser/game/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.game.zsnes import ZSNESFile # noqa
2 | from hachoir.parser.game.spider_man_video import SpiderManVideoFile # noqa
3 | from hachoir.parser.game.laf import LafFile # noqa
4 | from hachoir.parser.game.blp import BLP1File, BLP2File # noqa
5 |
--------------------------------------------------------------------------------
/doc/hacking.rst:
--------------------------------------------------------------------------------
1 | ++++++++++++
2 | Hack Hachoir
3 | ++++++++++++
4 |
5 | Run tests
6 | =========
7 |
8 | Using tox
9 | ---------
10 |
11 | Install tox (``pip install tox``) and then run tox::
12 |
13 | tox
14 |
15 | Manually
16 | --------
17 |
18 | Run tests manually::
19 |
20 | python3 runtests.py
21 |
--------------------------------------------------------------------------------
/hachoir/core/endian.py:
--------------------------------------------------------------------------------
1 | """
2 | Constant values about endian.
3 | """
4 |
5 | BIG_ENDIAN = "ABCD"
6 | LITTLE_ENDIAN = "DCBA"
7 | MIDDLE_ENDIAN = "BADC"
8 | NETWORK_ENDIAN = BIG_ENDIAN
9 |
10 | endian_name = {
11 | BIG_ENDIAN: "Big endian",
12 | LITTLE_ENDIAN: "Little endian",
13 | MIDDLE_ENDIAN: "Middle endian",
14 | }
15 |
--------------------------------------------------------------------------------
/hachoir/parser/common/tracker.py:
--------------------------------------------------------------------------------
1 | """
2 | Shared code for tracker parser.
3 | """
4 |
5 | NOTE_NAME = {}
6 | NOTES = ("C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "G#", "A", "A#", "B")
7 | for octave in range(10):
8 | for index, note in enumerate(NOTES):
9 | NOTE_NAME[octave * 12 + index] = "%s (octave %s)" % (note, octave)
10 |
--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
1 | syntax: glob
2 |
3 | # Generated files: .py => .pyc
4 | *.pyc
5 | *.pyo
6 | __pycache__
7 | hachoir-metadata/hachoir_metadata/qt/dialog_ui.py
8 |
9 | # Temporary files (vim backups)
10 | *.swp
11 |
12 | # build/ subdirectories
13 | build
14 |
15 | # build by the tox command
16 | .tox/
17 |
18 | # build by distutils
19 | hachoir.egg-info/
20 |
--------------------------------------------------------------------------------
/hachoir/field/field_set.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import BasicFieldSet, GenericFieldSet
2 |
3 |
4 | class FieldSet(GenericFieldSet):
5 |
6 | def __init__(self, parent, name, *args, **kw):
7 | assert issubclass(parent.__class__, BasicFieldSet)
8 | GenericFieldSet.__init__(
9 | self, parent, name, parent.stream, *args, **kw)
10 |
--------------------------------------------------------------------------------
/doc/examples/editor_gzip.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser import createParser
2 | from hachoir.editor import createEditor
3 | from hachoir.field import writeIntoFile
4 |
5 | parser = createParser("file.gz")
6 | with parser:
7 | editor = createEditor(parser)
8 | del editor["filename"]
9 | editor["has_filename"].value = False
10 | writeIntoFile(editor, "noname.gz")
11 |
--------------------------------------------------------------------------------
/doc/examples/editor_zip.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser import createParser
2 | from hachoir.editor import createEditor
3 | from hachoir.field import writeIntoFile
4 |
5 | parser = createParser("file.zip")
6 | with parser:
7 | editor = createEditor(parser)
8 | editor["end_central_directory/comment"].value = "new comment"
9 | writeIntoFile(editor, "file_comment.zip")
10 |
--------------------------------------------------------------------------------
/hachoir/wx/hex_view/hex_view_setup.py:
--------------------------------------------------------------------------------
1 | from hachoir.wx.resource import get_child_control
2 |
3 |
4 | def setup_hex_view(parent, dispatcher):
5 | print("[+] Setup hex view")
6 | hex_view = get_child_control(parent, 'hex_view')
7 | dispatcher.add_sender(hex_view)
8 | dispatcher.add(hex_view)
9 | dispatcher.add_receiver(hex_view)
10 |
11 | return hex_view
12 |
--------------------------------------------------------------------------------
/hachoir/wx/tree_view/tree_view_setup.py:
--------------------------------------------------------------------------------
1 | from hachoir.wx.resource import get_child_control
2 |
3 |
4 | def setup_tree_view(parent, dispatcher):
5 | print("[+] Setup tree view")
6 | tree_view = get_child_control(parent, "tree_view")
7 | dispatcher.add_sender(tree_view)
8 | dispatcher.add(tree_view)
9 | dispatcher.add_receiver(tree_view)
10 |
11 | return tree_view
12 |
--------------------------------------------------------------------------------
/hachoir/wx/frame_view/frame_view.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class frame_view_t(wx.Frame):
5 | def __init__(self):
6 | wx.Frame.__init__(self)
7 | # OnCreate required to avoid crashing wx
8 | self.Bind(wx.EVT_WINDOW_CREATE, self.OnCreate)
9 |
10 | def OnCreate(self, evt):
11 | pass
12 |
13 | def ready(self):
14 | self.dispatcher.trigger('frame_view_ready', self)
15 |
--------------------------------------------------------------------------------
/hachoir/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.parser import ValidateError, HachoirParser, Parser # noqa
2 | from hachoir.parser.parser_list import ParserList, HachoirParserList # noqa
3 | from hachoir.parser.guess import QueryParser, guessParser, createParser # noqa
4 | from hachoir.parser import (archive, audio, container, # noqa
5 | file_system, image, game, misc, network, program,
6 | video)
7 |
--------------------------------------------------------------------------------
/hachoir/editor/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.editor.field import EditorError, FakeField # noqa
2 | from hachoir.editor.typed_field import (EditableField, EditableBits, # noqa
3 | EditableBytes, EditableInteger,
4 | EditableString, createEditableField)
5 | from hachoir.editor.fieldset import (EditableFieldSet, NewFieldSet, # noqa
6 | createEditor)
7 |
--------------------------------------------------------------------------------
/hachoir/parser/container/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.container.asn1 import ASN1File # noqa
2 | from hachoir.parser.container.mkv import MkvFile # noqa
3 | from hachoir.parser.container.ogg import OggFile, OggStream # noqa
4 | from hachoir.parser.container.riff import RiffFile # noqa
5 | from hachoir.parser.container.swf import SwfFile # noqa
6 | from hachoir.parser.container.realmedia import RealMediaFile # noqa
7 | from hachoir.parser.container.mp4 import MP4File # noqa
8 |
--------------------------------------------------------------------------------
/hachoir/parser/misc/common.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import StaticFieldSet, Float32
2 |
3 |
4 | class Vertex(StaticFieldSet):
5 | format = ((Float32, "x"), (Float32, "y"), (Float32, "z"))
6 |
7 | def createValue(self):
8 | return (self["x"].value, self["y"].value, self["z"].value)
9 |
10 |
11 | class MapUV(StaticFieldSet):
12 | format = ((Float32, "u"), (Float32, "v"))
13 |
14 | def createValue(self):
15 | return (self["u"].value, self["v"].value)
16 |
--------------------------------------------------------------------------------
/hachoir/regex/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.regex.regex import (RegexEmpty, # noqa
2 | RegexString, createString,
3 | RegexRangeItem, RegexRangeCharacter, RegexRange, createRange,
4 | RegexAnd, RegexOr, RegexRepeat,
5 | RegexDot, RegexStart, RegexEnd, RegexWord)
6 | from hachoir.regex.parser import parse # noqa
7 | from hachoir.regex.pattern import PatternMatching # noqa
8 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/core_type_menu_fwd.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class core_type_menu_fwd_t:
5 | def __init__(self, imp):
6 | self.imp = imp
7 |
8 | def on_field_menu_ready(self, dispatcher, view):
9 | assert view is not None
10 | view.Bind(wx.EVT_MENU, self.on_type_selected)
11 |
12 | def on_type_selected(self, event):
13 | try:
14 | self.imp.on_type_selected(event.GetId())
15 | except KeyError:
16 | event.Skip()
17 |
--------------------------------------------------------------------------------
/doc/install.rst:
--------------------------------------------------------------------------------
1 | +++++++++++++++
2 | Install Hachoir
3 | +++++++++++++++
4 |
5 | To install Hachoir, type::
6 |
7 | python3 -m pip install -U hachoir
8 |
9 | To use hachoir-urwid, you will also need to install `urwid library
10 | `_::
11 |
12 | python3 -m pip install -U urwid
13 |
14 | To use hachoir-wx, you will also need to install `wxPython
15 | `_::
16 |
17 | python3 -m pip install -U wxPython
18 |
19 | Hachoir requires Python 3.6 or newer.
20 |
--------------------------------------------------------------------------------
/hachoir/parser/file_system/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.file_system.ext2 import EXT2_FS # noqa
2 | from hachoir.parser.file_system.fat import FAT12, FAT16, FAT32 # noqa
3 | from hachoir.parser.file_system.mbr import MSDos_HardDrive # noqa
4 | from hachoir.parser.file_system.ntfs import NTFS # noqa
5 | from hachoir.parser.file_system.iso9660 import ISO9660 # noqa
6 | from hachoir.parser.file_system.reiser_fs import REISER_FS # noqa
7 | from hachoir.parser.file_system.linux_swap import LinuxSwapFile # noqa
8 |
--------------------------------------------------------------------------------
/hachoir/wx/unicode.py:
--------------------------------------------------------------------------------
1 | import locale
2 | import sys
3 |
4 |
5 | def get_charset():
6 | try:
7 | charset = locale.getdefaultlocale()[1]
8 | except (locale.Error, NameError, AttributeError, IndexError):
9 | pass
10 |
11 | if charset is None:
12 | charset = sys.getdefaultencoding()
13 |
14 | return charset
15 |
16 |
17 | def force_unicode(name):
18 | if not isinstance(name, str):
19 | charset = get_charset()
20 | name = str(name, charset)
21 |
22 | return name
23 |
--------------------------------------------------------------------------------
/hachoir/parser/program/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.program.elf import ElfFile # noqa
2 | from hachoir.parser.program.exe import ExeFile # noqa
3 | from hachoir.parser.program.macho import MachoFile, MachoFatFile # noqa
4 | from hachoir.parser.program.python import PythonCompiledFile # noqa
5 | from hachoir.parser.program.java import JavaCompiledClassFile # noqa
6 | from hachoir.parser.program.prc import PRCFile # noqa
7 | from hachoir.parser.program.nds import NdsFile # noqa
8 | from hachoir.parser.program.java_serialized import JavaSerializedFile # noqa
9 |
--------------------------------------------------------------------------------
/hachoir/wx/frame_view/frame_view_fwd.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class frame_view_fwd_t:
5 | def __init__(self, imp):
6 | self.imp = imp
7 |
8 | def on_frame_view_ready(self, dispatcher, view):
9 | assert view is not None
10 | view.Bind(wx.EVT_ACTIVATE, self.on_activated)
11 | view.Bind(wx.EVT_SHOW, self.on_shown)
12 |
13 | def on_activated(self, event):
14 | if event.GetActive():
15 | self.imp.on_activated()
16 |
17 | def on_shown(self, event):
18 | if event.Show:
19 | self.imp.on_activated()
20 |
--------------------------------------------------------------------------------
/hachoir/metadata/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.metadata.metadata import extractMetadata # noqa
2 |
3 | # Just import the module,
4 | # each module use registerExtractor() method
5 | import hachoir.metadata.archive # noqa
6 | import hachoir.metadata.audio # noqa
7 | import hachoir.metadata.file_system # noqa
8 | import hachoir.metadata.image # noqa
9 | import hachoir.metadata.jpeg # noqa
10 | import hachoir.metadata.misc # noqa
11 | import hachoir.metadata.program # noqa
12 | import hachoir.metadata.riff # noqa
13 | import hachoir.metadata.video # noqa
14 | import hachoir.metadata.cr2 # noqa
--------------------------------------------------------------------------------
/hachoir/wx/dialogs.py:
--------------------------------------------------------------------------------
1 | import wx
2 | import os
3 |
4 |
5 | def file_open_dialog():
6 | dialog_style = wx.FD_OPEN | wx.FD_FILE_MUST_EXIST
7 |
8 | dialog = wx.FileDialog(
9 | None, message='Open',
10 | defaultDir=os.getcwd(),
11 | defaultFile='', style=dialog_style)
12 |
13 | return dialog
14 |
15 |
16 | def file_save_dialog(title):
17 | dialog_style = wx.FD_SAVE
18 |
19 | dialog = wx.FileDialog(
20 | None, message=title,
21 | defaultDir=os.getcwd(),
22 | defaultFile='', style=dialog_style)
23 |
24 | return dialog
25 |
--------------------------------------------------------------------------------
/hachoir/wx/frame_view/frame_view_setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .frame_view_imp import frame_view_imp_t
4 | from .frame_view_fwd import frame_view_fwd_t
5 |
6 | from hachoir.wx.resource import get_frame
7 |
8 |
9 | def setup_frame_view(dispatcher):
10 | print('[+] Setup frame view')
11 | frame = get_frame('frame_view')
12 | dispatcher.add_sender(frame)
13 |
14 | frame_view_imp = frame_view_imp_t()
15 | dispatcher.add(frame_view_imp)
16 |
17 | frame_view_fwd = frame_view_fwd_t(frame_view_imp)
18 | dispatcher.add_receiver(frame_view_fwd)
19 |
20 | return frame
21 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include benchmark.sh
2 | include AUTHORS
3 | include COPYING
4 | include ChangeLog
5 | include INSTALL
6 | include MANIFEST.in
7 | include README.rst
8 | include TODO.rst
9 | include tox.ini
10 | include .travis.yml
11 | include runtests.py
12 | include hachoir-metadata-csv
13 | include run_testcase.py
14 |
15 | include doc/*.rst doc/conf.py doc/make.bat doc/Makefile doc/gen_parser_list.py
16 | include doc/examples/*.py
17 |
18 | include tests/*.py tests/*.rst tests/files/*
19 |
20 | # IGNORED files:
21 | #
22 | # hachoir-metadata-gtk
23 | # hachoir-metadata-qt
24 | # hachoir/metadata/qt/dialog.ui
25 | # tools/*.py
26 |
--------------------------------------------------------------------------------
/hachoir/metadata/formatter.py:
--------------------------------------------------------------------------------
1 | NB_CHANNEL_NAME = {1: "mono", 2: "stereo"}
2 |
3 |
4 | def humanAudioChannel(value):
5 | return NB_CHANNEL_NAME.get(value, str(value))
6 |
7 |
8 | def humanFrameRate(value):
9 | if isinstance(value, (int, float)):
10 | return "%.1f fps" % value
11 | else:
12 | return value
13 |
14 |
15 | def humanComprRate(rate):
16 | return "%.1fx" % rate
17 |
18 |
19 | def humanAltitude(value):
20 | return "%.1f meters" % value
21 |
22 |
23 | def humanPixelSize(value):
24 | return "%s pixels" % value
25 |
26 |
27 | def humanDPI(value):
28 | return "%s DPI" % value
29 |
--------------------------------------------------------------------------------
/doc/examples/editor_add_extra.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser import createParser
2 | from hachoir.editor import createEditor
3 | from hachoir.field import writeIntoFile
4 | from hachoir.editor import EditableInteger, EditableBytes
5 |
6 | parser = createParser("file.gz")
7 | with parser:
8 | editor = createEditor(parser)
9 | extra = "abcd"
10 | editor["has_extra"].value = True
11 | editor.insertAfter("os",
12 | EditableInteger(editor, "extra_length", False,
13 | 16, len(extra)),
14 | EditableBytes(editor, "extra", extra))
15 | writeIntoFile(editor, "file_extra.gz")
16 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/core_type_menu.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class core_type_menu_t:
5 | def __init__(self, menu):
6 | self.menu = menu
7 | self.id_to_type = {}
8 |
9 | self.Bind = self.menu.Bind # see note in field_menu.py
10 |
11 | def add_type(self, type_name):
12 | type_id = wx.NewId()
13 | self.id_to_type[type_id] = type_name
14 | self.menu.Append(type_id, type_name)
15 |
16 | def get_type_name(self, id):
17 | return self.id_to_type[id]
18 |
19 | def clear(self):
20 | items = self.menu.GetMenuItems()
21 | for item in items:
22 | self.menu.Delete(item)
23 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_split_menu_fwd.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class field_split_menu_fwd_t:
5 | def __init__(self, imp):
6 | self.imp = imp
7 |
8 | def on_field_menu_ready(self, dispatcher, view):
9 | assert view is not None
10 |
11 | view.Bind(wx.EVT_MENU, self.on_split_bytes,
12 | id=wx.xrc.XRCID('field_menu_split_bytes'))
13 | view.Bind(wx.EVT_MENU, self.on_split_bits,
14 | id=wx.xrc.XRCID('field_menu_split_bits'))
15 |
16 | def on_split_bits(self, event):
17 | self.imp.on_split_bits()
18 |
19 | def on_split_bytes(self, event):
20 | self.imp.on_split_bytes()
21 |
--------------------------------------------------------------------------------
/hachoir/stream/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN # noqa
2 | from hachoir.stream.stream import StreamError # noqa
3 | from hachoir.stream.input import (InputStreamError, # noqa
4 | InputStream, InputIOStream, StringInputStream,
5 | InputSubStream, InputFieldStream,
6 | FragmentedStream, ConcatStream)
7 | from hachoir.stream.input_helper import FileInputStream, guessStreamCharset # noqa
8 | from hachoir.stream.output import (OutputStreamError, # noqa
9 | FileOutputStream, StringOutputStream, OutputStream)
10 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_menu.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from hachoir.wx.dialogs import file_save_dialog
4 | import wx
5 |
6 |
7 | class field_menu_t:
8 | def __init__(self, parent, menu):
9 | self.parent = parent
10 | self.menu = menu
11 |
12 | # forward this call because xrc doesn't allow menu
13 | # subclassing (as of 2.6.3)
14 | self.Bind = self.menu.Bind
15 |
16 | def show_opts(self):
17 | self.parent.PopupMenu(self.menu)
18 |
19 | def ask_for_dump_file(self, title):
20 | dump_dlog = file_save_dialog(title)
21 | if wx.ID_OK == dump_dlog.ShowModal():
22 | return dump_dlog.GetPath()
23 |
--------------------------------------------------------------------------------
/hachoir/parser/audio/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.audio.aiff import AiffFile # noqa
2 | from hachoir.parser.audio.au import AuFile # noqa
3 | from hachoir.parser.audio.itunesdb import ITunesDBFile # noqa
4 | from hachoir.parser.audio.midi import MidiFile # noqa
5 | from hachoir.parser.audio.mpeg_audio import MpegAudioFile # noqa
6 | from hachoir.parser.audio.real_audio import RealAudioFile # noqa
7 | from hachoir.parser.audio.xm import XMModule # noqa
8 | from hachoir.parser.audio.s3m import S3MModule # noqa
9 | from hachoir.parser.audio.s3m import PTMModule # noqa
10 | from hachoir.parser.audio.mod import AmigaModule # noqa
11 | from hachoir.parser.audio.flac import FlacParser # noqa
12 |
--------------------------------------------------------------------------------
/doc/editor.rst:
--------------------------------------------------------------------------------
1 | .. _editor:
2 |
3 | +++++++++++++++++++++
4 | hachoir.editor module
5 | +++++++++++++++++++++
6 |
7 | Hachoir editor is a Python library based on Hachoir core used to edit binary
8 | files.
9 |
10 | Today, only one program uses it: :ref:`hachoir-strip ` (remove "useless"
11 | information to make a file smaller).
12 |
13 | Example: gzip, remove filename
14 | ==============================
15 |
16 | .. literalinclude:: examples/editor_gzip.py
17 |
18 | Example: gzip, add extra
19 | ========================
20 |
21 | .. literalinclude:: examples/editor_add_extra.py
22 |
23 | Example: zip, set comment
24 | =========================
25 |
26 | .. literalinclude:: examples/editor_zip.py
27 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/format.py:
--------------------------------------------------------------------------------
1 | def format_addr_dec(addr):
2 | return "%08d.%01d" % divmod(addr, 8)
3 |
4 |
5 | def format_addr_hex(addr):
6 | return "%08x.%01d" % divmod(addr, 8)
7 |
8 |
9 | def format_size(size):
10 | return "%08u.%01d" % divmod(size, 8)
11 |
12 |
13 | def format_data(field):
14 | data = ''
15 |
16 | if field.hasValue():
17 | data = field.display
18 |
19 | return data
20 |
21 |
22 | def format_name(field):
23 | name = field.name
24 | if field.is_field_set:
25 | name += '/'
26 |
27 | return name
28 |
29 |
30 | def format_desc(field):
31 | if field.description:
32 | return str(field.description)
33 | return ''
34 |
--------------------------------------------------------------------------------
/hachoir/parser/image/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.image.bmp import BmpFile # noqa
2 | from hachoir.parser.image.gif import GifFile # noqa
3 | from hachoir.parser.image.ico import IcoFile # noqa
4 | from hachoir.parser.image.jpeg import JpegFile # noqa
5 | from hachoir.parser.image.pcx import PcxFile # noqa
6 | from hachoir.parser.image.psd import PsdFile # noqa
7 | from hachoir.parser.image.png import PngFile # noqa
8 | from hachoir.parser.image.tga import TargaFile # noqa
9 | from hachoir.parser.image.tiff import TiffFile # noqa
10 | from hachoir.parser.image.wmf import WMF_File # noqa
11 | from hachoir.parser.image.xcf import XcfFile # noqa
12 | from hachoir.parser.image.cr2 import CR2File # noqa
13 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_view_setup.py:
--------------------------------------------------------------------------------
1 | from hachoir.wx.resource import get_child_control
2 | from .field_view_imp import field_view_imp_t
3 | from .field_view_fwd import field_view_fwd_t
4 | from .field_menu_setup import setup_field_menu
5 |
6 |
7 | def setup_field_view(parent, dispatcher):
8 | print("[+] Setup field view")
9 | field_view = get_child_control(parent, 'field_view')
10 | dispatcher.add_sender(field_view)
11 |
12 | field_view_imp = field_view_imp_t()
13 | dispatcher.add(field_view_imp)
14 |
15 | field_view_fwd = field_view_fwd_t(field_view_imp)
16 | dispatcher.add(field_view_fwd)
17 |
18 | setup_field_menu(field_view, dispatcher)
19 |
20 | return field_view
21 |
--------------------------------------------------------------------------------
/doc/gen_parser_list.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import os.path
3 |
4 | def writeReadme(out):
5 | from hachoir.parser.parser_list import HachoirParserList
6 |
7 | # Write parser list
8 | format = "rest"
9 | if format == "rest":
10 | print("Parser list", file=out)
11 | print("===========", file=out)
12 | print(file=out)
13 | HachoirParserList().print_(out=out, format=format)
14 |
15 | def main():
16 | path = os.path.dirname(__file__)
17 | filename = os.path.join(path, 'parser_list.rst')
18 | with open(filename, 'w') as readme:
19 | writeReadme(readme)
20 | print("%s regenerated" % filename)
21 |
22 | if __name__ == "__main__":
23 | main()
24 |
25 |
--------------------------------------------------------------------------------
/doc/contact.rst:
--------------------------------------------------------------------------------
1 | +++++++
2 | Contact
3 | +++++++
4 |
5 | Mailing list
6 | ============
7 |
8 | Mailing list: ``hachoir AT lists.tuxfamily.org``
9 |
10 | * `Read archives on mail-archives.com `_
11 | * `Mailing list archives on Gmane.org
12 | `_: Read mailing list using
13 | HTTP, NNTP or RSS
14 |
15 | * To subscribe, send an email with subject ``subscribe`` (and empty body) to
16 | ``hachoir-request@lists.tux(...).org``
17 | * To unsubscribe, send an email with subject ``unsubscribe`` (and empty body)
18 | to ``hachoir-request@lists.tux(...).org``
19 | * You have to subscribe to post email.
20 | * Created October 22nd 2006
21 |
22 |
--------------------------------------------------------------------------------
/doc/examples/metadata.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser import createParser
2 | from hachoir.metadata import extractMetadata
3 | from sys import argv, stderr, exit
4 |
5 | if len(argv) != 2:
6 | print("usage: %s filename" % argv[0], file=stderr)
7 | exit(1)
8 | filename = argv[1]
9 | parser = createParser(filename)
10 | if not parser:
11 | print("Unable to parse file", file=stderr)
12 | exit(1)
13 |
14 | with parser:
15 | try:
16 | metadata = extractMetadata(parser)
17 | except Exception as err:
18 | print("Metadata extraction error: %s" % err)
19 | metadata = None
20 | if not metadata:
21 | print("Unable to extract metadata")
22 | exit(1)
23 |
24 | for line in metadata.exportPlaintext():
25 | print(line)
26 |
--------------------------------------------------------------------------------
/hachoir/wx/frame_view/frame_view_imp.py:
--------------------------------------------------------------------------------
1 | class frame_view_imp_t:
2 | def on_frame_view_ready(self, dispatcher, frame_view):
3 | assert frame_view is not None
4 | self.view = frame_view
5 |
6 | def on_file_ready(self, dispatcher, file):
7 | assert file is not None
8 | self.filename = file.name
9 |
10 | def on_filename_update(self, dispatcher, filename):
11 | self.filename = filename
12 |
13 | def format_title(self, field):
14 | field_path = field.path
15 | return self.filename + '/' + field_path[1:]
16 |
17 | def on_field_activated(self, dispatcher, field):
18 | self.view.SetTitle(self.format_title(field))
19 |
20 | def on_activated(self):
21 | self.dispatcher.trigger('frame_activated', self.view)
22 |
--------------------------------------------------------------------------------
/hachoir/core/error.py:
--------------------------------------------------------------------------------
1 | """
2 | Functions to display an error (error, warning or information) message.
3 | """
4 |
5 | from hachoir.core.log import log
6 | import sys
7 | import traceback
8 |
9 |
10 | def getBacktrace(empty="Empty backtrace."):
11 | """
12 | Try to get backtrace as string.
13 | Returns "Error while trying to get backtrace" on failure.
14 | """
15 | try:
16 | info = sys.exc_info()
17 | trace = traceback.format_exception(*info)
18 | if trace[0] != "None\n":
19 | return "".join(trace)
20 | except Exception:
21 | # No i18n here (imagine if i18n function calls error...)
22 | return "Error while trying to get backtrace"
23 | return empty
24 |
25 |
26 | info = log.info
27 | warning = log.warning
28 | error = log.error
29 |
--------------------------------------------------------------------------------
/hachoir/wx/dispatcher.py:
--------------------------------------------------------------------------------
1 | class dispatcher_t:
2 | def __init__(self):
3 | self.receivers = []
4 |
5 | def add(self, who):
6 | self.add_sender(who)
7 | self.add_receiver(who)
8 |
9 | def add_sender(self, sender):
10 | sender.dispatcher = self
11 |
12 | def add_receiver(self, receiver):
13 | self.receivers.append(receiver)
14 |
15 | def trigger(self, event_name, *args):
16 | handler_name = 'on_' + event_name
17 | unhandled_name = 'on_unhandled'
18 |
19 | for receiver in self.receivers:
20 | if hasattr(receiver, handler_name):
21 | getattr(receiver, handler_name)(self, *args)
22 | elif hasattr(receiver, unhandled_name):
23 | getattr(receiver, unhandled_name)(self, event_name, *args)
24 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py3, pep8, doc
3 |
4 | [testenv]
5 | basepython = python3
6 | commands=
7 | python -bb -Werror -X faulthandler runtests.py
8 |
9 | [testenv:pep8]
10 | allowlist_externals = sh
11 | deps = flake8
12 | commands =
13 | sh tools/flake8.sh
14 |
15 | [flake8]
16 | # E121 continuation line under-indented for hanging indent
17 | # hachoir/parser/network/ouid.py
18 | # E131 continuation line unaligned for hanging indent
19 | # parser/container/mp4.py
20 | # E501 line too long (88 > 79 characters)
21 | # W503 line break before binary operator
22 | # W504 line break after binary operator
23 | ignore = E121,E131,E501,W503,W504
24 |
25 | [testenv:doc]
26 | deps=
27 | sphinx
28 | allowlist_externals = make
29 | commands=
30 | python doc/gen_parser_list.py
31 | make -C doc html
32 |
33 |
--------------------------------------------------------------------------------
/hachoir/core/event_handler.py:
--------------------------------------------------------------------------------
1 | class EventHandler(object):
2 | """
3 | Class to connect events to event handlers.
4 | """
5 |
6 | def __init__(self):
7 | self.handlers = {}
8 |
9 | def connect(self, event_name, handler):
10 | """
11 | Connect an event handler to an event. Append it to handlers list.
12 | """
13 | try:
14 | self.handlers[event_name].append(handler)
15 | except KeyError:
16 | self.handlers[event_name] = [handler]
17 |
18 | def raiseEvent(self, event_name, *args):
19 | """
20 | Raiser an event: call each handler for this event_name.
21 | """
22 | if event_name not in self.handlers:
23 | return
24 | for handler in self.handlers[event_name]:
25 | handler(*args)
26 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_split_menu.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class field_split_menu_t:
5 | def __init__(self, parent, menu):
6 | self.parent = parent
7 | self.menu = menu
8 | self.Bind = self.menu.Bind # see note in field_menu.py
9 |
10 | def ask_split(self, caption, min, max):
11 | # Note: we would prefer a NumberEntryDialog but this isn't currently wrapped
12 | # by wxPython Phoenix.
13 | res = None
14 | dlg = wx.TextEntryDialog(self.parent, 'Enter split offset:', '',
15 | caption, min, min, max)
16 | if dlg.ShowModal() == wx.ID_OK:
17 | try:
18 | res = int(dlg.GetValue())
19 | except ValueError:
20 | res = None
21 | dlg.Destroy()
22 | return res
23 |
--------------------------------------------------------------------------------
/hachoir/field/character.py:
--------------------------------------------------------------------------------
1 | """
2 | Character field class: a 8-bit character
3 | """
4 |
5 | from hachoir.field import Bits
6 | from hachoir.core.tools import makePrintable
7 |
8 |
9 | class Character(Bits):
10 | """
11 | A 8-bit character using ASCII charset for display attribute.
12 | """
13 | static_size = 8
14 |
15 | def __init__(self, parent, name, description=None):
16 | Bits.__init__(self, parent, name, self.static_size, description=description)
17 |
18 | def createValue(self):
19 | return chr(self._parent.stream.readBits(
20 | self.absolute_address, self.static_size, self.parent.endian))
21 |
22 | def createRawDisplay(self):
23 | return str(Bits.createValue(self))
24 |
25 | def createDisplay(self):
26 | return makePrintable(self.value, "ASCII", quote="'")
27 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_view_fwd.py:
--------------------------------------------------------------------------------
1 | import wx
2 |
3 |
4 | class field_view_fwd_t:
5 | def __init__(self, imp):
6 | self.imp = imp
7 |
8 | def on_field_view_ready(self, dispatcher, field_view):
9 | assert field_view is not None
10 |
11 | field_view.Bind(wx.EVT_COMMAND_RIGHT_CLICK, self.on_item_right_clicked)
12 | field_view.Bind(wx.EVT_RIGHT_UP, self.on_item_right_clicked)
13 | field_view.Bind(wx.EVT_LIST_ITEM_ACTIVATED, self.on_item_activated)
14 | field_view.Bind(wx.EVT_LIST_ITEM_SELECTED, self.on_item_selected)
15 |
16 | def on_item_activated(self, event):
17 | self.imp.on_item_activated()
18 |
19 | def on_item_selected(self, event):
20 | self.imp.on_item_selected()
21 |
22 | def on_item_right_clicked(self, event):
23 | self.imp.on_item_show_ops()
24 |
--------------------------------------------------------------------------------
/hachoir/core/language.py:
--------------------------------------------------------------------------------
1 | import functools
2 | from hachoir.core.iso639 import ISO639_2
3 |
4 |
5 | @functools.total_ordering
6 | class Language:
7 |
8 | def __init__(self, code):
9 | code = str(code)
10 | if code not in ISO639_2:
11 | raise ValueError("Invalid language code: %r" % code)
12 | self.code = code
13 |
14 | def __eq__(self, other):
15 | if other.__class__ != Language:
16 | return NotImplemented
17 | return self.code == other.code
18 |
19 | def __lt__(self, other):
20 | if other.__class__ != Language:
21 | return NotImplemented
22 | return self.code < other.code
23 |
24 | def __str__(self):
25 | return ISO639_2[self.code]
26 |
27 | def __repr__(self):
28 | return "" % (str(self), self.code)
29 |
--------------------------------------------------------------------------------
/hachoir/wx/resource/resource.py:
--------------------------------------------------------------------------------
1 | import os
2 | from wx.xrc import XmlResource, XRCID
3 |
4 |
5 | def get_resource():
6 | filename = os.path.join(os.getcwd(), os.path.dirname(__file__), 'hachoir_wx.xrc')
7 | return XmlResource(filename)
8 |
9 |
10 | def get_frame(name):
11 | return get_resource().LoadFrame(None, name)
12 |
13 |
14 | def get_child_control(parent, child):
15 | # We do this instead of XRCCTRL to work around a bug in wxPython 3.0.3.
16 | # FindWindowById, FindWindowByName and XRCCTRL all seem to return the
17 | # first-created "child" instead of the proper one; only FindWindow behaves
18 | # as expected.
19 | return parent.FindWindow(XRCID(child))
20 |
21 |
22 | def get_menu_bar(name):
23 | return get_resource().LoadMenuBar(name)
24 |
25 |
26 | def get_menu(name):
27 | return get_resource().LoadMenu(name)
28 |
--------------------------------------------------------------------------------
/hachoir/field/enum.py:
--------------------------------------------------------------------------------
1 | def Enum(field, enum, key_func=None):
2 | """
3 | Enum is an adapter to another field: it will just change its display
4 | attribute. It uses a dictionary to associate a value to another.
5 |
6 | key_func is an optional function with prototype "def func(key)->key"
7 | which is called to transform key.
8 | """
9 | display = field.createDisplay
10 | if key_func:
11 | def createDisplay():
12 | try:
13 | key = key_func(field.value)
14 | return enum[key]
15 | except LookupError:
16 | return display()
17 | else:
18 | def createDisplay():
19 | try:
20 | return enum[field.value]
21 | except LookupError:
22 | return display()
23 | field.createDisplay = createDisplay
24 | field.getEnum = lambda: enum
25 | return field
26 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/core_type_menu_imp.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import available_types
2 | from hachoir.wx.field_view.mutator import convert_field
3 | from hachoir.wx.field_view.stubs import can_convert
4 |
5 |
6 | class core_type_menu_imp_t:
7 | def __init__(self):
8 | self.cur_field = None
9 |
10 | def on_core_type_menu_ready(self, dispatcher, view):
11 | assert view is not None
12 | self.view = view
13 |
14 | def on_type_selected(self, id):
15 | convert_field(self.cur_field, self.view.get_type_name(id))
16 | self.dispatcher.trigger('field_modified', self.cur_field)
17 |
18 | def on_field_selected(self, dispatcher, field):
19 | self.cur_field = field
20 |
21 | self.view.clear()
22 | for type in available_types:
23 | if can_convert(field, type) and field.__class__ is not type:
24 | self.view.add_type(type.__name__)
25 |
--------------------------------------------------------------------------------
/hachoir/metadata/safe.py:
--------------------------------------------------------------------------------
1 | from hachoir.core.error import warning
2 |
3 |
4 | def fault_tolerant(func, *args):
5 | def safe_func(*args, **kw):
6 | try:
7 | func(*args, **kw)
8 | except Exception as err:
9 | warning("Error when calling function %s(): %s" % (
10 | func.__name__, err))
11 | return safe_func
12 |
13 |
14 | def getFieldAttribute(fieldset, key, attrname):
15 | try:
16 | field = fieldset[key]
17 | if field.hasValue():
18 | return getattr(field, attrname)
19 | except Exception as err:
20 | warning("Unable to get %s of field %s/%s: %s" % (
21 | attrname, fieldset.path, key, err))
22 | return None
23 |
24 |
25 | def getValue(fieldset, key):
26 | return getFieldAttribute(fieldset, key, "value")
27 |
28 |
29 | def getDisplay(fieldset, key):
30 | return getFieldAttribute(fieldset, key, "display")
31 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/mutator.py:
--------------------------------------------------------------------------------
1 | from hachoir import field as field_module
2 | from hachoir.wx.field_view.stubs import has_static_size, convert_size
3 |
4 |
5 | def split_field(field, split_pos, split_name, split_t, size_func):
6 | split_name += '[]'
7 |
8 | subfields = [
9 | split_t(field._parent, split_name, split_pos),
10 | split_t(field._parent, split_name, size_func(field) - split_pos)]
11 |
12 | field._parent.replaceField(field.name, subfields)
13 |
14 |
15 | def convert_field(field, new_type_name):
16 | field_set = field._parent
17 | new_type = getattr(field_module, new_type_name)
18 |
19 | if has_static_size(new_type):
20 | new_field = new_type(field_set, field.name, field._getDescription())
21 | else:
22 | new_field = new_type(field_set, field.name, convert_size(field, new_type), field._getDescription())
23 |
24 | field_set.replaceField(field.name, [new_field])
25 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 | branches: [main]
8 |
9 | jobs:
10 | build:
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | matrix:
14 | os: [ubuntu-latest]
15 | python: ['3.11']
16 | include:
17 | # Python 3.6 uses old Ubuntu 20.04
18 | - os: ubuntu-20.04
19 | python: '3.6'
20 |
21 | steps:
22 | # https://github.com/actions/checkout
23 | - uses: actions/checkout@v3
24 | - name: Setup Python
25 | # https://github.com/actions/setup-python
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: ${{ matrix.python }}
29 | - name: Install Tox and any other packages
30 | run: pip install tox
31 | - name: Display the Python version
32 | run: python -VV
33 | - name: Run Tox
34 | run: tox
35 |
--------------------------------------------------------------------------------
/hachoir/core/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Configuration of Hachoir
3 | """
4 |
5 | import os
6 |
7 | # UI: display options
8 | max_string_length = 40 # Max. length in characters of GenericString.display
9 | max_byte_length = 14 # Max. length in bytes of RawBytes.display
10 | max_bit_length = 256 # Max. length in bits of RawBits.display
11 |
12 | # Global options
13 | debug = False # Display many informations usefull to debug
14 | verbose = False # Display more informations
15 | quiet = False # Don't display warnings
16 |
17 | # Use internationalization and localization (gettext)?
18 | if os.name == "nt":
19 | # TODO: Remove this hack and make i18n works on Windows :-)
20 | use_i18n = False
21 | else:
22 | use_i18n = True
23 |
24 | # Parser global options
25 | autofix = True # Enable Autofix? see hachoir.field.GenericFieldSet
26 | check_padding_pattern = True # Check padding fields pattern?
27 |
--------------------------------------------------------------------------------
/hachoir/wx/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | from hachoir.wx.app import app_t
4 | from hachoir import __version__
5 | from hachoir.core.cmd_line import getHachoirOptions, configureHachoir
6 | from optparse import OptionParser
7 | import sys
8 |
9 |
10 | def parseOptions():
11 | parser = OptionParser(usage="%prog [options] [filename]")
12 | hachoir = getHachoirOptions(parser)
13 | parser.add_option_group(hachoir)
14 |
15 | values, arguments = parser.parse_args()
16 | if len(arguments) == 1:
17 | filename = arguments[0]
18 | elif not arguments:
19 | filename = None
20 | else:
21 | parser.print_help()
22 | sys.exit(1)
23 | return values, filename
24 |
25 |
26 | def main():
27 | print("hachoir version %s" % __version__)
28 | print()
29 | values, filename = parseOptions()
30 | configureHachoir(values)
31 | app = app_t(filename)
32 | app.MainLoop()
33 |
34 |
35 | if __name__ == '__main__':
36 | main()
37 |
--------------------------------------------------------------------------------
/benchmark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "x$PYTHON" = "x" ]; then
4 | PYTHON=`which python3`
5 | fi
6 | PYTHON=$PYTHON
7 | SRC=$(cd `dirname $0`; pwd)
8 | TESTCASE=tests/files/
9 | export PYTHONPATH=$SRC/hachoir-core:$SRC/hachoir-parser:$SRC/hachoir-metadata:$PYTHONPATH
10 |
11 | function prepare_benchmark
12 | {
13 | echo
14 | echo "=== $1 ==="
15 | sync
16 | }
17 |
18 | HACHOIR_VERSION="unknown"
19 | PYTHON_VERSION=`$PYTHON -c 'from sys import version; print(version.split("\n")[0].split("(")[0].strip())' 2>&1`
20 | echo "Benchmark Hachoir version $HACHOIR_VERSION on Python $PYTHON_VERSION"
21 |
22 | prepare_benchmark "hachoir-grep: yellowcase"
23 | $PYTHON -OO $SRC/hachoir-grep --bench --all $TESTCASE/yellowdude.3ds
24 |
25 | prepare_benchmark "hachoir-metadata: set A (mp3, wav, png, au, mkv)"
26 | (cd $TESTCASE; $PYTHON -OO $SRC/hachoir-metadata \
27 | --bench \
28 | sheep_on_drugs.mp3 kde_click.wav logo-kubuntu.png \
29 | audio_8khz_8bit_ulaw_4s39.au flashmob.mkv 10min.mkv)
30 |
31 |
--------------------------------------------------------------------------------
/doc/wx.rst:
--------------------------------------------------------------------------------
1 | .. _wx:
2 |
3 | ++++++++++++++++++
4 | hachoir-wx program
5 | ++++++++++++++++++
6 |
7 | hachoir-wx is a graphical binary file explorer and hex viewer, which uses the
8 | Hachoir library to parse the files and the WxPython library to create the user
9 | interface.
10 |
11 | Before use, make sure to install the required dependencies with ``pip install
12 | hachoir[wx]``. On Mac OS and Windows, this will install WxPython. On Linux, you
13 | may need to install a version of WxPython using your distribution's package manager
14 | or from the `WxPython Download page `_.
15 |
16 | .. image:: images/wx.png
17 | :alt: hachoir-wx screenshot (MP3 audio file)
18 |
19 | Command line options
20 | ====================
21 |
22 | * ``--preload=10``: Load 10 fields when loading a new field set
23 | * ``--path="/header/bpp"``: Open the specified path and focus on the field
24 | * ``--parser=PARSERID``: Force a parser (and skip parser validation)
25 | * ``--help``: Show all command line options
26 |
--------------------------------------------------------------------------------
/hachoir/parser/archive/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.archive.ace import AceFile # noqa
2 | from hachoir.parser.archive.ar import ArchiveFile # noqa
3 | from hachoir.parser.archive.arj import ArjParser # noqa
4 | from hachoir.parser.archive.bomstore import BomFile # noqa
5 | from hachoir.parser.archive.bzip2_parser import Bzip2Parser # noqa
6 | from hachoir.parser.archive.cab import CabFile # noqa
7 | from hachoir.parser.archive.gzip_parser import GzipParser # noqa
8 | from hachoir.parser.archive.tar import TarFile # noqa
9 | from hachoir.parser.archive.zip import ZipFile # noqa
10 | from hachoir.parser.archive.rar import RarFile # noqa
11 | from hachoir.parser.archive.rpm import RpmFile # noqa
12 | from hachoir.parser.archive.sevenzip import SevenZipParser # noqa
13 | from hachoir.parser.archive.mar import MarFile # noqa
14 | from hachoir.parser.archive.mozilla_ar import MozillaArchive # noqa
15 | from hachoir.parser.archive.zlib import ZlibData # noqa
16 | from hachoir.parser.archive.prs_pak import PRSPakFile # noqa
17 |
--------------------------------------------------------------------------------
/tools/fuzzer/tools.py:
--------------------------------------------------------------------------------
1 | from sys import platform
2 |
3 | if platform == 'win32':
4 | from win32process import (GetCurrentProcess, SetPriorityClass,
5 | BELOW_NORMAL_PRIORITY_CLASS)
6 |
7 | def beNice():
8 | process = GetCurrentProcess()
9 | # FIXME: Not supported on Windows 95/98/Me/NT: ignore error?
10 | # which error?
11 | SetPriorityClass(process, BELOW_NORMAL_PRIORITY_CLASS)
12 |
13 | OS_ERRORS = (OSError, WindowsError)
14 | else:
15 | from os import nice
16 |
17 | def beNice():
18 | nice(19)
19 |
20 | OS_ERRORS = OSError
21 |
22 | try:
23 | import sha
24 |
25 | def generateUniqueID(data):
26 | return sha.new(data).hexdigest()
27 | except ImportError:
28 | def generateUniqueID(data):
29 | generateUniqueID.sequence += 1
30 | return generateUniqueID.sequence
31 | generateUniqueID.sequence = 0
32 |
33 |
34 | def getFilesize(file):
35 | file.seek(0, 2)
36 | size = file.tell()
37 | file.seek(0, 0)
38 | return size
39 |
--------------------------------------------------------------------------------
/hachoir/core/profiler.py:
--------------------------------------------------------------------------------
1 | from hotshot import Profile
2 | from hotshot.stats import load as loadStats
3 | from os import unlink
4 |
5 |
6 | def runProfiler(func, args=tuple(), kw={}, verbose=True, nb_func=25,
7 | sort_by=('cumulative', 'calls')):
8 | profile_filename = "/tmp/profiler"
9 | prof = Profile(profile_filename)
10 | try:
11 | if verbose:
12 | print("[+] Run profiler")
13 | result = prof.runcall(func, *args, **kw)
14 | prof.close()
15 | if verbose:
16 | print("[+] Stop profiler")
17 | print("[+] Process data...")
18 | stat = loadStats(profile_filename)
19 | if verbose:
20 | print("[+] Strip...")
21 | stat.strip_dirs()
22 | if verbose:
23 | print("[+] Sort data...")
24 | stat.sort_stats(*sort_by)
25 | if verbose:
26 | print()
27 | print("[+] Display statistics")
28 | print()
29 | stat.print_stats(nb_func)
30 | return result
31 | finally:
32 | unlink(profile_filename)
33 |
--------------------------------------------------------------------------------
/hachoir/parser/common/deflate.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import CompressedField
2 |
3 | try:
4 | from zlib import decompressobj, MAX_WBITS
5 |
6 | class DeflateStream:
7 |
8 | def __init__(self, stream, wbits=None):
9 | if wbits:
10 | self.gzip = decompressobj(-MAX_WBITS)
11 | else:
12 | self.gzip = decompressobj()
13 |
14 | def __call__(self, size, data=None):
15 | if data is None:
16 | data = b''
17 | return self.gzip.decompress(self.gzip.unconsumed_tail + data, size)
18 |
19 | class DeflateStreamWbits(DeflateStream):
20 |
21 | def __init__(self, stream):
22 | DeflateStream.__init__(self, stream, True)
23 |
24 | def Deflate(field, wbits=True):
25 | if wbits:
26 | CompressedField(field, DeflateStreamWbits)
27 | else:
28 | CompressedField(field, DeflateStream)
29 | return field
30 | has_deflate = True
31 | except ImportError:
32 | def Deflate(field, wbits=True):
33 | return field
34 | has_deflate = False
35 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_menu_fwd.py:
--------------------------------------------------------------------------------
1 | from wx import EVT_MENU
2 | from wx.xrc import XRCID
3 |
4 |
5 | class field_menu_fwd_t:
6 | def __init__(self, imp):
7 | self.imp = imp
8 |
9 | def on_field_view_ready(self, dispatcher, view):
10 | assert view is not None
11 |
12 | view.Bind(EVT_MENU, self.imp.on_addr_rel,
13 | id=XRCID('field_menu_address_relative'))
14 | view.Bind(EVT_MENU, self.imp.on_addr_abs,
15 | id=XRCID('field_menu_address_absolute'))
16 | view.Bind(EVT_MENU, self.imp.on_addr_hex,
17 | id=XRCID('field_menu_address_base_hex'))
18 | view.Bind(EVT_MENU, self.imp.on_addr_dec,
19 | id=XRCID('field_menu_address_base_dec'))
20 | view.Bind(EVT_MENU, self.imp.on_dump_to_disk,
21 | id=XRCID('field_menu_dump_to_disk'))
22 | view.Bind(EVT_MENU, self.imp.on_parse_substream,
23 | id=XRCID('field_menu_parse_substream'))
24 | view.Bind(EVT_MENU, self.imp.on_open_window_here,
25 | id=XRCID('field_menu_open_window_here'))
26 |
--------------------------------------------------------------------------------
/hachoir/metadata/timezone.py:
--------------------------------------------------------------------------------
1 | from datetime import tzinfo, timedelta
2 |
3 |
4 | class TimezoneUTC(tzinfo):
5 | """UTC timezone"""
6 | ZERO = timedelta(0)
7 |
8 | def utcoffset(self, dt):
9 | return TimezoneUTC.ZERO
10 |
11 | def tzname(self, dt):
12 | return "UTC"
13 |
14 | def dst(self, dt):
15 | return TimezoneUTC.ZERO
16 |
17 | def __repr__(self):
18 | return ""
19 |
20 |
21 | class Timezone(TimezoneUTC):
22 | """Fixed offset in hour from UTC."""
23 |
24 | def __init__(self, offset):
25 | self._offset = timedelta(minutes=offset * 60)
26 | self._name = "%+03u00" % offset
27 |
28 | def utcoffset(self, dt):
29 | return self._offset
30 |
31 | def tzname(self, dt):
32 | return self._name
33 |
34 | def __repr__(self):
35 | return "" % (
36 | self._offset, self._name)
37 |
38 |
39 | UTC = TimezoneUTC()
40 |
41 |
42 | def createTimezone(offset):
43 | if offset:
44 | return Timezone(offset)
45 | else:
46 | return UTC
47 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_split_menu_imp.py:
--------------------------------------------------------------------------------
1 | # from hachoir.wx.field_view.mutator import split_field
2 | from hachoir.field import RawBytes, RawBits
3 |
4 |
5 | class field_split_menu_imp_t:
6 | def on_field_split_menu_ready(self, dispatcher, view):
7 | assert view is not None
8 | self.view = view
9 |
10 | def on_field_selected(self, dispatcher, field):
11 | self.field = field
12 |
13 | def on_split_bytes(self):
14 | if self.split_field('Split Bytes...', self.field, RawBytes, lambda field: field.size // 8):
15 | self.dispatcher.trigger('field_was_split_bytes', self.field)
16 |
17 | def on_split_bits(self):
18 | if self.split_field('Split Bits...', self.field, RawBits, lambda field: field.size):
19 | self.dispatcher.trigger('field_was_split_bits', self.field)
20 |
21 | def split_field(self, caption, field, split_type, size_func):
22 | offset = self.view.ask_split(caption, 1, size_func(field) - 1)
23 | # FIXME: code commented because of pep8 warning
24 | # if offset is not None:
25 | # new_fields = split_field(field, offset, field.name, split_type, size_func)
26 | return offset
27 |
--------------------------------------------------------------------------------
/tools/flv_extractor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Extract audio from a FLV movie
4 |
5 | Author: Victor Stinner
6 | Creation date: 2006-11-06
7 | """
8 | from hachoir.parser import createParser
9 | from hachoir.stream import FileOutputStream
10 | from hachoir.parser.video.flv import AUDIO_CODEC_MP3
11 | from sys import stderr, exit, argv
12 |
13 |
14 | def main():
15 | if len(argv) != 2:
16 | print("usage: %s video.flv" % argv[0], file=stderr)
17 | exit(1)
18 |
19 | # Open input video
20 | inputname = argv[1]
21 | parser = createParser(inputname)
22 | if parser["audio[0]/codec"].value != AUDIO_CODEC_MP3:
23 | print("Unknown audio codec: %s" %
24 | parser["audio[0]/codec"].display, file=stderr)
25 |
26 | # Extract audio
27 | print("Extractor audio from: %s" % inputname)
28 | outputname = inputname + ".mp3"
29 | output = FileOutputStream(outputname)
30 | for chunk in parser.array("audio"):
31 | data = chunk["music_data"]
32 | output.copyBitsFrom(
33 | data.parent.stream, data.absolute_address, data.size, data.parent.endian)
34 | print("Write audio into: %s" % outputname)
35 |
36 |
37 | main()
38 |
--------------------------------------------------------------------------------
/hachoir/metadata/file_system.py:
--------------------------------------------------------------------------------
1 | from hachoir.metadata.metadata import RootMetadata, registerExtractor
2 | from hachoir.metadata.safe import fault_tolerant
3 | from hachoir.parser.file_system import ISO9660
4 | from datetime import datetime
5 |
6 |
7 | class ISO9660_Metadata(RootMetadata):
8 |
9 | def extract(self, iso):
10 | desc = iso['volume[0]/content']
11 | self.title = desc['volume_id'].value
12 | self.title = desc['vol_set_id'].value
13 | self.author = desc['publisher'].value
14 | self.author = desc['data_preparer'].value
15 | self.producer = desc['application'].value
16 | self.copyright = desc['copyright'].value
17 | self.readTimestamp('creation_date', desc['creation_ts'].value)
18 | self.readTimestamp('last_modification', desc['modification_ts'].value)
19 |
20 | @fault_tolerant
21 | def readTimestamp(self, key, value):
22 | if value.startswith("0000"):
23 | return
24 | value = datetime(
25 | int(value[0:4]), int(value[4:6]), int(value[6:8]),
26 | int(value[8:10]), int(value[10:12]), int(value[12:14]))
27 | setattr(self, key, value)
28 |
29 |
30 | registerExtractor(ISO9660, ISO9660_Metadata)
31 |
--------------------------------------------------------------------------------
/hachoir/parser/misc/__init__.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser.misc.file_3do import File3do # noqa
2 | from hachoir.parser.misc.file_3ds import File3ds # noqa
3 | from hachoir.parser.misc.torrent import TorrentFile # noqa
4 | from hachoir.parser.misc.ttf import TrueTypeFontFile # noqa
5 | from hachoir.parser.misc.chm import ChmFile # noqa
6 | from hachoir.parser.misc.lnk import LnkFile # noqa
7 | from hachoir.parser.misc.pcf import PcfFile # noqa
8 | from hachoir.parser.misc.ole2 import OLE2_File # noqa
9 | from hachoir.parser.misc.pdf import PDFDocument # noqa
10 | from hachoir.parser.misc.pifv import PIFVFile # noqa
11 | from hachoir.parser.misc.hlp import HlpFile # noqa
12 | from hachoir.parser.misc.gnome_keyring import GnomeKeyring # noqa
13 | from hachoir.parser.misc.bplist import BPList # noqa
14 | from hachoir.parser.misc.dsstore import DSStore # noqa
15 | from hachoir.parser.misc.word_doc import WordDocumentParser # noqa
16 | from hachoir.parser.misc.word_2 import Word2DocumentParser # noqa
17 | from hachoir.parser.misc.mstask import MSTaskFile # noqa
18 | from hachoir.parser.misc.mapsforge_map import MapsforgeMapFile # noqa
19 | from hachoir.parser.misc.fit import FITFile # noqa
20 | from hachoir.parser.misc.git_pack import GitPackFile # noqa
21 |
--------------------------------------------------------------------------------
/hachoir/subfile/data_rate.py:
--------------------------------------------------------------------------------
1 | from time import time
2 |
3 | DATARATE_UPDATE = 1.0 # Time slice (in second) for datarate computation
4 |
5 |
6 | class DataRate:
7 | """
8 | Compute average speed in bits per second of a function.
9 | Store self.size data rates to compute good average speed.
10 | Don't compute average before self.min_size values are computed.
11 | """
12 |
13 | def __init__(self, offset, size=20, min_size=3):
14 | self.last_offset = offset
15 | self.last_time = time()
16 | self.datarates = []
17 | # Average bit rate
18 | self.average = None
19 | # Number of stored value used to compute average data rate
20 | self.size = size
21 | self.min_size = min_size
22 |
23 | def update(self, offset):
24 | # Compute time delta
25 | difftime = time() - self.last_time
26 | if difftime < DATARATE_UPDATE:
27 | # Only update each second
28 | return
29 | self.last_time = time()
30 |
31 | # Compute data rate
32 | rate = float(offset - self.last_offset) / difftime
33 | self.last_offset = offset
34 |
35 | # Update statistics
36 | self.datarates.append(rate)
37 | self.datarates = self.datarates[-self.size:]
38 | if self.min_size <= len(self.datarates):
39 | self.average = sum(self.datarates) / len(self.datarates)
40 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/stubs.py:
--------------------------------------------------------------------------------
1 | #
2 | # some stubs that could be in hachoir-core.
3 | #
4 |
5 | from hachoir.stream.input import FileFromInputStream
6 | from shutil import copyfileobj
7 |
8 |
9 | def field_index(field_set, field):
10 | return field_set._fields.index(field.name)
11 |
12 |
13 | def field_from_index(field_set, index):
14 | return field_set._fields.values[index]
15 |
16 |
17 | def has_static_size(type):
18 | return isinstance(type.static_size, int)
19 |
20 |
21 | def can_convert(from_field, to_type):
22 | if has_static_size(from_field) and has_static_size(to_type):
23 | return from_field.static_size == to_type.static_size
24 | elif has_static_size(to_type):
25 | return from_field.size == to_type.static_size
26 | else:
27 | return False
28 |
29 |
30 | def field_type_name(field):
31 | return field.__class__.__name__
32 |
33 |
34 | def convert_size(from_field, to_type):
35 | if not (('Byte' in field_type_name(from_field)) ^ ('Byte' in to_type.__name__)):
36 | return from_field.size
37 | elif 'Byte' in field_type_name(from_field):
38 | return from_field.size * 8
39 | else:
40 | return from_field.size // 8
41 |
42 |
43 | def save_substream_to_disk(field, dest_path):
44 | dest_stream = open(dest_path, 'wb')
45 | f = FileFromInputStream(field.getSubIStream())
46 | copyfileobj(f, dest_stream)
47 | dest_stream.close()
48 |
--------------------------------------------------------------------------------
/doc/list.rst:
--------------------------------------------------------------------------------
1 | .. _list:
2 |
3 | ++++++++++++++++++++
4 | hachoir-list program
5 | ++++++++++++++++++++
6 |
7 | hachoir-list uses Hachoir to decode a binary file and then
8 | prints out the decoded fields, so that the output can be processed
9 | by other text-oriented tools.
10 |
11 | Examples
12 | ========
13 |
14 | Print all decoded fields::
15 |
16 | $ hachoir-list cd_0008_5C48_1m53s.cda
17 | signature : "RIFF"
18 | filesize : 36 bytes
19 | type : "CDDA"
20 | cdda
21 | tag : "fmt "
22 | size : 24 bytes
23 | cda_version : 1
24 | track_no : 4
25 | disc_serial : 0008-5C48
26 | hsg_offset : 19477
27 | hsg_length : 8507
28 | rb_offset
29 | frame : 52
30 | second : 21
31 | minute : 4
32 | notused : "\0"
33 | rb_length
34 | frame : 32
35 | second : 53
36 | minute : 1
37 | notused : "\0"
38 |
39 | Other options:
40 |
41 | * ``--description``: Show description for each field
42 | * ``--indent-width``: Change (or disable) indentation
43 | * ``--hide-value``: Don't display the string value
44 | * ``--hide-size``: Don't display field size
45 | * Get full option list using ``--help``
46 |
--------------------------------------------------------------------------------
/hachoir/subfile/output.py:
--------------------------------------------------------------------------------
1 | from hachoir.stream import FileOutputStream, StreamError
2 | from hachoir.core.error import error
3 | from errno import EEXIST
4 | from os import mkdir, path
5 |
6 |
7 | class Output:
8 | """
9 | Store files found by search tool.
10 | """
11 |
12 | def __init__(self, directory):
13 | self.directory = directory
14 | self.mkdir = False
15 | self.file_id = 1
16 |
17 | def createDirectory(self):
18 | try:
19 | mkdir(self.directory)
20 | except OSError as err:
21 | if err.errno == EEXIST:
22 | pass
23 | else:
24 | raise
25 |
26 | def createFilename(self, file_ext=None):
27 | filename = "file-%04u" % self.file_id
28 | self.file_id += 1
29 | if file_ext:
30 | filename += file_ext
31 | return filename
32 |
33 | def writeFile(self, filename, stream, offset, size):
34 | # Create directory (only on first call)
35 | if not self.mkdir:
36 | self.createDirectory()
37 | self.mkdir = True
38 |
39 | # Create output file
40 | filename = path.join(self.directory, filename)
41 | output = FileOutputStream(filename)
42 |
43 | # Write output
44 | try:
45 | output.copyBytesFrom(stream, offset, size // 8)
46 | except StreamError as err:
47 | error("copyBytesFrom() error: %s" % err)
48 | return filename
49 |
--------------------------------------------------------------------------------
/tests/test_editor.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from io import BytesIO
3 | from hachoir.core.endian import BIG_ENDIAN
4 | from hachoir.editor import createEditor
5 | from hachoir.field import Parser, Bits
6 | from hachoir.stream import StringInputStream, OutputStream
7 | from hachoir.test import setup_tests
8 |
9 |
10 | class TestEditor(unittest.TestCase):
11 | def test_bit_alignment(self):
12 | data = bytes([255, 255, 255, 254])
13 | stream = StringInputStream(data)
14 | parser = TestParser(stream)
15 | editor = createEditor(parser)
16 |
17 | # Cause a change in a non-byte-aligned field
18 | editor['flags[2]'].value -= 1
19 |
20 | # Generate output and verify operation
21 | output_io = BytesIO()
22 | output_stream = OutputStream(output_io)
23 |
24 | editor.writeInto(output_stream)
25 | output_bits = "{0:b}".format(int.from_bytes(output_io.getvalue(), 'big'))
26 |
27 | # X is the modified bit
28 | # .....,,,,,,,,,,,,,,,,..X,,,,,,,,
29 | self.assertEqual(output_bits, "11111111111111111111111011111110")
30 |
31 |
32 | class TestParser(Parser):
33 | endian = BIG_ENDIAN
34 |
35 | def createFields(self):
36 | yield Bits(self, 'flags[]', 5)
37 | yield Bits(self, 'flags[]', 16)
38 | yield Bits(self, 'flags[]', 3)
39 | yield Bits(self, 'flags[]', 8)
40 |
41 |
42 | if __name__ == "__main__":
43 | setup_tests()
44 | unittest.main()
45 |
--------------------------------------------------------------------------------
/hachoir/core/cmd_line.py:
--------------------------------------------------------------------------------
1 | from optparse import OptionGroup
2 | from hachoir.core.log import log
3 | from hachoir import __version__
4 | import hachoir.core.config as config
5 | import sys
6 |
7 |
8 | def displayVersion(*args):
9 | print("Hachoir version %s" % __version__)
10 | sys.exit(0)
11 |
12 |
13 | def getHachoirOptions(parser):
14 | """
15 | Create an option group (type optparse.OptionGroup) of Hachoir
16 | library options.
17 | """
18 | def setLogFilename(*args):
19 | log.setFilename(args[2])
20 |
21 | common = OptionGroup(parser, "Hachoir library",
22 | "Configure Hachoir library")
23 | common.add_option("--verbose", help="Verbose mode",
24 | default=False, action="store_true")
25 | common.add_option("--log", help="Write log in a file",
26 | type="string", action="callback",
27 | callback=setLogFilename)
28 | common.add_option("--quiet", help="Quiet mode (don't display warning)",
29 | default=False, action="store_true")
30 | common.add_option("--debug", help="Debug mode",
31 | default=False, action="store_true")
32 | return common
33 |
34 |
35 | def configureHachoir(option):
36 | # Configure Hachoir using "option" (value from optparse)
37 | if option.quiet:
38 | config.quiet = True
39 | if option.verbose:
40 | config.verbose = True
41 | if option.debug:
42 | config.debug = True
43 |
--------------------------------------------------------------------------------
/hachoir/subfile/pattern.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser import QueryParser
2 | from hachoir.regex import PatternMatching
3 |
4 |
5 | # XXX hachoir.regex uses str but the rest of hachoir uses bytes,
6 | # which means we have to convert bytes to str using latin1 encoding
7 | # (the closest "raw bytes" encoding) in order for matching to work.
8 | class HachoirPatternMatching(PatternMatching):
9 |
10 | def __init__(self, categories=None, parser_ids=None):
11 | PatternMatching.__init__(self)
12 |
13 | # Load parser list
14 | tags = []
15 | if categories:
16 | tags += [("category", cat) for cat in categories]
17 | if parser_ids:
18 | tags += [("id", parser_id) for parser_id in parser_ids]
19 | if tags:
20 | tags += [None]
21 | parser_list = QueryParser(tags)
22 |
23 | # Create string patterns
24 | for parser in parser_list:
25 | for (magic, offset) in parser.getParserTags().get("magic", ()):
26 | self.addString(magic.decode('latin1'), (offset, parser))
27 |
28 | # Create regex patterns
29 | for parser in parser_list:
30 | for (regex, offset) in parser.getParserTags().get("magic_regex", ()):
31 | self.addRegex(regex.decode('latin1'), (offset, parser))
32 | self.commit()
33 |
34 | def search(self, data):
35 | for start, stop, item in PatternMatching.search(self, data.decode('latin1')):
36 | yield (item.user[1], start * 8 - item.user[0])
37 |
--------------------------------------------------------------------------------
/tests/test_strip.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.test import setup_tests
3 | import hashlib
4 | import os.path
5 | import subprocess
6 | import sys
7 | import unittest
8 |
9 | DATADIR = os.path.join(os.path.dirname(__file__), "files")
10 | KDE_CLICK = os.path.join(DATADIR, 'kde_click.wav')
11 | PROGRAM = os.path.join(os.path.dirname(__file__), "..", "hachoir-strip")
12 |
13 |
14 | def checksum(filename):
15 | hash = hashlib.sha1()
16 | with open(filename, 'rb') as fp:
17 | chunk = fp.read(4096)
18 | hash.update(chunk)
19 | return hash.hexdigest()
20 |
21 |
22 | class TestStripCommandLine(unittest.TestCase):
23 |
24 | def test_strip_all(self):
25 | self.assertEqual(checksum(KDE_CLICK),
26 | 'dcafdef2048985aa925df5f86053bda5a87eb64b')
27 |
28 | newname = KDE_CLICK + ".new"
29 | if os.path.exists(newname):
30 | os.unlink(newname)
31 |
32 | args = [sys.executable, PROGRAM, KDE_CLICK]
33 | proc = subprocess.Popen(args,
34 | stdout=subprocess.PIPE,
35 | stderr=subprocess.STDOUT)
36 | stdout, _ = proc.communicate()
37 | stdout = stdout.decode('ascii', 'replace')
38 | self.assertIn('Save new file', stdout)
39 |
40 | self.assertEqual(checksum(newname),
41 | '6456990d3931292a1c96c6e8f035e983cd84d477')
42 |
43 | os.unlink(newname)
44 |
45 |
46 | if __name__ == "__main__":
47 | setup_tests()
48 | unittest.main()
49 |
--------------------------------------------------------------------------------
/hachoir/field/vector.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import Field, FieldSet, ParserError
2 |
3 |
4 | class GenericVector(FieldSet):
5 |
6 | def __init__(self, parent, name, nb_items, item_class, item_name="item", description=None):
7 | # Sanity checks
8 | assert issubclass(item_class, Field)
9 | assert isinstance(item_class.static_size, int)
10 | if not (0 < nb_items):
11 | raise ParserError('Unable to create empty vector "%s" in %s'
12 | % (name, parent.path))
13 | size = nb_items * item_class.static_size
14 | self.__nb_items = nb_items
15 | self._item_class = item_class
16 | self._item_name = item_name
17 | FieldSet.__init__(self, parent, name, description, size=size)
18 |
19 | def __len__(self):
20 | return self.__nb_items
21 |
22 | def createFields(self):
23 | name = self._item_name + "[]"
24 | parser = self._item_class
25 | for index in range(len(self)):
26 | yield parser(self, name)
27 |
28 |
29 | class UserVector(GenericVector):
30 | """
31 | To implement:
32 | - item_name: name of a field without [] (eg. "color" becomes "color[0]"),
33 | default value is "item"
34 | - item_class: class of an item
35 | """
36 | item_class = None
37 | item_name = "item"
38 |
39 | def __init__(self, parent, name, nb_items, description=None):
40 | GenericVector.__init__(self, parent, name, nb_items,
41 | self.item_class, self.item_name, description)
42 |
--------------------------------------------------------------------------------
/hachoir/field/fragment.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import FieldSet, RawBytes
2 | from hachoir.stream import StringInputStream
3 |
4 |
5 | class FragmentGroup:
6 |
7 | def __init__(self, parser):
8 | self.items = []
9 | self.parser = parser
10 | self.args = {}
11 |
12 | def add(self, item):
13 | self.items.append(item)
14 |
15 | def createInputStream(self):
16 | # FIXME: Use lazy stream creation
17 | data = []
18 | for item in self.items:
19 | data.append(item["rawdata"].value)
20 | data = b"".join(data)
21 |
22 | tags = {"args": self.args}
23 | if self.parser is not None:
24 | tags["class"] = self.parser
25 | tags = iter(tags.items())
26 | return StringInputStream(data, "", tags=tags)
27 |
28 |
29 | class CustomFragment(FieldSet):
30 |
31 | def __init__(self, parent, name, size, parser, description=None, group=None):
32 | FieldSet.__init__(self, parent, name, description, size=size)
33 | if not group:
34 | group = FragmentGroup(parser)
35 | self.field_size = size
36 | self.group = group
37 | self.group.add(self)
38 |
39 | def createFields(self):
40 | yield RawBytes(self, "rawdata", self.field_size // 8)
41 |
42 | def _createInputStream(self, **args):
43 | return self.group.createInputStream()
44 |
45 | def createValue(self):
46 | return self["rawdata"].value
47 |
48 | def createDisplay(self):
49 | return self["rawdata"].display
50 |
--------------------------------------------------------------------------------
/hachoir/parser/misc/ole2_util.py:
--------------------------------------------------------------------------------
1 | from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN
2 | from hachoir.field import RawBytes, RootSeekableFieldSet, ParserError
3 | from hachoir.parser import HachoirParser
4 |
5 |
6 | class OLE2FragmentParser(HachoirParser, RootSeekableFieldSet):
7 | tags = {
8 | "description": "Microsoft Office document subfragments",
9 | }
10 | endian = LITTLE_ENDIAN
11 |
12 | ENDIAN_CHECK = False
13 |
14 | def __init__(self, stream, **args):
15 | RootSeekableFieldSet.__init__(
16 | self, None, "root", stream, None, stream.askSize(self))
17 | HachoirParser.__init__(self, stream, **args)
18 | if self.ENDIAN_CHECK:
19 | if self["endian"].value == b"\xFF\xFE":
20 | self.endian = BIG_ENDIAN
21 | elif self["endian"].value == b"\xFE\xFF":
22 | self.endian = LITTLE_ENDIAN
23 | else:
24 | raise ParserError("OLE2: Invalid endian value")
25 |
26 | def validate(self):
27 | if self.ENDIAN_CHECK:
28 | if self["endian"].value not in ["\xFF\xFE", "\xFE\xFF"]:
29 | return "Unknown endian value %s" % self["endian"].value.encode('hex')
30 | return True
31 |
32 |
33 | class RawParser(OLE2FragmentParser):
34 | ENDIAN_CHECK = False
35 | OS_CHECK = False
36 |
37 | def createFields(self):
38 | yield RawBytes(self, "rawdata", self.datasize)
39 | if self.datasize < self.size // 8:
40 | yield RawBytes(self, "slack_space", (self.size // 8) - self.datasize)
41 |
--------------------------------------------------------------------------------
/hachoir/parser/image/common.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import FieldSet, UserVector, UInt8
2 |
3 |
4 | class RGB(FieldSet):
5 | color_name = {
6 | (0, 0, 0): "Black",
7 | (255, 0, 0): "Red",
8 | (0, 255, 0): "Green",
9 | (0, 0, 255): "Blue",
10 | (255, 255, 255): "White",
11 | }
12 | static_size = 24
13 |
14 | def createFields(self):
15 | yield UInt8(self, "red", "Red")
16 | yield UInt8(self, "green", "Green")
17 | yield UInt8(self, "blue", "Blue")
18 |
19 | def createDescription(self):
20 | rgb = self["red"].value, self["green"].value, self["blue"].value
21 | name = self.color_name.get(rgb)
22 | if not name:
23 | name = "#%02X%02X%02X" % rgb
24 | return "RGB color: " + name
25 |
26 |
27 | class RGBA(RGB):
28 | static_size = 32
29 |
30 | def createFields(self):
31 | yield UInt8(self, "red", "Red")
32 | yield UInt8(self, "green", "Green")
33 | yield UInt8(self, "blue", "Blue")
34 | yield UInt8(self, "alpha", "Alpha")
35 |
36 | def createDescription(self):
37 | description = RGB.createDescription(self)
38 | opacity = self["alpha"].value * 100 // 255
39 | return "%s (opacity: %s%%)" % (description, opacity)
40 |
41 |
42 | class PaletteRGB(UserVector):
43 | item_class = RGB
44 | item_name = "color"
45 |
46 | def createDescription(self):
47 | return "Palette of %u RGB colors" % len(self)
48 |
49 |
50 | class PaletteRGBA(PaletteRGB):
51 | item_class = RGBA
52 |
53 | def createDescription(self):
54 | return "Palette of %u RGBA colors" % len(self)
55 |
--------------------------------------------------------------------------------
/doc/grep.rst:
--------------------------------------------------------------------------------
1 | .. _grep:
2 |
3 | ++++++++++++++++++++
4 | hachoir-grep program
5 | ++++++++++++++++++++
6 |
7 | hachoir-grep is an experimantal search program based on Hachoir. Its goal is
8 | to search a substring in a binary file or list all strings.
9 |
10 | Examples
11 | ========
12 |
13 | List all strings::
14 |
15 | $ .hachoir-grep --all sheep_on_drugs.mp3
16 | 0:ID3
17 | 10:TAL
18 | 17:Bilmusik vol 1. Stainless Steel Providers
19 | (...)
20 | 143:COM
21 | 150:eng
22 | 154:Stainless Steel Provider is compilated to the car of Twinstar.
23 |
24 | Search substring::
25 |
26 | $ hachoir-grep "il" sheep_on_drugs.mp3
27 | 17:Bilmusik vol 1. Stainless Steel Providers
28 | 154:Stainless Steel Provider is compilated to the car of Twinstar.
29 |
30 | Search substring with case sensitive and display string path::
31 |
32 | $ hachoir-grep --path --case Car sheep_on_drugs.mp3
33 | 78:/id3v2/field[2]/content/text:Car music
34 |
35 | Other options:
36 |
37 | * ``--no-value``: Don't display the string value
38 | * ``--no-addr``: Don't display string address
39 | * ``--percent``: Display search percent (on stderr)
40 | * Get full option list using --help
41 |
42 |
43 | Comparaison to grep and strings
44 | ===============================
45 |
46 | The difference between hachoir-grep and grep or strings are:
47 |
48 | * hachoir-grep don't parse non-string data
49 | * hachoir-grep cares about string charset (ISO-8859-1, UTF-8, UTF-16, etc.)
50 | but search is charset independant
51 | * hachoir-grep can display string address
52 |
53 | Drawbacks:
54 |
55 | * hachoir-grep is slow and might use a lot of memory
56 | * results depend on the quality of Hachoir parsers
57 |
58 |
--------------------------------------------------------------------------------
/hachoir/parser/common/msdos.py:
--------------------------------------------------------------------------------
1 | """
2 | MS-DOS structures.
3 |
4 | Documentation:
5 | - File attributes:
6 | http://www.cs.colorado.edu/~main/cs1300/include/ddk/winddk.h
7 | """
8 |
9 | from hachoir.field import StaticFieldSet
10 | from hachoir.field import Bit, NullBits
11 |
12 | _FIELDS = (
13 | (Bit, "read_only"),
14 | (Bit, "hidden"),
15 | (Bit, "system"),
16 | (NullBits, "reserved[]", 1),
17 | (Bit, "directory"),
18 | (Bit, "archive"),
19 | (Bit, "device"),
20 | (Bit, "normal"),
21 | (Bit, "temporary"),
22 | (Bit, "sparse_file"),
23 | (Bit, "reparse_file"),
24 | (Bit, "compressed"),
25 | (Bit, "offline"),
26 | (Bit, "dont_index_content"),
27 | (Bit, "encrypted"),
28 | )
29 |
30 |
31 | class MSDOSFileAttr16(StaticFieldSet):
32 | """
33 | MSDOS 16-bit file attributes
34 | """
35 | format = _FIELDS + ((NullBits, "reserved[]", 1),)
36 |
37 | _text_keys = (
38 | # Sort attributes by importance
39 | "directory", "read_only", "compressed",
40 | "hidden", "system",
41 | "normal", "device",
42 | "temporary", "archive")
43 |
44 | def createValue(self):
45 | mode = []
46 | for name in self._text_keys:
47 | if self[name].value:
48 | if 4 <= len(mode):
49 | mode.append("...")
50 | break
51 | else:
52 | mode.append(name)
53 | if mode:
54 | return ", ".join(mode)
55 | else:
56 | return "(none)"
57 |
58 |
59 | class MSDOSFileAttr32(MSDOSFileAttr16):
60 | """
61 | MSDOS 32-bit file attributes
62 | """
63 | format = _FIELDS + ((NullBits, "reserved[]", 17),)
64 |
--------------------------------------------------------------------------------
/hachoir/parser/archive/prs_pak.py:
--------------------------------------------------------------------------------
1 | """
2 | Parallel Realities Starfighter .pak file parser
3 |
4 | See http://www.parallelrealities.co.uk/projects/starfighter.php
5 | or svn://svn.debian.org/svn/pkg-games/packages/trunk/starfighter/
6 |
7 | Author: Oliver Gerlich
8 | """
9 |
10 | from hachoir.parser import Parser
11 | from hachoir.field import (UInt32, String, SubFile, FieldSet)
12 | from hachoir.core.endian import LITTLE_ENDIAN
13 | from hachoir.core.text_handler import filesizeHandler
14 |
15 |
16 | class FileEntry(FieldSet):
17 |
18 | def createFields(self):
19 | yield String(self, "filename", 56, truncate="\0")
20 | yield filesizeHandler(UInt32(self, "size"))
21 | yield SubFile(self, "data", self["size"].value, filename=self["filename"].value)
22 |
23 | def createDescription(self):
24 | return self["filename"].value
25 |
26 |
27 | class PRSPakFile(Parser):
28 |
29 | PARSER_TAGS = {
30 | "id": "prs_pak",
31 | "category": "archive",
32 | "file_ext": ("pak",),
33 | "mime": (u"application/octet-stream",),
34 | "min_size": 4 * 8, # just the identifier
35 | "magic": ((b'PACK', 0),),
36 | "description": "Parallel Realities Starfighter .pak archive",
37 | }
38 |
39 | endian = LITTLE_ENDIAN
40 |
41 | def validate(self):
42 | return (self.stream.readBytes(0, 4) == b'PACK'
43 | and self["file[0]/size"].value >= 0
44 | and len(self["file[0]/filename"].value) > 0)
45 |
46 | def createFields(self):
47 | yield String(self, "magic", 4)
48 |
49 | # all remaining data must be file entries:
50 | while self.current_size < self._size:
51 | yield FileEntry(self, "file[]")
52 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | *******
2 | Hachoir
3 | *******
4 |
5 | .. image:: https://img.shields.io/pypi/v/hachoir.svg
6 | :alt: Latest release on the Python Cheeseshop (PyPI)
7 | :target: https://pypi.python.org/pypi/hachoir
8 |
9 | .. image:: https://github.com/vstinner/hachoir/actions/workflows/build.yml/badge.svg
10 | :alt: Build status of hachoir on GitHub Actions
11 | :target: https://github.com/vstinner/hachoir/actions
12 |
13 | .. image:: http://unmaintained.tech/badge.svg
14 | :target: http://unmaintained.tech/
15 | :alt: No Maintenance Intended
16 |
17 | Hachoir is a Python library to view and edit a binary stream field by field.
18 | In other words, Hachoir allows you to "browse" any binary stream just like you
19 | browse directories and files.
20 |
21 | A file is splitted in a tree of fields, where the smallest field is just one
22 | bit. Examples of fields types: integers, strings, bits, padding types, floats,
23 | etc. Hachoir is the French word for a meat grinder (meat mincer), which is used
24 | by butchers to divide meat into long tubes; Hachoir is used by computer
25 | butchers to divide binary files into fields.
26 |
27 | * `Hachoir website `_ (source code, bugs)
28 | * `Hachoir on GitHub (Source code, bug tracker) `_
29 | * License: GNU GPL v2
30 |
31 | Command line tools using Hachoir parsers:
32 |
33 | * hachoir-grep: find a text pattern in a binary file
34 | * hachoir-metadata: get metadata from binary files
35 | * hachoir-strip: modify a file to remove metadata
36 | * hachoir-urwid: display the content of a binary file in text mode
37 |
38 | Installation instructions: http://hachoir.readthedocs.io/en/latest/install.html
39 |
40 | Hachoir is written for Python 3.6 or newer.
41 |
--------------------------------------------------------------------------------
/tools/find_deflate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from zlib import decompress, error as zlib_error
3 | from sys import argv, stderr, exit
4 | from time import time
5 |
6 | MIN_SIZE = 2
7 |
8 |
9 | def canDeflate(compressed_data):
10 | try:
11 | data = decompress(compressed_data)
12 | return True
13 | except zlib_error:
14 | return False
15 |
16 |
17 | def findDeflateBlocks(data):
18 | next_msg = time() + 1.0
19 | max_index = len(data) - MIN_SIZE - 1
20 | for index in range(max_index + 1):
21 | if next_msg < time():
22 | next_msg = time() + 1.0
23 | print("Progress: %.1f%% (offset %s/%s)" % (
24 | index * 100.0 / max_index, index, max_index))
25 | if canDeflate(data[index:]):
26 | yield index
27 |
28 |
29 | def guessDeflateSize(data, offset):
30 | size = len(data) - offset
31 | while size:
32 | if canDeflate(data[offset:offset + size]):
33 | yield size
34 | size -= 1
35 |
36 |
37 | def main():
38 | if len(argv) != 2:
39 | print("usage: %s filename" % argv[0], file=stderr)
40 | exit(1)
41 | data = open(argv[1], 'rb').read()
42 | offsets = []
43 | for offset in findDeflateBlocks(data):
44 | print("Offset %s" % offset)
45 | offsets.append(offset)
46 | if offsets:
47 | for offset in offsets:
48 | for size in guessDeflateSize(data, offset):
49 | if size == (len(data) - offset):
50 | size = "%s (until the end)" % size
51 | print("Offset %s -- size %s" % (offset, size))
52 | else:
53 | print("No deflate block found", file=stderr)
54 | exit(0)
55 |
56 | if __name__ == "__main__":
57 | main()
58 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_menu_imp.py:
--------------------------------------------------------------------------------
1 | from hachoir.wx.field_view.stubs import save_substream_to_disk
2 |
3 |
4 | class field_menu_imp_t:
5 | def on_field_set_ready(self, dispatcher, fields):
6 | assert fields is not None
7 | self.fields = fields
8 | self.selected = None
9 |
10 | def on_field_menu_ready(self, dispatcher, view):
11 | assert view is not None
12 | self.view = view
13 |
14 | def on_field_show_ops(self, dispatcher, field):
15 | self.view.show_opts()
16 |
17 | def on_addr_rel(self, event):
18 | self.dispatcher.trigger('address_relative')
19 |
20 | def on_addr_abs(self, event):
21 | self.dispatcher.trigger('address_absolute')
22 |
23 | def on_addr_hex(self, event):
24 | self.dispatcher.trigger('address_hexadecimal')
25 |
26 | def on_addr_dec(self, event):
27 | self.dispatcher.trigger('address_decimal')
28 |
29 | def on_split_bits(self):
30 | self.dispatcher.trigger('field_split_bits')
31 |
32 | def on_split_bytes(self):
33 | self.dispatcher.trigger('field_split_bytes')
34 |
35 | def on_field_selected(self, dispatcher, field):
36 | self.selected = field
37 |
38 | def on_file_ready(self, dispatcher, file):
39 | self.file = file
40 |
41 | def on_parse_substream(self, dispatcher):
42 | self.dispatcher.trigger('field_parse_substream', self.selected)
43 |
44 | def on_open_window_here(self, dispatcher):
45 | self.dispatcher.trigger('field_open_window_here', self.selected)
46 |
47 | def on_dump_to_disk(self, event):
48 | dump_path = self.view.ask_for_dump_file('Dump "' + self.selected.path + '" To Disk...')
49 | if dump_path is not None:
50 | save_substream_to_disk(self.selected, dump_path)
51 |
--------------------------------------------------------------------------------
/doc/strip.rst:
--------------------------------------------------------------------------------
1 | .. _strip:
2 |
3 | +++++++++++++++++++++
4 | hachoir-strip program
5 | +++++++++++++++++++++
6 |
7 | hachoir-strip is an experimental program based on Hachoir library: it removes
8 | "useless" informations from a file. Don't use it to create smaller file, you
9 | have better to recompress your data :-) hachoir-strip can be used if you would
10 | like to remove spy information which can be used to know the origin of a file.
11 |
12 | Examples
13 | ========
14 |
15 | Our victim::
16 |
17 | $ hachoir-metadata KDE_Click.wav.new
18 | Common:
19 | - Creation date: 2001-02-21 <== here they are
20 | - Producer: Sound Forge 4.5 <== spy informations :-)
21 | - MIME type: audio/x-wav
22 | - Endian: Little endian
23 | Audio:
24 | - Duration: 39 ms
25 | ...
26 |
27 | Clean up the file::
28 |
29 | $ hachoir-strip KDE_Click.wav
30 | [+] Process file KDE_Click.wav
31 | Remove field /info
32 | Remove 56 bytes (3.1%)
33 | Save new file into KDE_Click.wav.new
34 |
35 | $ hachoir-metadata KDE_Click.wav.new
36 | Common:
37 | - MIME type: audio/x-wav
38 | - Endian: Little endian
39 | Audio:
40 | - Duration: 39 ms
41 | ...
42 |
43 | So hachoir-strip removed creation date (2001-02-21) and producer (software
44 | used to record/edit the sound: Sound Forge 4.5). The file is also 56 bytes
45 | smaller.
46 |
47 |
48 | Options
49 | =======
50 |
51 | You can select field types to remove using --strip:
52 |
53 | * (default): remove all useless fields
54 | * ``--strip=useless``: remove really useless fields (eg. padding)
55 | * ``--strip=metadata``: remove metadata like ID3 tags and EXIF and IPTC metadatas
56 | * ``--strip=index``: remove video index
57 |
58 | You can combine options with comma: ``--strip="useless,metadata"``.
59 |
60 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_view.py:
--------------------------------------------------------------------------------
1 | from wx import ListCtrl, EVT_WINDOW_CREATE, CallAfter
2 | from wx.lib.mixins.listctrl import ListCtrlAutoWidthMixin
3 |
4 |
5 | class field_view_t(ListCtrl, ListCtrlAutoWidthMixin):
6 | def __init__(self):
7 | self.cols = {}
8 |
9 | ListCtrl.__init__(self)
10 | self.Bind(EVT_WINDOW_CREATE, self.on_create)
11 |
12 | def post_init(self):
13 | ListCtrlAutoWidthMixin.__init__(self)
14 |
15 | columns = ['address', 'name', 'type', 'size', 'data', 'description']
16 | for name in columns:
17 | self.append_column(name)
18 | self.col_min_width = [len(s) for s in columns]
19 |
20 | self.Layout()
21 | self.dispatcher.trigger('field_view_ready', self)
22 |
23 | def on_create(self, event):
24 | self.Unbind(EVT_WINDOW_CREATE)
25 | CallAfter(self.post_init)
26 |
27 | def append_column(self, name):
28 | index = self.GetColumnCount()
29 | self.cols[name] = index
30 | self.InsertColumn(col=index, heading=name)
31 |
32 | def get_selected(self, name):
33 | return self.GetItem(self.GetFocusedItem(), self.cols['name']).GetText()
34 |
35 | def clear(self):
36 | self.DeleteAllItems()
37 |
38 | def register_callback(self, cbGetItemText):
39 | self.OnGetItemText_imp = cbGetItemText
40 |
41 | def OnGetItemText(self, item, col):
42 | return self.OnGetItemText_imp(item, col)
43 |
44 | def get_col_index(self, name):
45 | return self.cols[name]
46 |
47 | def get_col_count(self):
48 | return len(self.cols)
49 |
50 | def resize_column(self, col_index, width):
51 | width = max(self.col_min_width[col_index], width) + 1
52 | self.SetColumnWidth(col_index, self.GetCharWidth() * width)
53 |
--------------------------------------------------------------------------------
/hachoir/metadata/qt/dialog.ui:
--------------------------------------------------------------------------------
1 |
2 | Form
3 |
4 |
5 |
6 | 0
7 | 0
8 | 441
9 | 412
10 |
11 |
12 |
13 | hachoir-metadata
14 |
15 |
16 | -
17 |
18 |
-
19 |
20 |
21 | Open
22 |
23 |
24 |
25 | -
26 |
27 |
28 |
29 | 0
30 | 0
31 |
32 |
33 |
34 |
35 |
36 |
37 | -
38 |
39 |
40 | true
41 |
42 |
43 | false
44 |
45 |
46 | 0
47 |
48 |
49 | 0
50 |
51 |
52 |
53 | -
54 |
55 |
56 | Quit
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
--------------------------------------------------------------------------------
/hachoir/field/parser.py:
--------------------------------------------------------------------------------
1 | from hachoir.core.endian import BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN
2 | from hachoir.field import GenericFieldSet
3 | from hachoir.core.log import Logger
4 | import hachoir.core.config as config
5 |
6 |
7 | class Parser(GenericFieldSet):
8 | """
9 | A parser is the root of all other fields. It create first level of fields
10 | and have special attributes and methods:
11 | - endian: Byte order (L{BIG_ENDIAN}, L{LITTLE_ENDIAN} or L{MIDDLE_ENDIAN}) of input data ;
12 | - stream: Data input stream (set in L{__init__()}) ;
13 | - size: Field set size will be size of input stream.
14 | """
15 |
16 | def __init__(self, stream, description=None):
17 | """
18 | Parser constructor
19 |
20 | @param stream: Data input stream (see L{InputStream})
21 | @param description: (optional) String description
22 | """
23 | # Check arguments
24 | assert hasattr(self, "endian") \
25 | and self.endian in (BIG_ENDIAN, LITTLE_ENDIAN, MIDDLE_ENDIAN)
26 |
27 | # Call parent constructor
28 | GenericFieldSet.__init__(
29 | self, None, "root", stream, description, stream.askSize(self))
30 |
31 | def close(self):
32 | self.stream.close()
33 |
34 | def __enter__(self):
35 | return self
36 |
37 | def __exit__(self, type, value, traceback):
38 | self.close()
39 |
40 | def _logger(self):
41 | return Logger._logger(self)
42 |
43 | def _setSize(self, size):
44 | self._truncate(size)
45 | self.raiseEvent("field-resized", self)
46 | size = property(lambda self: self._size, doc="Size in bits")
47 |
48 | path = property(lambda self: "/")
49 |
50 | # dummy definition to prevent hachoir-core from depending on hachoir-parser
51 | autofix = property(lambda self: config.autofix)
52 |
--------------------------------------------------------------------------------
/hachoir/stream/input_helper.py:
--------------------------------------------------------------------------------
1 | from hachoir.core.i18n import guessBytesCharset
2 | from hachoir.stream import InputIOStream, InputSubStream, InputStreamError
3 |
4 |
5 | def FileInputStream(filename, real_filename=None, **args):
6 | """
7 | Create an input stream of a file. filename must be unicode or a file
8 | object.
9 |
10 | real_filename is an optional argument used to specify the real filename,
11 | its type can be 'str' or 'unicode'. Use real_filename when you are
12 | not able to convert filename to real unicode string (ie. you have to
13 | use unicode(name, 'replace') or unicode(name, 'ignore')).
14 | """
15 | if not real_filename:
16 | real_filename = (filename if isinstance(filename, str)
17 | else getattr(filename, 'name', ''))
18 | try:
19 | if isinstance(filename, str):
20 | inputio = open(real_filename, 'rb')
21 | else:
22 | inputio = filename
23 | filename = getattr(filename, 'name', '')
24 | except IOError as err:
25 | errmsg = str(err)
26 | raise InputStreamError(
27 | "Unable to open file %s: %s" % (filename, errmsg))
28 | source = "file:" + filename
29 | offset = args.pop("offset", 0)
30 | size = args.pop("size", None)
31 | if offset or size:
32 | if size:
33 | size = 8 * size
34 | stream = InputIOStream(inputio, source=source, **args)
35 | return InputSubStream(stream, 8 * offset, size, **args)
36 | else:
37 | args.setdefault("tags", []).append(("filename", filename))
38 | return InputIOStream(inputio, source=source, **args)
39 |
40 |
41 | def guessStreamCharset(stream, address, size, default=None):
42 | size = min(size, 1024 * 8)
43 | bytes = stream.readBytes(address, size // 8)
44 | return guessBytesCharset(bytes, default)
45 |
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to Hachoir's documentation!
2 | ===================================
3 |
4 | *Hachoir* is a Python library to view and edit a binary stream field by field.
5 | In other words, Hachoir allows you to "browse" any binary stream just like you
6 | browse directories and files. A file is splitted in a tree of fields, where the
7 | smallest field is just one bit. Examples of fields types: integers,
8 | strings, bits, padding types, floats, etc. Hachoir is the French word for a
9 | meat grinder (meat mincer), which is used by butchers to divide meat into long
10 | tubes; Hachoir is used by computer butchers to divide binary files into fields.
11 |
12 | * `Hachoir website `_ (this site)
13 | * `Hachoir at GitHub `_ (source code, bugs)
14 |
15 | Command line tools using Hachoir parsers:
16 |
17 | * :ref:`hachoir-metadata `: get metadata from binary files
18 | * :ref:`hachoir-urwid `: display the content of a binary file in text mode
19 | * :ref:`hachoir-wx `: display the content of a binary file in GUI mode
20 | * :ref:`hachoir-grep `: find a text pattern in a binary file
21 | * :ref:`hachoir-strip `: modify a file to remove metadata
22 | * :ref:`hachoir-list `: print decoded content of a binary file
23 |
24 | .. image:: images/urwid.png
25 | :alt: hachoir-urwid screenshot (MP3 audio file)
26 |
27 |
28 | User Guide
29 | ==========
30 |
31 | .. toctree::
32 | :maxdepth: 1
33 |
34 | install
35 | metadata
36 | urwid
37 | wx
38 | subfile
39 | grep
40 | strip
41 | list
42 |
43 |
44 | Developer Guide
45 | ===============
46 |
47 | .. toctree::
48 | :maxdepth: 1
49 |
50 | developer
51 | internals
52 | parser
53 | regex
54 | editor
55 |
56 | Others pages
57 | ============
58 |
59 | .. toctree::
60 | :maxdepth: 1
61 |
62 | contact
63 | hacking
64 | authors
65 | changelog
66 |
--------------------------------------------------------------------------------
/hachoir/metadata/filter.py:
--------------------------------------------------------------------------------
1 | from hachoir.metadata.timezone import UTC
2 | from datetime import date, datetime
3 |
4 | # Year in 1850..2030
5 | MIN_YEAR = 1850
6 | MAX_YEAR = 2030
7 |
8 |
9 | class Filter:
10 |
11 | def __init__(self, valid_types, min=None, max=None):
12 | self.types = valid_types
13 | self.min = min
14 | self.max = max
15 |
16 | def __call__(self, value):
17 | if not isinstance(value, self.types):
18 | return True
19 | if self.min is not None and value < self.min:
20 | return False
21 | if self.max is not None and self.max < value:
22 | return False
23 | return True
24 |
25 |
26 | class NumberFilter(Filter):
27 |
28 | def __init__(self, min=None, max=None):
29 | Filter.__init__(self, (int, float), min, max)
30 |
31 |
32 | class DatetimeFilter(Filter):
33 |
34 | def __init__(self, min=None, max=None):
35 | Filter.__init__(self, (date, datetime),
36 | datetime(MIN_YEAR, 1, 1),
37 | datetime(MAX_YEAR, 12, 31))
38 | self.min_date = date(MIN_YEAR, 1, 1)
39 | self.max_date = date(MAX_YEAR, 12, 31)
40 | self.min_tz = datetime(MIN_YEAR, 1, 1, tzinfo=UTC)
41 | self.max_tz = datetime(MAX_YEAR, 12, 31, tzinfo=UTC)
42 |
43 | def __call__(self, value):
44 | """
45 | Use different min/max values depending on value type
46 | (datetime with timezone, datetime or date).
47 | """
48 | if not isinstance(value, self.types):
49 | return True
50 | if hasattr(value, "tzinfo") and value.tzinfo:
51 | return (self.min_tz <= value <= self.max_tz)
52 | elif isinstance(value, datetime):
53 | return (self.min <= value <= self.max)
54 | else:
55 | return (self.min_date <= value <= self.max_date)
56 |
57 |
58 | DATETIME_FILTER = DatetimeFilter()
59 |
--------------------------------------------------------------------------------
/hachoir/wx/field_view/field_menu_setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .field_menu_imp import field_menu_imp_t
4 | from .field_menu_fwd import field_menu_fwd_t
5 | from .field_menu import field_menu_t
6 |
7 | from .core_type_menu import core_type_menu_t
8 | from .core_type_menu_fwd import core_type_menu_fwd_t
9 | from .core_type_menu_imp import core_type_menu_imp_t
10 |
11 | from .field_split_menu import field_split_menu_t
12 | from .field_split_menu_fwd import field_split_menu_fwd_t
13 | from .field_split_menu_imp import field_split_menu_imp_t
14 |
15 | import wx
16 |
17 | from hachoir.wx.resource import get_menu
18 |
19 |
20 | def setup_field_menu(parent, dispatcher):
21 | menu = get_menu('field_menu')
22 | field_menu = field_menu_t(parent, menu)
23 |
24 | imp = field_menu_imp_t()
25 | dispatcher.add(imp)
26 |
27 | fwd = field_menu_fwd_t(imp)
28 | dispatcher.add_receiver(fwd)
29 |
30 | setup_core_type_menu(menu, dispatcher)
31 | setup_field_split_menu(parent, menu, dispatcher)
32 |
33 | dispatcher.trigger('field_menu_ready', field_menu)
34 |
35 | return field_menu
36 |
37 |
38 | def setup_core_type_menu(parent, dispatcher):
39 | menu = parent.FindItemById(wx.xrc.XRCID('field_menu_convert_to_core_type')).GetSubMenu()
40 | core_type_menu = core_type_menu_t(menu)
41 |
42 | imp = core_type_menu_imp_t()
43 | dispatcher.add(imp)
44 |
45 | fwd = core_type_menu_fwd_t(imp)
46 | dispatcher.add_receiver(fwd)
47 |
48 | dispatcher.trigger('core_type_menu_ready', core_type_menu)
49 |
50 |
51 | def setup_field_split_menu(parent, parent_menu, dispatcher):
52 | menu = parent_menu.FindItemById(wx.xrc.XRCID('field_menu_split')).GetSubMenu()
53 | split_menu = field_split_menu_t(parent, menu)
54 |
55 | imp = field_split_menu_imp_t()
56 | dispatcher.add(imp)
57 |
58 | fwd = field_split_menu_fwd_t(imp)
59 | dispatcher.add_receiver(fwd)
60 |
61 | dispatcher.trigger('field_split_menu_ready', split_menu)
62 |
--------------------------------------------------------------------------------
/hachoir/parser/archive/ar.py:
--------------------------------------------------------------------------------
1 | """
2 | GNU ar archive : archive file (.a) and Debian (.deb) archive.
3 | """
4 |
5 | from hachoir.parser import Parser
6 | from hachoir.field import (FieldSet, ParserError,
7 | String, RawBytes, UnixLine)
8 | from hachoir.core.endian import BIG_ENDIAN
9 |
10 |
11 | class ArchiveFileEntry(FieldSet):
12 |
13 | def createFields(self):
14 | yield UnixLine(self, "header", "Header")
15 | info = self["header"].value
16 | info = info.split()
17 | if len(info) < 3:
18 | raise ParserError("Invalid file entry header: %r" % info)
19 | size = int(info[-2])
20 | if 0 < size:
21 | yield RawBytes(self, "content", size, "File data")
22 |
23 | def createDescription(self):
24 | return "File entry (%s)" % self["header"].value.split()[0]
25 |
26 |
27 | class ArchiveFile(Parser):
28 | endian = BIG_ENDIAN
29 | MAGIC = b'!\n'
30 | PARSER_TAGS = {
31 | "id": "unix_archive",
32 | "category": "archive",
33 | "file_ext": ("a", "deb"),
34 | "mime":
35 | ("application/x-debian-package",
36 | "application/x-archive",
37 | "application/x-dpkg"),
38 | "min_size": (8 + 13) * 8, # file signature + smallest file as possible
39 | "magic": ((MAGIC, 0),),
40 | "description": "Unix archive"
41 | }
42 |
43 | def validate(self):
44 | if self.stream.readBytes(0, len(self.MAGIC)) != self.MAGIC:
45 | return "Invalid magic string"
46 | return True
47 |
48 | def createFields(self):
49 | yield String(self, "id", 8, "Unix archive identifier (\"\")", charset="ASCII")
50 | while not self.eof:
51 | data = self.stream.readBytes(self.current_size, 1)
52 | if data == b"\n":
53 | yield RawBytes(self, "empty_line[]", 1, "Empty line")
54 | else:
55 | yield ArchiveFileEntry(self, "file[]", "File")
56 |
--------------------------------------------------------------------------------
/hachoir/field/bit_field.py:
--------------------------------------------------------------------------------
1 | """
2 | Bit sized classes:
3 | - Bit: Single bit, value is False or True ;
4 | - Bits: Integer with a size in bits ;
5 | - RawBits: unknown content with a size in bits.
6 | """
7 |
8 | from hachoir.field import Field
9 | from hachoir.core import config
10 |
11 |
12 | class RawBits(Field):
13 | """
14 | Unknown content with a size in bits.
15 | """
16 | static_size = staticmethod(lambda *args, **kw: args[1])
17 |
18 | def __init__(self, parent, name, size, description=None):
19 | """
20 | Constructor: see L{Field.__init__} for parameter description
21 | """
22 | Field.__init__(self, parent, name, size, description)
23 |
24 | def hasValue(self):
25 | return True
26 |
27 | def createValue(self):
28 | return self._parent.stream.readBits(
29 | self.absolute_address, self._size, self._parent.endian)
30 |
31 | def createDisplay(self):
32 | if self._size < config.max_bit_length:
33 | return str(self.value)
34 | else:
35 | return ("<%s size=%u>" %
36 | (self.__class__.__name__, self._size))
37 | createRawDisplay = createDisplay
38 |
39 |
40 | class Bits(RawBits):
41 | """
42 | Positive integer with a size in bits
43 |
44 | @see: L{Bit}
45 | @see: L{RawBits}
46 | """
47 | pass
48 |
49 |
50 | class Bit(RawBits):
51 | """
52 | Single bit: value can be False or True, and size is exactly one bit.
53 |
54 | @see: L{Bits}
55 | """
56 | static_size = 1
57 |
58 | def __init__(self, parent, name, description=None):
59 | """
60 | Constructor: see L{Field.__init__} for parameter description
61 | """
62 | RawBits.__init__(self, parent, name, 1, description=description)
63 |
64 | def createValue(self):
65 | return 1 == self._parent.stream.readBits(
66 | self.absolute_address, 1, self._parent.endian)
67 |
68 | def createRawDisplay(self):
69 | return str(int(self.value))
70 |
--------------------------------------------------------------------------------
/hachoir/field/integer.py:
--------------------------------------------------------------------------------
1 | """
2 | Integer field classes:
3 | - UInt8, UInt16, UInt24, UInt32, UInt64: unsigned integer of 8, 16, 32, 64 bits ;
4 | - Int8, Int16, Int24, Int32, Int64: signed integer of 8, 16, 32, 64 bits.
5 | """
6 |
7 | from hachoir.field import Bits, FieldError
8 |
9 |
10 | class GenericInteger(Bits):
11 | """
12 | Generic integer class used to generate other classes.
13 | """
14 |
15 | def __init__(self, parent, name, signed, size, description=None):
16 | if not (8 <= size <= 16384):
17 | raise FieldError(
18 | "Invalid integer size (%s): have to be in 8..16384" % size)
19 | Bits.__init__(self, parent, name, size, description)
20 | self.signed = signed
21 |
22 | def createValue(self):
23 | return self._parent.stream.readInteger(
24 | self.absolute_address, self.signed, self._size, self._parent.endian)
25 |
26 |
27 | def integerFactory(name, is_signed, size, doc):
28 | class Integer(GenericInteger):
29 | __doc__ = doc
30 | static_size = size
31 |
32 | def __init__(self, parent, name, description=None):
33 | GenericInteger.__init__(
34 | self, parent, name, is_signed, size, description)
35 | cls = Integer
36 | cls.__name__ = name
37 | return cls
38 |
39 |
40 | UInt8 = integerFactory("UInt8", False, 8, "Unsigned integer of 8 bits")
41 | UInt16 = integerFactory("UInt16", False, 16, "Unsigned integer of 16 bits")
42 | UInt24 = integerFactory("UInt24", False, 24, "Unsigned integer of 24 bits")
43 | UInt32 = integerFactory("UInt32", False, 32, "Unsigned integer of 32 bits")
44 | UInt64 = integerFactory("UInt64", False, 64, "Unsigned integer of 64 bits")
45 |
46 | Int8 = integerFactory("Int8", True, 8, "Signed integer of 8 bits")
47 | Int16 = integerFactory("Int16", True, 16, "Signed integer of 16 bits")
48 | Int24 = integerFactory("Int24", True, 24, "Signed integer of 24 bits")
49 | Int32 = integerFactory("Int32", True, 32, "Signed integer of 32 bits")
50 | Int64 = integerFactory("Int64", True, 64, "Signed integer of 64 bits")
51 |
--------------------------------------------------------------------------------
/hachoir/field/static_field_set.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import FieldSet, ParserError
2 |
3 |
4 | class StaticFieldSet(FieldSet):
5 | """
6 | Static field set: format class attribute is a tuple of all fields
7 | in syntax like:
8 | format = (
9 | (TYPE1, ARG1, ARG2, ...),
10 | (TYPE2, ARG1, ARG2, ..., {KEY1=VALUE1, ...}),
11 | ...
12 | )
13 |
14 | Types with dynamic size are forbidden, eg. CString, PascalString8, etc.
15 | """
16 | format = None # You have to redefine this class variable
17 | _class = None
18 |
19 | def __new__(cls, *args, **kw):
20 | assert cls.format is not None, "Class attribute 'format' is not set"
21 | if cls._class is not cls.__name__:
22 | cls._class = cls.__name__
23 | cls.static_size = cls._computeStaticSize()
24 | return object.__new__(cls)
25 |
26 | @staticmethod
27 | def _computeItemSize(item):
28 | item_class = item[0]
29 | if item_class.static_size is None:
30 | raise ParserError("Unable to get static size of field type: %s"
31 | % item_class.__name__)
32 | if callable(item_class.static_size):
33 | if isinstance(item[-1], dict):
34 | return item_class.static_size(*item[1:-1], **item[-1])
35 | else:
36 | return item_class.static_size(*item[1:])
37 | else:
38 | assert isinstance(item_class.static_size, int)
39 | return item_class.static_size
40 |
41 | def createFields(self):
42 | for item in self.format:
43 | if isinstance(item[-1], dict):
44 | yield item[0](self, *item[1:-1], **item[-1])
45 | else:
46 | yield item[0](self, *item[1:])
47 |
48 | @classmethod
49 | def _computeStaticSize(cls, *args):
50 | return sum(cls._computeItemSize(item) for item in cls.format)
51 |
52 | # Initial value of static_size, it changes when first instance
53 | # is created (see __new__)
54 | static_size = _computeStaticSize
55 |
--------------------------------------------------------------------------------
/doc/urwid.rst:
--------------------------------------------------------------------------------
1 | .. _urwid:
2 |
3 | +++++++++++++++++++++
4 | hachoir-urwid program
5 | +++++++++++++++++++++
6 |
7 | hachoir-urwid is a binary file explorer based on Hachoir library to parse the
8 | files. Using this tool you can exactly know the meaning of each bit/byte of
9 | your files. With direction keys, you can navigate in the field tree. The key
10 | 'h' will disable 'human display' and switch to 'raw display'. It's sometime
11 | useful when you would like to compare hexadecimal data and Hachoir
12 | reprensentation.
13 |
14 | **hachoir-urwid** is the most user friendly interface based on Hachoir to
15 | explore a binary file.
16 |
17 | Before use, make sure to install the required dependencies with ``pip install
18 | hachoir[urwid]``.
19 |
20 | .. image:: images/urwid.png
21 | :alt: hachoir-urwid screenshot (MP3 audio file)
22 |
23 | Command line options
24 | ====================
25 |
26 | * ``--preload=10``: Load 10 fields when loading a new field set
27 | * ``--path="/header/bpp"``: Open the specified path and focus on the field
28 | * ``--parser=PARSERID``: Force a parser (and skip parser validation)
29 |
30 |
31 | Usefull keys
32 | ============
33 |
34 | Move:
35 |
36 | * up/down: move up/down
37 | * home: go to parent
38 | * end: go to the last field of a field set
39 | * left/right: horizontal scrolling
40 |
41 | Setup display:
42 |
43 | * h: most important option :-) switch between human display (default) and raw value
44 | * v / d / s: show or hide field value / description / size
45 | * a: switch between relative (default) and absolute address
46 | * b: switch between address in decimal (default) and hexadecimal
47 |
48 | Interaction:
49 |
50 | * enter: on a field set, expand/collaspe the children
51 | * space: parse a file/stream contained in the current field
52 |
53 | Application:
54 |
55 | * q: quit
56 | * < / >: previous / next tab
57 | * + / -: move separator vertically
58 | * esc or CTRL+W: close current tab
59 | * F1: display help
60 |
61 |
62 | Help
63 | ====
64 |
65 | Command line options: use --help option.
66 |
67 | In hachoir-use, use F1 key to get help (keyboard keys).
68 |
--------------------------------------------------------------------------------
/hachoir/metadata/cr2.py:
--------------------------------------------------------------------------------
1 | """
2 | Canon CR2 raw image data, version 2.0 image metadata extractor.
3 |
4 | Authors: Fernando Crespo
5 | Creation date: 21 february 2017
6 | """
7 |
8 | from hachoir.metadata.metadata import (registerExtractor, RootMetadata)
9 | from hachoir.parser.image import CR2File
10 | from hachoir.metadata.safe import fault_tolerant
11 |
12 |
13 | class CR2Metadata(RootMetadata):
14 | key_to_attr = {
15 | "ImageWidth": "width",
16 | "ImageLength": "height",
17 | "ImageDescription": "comment",
18 | "DocumentName": "title",
19 | "XResolution": "width_dpi",
20 | "YResolution": "height_dpi",
21 | }
22 |
23 | def extract(self, tiff):
24 | if "ifd[0]" in tiff:
25 | self.useIFD(tiff["ifd[0]"])
26 | self.camera_manufacturer = tiff["ifd[0]"]["value[4]"].value
27 | self.camera_model = tiff["ifd[0]"]["value[5]"].value
28 |
29 | if "exif[0]" in tiff:
30 | self.date_time_original = tiff["exif[0]"]["value[7]"].value
31 | self.date_time_digitized = tiff["exif[0]"]["value[8]"].value
32 |
33 | def useIFD(self, ifd):
34 | attr = {}
35 | for entry in ifd.array("entry"):
36 | self.processIfdEntry(ifd, entry, attr)
37 | if 'BitsPerSample' in attr and 'SamplesPerPixel' in attr:
38 | self.bits_per_pixel = attr[
39 | 'BitsPerSample'] * attr['SamplesPerPixel']
40 |
41 | @fault_tolerant
42 | def processIfdEntry(self, ifd, entry, attr):
43 | tag = entry["tag"].display
44 | if tag in {"BitsPerSample", "SamplesPerPixel"}:
45 | value = ifd.getEntryValues(entry)[0].value
46 | attr[tag] = value
47 | return
48 |
49 | try:
50 | attrname = self.key_to_attr[tag]
51 | except KeyError:
52 | return
53 | value = ifd.getEntryValues(entry)[0].value
54 | if tag in {"XResolution", "YResolution"}:
55 | value = round(value)
56 | setattr(self, attrname, value)
57 |
58 |
59 | registerExtractor(CR2File, CR2Metadata)
60 |
--------------------------------------------------------------------------------
/hachoir/core/text_handler.py:
--------------------------------------------------------------------------------
1 | """
2 | Utilities used to convert a field to human classic reprentation of data.
3 | """
4 |
5 | from hachoir.core.tools import (
6 | humanDuration, humanFilesize, alignValue,
7 | durationWin64 as doDurationWin64,
8 | deprecated)
9 | from types import FunctionType, MethodType
10 | from hachoir.field import Field
11 |
12 |
13 | def textHandler(field, handler):
14 | assert isinstance(handler, (FunctionType, MethodType))
15 | assert issubclass(field.__class__, Field)
16 | field.createDisplay = lambda: handler(field)
17 | return field
18 |
19 |
20 | def displayHandler(field, handler):
21 | assert isinstance(handler, (FunctionType, MethodType))
22 | assert issubclass(field.__class__, Field)
23 | field.createDisplay = lambda: handler(field.value)
24 | return field
25 |
26 |
27 | @deprecated("Use TimedeltaWin64 field type")
28 | def durationWin64(field):
29 | """
30 | Convert Windows 64-bit duration to string. The timestamp format is
31 | a 64-bit number: number of 100ns. See also timestampWin64().
32 |
33 | >>> durationWin64(type("", (), dict(value=2146280000, size=64)))
34 | '3 min 34 sec 628 ms'
35 | >>> durationWin64(type("", (), dict(value=(1 << 64)-1, size=64)))
36 | '58494 years 88 days 5 hours'
37 | """
38 | assert hasattr(field, "value") and hasattr(field, "size")
39 | assert field.size == 64
40 | delta = doDurationWin64(field.value)
41 | return humanDuration(delta)
42 |
43 |
44 | def filesizeHandler(field):
45 | """
46 | Format field value using humanFilesize()
47 | """
48 | return displayHandler(field, humanFilesize)
49 |
50 |
51 | def hexadecimal(field):
52 | """
53 | Convert an integer to hexadecimal in lower case. Returns unicode string.
54 |
55 | >>> hexadecimal(type("", (), dict(value=412, size=16)))
56 | '0x019c'
57 | >>> hexadecimal(type("", (), dict(value=0, size=32)))
58 | '0x00000000'
59 | """
60 | assert hasattr(field, "value") and hasattr(field, "size")
61 | size = field.size
62 | padding = alignValue(size, 4) // 4
63 | pattern = "0x%%0%ux" % padding
64 | return pattern % field.value
65 |
--------------------------------------------------------------------------------
/hachoir/parser/template.py:
--------------------------------------------------------------------------------
1 | """
2 | ====================== 8< ============================
3 | This file is an Hachoir parser template. Make a copy
4 | of it, and adapt it to your needs.
5 |
6 | You have to replace all "TODO" with you code.
7 | ====================== 8< ============================
8 |
9 | TODO parser.
10 |
11 | Author: TODO TODO
12 | Creation date: YYYY-mm-DD
13 | """
14 |
15 | # TODO: Just keep what you need
16 | from hachoir.parser import Parser
17 | # from hachoir.field import (ParserError,
18 | # UInt8, UInt16, UInt32, String, RawBytes)
19 | # from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN
20 |
21 |
22 | class TODOFile(Parser):
23 | PARSER_TAGS = {
24 | "id": "TODO",
25 | "category": "TODO", # "archive", "audio", "container", ...
26 | # TODO: Example ("bmp",) to parse the file "image.bmp"
27 | "file_ext": ("TODO",),
28 | "mime": ("TODO",), # TODO: Example: "image/png"
29 | # TODO: Minimum file size (x bits, or x*8 in bytes)
30 | "min_size": 0,
31 | "description": "TODO", # TODO: Example: "A bitmap picture"
32 | }
33 |
34 | # TODO: Choose between little or big endian
35 | # endian = LITTLE_ENDIAN
36 | # endian = BIG_ENDIAN
37 |
38 | def validate(self):
39 | # TODO: Check that file looks like your format
40 | # Example: check first two bytes
41 | # return (self.stream.readBytes(0, 2) == b'BM')
42 | return False
43 |
44 | def createFields(self):
45 | # TODO: Write your parser using this model:
46 | # yield UInt8(self, "name1", "description1")
47 | # yield UInt16(self, "name2", "description2")
48 | # yield UInt32(self, "name3", "description3")
49 | # yield String(self, "name4", 1, "description4") # TODO: add ", charset="ASCII")"
50 | # yield String(self, "name5", 1, "description5", charset="ASCII")
51 | # yield String(self, "name6", 1, "description6", charset="ISO-8859-1")
52 |
53 | # Read rest of the file (if any)
54 | # TODO: You may remove this code
55 | if self.current_size < self._size:
56 | yield self.seekBit(self._size, "end")
57 |
--------------------------------------------------------------------------------
/hachoir/editor/field.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import joinPath, MissingField
2 |
3 |
4 | class EditorError(Exception):
5 | pass
6 |
7 |
8 | class FakeField(object):
9 | """
10 | This class have API looks similar to Field API, but objects don't contain
11 | any value: all values are _computed_ by parent methods.
12 |
13 | Example: FakeField(editor, "abc").size calls editor._getFieldSize("abc").
14 | """
15 | is_field_set = False
16 |
17 | def __init__(self, parent, name):
18 | self._parent = parent
19 | self._name = name
20 |
21 | def _getPath(self):
22 | return joinPath(self._parent.path, self._name)
23 | path = property(_getPath)
24 |
25 | def _getName(self):
26 | return self._name
27 | name = property(_getName)
28 |
29 | def _getAddress(self):
30 | return self._parent._getFieldAddress(self._name)
31 | address = property(_getAddress)
32 |
33 | def _getSize(self):
34 | return self._parent.input[self._name].size
35 | size = property(_getSize)
36 |
37 | def _getValue(self):
38 | return self._parent.input[self._name].value
39 | value = property(_getValue)
40 |
41 | def createDisplay(self):
42 | # TODO: Returns new value if field is altered
43 | return self._parent.input[self._name].display
44 | display = property(createDisplay)
45 |
46 | def _getParent(self):
47 | return self._parent
48 | parent = property(_getParent)
49 |
50 | def hasValue(self):
51 | return self._parent.input[self._name].hasValue()
52 |
53 | def __getitem__(self, key):
54 | # TODO: Implement this function!
55 | raise MissingField(self, key)
56 |
57 | def _isAltered(self):
58 | return False
59 | is_altered = property(_isAltered)
60 |
61 | def writeInto(self, output):
62 | size = self.size
63 | addr = self._parent._getFieldInputAddress(self._name)
64 | input = self._parent.input
65 | stream = input.stream
66 | if size % 8 or addr % 8:
67 | output.copyBitsFrom(stream, addr, size, input.endian)
68 | else:
69 | output.copyBytesFrom(stream, addr, size // 8)
70 |
--------------------------------------------------------------------------------
/hachoir/wx/hex_view/file_cache.py:
--------------------------------------------------------------------------------
1 | from functools import lru_cache
2 |
3 |
4 | class FileCache(object):
5 | CHUNKSIZE = 4096
6 |
7 | def __init__(self, file):
8 | self.file = file
9 |
10 | self.update_file_size()
11 |
12 | def update_file_size(self):
13 | pos = self.file.tell()
14 | self.file.seek(0, 2)
15 | self.filesize = self.file.tell()
16 | self.file.seek(pos)
17 |
18 | @lru_cache(maxsize=100)
19 | def get_chunk(self, cstart):
20 | pos = self.file.tell()
21 | self.file.seek(cstart)
22 | chunk = self.file.read(self.CHUNKSIZE)
23 | self.file.seek(pos)
24 | return chunk
25 |
26 | def hint(self, s, e):
27 | '''Hint that the range [s, e) may be needed soon'''
28 |
29 | sc = s // self.CHUNKSIZE
30 | ec = (e + self.CHUNKSIZE - 1) // self.CHUNKSIZE
31 | for c in range(sc, ec):
32 | self.get_chunk(c * self.CHUNKSIZE)
33 |
34 | def get(self, s, e):
35 | '''Obtain the file contents in the range [s, e)'''
36 | soff = s % self.CHUNKSIZE
37 | eoff = e % self.CHUNKSIZE
38 | sc = s // self.CHUNKSIZE
39 | ec = (e + self.CHUNKSIZE - 1) // self.CHUNKSIZE
40 |
41 | out = []
42 | for c in range(sc, ec):
43 | out.append(self.get_chunk(c * self.CHUNKSIZE))
44 |
45 | if eoff:
46 | out[-1] = out[-1][:eoff]
47 | if soff:
48 | out[0] = out[0][soff:]
49 | return b''.join(out)
50 |
51 |
52 | def test():
53 | from io import BytesIO
54 |
55 | for blocksize in [8, 1024]:
56 | instr = bytes(range(256))
57 | sf = BytesIO(instr)
58 | fc = FileCache(sf)
59 | fc.CHUNKSIZE = blocksize
60 |
61 | import random
62 | random.seed(1)
63 | for iter in range(256):
64 | s = random.randrange(0, fc.filesize + 10)
65 | e = random.randrange(s, fc.filesize + 10)
66 | print("testing", s, e)
67 | got = fc.get(s, e)
68 | expected = instr[s:e]
69 | assert got == expected, "Failed to get %d, %d: got %r, expected %r" % (s, e, got, expected)
70 |
71 |
72 | if __name__ == '__main__':
73 | test()
74 |
--------------------------------------------------------------------------------
/hachoir/field/helper.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import (FieldError,
2 | RawBits, RawBytes,
3 | PaddingBits, PaddingBytes,
4 | NullBits, NullBytes,
5 | GenericString, GenericInteger)
6 | from hachoir.stream import FileOutputStream
7 |
8 |
9 | def createRawField(parent, size, name="raw[]", description=None):
10 | if size <= 0:
11 | raise FieldError("Unable to create raw field of %s bits" % size)
12 | if (size % 8) == 0:
13 | return RawBytes(parent, name, size // 8, description)
14 | else:
15 | return RawBits(parent, name, size, description)
16 |
17 |
18 | def createPaddingField(parent, nbits, name="padding[]", description=None):
19 | if nbits <= 0:
20 | raise FieldError("Unable to create padding of %s bits" % nbits)
21 | if (nbits % 8) == 0:
22 | return PaddingBytes(parent, name, nbits // 8, description)
23 | else:
24 | return PaddingBits(parent, name, nbits, description)
25 |
26 |
27 | def createNullField(parent, nbits, name="padding[]", description=None):
28 | if nbits <= 0:
29 | raise FieldError("Unable to create null padding of %s bits" % nbits)
30 | if (nbits % 8) == 0:
31 | return NullBytes(parent, name, nbits // 8, description)
32 | else:
33 | return NullBits(parent, name, nbits, description)
34 |
35 |
36 | def isString(field):
37 | return issubclass(field.__class__, GenericString)
38 |
39 |
40 | def isInteger(field):
41 | return issubclass(field.__class__, GenericInteger)
42 |
43 |
44 | def writeIntoFile(fieldset, filename):
45 | output = FileOutputStream(filename)
46 | with output:
47 | fieldset.writeInto(output)
48 |
49 |
50 | def createOrphanField(fieldset, address, field_cls, *args, **kw):
51 | """
52 | Create an orphan field at specified address:
53 | field_cls(fieldset, *args, **kw)
54 |
55 | The field uses the fieldset properties but it isn't added to the
56 | field set.
57 | """
58 | save_size = fieldset._current_size
59 | try:
60 | fieldset._current_size = address
61 | field = field_cls(fieldset, *args, **kw)
62 | finally:
63 | fieldset._current_size = save_size
64 | return field
65 |
--------------------------------------------------------------------------------
/tests/test_grep.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from hachoir.grep import Grep
3 | from hachoir.parser import createParser
4 | from hachoir.test import setup_tests
5 | import os.path
6 | import subprocess
7 | import sys
8 | import unittest
9 |
10 | DATADIR = os.path.join(os.path.dirname(__file__), "files")
11 | GEORGIA_CAB = os.path.join(DATADIR, 'georgia.cab')
12 | PROGRAM = os.path.join(os.path.dirname(__file__), "..", "hachoir-grep")
13 |
14 |
15 | class TestGrepClass(unittest.TestCase):
16 |
17 | def test_grep(self):
18 | fields = []
19 |
20 | class TestGrep(Grep):
21 |
22 | def onMatch(self, field):
23 | fields.append(field)
24 |
25 | parser = createParser(GEORGIA_CAB)
26 | with parser:
27 | grep = TestGrep()
28 | grep.grep(parser)
29 | fields = [(field.absolute_address, field.path, field.value)
30 | for field in fields]
31 | self.assertEqual(fields,
32 | [(0, '/magic', 'MSCF'),
33 | (480, '/file[0]/filename', 'fontinst.inf'),
34 | (712, '/file[1]/filename', 'Georgiaz.TTF'),
35 | (944, '/file[2]/filename', 'Georgiab.TTF'),
36 | (1176, '/file[3]/filename', 'Georgiai.TTF'),
37 | (1408, '/file[4]/filename', 'Georgia.TTF'),
38 | (1632, '/file[5]/filename', 'fontinst.exe')])
39 |
40 |
41 | class TestGrepCommandLine(unittest.TestCase):
42 |
43 | def test_grep(self):
44 | args = [sys.executable, PROGRAM, "--all", "--path", GEORGIA_CAB]
45 | proc = subprocess.Popen(args,
46 | stdout=subprocess.PIPE,
47 | stderr=subprocess.STDOUT)
48 | stdout, _ = proc.communicate()
49 | stdout = stdout.decode('ascii', 'replace')
50 | self.assertEqual(stdout, """
51 | 0:/magic:MSCF
52 | 60:/file[0]/filename:fontinst.inf
53 | 89:/file[1]/filename:Georgiaz.TTF
54 | 118:/file[2]/filename:Georgiab.TTF
55 | 147:/file[3]/filename:Georgiai.TTF
56 | 176:/file[4]/filename:Georgia.TTF
57 | 204:/file[5]/filename:fontinst.exe
58 | """.lstrip())
59 |
60 |
61 | if __name__ == "__main__":
62 | setup_tests()
63 | unittest.main()
64 |
--------------------------------------------------------------------------------
/tools/entropy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from math import log
3 |
4 |
5 | class Entropy:
6 |
7 | def __init__(self):
8 | self.frequence = dict((index, 0) for index in range(0, 256))
9 | self.count = 0
10 |
11 | def readBytes(self, bytes):
12 | for byte in bytes:
13 | self.frequence[byte] = self.frequence[byte] + 1
14 | self.count += len(bytes)
15 | return self
16 |
17 | def compute(self):
18 | h = 0
19 | for value in self.frequence.values():
20 | if not value:
21 | continue
22 | p_i = float(value) / self.count
23 | h -= p_i * log(p_i, 2)
24 | return h
25 |
26 | from time import time
27 | from sys import stderr
28 |
29 |
30 | class EntropyFile(Entropy):
31 |
32 | def __init__(self):
33 | Entropy.__init__(self)
34 | self.progress_time = 1.0
35 | self.buffer_size = 4096
36 |
37 | def displayProgress(self, percent):
38 | print("Progress: %.1f%%" % percent, file=stderr)
39 |
40 | def readStream(self, stream, streamsize=None):
41 | # Read stream size
42 | if streamsize is None:
43 | stream.seek(0, 2)
44 | streamsize = stream.tell()
45 | if streamsize <= 0:
46 | raise ValueError("Empty stream")
47 |
48 | # Read stream content
49 | stream.seek(0, 0)
50 | next_msg = time() + self.progress_time
51 | while True:
52 | if next_msg <= time():
53 | self.displayProgress(stream.tell() * 100.0 / streamsize)
54 | next_msg = time() + self.progress_time
55 | raw = stream.read(self.buffer_size)
56 | if not raw:
57 | break
58 | self.readBytes(raw)
59 | return self
60 |
61 | def readFile(self, filename):
62 | stream = open(filename, 'rb')
63 | self.readStream(stream)
64 | return self
65 |
66 |
67 | def main():
68 | from sys import argv, exit
69 | if len(argv) != 2:
70 | print("usage: %s filename" % argv[0], file=stderr)
71 | exit(1)
72 | entropy = EntropyFile()
73 | entropy.readFile(argv[1])
74 | print("Entropy: %.4f bit/byte" % entropy.compute())
75 | exit(0)
76 |
77 | if __name__ == "__main__":
78 | main()
79 |
--------------------------------------------------------------------------------
/doc/authors.rst:
--------------------------------------------------------------------------------
1 | +++++++++++++++
2 | Hachoir Authors
3 | +++++++++++++++
4 |
5 | Team:
6 |
7 | * Julien Muchembled
8 | * Victor Stinner
9 | * Robert Xiao aka nneonneo - improve SeekableFieldSet
10 |
11 | Packagers:
12 |
13 | * Arnaud Pithon aka bildoon - ArchLinux package (v0.5.2 and svn)
14 | * Emmanuel Garette aka GnunuX - ArchLinux package (v0.5.2)
15 | * Michael Scherer aka misc - Mandriva package (v0.5.2)
16 | * Michel Casabona aka plumbear - Debian package (v0.5.2)
17 | * Richard Demongeot - Debian package (v0.5.2)
18 | * Thomas de Grenier de Latour TGL - Gentoo ebuild
19 |
20 | Contributors
21 |
22 | * Alexandre Boeglin - PIFV parser
23 | * Aurélien Jacobs - AVI parser big contributor
24 | * Christophe Fergeau - Improve iTunesDB parser
25 | * Christophe Gisquet - Write RAR parser
26 | * Kirill Zorin - Author of hachoir-wx, 3DO and game parsers
27 | * Elie Roudninski aka adema - Started Gtk GUI
28 | * Feth Arezki - Fix hachoir-metadata-qt to save the current directory
29 | * Frédéric Weisbecker - Author of ReiserFS parser
30 | * Gottfried Ganßauge - Fix Win32 curses issues
31 | * Jason Gorski - Author of zsnes parser
32 | * Jean-Marc Libs - KDE plugin
33 | * Mickaël Kenikssi - Write CDDA parser
34 | * Mike Melanson - Write RealMedia and SpiderMan video parser
35 | * Olivier Schwab - Write 7-zip parser
36 | * Pierre Thierry - KDE plugin
37 | * Robert Xiao aka nneonneo - improve LNK parser
38 | * Romain Hérault - Author of iTunesDB parser
39 | * Sebastien Ponce - ActionScript (for SWF) parser
40 | * Thomas Pabst - Gnome plugin
41 | * Thomas de Grenier de Latour aka TGL - Java class parser
42 |
--------------------------------------------------------------------------------
/tests/regex_regression.rst:
--------------------------------------------------------------------------------
1 | Regex regression (repeat)
2 | =========================
3 |
4 | Factorisation of (a{n,p}){x,y}:
5 | -------------------------------
6 |
7 | >>> from hachoir.regex import parse
8 | >>> parse("(a{2,3}){4,5}")
9 |
10 | >>> parse("(a{2,}){3,4}")
11 |
12 | >>> parse("(a{2,3})+")
13 |
14 | >>> parse("(a*){2,3}")
15 |
16 | >>> parse("(a+){2,3}")
17 |
18 |
19 | Factorisation of (a|b)*:
20 | ------------------------
21 |
22 | >>> parse("(a*|b)*")
23 |
24 | >>> parse("(a+|b)*")
25 |
26 | >>> parse("(a{2,}|b)*")
27 |
28 |
29 | Factorisation of (a|b)+:
30 | ------------------------
31 |
32 | >>> parse("(a*|b)+")
33 |
34 | >>> parse("(a+|b|)+")
35 |
36 | >>> parse("(a+|b)+")
37 |
38 | >>> parse("(a{5,}|b)+")
39 |
40 |
41 | Factorisation of (a|b){x,}:
42 | ---------------------------
43 |
44 | >>> parse("(a+|b){3,}")
45 |
46 | >>> parse("(a{2,}|b){3,}")
47 |
48 |
49 | Factorisation of (a|b){x,y}:
50 | ----------------------------
51 |
52 | >>> parse("(a*|b|){4,5}")
53 |
54 | >>> parse("(a+|b|){4,5}")
55 |
56 | >>> parse("(a*|b){4,5}")
57 |
58 |
59 | Do not optimize:
60 | ----------------
61 |
62 | >>> parse('(a*|b){3,}')
63 |
64 | >>> parse("(a{2,3}|b){3,}")
65 |
66 | >>> parse("(a{2,3}|b)*")
67 |
68 | >>> parse("(a{2,3}|b)+")
69 |
70 | >>> parse("(a+|b){4,5}")
71 |
72 | >>> parse("(a{2,}|b){4,5}")
73 |
74 | >>> parse("(a{2,3}|b){4,5}")
75 |
76 |
77 |
78 | Regex regression (b)
79 | ====================
80 |
81 | >>> from hachoir.regex import parse
82 | >>> parse("(M(SCF|Thd)|B(MP4|Zh))")
83 |
84 | >>> parse("(FWS1|CWS1|FWS2|CWS2)")
85 |
86 | >>> parse("(abcdeZ|abZ)")
87 |
88 | >>> parse("(00t003|10t003|00[12]0[1-9].abc\0|1CD001)")
89 |
90 |
91 |
--------------------------------------------------------------------------------
/hachoir/field/byte_field.py:
--------------------------------------------------------------------------------
1 | """
2 | Very basic field: raw content with a size in byte. Use this class for
3 | unknown content.
4 | """
5 |
6 | from hachoir.field import Field, FieldError
7 | from hachoir.core.tools import makePrintable
8 | from hachoir.core import config
9 |
10 | MAX_LENGTH = (2**64)
11 |
12 |
13 | class RawBytes(Field):
14 | """
15 | Byte vector of unknown content
16 |
17 | @see: L{Bytes}
18 | """
19 | static_size = staticmethod(lambda *args, **kw: args[1] * 8)
20 |
21 | def __init__(self, parent, name, length, description="Raw data"):
22 | assert issubclass(parent.__class__, Field)
23 | if not (0 < length <= MAX_LENGTH):
24 | raise FieldError("Invalid RawBytes length (%s)!" % length)
25 | Field.__init__(self, parent, name, length * 8, description)
26 | self._display = None
27 |
28 | def _createDisplay(self, human):
29 | max_bytes = config.max_byte_length
30 | try:
31 | display = makePrintable(self.value[:max_bytes], "ASCII")
32 | except Exception:
33 | if self._display is None:
34 | address = self.absolute_address
35 | length = min(self._size // 8, max_bytes)
36 | self._display = self._parent.stream.readBytes(address, length)
37 | display = makePrintable(self._display, "ASCII")
38 | truncated = (8 * len(display) < self._size)
39 | if human:
40 | if truncated:
41 | display += "(...)"
42 | return makePrintable(display, "latin-1", quote='"')
43 | else:
44 | if truncated:
45 | return '"%s(...)"' % display
46 | else:
47 | return '"%s"' % display
48 |
49 | def createDisplay(self):
50 | return self._createDisplay(True)
51 |
52 | def createRawDisplay(self):
53 | return self._createDisplay(False)
54 |
55 | def hasValue(self):
56 | return True
57 |
58 | def createValue(self):
59 | assert (self._size % 8) == 0
60 | if self._display:
61 | self._display = None
62 | return self._parent.stream.readBytes(
63 | self.absolute_address, self._size // 8)
64 |
65 |
66 | class Bytes(RawBytes):
67 | """
68 | Byte vector: can be used for magic number or GUID/UUID for example.
69 |
70 | @see: L{RawBytes}
71 | """
72 | pass
73 |
--------------------------------------------------------------------------------
/hachoir/core/timeout.py:
--------------------------------------------------------------------------------
1 | """
2 | limitedTime(): set a timeout in seconds when calling a function,
3 | raise a Timeout error if time exceed.
4 | """
5 | from math import ceil
6 |
7 | IMPLEMENTATION = None
8 |
9 |
10 | class Timeout(RuntimeError):
11 | """
12 | Timeout error, inherits from RuntimeError
13 | """
14 | pass
15 |
16 |
17 | def signalHandler(signum, frame):
18 | """
19 | Signal handler to catch timeout signal: raise Timeout exception.
20 | """
21 | raise Timeout("Timeout exceed!")
22 |
23 |
24 | def limitedTime(second, func, *args, **kw):
25 | """
26 | Call func(*args, **kw) with a timeout of second seconds.
27 | """
28 | return func(*args, **kw)
29 |
30 |
31 | def fixTimeout(second):
32 | """
33 | Fix timeout value: convert to integer with a minimum of 1 second
34 | """
35 | if isinstance(second, float):
36 | second = int(ceil(second))
37 | assert isinstance(second, int)
38 | return max(second, 1)
39 |
40 |
41 | if not IMPLEMENTATION:
42 | try:
43 | from signal import signal, alarm, SIGALRM
44 |
45 | # signal.alarm() implementation
46 | def limitedTime(second, func, *args, **kw): # noqa
47 | second = fixTimeout(second)
48 | old_alarm = signal(SIGALRM, signalHandler)
49 | try:
50 | alarm(second)
51 | return func(*args, **kw)
52 | finally:
53 | alarm(0)
54 | signal(SIGALRM, old_alarm)
55 |
56 | IMPLEMENTATION = "signal.alarm()"
57 | except ImportError:
58 | pass
59 |
60 | if not IMPLEMENTATION:
61 | try:
62 | from signal import signal, SIGXCPU # noqa
63 | from resource import getrlimit, setrlimit, RLIMIT_CPU
64 |
65 | # resource.setrlimit(RLIMIT_CPU) implementation
66 | # "Bug": timeout is 'CPU' time so sleep() are not part of the timeout
67 | def limitedTime(second, func, *args, **kw): # noqa
68 | second = fixTimeout(second)
69 | old_alarm = signal(SIGXCPU, signalHandler)
70 | current = getrlimit(RLIMIT_CPU)
71 | try:
72 | setrlimit(RLIMIT_CPU, (second, current[1]))
73 | return func(*args, **kw)
74 | finally:
75 | setrlimit(RLIMIT_CPU, current)
76 | signal(SIGXCPU, old_alarm)
77 |
78 | IMPLEMENTATION = "resource.setrlimit(RLIMIT_CPU)"
79 | except ImportError:
80 | pass
81 |
--------------------------------------------------------------------------------
/hachoir/parser/archive/mar.py:
--------------------------------------------------------------------------------
1 | """
2 | Microsoft Archive parser
3 |
4 | Author: Victor Stinner
5 | Creation date: 2007-03-04
6 | """
7 |
8 | from hachoir.parser import Parser
9 | from hachoir.field import FieldSet, String, UInt32, SubFile
10 | from hachoir.core.endian import LITTLE_ENDIAN
11 | from hachoir.core.text_handler import textHandler, filesizeHandler, hexadecimal
12 |
13 | MAX_NB_FILE = 100000
14 |
15 |
16 | class FileIndex(FieldSet):
17 | static_size = 68 * 8
18 |
19 | def createFields(self):
20 | yield String(self, "filename", 56, truncate="\0", charset="ASCII")
21 | yield filesizeHandler(UInt32(self, "filesize"))
22 | yield textHandler(UInt32(self, "crc32"), hexadecimal)
23 | yield UInt32(self, "offset")
24 |
25 | def createDescription(self):
26 | return "File %s (%s) at %s" % (
27 | self["filename"].value, self["filesize"].display, self["offset"].value)
28 |
29 |
30 | class MarFile(Parser):
31 | MAGIC = b"MARC"
32 | PARSER_TAGS = {
33 | "id": "mar",
34 | "category": "archive",
35 | "file_ext": ("mar",),
36 | "min_size": 80 * 8, # At least one file index
37 | "magic": ((MAGIC, 0),),
38 | "description": "Microsoft Archive",
39 | }
40 | endian = LITTLE_ENDIAN
41 |
42 | def validate(self):
43 | if self.stream.readBytes(0, 4) != self.MAGIC:
44 | return "Invalid magic"
45 | if self["version"].value != 3:
46 | return "Invalid version"
47 | if not (1 <= self["nb_file"].value <= MAX_NB_FILE):
48 | return "Invalid number of file"
49 | return True
50 |
51 | def createFields(self):
52 | yield String(self, "magic", 4, "File signature (MARC)", charset="ASCII")
53 | yield UInt32(self, "version")
54 | yield UInt32(self, "nb_file")
55 | files = []
56 | for index in range(self["nb_file"].value):
57 | item = FileIndex(self, "file[]")
58 | yield item
59 | if item["filesize"].value:
60 | files.append(item)
61 | files.sort(key=lambda item: item["offset"].value)
62 | for index in files:
63 | padding = self.seekByte(index["offset"].value)
64 | if padding:
65 | yield padding
66 | size = index["filesize"].value
67 | desc = "File %s" % index["filename"].value
68 | yield SubFile(self, "data[]", size, desc, filename=index["filename"].value)
69 |
--------------------------------------------------------------------------------
/hachoir/parser/game/spider_man_video.py:
--------------------------------------------------------------------------------
1 | """
2 | Parser for an obscure FMV file format: bin files from the game
3 | "The Amazing Spider-Man vs. The Kingpin" (Sega CD)
4 |
5 | Author: Mike Melanson
6 | Creation date: 2006-09-30
7 | File samples: http://samples.mplayerhq.hu/game-formats/spiderman-segacd-bin/
8 | """
9 |
10 | from hachoir.parser import Parser
11 | from hachoir.field import FieldSet, UInt32, String, RawBytes
12 | from hachoir.core.endian import BIG_ENDIAN
13 | from hachoir.core.text_handler import textHandler, hexadecimal
14 |
15 |
16 | class Chunk(FieldSet):
17 | tag_info = {
18 | "CONF": ("conf[]", None, "Configuration header"),
19 | "AUDI": ("audio[]", None, "Audio chunk"),
20 | "SYNC": ("sync[]", None, "Start of video frame data"),
21 | "IVRA": ("ivra[]", None, "Vector codebook (?)"),
22 | "VRAM": ("video[]", None, "Video RAM tile pattern"),
23 | "CRAM": ("color[]", None, "Color RAM (palette)"),
24 | "CEND": ("video_end[]", None, "End of video data"),
25 | "MEND": ("end_file", None, "End of file"),
26 | }
27 |
28 | def __init__(self, *args):
29 | FieldSet.__init__(self, *args)
30 | self._size = self["length"].value * 8
31 | fourcc = self["fourcc"].value
32 | if fourcc in self.tag_info:
33 | self._name, self._parser, self._description = self.tag_info[fourcc]
34 | else:
35 | self._parser = None
36 | self._description = "Unknown chunk: fourcc %s" % self[
37 | "fourcc"].display
38 |
39 | def createFields(self):
40 | yield String(self, "fourcc", 4, "FourCC", charset="ASCII")
41 | yield textHandler(UInt32(self, "length", "length"), hexadecimal)
42 | size = self["length"].value - 8
43 | if 0 < size:
44 | if self._parser:
45 | yield from self._parser(self, size)
46 | else:
47 | yield RawBytes(self, "data", size)
48 |
49 |
50 | class SpiderManVideoFile(Parser):
51 | PARSER_TAGS = {
52 | "id": "spiderman_video",
53 | "category": "game",
54 | "file_ext": ("bin",),
55 | "min_size": 8 * 8,
56 | "description": "The Amazing Spider-Man vs. The Kingpin (Sega CD) FMV video"
57 | }
58 |
59 | endian = BIG_ENDIAN
60 |
61 | def validate(self):
62 | return (self.stream.readBytes(0, 4) == b'CONF')
63 |
64 | def createFields(self):
65 | while not self.eof:
66 | yield Chunk(self, "chunk[]")
67 |
--------------------------------------------------------------------------------
/tools/fuzzer/mangle.py:
--------------------------------------------------------------------------------
1 | from random import randint, choice as random_choice
2 | from array import array
3 |
4 | MAX_MIX = 20
5 | MIN_MIX = -MAX_MIX
6 | MIN_COUNT = 15
7 | MAX_COUNT = 2500
8 | MAX_INC = 32
9 | MIN_INC = -MAX_INC
10 |
11 | SPECIAL_VALUES_NOENDIAN = (
12 | "\x00",
13 | "\x00\x00",
14 | "\x7f",
15 | "\x7f\xff",
16 | "\x7f\xff\xff\xff",
17 | "\x80",
18 | "\x80\x00",
19 | "\x80\x00\x00\x00",
20 | "\xfe",
21 | "\xfe\xff",
22 | "\xfe\xff\xff\xff",
23 | "\xff",
24 | "\xff\xff",
25 | "\xff\xff\xff\xff",
26 | )
27 |
28 | SPECIAL_VALUES = []
29 | for item in SPECIAL_VALUES_NOENDIAN:
30 | SPECIAL_VALUES.append(item)
31 | itemb = item[::-1]
32 | if item != itemb:
33 | SPECIAL_VALUES.append(itemb)
34 |
35 |
36 | def mangle_replace(data, offset):
37 | data[offset] = randint(0, 255)
38 |
39 |
40 | def mangle_increment(data, offset):
41 | value = data[offset] + randint(MIN_INC, MAX_INC)
42 | data[offset] = max(min(value, 255), 0)
43 |
44 |
45 | def mangle_bit(data, offset):
46 | bit = randint(0, 7)
47 | if randint(0, 1) == 1:
48 | value = data[offset] | (1 << bit)
49 | else:
50 | value = data[offset] & (~(1 << bit) & 0xFF)
51 | data[offset] = value
52 |
53 |
54 | def mangle_special_value(data, offset):
55 | tlen = len(data) - offset
56 | text = random_choice(SPECIAL_VALUES)[:tlen]
57 | data[offset:offset + len(text)] = array("B", text)
58 |
59 |
60 | def mangle_mix(data, ofs1):
61 | ofs2 = ofs1 + randint(MIN_MIX, MAX_MIX)
62 | ofs2 = max(min(ofs2, len(data) - 1), 0)
63 | data[ofs1], data[ofs2] = data[ofs2], data[ofs1]
64 |
65 |
66 | MANGLE_OPERATIONS = (
67 | mangle_replace,
68 | mangle_increment,
69 | mangle_bit,
70 | mangle_special_value,
71 | mangle_mix,
72 | )
73 |
74 |
75 | def mangle(data, percent, min_count=MIN_COUNT, max_count=MAX_COUNT):
76 | """
77 | Mangle data: add few random bytes in input byte array.
78 |
79 | This function is based on an idea of Ilja van Sprundel (file mangle.c).
80 | """
81 | hsize = len(data) - 1
82 | max_percent = max(min(percent, 1.0), 0.0001)
83 | count = int(float(len(data)) * max_percent)
84 | count = max(count, min_count)
85 | count = min(count, max_count)
86 | count = randint(1, count)
87 | for index in range(count):
88 | operation = random_choice(MANGLE_OPERATIONS)
89 | offset = randint(0, hsize)
90 | operation(data, offset)
91 | return count
92 |
--------------------------------------------------------------------------------
/tests/test_doc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import doctest
3 | import hachoir.core.i18n # noqa: import it because it does change the locale
4 | from hachoir.test import setup_tests
5 | import os
6 | import unittest
7 |
8 | ROOT = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
9 |
10 |
11 | def importModule(name):
12 | mod = __import__(name)
13 | components = name.split('.')
14 | for comp in components[1:]:
15 | mod = getattr(mod, comp)
16 | return mod
17 |
18 |
19 | class TestDoc(unittest.TestCase):
20 | verbose = False
21 |
22 | def check_doc(self, filename, subdir=None, name=None):
23 | if self.verbose:
24 | print("--- %s: Run tests" % filename)
25 | if not subdir:
26 | fullpath = os.path.join('..', 'doc', filename)
27 | else:
28 | fullpath = os.path.join(subdir, filename)
29 | failure, nb_test = doctest.testfile(
30 | fullpath, optionflags=doctest.ELLIPSIS, name=name)
31 | if failure:
32 | self.fail("error")
33 | if self.verbose:
34 | print("--- %s: End of tests" % filename)
35 |
36 | def check_module(self, name):
37 | if self.verbose:
38 | print("--- Test module %s" % name)
39 | module = importModule(name)
40 | failure, nb_test = doctest.testmod(module)
41 | if failure:
42 | self.fail("error")
43 | if self.verbose:
44 | print("--- End of test")
45 |
46 | def test_doc_directory(self):
47 | self.check_doc('developer.rst')
48 | self.check_doc('internals.rst')
49 | self.check_doc('regex.rst')
50 |
51 | def test_tests_directory(self):
52 | self.check_doc('regex_regression.rst', subdir='.')
53 |
54 | def test_hachoir_core(self):
55 | self.check_module("hachoir.core.bits")
56 | self.check_module("hachoir.core.dict")
57 | self.check_module("hachoir.core.i18n")
58 | self.check_module("hachoir.core.text_handler")
59 | self.check_module("hachoir.core.tools")
60 |
61 | def test_hachoir_metadata(self):
62 | self.check_module("hachoir.metadata.metadata")
63 | self.check_module("hachoir.metadata.setter")
64 |
65 | def test_hachoir_regex(self):
66 | self.check_module("hachoir.regex.parser")
67 | self.check_module("hachoir.regex.regex")
68 | self.check_module("hachoir.regex.pattern")
69 |
70 |
71 | if __name__ == "__main__":
72 | setup_tests()
73 | unittest.main()
74 |
--------------------------------------------------------------------------------
/hachoir/field/fake_array.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | from hachoir.field import MissingField
3 |
4 |
5 | class FakeArray:
6 | """
7 | Simulate an array for GenericFieldSet.array(): fielset.array("item")[0] is
8 | equivalent to fielset.array("item[0]").
9 |
10 | It's possible to iterate over the items using::
11 |
12 | for element in fieldset.array("item"):
13 | ...
14 |
15 | And to get array size using len(fieldset.array("item")).
16 | """
17 |
18 | def __init__(self, fieldset, name):
19 | pos = name.rfind("/")
20 | if pos != -1:
21 | self.fieldset = fieldset[name[:pos]]
22 | self.name = name[pos + 1:]
23 | else:
24 | self.fieldset = fieldset
25 | self.name = name
26 | self._format = "%s[%%u]" % self.name
27 | self._cache = {}
28 | self._known_size = False
29 | self._max_index = -1
30 |
31 | def __bool__(self):
32 | "Is the array empty or not?"
33 | if self._cache:
34 | return True
35 | else:
36 | return (0 in self)
37 |
38 | def __len__(self):
39 | "Number of fields in the array"
40 | total = self._max_index + 1
41 | if not self._known_size:
42 | for index in itertools.count(total):
43 | try:
44 | self[index]
45 | total += 1
46 | except MissingField:
47 | break
48 | return total
49 |
50 | def __contains__(self, index):
51 | try:
52 | self[index]
53 | return True
54 | except MissingField:
55 | return False
56 |
57 | def __getitem__(self, index):
58 | """
59 | Get a field of the array. Returns a field, or raise MissingField
60 | exception if the field doesn't exist.
61 | """
62 | try:
63 | value = self._cache[index]
64 | except KeyError:
65 | try:
66 | value = self.fieldset[self._format % index]
67 | except MissingField:
68 | self._known_size = True
69 | raise
70 | self._cache[index] = value
71 | self._max_index = max(index, self._max_index)
72 | return value
73 |
74 | def __iter__(self):
75 | """
76 | Iterate in the fields in their index order: field[0], field[1], ...
77 | """
78 | for index in itertools.count(0):
79 | try:
80 | yield self[index]
81 | except MissingField:
82 | break
83 |
--------------------------------------------------------------------------------
/hachoir/parser/archive/mozilla_ar.py:
--------------------------------------------------------------------------------
1 | """MAR (Mozilla ARchive) parser
2 |
3 | Author: Robert Xiao
4 | Creation date: July 10, 2007
5 |
6 | """
7 |
8 | from hachoir.core.endian import BIG_ENDIAN
9 | from hachoir.field import (RootSeekableFieldSet, FieldSet,
10 | String, CString, UInt32, RawBytes)
11 | from hachoir.core.text_handler import displayHandler, filesizeHandler
12 | from hachoir.core.tools import humanUnixAttributes
13 | from hachoir.parser import HachoirParser
14 |
15 |
16 | class IndexEntry(FieldSet):
17 |
18 | def createFields(self):
19 | yield UInt32(self, "offset", "Offset in bytes relative to start of archive")
20 | yield filesizeHandler(UInt32(self, "length", "Length in bytes"))
21 | yield displayHandler(UInt32(self, "flags"), humanUnixAttributes)
22 | yield CString(self, "name", "Filename (byte array)")
23 |
24 | def createDescription(self):
25 | return 'File %s, Size %s, Mode %s' % (
26 | self["name"].display, self["length"].display, self["flags"].display)
27 |
28 |
29 | class MozillaArchive(HachoirParser, RootSeekableFieldSet):
30 | MAGIC = b"MAR1"
31 | PARSER_TAGS = {
32 | "id": "mozilla_ar",
33 | "category": "archive",
34 | "file_ext": ("mar",),
35 | "min_size": (8 + 4 + 13) * 8, # Header, Index Header, 1 Index Entry
36 | "magic": ((MAGIC, 0),),
37 | "description": "Mozilla Archive",
38 | }
39 | endian = BIG_ENDIAN
40 |
41 | def __init__(self, stream, **args):
42 | RootSeekableFieldSet.__init__(
43 | self, None, "root", stream, None, stream.askSize(self))
44 | HachoirParser.__init__(self, stream, **args)
45 |
46 | def validate(self):
47 | if self.stream.readBytes(0, 4) != self.MAGIC:
48 | return "Invalid magic"
49 | return True
50 |
51 | def createFields(self):
52 | yield String(self, "magic", 4, "File signature (MAR1)", charset="ASCII")
53 | yield UInt32(self, "index_offset", "Offset to index relative to file start")
54 | self.seekByte(self["index_offset"].value, False)
55 | yield UInt32(self, "index_size", "size of index in bytes")
56 | current_index_size = 0 # bytes
57 | while current_index_size < self["index_size"].value:
58 | # plus 4 compensates for index_size
59 | self.seekByte(self["index_offset"].value +
60 | current_index_size + 4, False)
61 | entry = IndexEntry(self, "index_entry[]")
62 | yield entry
63 | current_index_size += entry.size // 8
64 | self.seekByte(entry["offset"].value, False)
65 | yield RawBytes(self, "file[]", entry["length"].value)
66 |
--------------------------------------------------------------------------------
/hachoir/parser/image/psd.py:
--------------------------------------------------------------------------------
1 | """
2 | Photoshop parser (.psd file).
3 |
4 | Creation date: 8 january 2006
5 | Author: Victor Stinner
6 | """
7 |
8 | from hachoir.parser import Parser
9 | from hachoir.field import (FieldSet,
10 | UInt16, UInt32, String, NullBytes, Enum, RawBytes)
11 | from hachoir.core.endian import BIG_ENDIAN
12 | from hachoir.parser.image.photoshop_metadata import Photoshop8BIM
13 |
14 |
15 | class Config(FieldSet):
16 |
17 | def __init__(self, *args):
18 | FieldSet.__init__(self, *args)
19 | self._size = (4 + self["size"].value) * 8
20 |
21 | def createFields(self):
22 | yield UInt32(self, "size")
23 | while not self.eof:
24 | yield Photoshop8BIM(self, "item[]")
25 |
26 |
27 | class PsdFile(Parser):
28 | endian = BIG_ENDIAN
29 | PARSER_TAGS = {
30 | "id": "psd",
31 | "category": "image",
32 | "file_ext": ("psd",),
33 | "mime": ("image/psd", "image/photoshop", "image/x-photoshop"),
34 | "min_size": 4 * 8,
35 | "magic": ((b"8BPS\0\1", 0),),
36 | "description": "Photoshop (PSD) picture",
37 | }
38 | COLOR_MODE = {
39 | 0: "Bitmap",
40 | 1: "Grayscale",
41 | 2: "Indexed",
42 | 3: "RGB color",
43 | 4: "CMYK color",
44 | 7: "Multichannel",
45 | 8: "Duotone",
46 | 9: "Lab Color",
47 | }
48 | COMPRESSION_NAME = {
49 | 0: "Raw data",
50 | 1: "RLE",
51 | }
52 |
53 | def validate(self):
54 | if self.stream.readBytes(0, 4) != b"8BPS":
55 | return "Invalid signature"
56 | return True
57 |
58 | def createFields(self):
59 | yield String(self, "signature", 4, "PSD signature (8BPS)", charset="ASCII")
60 | yield UInt16(self, "version")
61 | yield NullBytes(self, "reserved[]", 6)
62 | yield UInt16(self, "nb_channels")
63 | yield UInt32(self, "width")
64 | yield UInt32(self, "height")
65 | yield UInt16(self, "depth")
66 | yield Enum(UInt16(self, "color_mode"), self.COLOR_MODE)
67 |
68 | # Mode data
69 | yield UInt32(self, "mode_data_size")
70 | size = self["mode_data_size"].value
71 | if size:
72 | yield RawBytes(self, "mode_data", size)
73 |
74 | # Resources
75 | yield Config(self, "config")
76 |
77 | # Reserved
78 | yield UInt32(self, "reserved_data_size")
79 | size = self["reserved_data_size"].value
80 | if size:
81 | yield RawBytes(self, "reserved_data", size)
82 |
83 | yield Enum(UInt16(self, "compression"), self.COMPRESSION_NAME)
84 |
85 | size = (self.size - self.current_size) // 8
86 | if size:
87 | yield RawBytes(self, "end", size)
88 |
--------------------------------------------------------------------------------
/hachoir/core/memory.py:
--------------------------------------------------------------------------------
1 | import gc
2 |
3 | # ---- Default implementation when resource is missing ----------------------
4 | PAGE_SIZE = 4096
5 |
6 |
7 | def getMemoryLimit():
8 | """
9 | Get current memory limit in bytes.
10 |
11 | Return None on error.
12 | """
13 | return None
14 |
15 |
16 | def setMemoryLimit(max_mem):
17 | """
18 | Set memory limit in bytes.
19 | Use value 'None' to disable memory limit.
20 |
21 | Return True if limit is set, False on error.
22 | """
23 | return False
24 |
25 |
26 | def getMemorySize():
27 | """
28 | Read currenet process memory size: size of available virtual memory.
29 | This value is NOT the real memory usage.
30 |
31 | This function only works on Linux (use /proc/self/statm file).
32 | """
33 | try:
34 | statm = open('/proc/self/statm').readline().split()
35 | except IOError:
36 | return None
37 | return int(statm[0]) * PAGE_SIZE
38 |
39 |
40 | def clearCaches():
41 | """
42 | Try to clear all caches: call gc.collect() (Python garbage collector).
43 | """
44 | gc.collect()
45 | # import re; re.purge()
46 |
47 |
48 | try:
49 | # ---- 'resource' implementation -----------------------------------
50 | from resource import getpagesize, getrlimit, setrlimit, RLIMIT_AS
51 |
52 | PAGE_SIZE = getpagesize()
53 |
54 | def getMemoryLimit(): # noqa
55 | try:
56 | limit = getrlimit(RLIMIT_AS)[0]
57 | if 0 < limit:
58 | limit *= PAGE_SIZE
59 | return limit
60 | except ValueError:
61 | return None
62 |
63 | def setMemoryLimit(max_mem): # noqa
64 | if max_mem is None:
65 | max_mem = -1
66 | try:
67 | setrlimit(RLIMIT_AS, (max_mem, -1))
68 | return True
69 | except ValueError:
70 | return False
71 | except ImportError:
72 | pass
73 |
74 |
75 | def limitedMemory(limit, func, *args, **kw):
76 | """
77 | Limit memory grow when calling func(*args, **kw):
78 | restrict memory grow to 'limit' bytes.
79 |
80 | Use try/except MemoryError to catch the error.
81 | """
82 | # First step: clear cache to gain memory
83 | clearCaches()
84 |
85 | # Get total program size
86 | max_rss = getMemorySize()
87 | if max_rss is not None:
88 | # Get old limit and then set our new memory limit
89 | old_limit = getMemoryLimit()
90 | limit = max_rss + limit
91 | limited = setMemoryLimit(limit)
92 | else:
93 | limited = False
94 |
95 | try:
96 | # Call function
97 | return func(*args, **kw)
98 | finally:
99 | # and unset our memory limit
100 | if limited:
101 | setMemoryLimit(old_limit)
102 |
103 | # After calling the function: clear all caches
104 | clearCaches()
105 |
--------------------------------------------------------------------------------
/hachoir/field/sub_file.py:
--------------------------------------------------------------------------------
1 | from hachoir.field import Bytes
2 | from hachoir.core.tools import makePrintable, humanFilesize
3 | from hachoir.stream import InputIOStream
4 |
5 |
6 | class SubFile(Bytes):
7 | """
8 | File stored in another file
9 | """
10 |
11 | def __init__(self, parent, name, length, description=None,
12 | parser=None, filename=None, mime_type=None, parser_class=None):
13 | if filename:
14 | if not isinstance(filename, str):
15 | filename = makePrintable(filename, "ISO-8859-1")
16 | if not description:
17 | description = 'File "%s" (%s)' % (
18 | filename, humanFilesize(length))
19 | Bytes.__init__(self, parent, name, length, description)
20 |
21 | def createInputStream(cis, **args):
22 | tags = args.setdefault("tags", [])
23 | if parser_class:
24 | tags.append(("class", parser_class))
25 | if parser is not None:
26 | tags.append(("id", parser.PARSER_TAGS["id"]))
27 | if mime_type:
28 | tags.append(("mime", mime_type))
29 | if filename:
30 | tags.append(("filename", filename))
31 | return cis(**args)
32 | self.setSubIStream(createInputStream)
33 |
34 |
35 | class CompressedStream:
36 | offset = 0
37 |
38 | def __init__(self, stream, decompressor):
39 | self.stream = stream
40 | self.decompressor = decompressor(stream)
41 | self._buffer = b''
42 |
43 | def read(self, size):
44 | d = self._buffer
45 | data = [d[:size]]
46 | size -= len(d)
47 | if size > 0:
48 | d = self.decompressor(size)
49 | data.append(d[:size])
50 | size -= len(d)
51 | while size > 0:
52 | n = 4096
53 | if self.stream.size:
54 | n = min(self.stream.size - self.offset, n)
55 | if not n:
56 | break
57 | d = self.stream.read(self.offset, n)[1]
58 | self.offset += 8 * len(d)
59 | d = self.decompressor(size, d)
60 | data.append(d[:size])
61 | size -= len(d)
62 | self._buffer = d[size + len(d):]
63 | return b''.join(data)
64 |
65 |
66 | def CompressedField(field, decompressor):
67 | def createInputStream(cis, source=None, **args):
68 | if field._parent:
69 | stream = cis(source=source)
70 | args.setdefault("tags", []).extend(stream.tags)
71 | else:
72 | stream = field.stream
73 | input = CompressedStream(stream, decompressor)
74 | if source is None:
75 | source = "Compressed source: '%s' (offset=%s)" % (
76 | stream.source, field.absolute_address)
77 | return InputIOStream(input, source=source, **args)
78 | field.setSubIStream(createInputStream)
79 | return field
80 |
--------------------------------------------------------------------------------
/hachoir/parser/image/pcx.py:
--------------------------------------------------------------------------------
1 | """
2 | PCX picture filter.
3 | """
4 |
5 | from hachoir.parser import Parser
6 | from hachoir.field import (
7 | UInt8, UInt16,
8 | PaddingBytes, RawBytes,
9 | Enum)
10 | from hachoir.parser.image.common import PaletteRGB
11 | from hachoir.core.endian import LITTLE_ENDIAN
12 |
13 |
14 | class PcxFile(Parser):
15 | endian = LITTLE_ENDIAN
16 | PARSER_TAGS = {
17 | "id": "pcx",
18 | "category": "image",
19 | "file_ext": ("pcx",),
20 | "mime": ("image/x-pcx",),
21 | "min_size": 128 * 8,
22 | "description": "PC Paintbrush (PCX) picture"
23 | }
24 | compression_name = {1: "Run-length encoding (RLE)"}
25 | version_name = {
26 | 0: "Version 2.5 of PC Paintbrush",
27 | 2: "Version 2.8 with palette information",
28 | 3: "Version 2.8 without palette information",
29 | 4: "PC Paintbrush for Windows",
30 | 5: "Version 3.0 (or greater) of PC Paintbrush"
31 | }
32 |
33 | def validate(self):
34 | if self["id"].value != 10:
35 | return "Wrong signature"
36 | if self["version"].value not in self.version_name:
37 | return "Unknown format version"
38 | if self["bpp"].value not in (1, 2, 4, 8, 24, 32):
39 | return "Unknown bits/pixel"
40 | if self["reserved[0]"].value != b"\0":
41 | return "Invalid reserved value"
42 | return True
43 |
44 | def createFields(self):
45 | yield UInt8(self, "id", "PCX identifier (10)")
46 | yield Enum(UInt8(self, "version", "PCX version"), self.version_name)
47 | yield Enum(UInt8(self, "compression", "Compression method"), self.compression_name)
48 | yield UInt8(self, "bpp", "Bits / pixel")
49 | yield UInt16(self, "xmin", "Minimum X")
50 | yield UInt16(self, "ymin", "Minimum Y")
51 | yield UInt16(self, "xmax", "Width minus one") # value + 1
52 | yield UInt16(self, "ymax", "Height minus one") # value + 1
53 | yield UInt16(self, "horiz_dpi", "Horizontal DPI")
54 | yield UInt16(self, "vert_dpi", "Vertical DPI")
55 | yield PaletteRGB(self, "palette_4bits", 16, "Palette (4 bits)")
56 | yield PaddingBytes(self, "reserved[]", 1)
57 | yield UInt8(self, "nb_color_plan", "Number of color plans")
58 | yield UInt16(self, "bytes_per_line", "Bytes per line")
59 | yield UInt16(self, "color_mode", "Color mode")
60 | yield PaddingBytes(self, "reserved[]", 58)
61 |
62 | if self._size is None: # TODO: is it possible to handle piped input?
63 | raise NotImplementedError
64 |
65 | nb_colors = 256
66 | size = (self._size - self.current_size) // 8
67 | has_palette = self["bpp"].value == 8
68 | if has_palette:
69 | size -= nb_colors * 3
70 | yield RawBytes(self, "image_data", size, "Image data")
71 |
72 | if has_palette:
73 | yield PaletteRGB(self, "palette_8bits", nb_colors, "Palette (8 bit)")
74 |
--------------------------------------------------------------------------------
/hachoir/parser/image/tiff.py:
--------------------------------------------------------------------------------
1 | """
2 | TIFF image parser.
3 |
4 | Authors: Victor Stinner, Sebastien Ponce, Robert Xiao
5 | Creation date: 30 september 2006
6 | """
7 |
8 | from hachoir.parser import Parser
9 | from hachoir.field import SeekableFieldSet, RootSeekableFieldSet, Bytes
10 | from hachoir.core.endian import LITTLE_ENDIAN, BIG_ENDIAN
11 | from hachoir.parser.image.exif import TIFF, IFD
12 |
13 |
14 | def getStrips(ifd):
15 | data = {}
16 | for i, entry in enumerate(ifd.array('entry')):
17 | data[entry['tag'].display] = entry
18 | # image data
19 | if "StripOffsets" in data and "StripByteCounts" in data:
20 | offs = ifd.getEntryValues(data["StripOffsets"])
21 | bytes = ifd.getEntryValues(data["StripByteCounts"])
22 | for off, byte in zip(offs, bytes):
23 | yield off.value, byte.value
24 |
25 | # image data
26 | if "TileOffsets" in data and "TileByteCounts" in data:
27 | offs = ifd.getEntryValues(data["TileOffsets"])
28 | bytes = ifd.getEntryValues(data["TileByteCounts"])
29 | for off, byte in zip(offs, bytes):
30 | yield off.value, byte.value
31 |
32 |
33 | class ImageFile(SeekableFieldSet):
34 |
35 | def __init__(self, parent, name, description, ifd):
36 | SeekableFieldSet.__init__(self, parent, name, description, None)
37 | self._ifd = ifd
38 |
39 | def createFields(self):
40 | for off, byte in getStrips(self._ifd):
41 | self.seekByte(off, relative=False)
42 | field = Bytes(self, "strip[]", byte)
43 | yield field
44 |
45 |
46 | class TiffFile(RootSeekableFieldSet, Parser):
47 | PARSER_TAGS = {
48 | "id": "tiff",
49 | "category": "image",
50 | "file_ext": ("tif", "tiff"),
51 | "mime": ("image/tiff",),
52 | "min_size": 8 * 8,
53 | "magic": ((b"II\x2A\0", 0), (b"MM\0\x2A", 0)),
54 | "description": "TIFF picture"
55 | }
56 |
57 | # Correct endian is set in constructor
58 | endian = LITTLE_ENDIAN
59 |
60 | def __init__(self, stream, **args):
61 | RootSeekableFieldSet.__init__(
62 | self, None, "root", stream, None, stream.askSize(self))
63 | if self.stream.readBytes(0, 2) == b"MM":
64 | self.endian = BIG_ENDIAN
65 | Parser.__init__(self, stream, **args)
66 |
67 | def validate(self):
68 | endian = self.stream.readBytes(0, 2)
69 | if endian not in (b"MM", b"II"):
70 | return "Invalid endian (%r)" % endian
71 | if self["version"].value != 42:
72 | return "Unknown TIFF version"
73 | return True
74 |
75 | def createFields(self):
76 | yield from TIFF(self)
77 |
78 | for ifd in self:
79 | if not isinstance(ifd, IFD):
80 | continue
81 | offs = (off for off, byte in getStrips(ifd))
82 | self.seekByte(min(offs), relative=False)
83 | image = ImageFile(self, "image[]", "Image File", ifd)
84 | yield image
85 |
--------------------------------------------------------------------------------
/hachoir/metadata/csv.py:
--------------------------------------------------------------------------------
1 | from hachoir.parser import createParser
2 | from hachoir.core.tools import makePrintable
3 | from hachoir.metadata import extractMetadata
4 | from hachoir.core.i18n import initLocale
5 | from sys import argv, stderr, exit
6 | from os import walk
7 | from os.path import join as path_join
8 | from fnmatch import fnmatch
9 | import codecs
10 |
11 | OUTPUT_FILENAME = "metadata.csv"
12 |
13 |
14 | class Extractor:
15 |
16 | def __init__(self, directory, fields):
17 | self.directory = directory
18 | self.fields = fields
19 | self.charset = "UTF-8"
20 | self.total = 0
21 | self.invalid = 0
22 |
23 | def main(self):
24 | output = codecs.open(OUTPUT_FILENAME, "w", self.charset)
25 | for filename in self.findFiles(self.directory, '*.doc'):
26 | self.total += 1
27 | line = self.processFile(filename)
28 | if line:
29 | print(line, file=output)
30 | else:
31 | self.invalid += 1
32 | output.close()
33 | self.summary()
34 |
35 | def summary(self):
36 | print(file=stderr)
37 | print("Valid files: %s" % (self.total - self.invalid), file=stderr)
38 | print("Invalid files: %s" % self.invalid, file=stderr)
39 | print("Total files: %s" % self.total, file=stderr)
40 | print(file=stderr)
41 | print("Result written into %s" % OUTPUT_FILENAME, file=stderr)
42 |
43 | def findFiles(self, directory, pattern):
44 | for dirpath, dirnames, filenames in walk(directory):
45 | for filename in filenames:
46 | if not fnmatch(filename.lower(), pattern):
47 | continue
48 | yield path_join(dirpath, filename)
49 |
50 | def processFile(self, filename):
51 | print("[%s] Process file %s..." % (self.total, filename))
52 | parser = createParser(filename)
53 | if not parser:
54 | print("Unable to parse file", file=stderr)
55 | return None
56 | try:
57 | metadata = extractMetadata(parser)
58 | except Exception as err:
59 | print("Metadata extraction error: %s" % str(err), file=stderr)
60 | return None
61 | if not metadata:
62 | print("Unable to extract metadata", file=stderr)
63 | return None
64 |
65 | filename = makePrintable(filename, self.charset)
66 | line = [filename]
67 | for field in self.fields:
68 | value = metadata.getText(field, '')
69 | value = makePrintable(value, self.charset)
70 | line.append(value)
71 | return '; '.join(line)
72 |
73 |
74 | def main():
75 | initLocale()
76 | if len(argv) != 3:
77 | print("usage: %s directory fields" % argv[0], file=stderr)
78 | print(file=stderr)
79 | print("eg. %s . title,creation_date" % argv[0], file=stderr)
80 | exit(1)
81 | directory = argv[1]
82 | fields = [field.strip() for field in argv[2].split(",")]
83 | Extractor(directory, fields).main()
84 |
--------------------------------------------------------------------------------
/hachoir/parser/program/prc.py:
--------------------------------------------------------------------------------
1 | """
2 | PRC (Palm resource) parser.
3 |
4 | Author: Sebastien Ponce
5 | Creation date: 29 october 2008
6 | """
7 |
8 | from hachoir.parser import Parser
9 | from hachoir.field import (FieldSet,
10 | UInt16, UInt32, TimestampMac32,
11 | String, RawBytes)
12 | from hachoir.core.endian import BIG_ENDIAN
13 |
14 |
15 | class PRCHeader(FieldSet):
16 | static_size = 78 * 8
17 |
18 | def createFields(self):
19 | yield String(self, "name", 32, "Name")
20 | yield UInt16(self, "flags", "Flags")
21 | yield UInt16(self, "version", "Version")
22 | yield TimestampMac32(self, "create_time", "Creation time")
23 | yield TimestampMac32(self, "mod_time", "Modification time")
24 | yield TimestampMac32(self, "backup_time", "Backup time")
25 | yield UInt32(self, "mod_num", "mod num")
26 | yield UInt32(self, "app_info", "app info")
27 | yield UInt32(self, "sort_info", "sort info")
28 | yield UInt32(self, "type", "type")
29 | yield UInt32(self, "id", "id")
30 | yield UInt32(self, "unique_id_seed", "unique_id_seed")
31 | yield UInt32(self, "next_record_list", "next_record_list")
32 | yield UInt16(self, "num_records", "num_records")
33 |
34 |
35 | class ResourceHeader(FieldSet):
36 | static_size = 10 * 8
37 |
38 | def createFields(self):
39 | yield String(self, "name", 4, "Name of the resource")
40 | yield UInt16(self, "flags", "ID number of the resource")
41 | yield UInt32(self, "offset", "Pointer to the resource data")
42 |
43 | def createDescription(self):
44 | return "Resource Header (%s)" % self["name"]
45 |
46 |
47 | class PRCFile(Parser):
48 | PARSER_TAGS = {
49 | "id": "prc",
50 | "category": "program",
51 | "file_ext": ("prc", ""),
52 | "min_size": ResourceHeader.static_size, # At least one program header
53 | "mime": (
54 | "application/x-pilot-prc",
55 | "application/x-palmpilot"),
56 | "description": "Palm Resource File"
57 | }
58 | endian = BIG_ENDIAN
59 |
60 | def validate(self):
61 | # FIXME: Implement the validation function!
62 | return False
63 |
64 | def createFields(self):
65 | # Parse header and program headers
66 | yield PRCHeader(self, "header", "Header")
67 | lens = []
68 | firstOne = True
69 | poff = 0
70 | for index in range(self["header/num_records"].value):
71 | r = ResourceHeader(self, "res_header[]")
72 | if firstOne:
73 | firstOne = False
74 | else:
75 | lens.append(r["offset"].value - poff)
76 | poff = r["offset"].value
77 | yield r
78 | lens.append(self.size // 8 - poff)
79 | yield UInt16(self, "placeholder", "Place holder bytes")
80 | for i in range(len(lens)):
81 | yield RawBytes(self, "res[]", lens[i], '"' + self["res_header[" + str(i) + "]/name"].value + "\" Resource")
82 |
83 | def createDescription(self):
84 | return "Palm Resource file"
85 |
--------------------------------------------------------------------------------
/hachoir/parser/misc/hlp.py:
--------------------------------------------------------------------------------
1 | """
2 | Microsoft Windows Help (HLP) parser for Hachoir project.
3 |
4 | Documents:
5 | - Windows Help File Format / Annotation File Format / SHG and MRB File Format
6 | written by M. Winterhoff (100326.2776@compuserve.com)
7 | found on http://www.wotsit.org/
8 |
9 | Author: Victor Stinner
10 | Creation date: 2007-09-03
11 | """
12 |
13 | from hachoir.parser import Parser
14 | from hachoir.field import (FieldSet,
15 | Bits, Int32, UInt16, UInt32,
16 | NullBytes, RawBytes, PaddingBytes, String)
17 | from hachoir.core.endian import LITTLE_ENDIAN
18 | from hachoir.core.text_handler import (textHandler, hexadecimal,
19 | displayHandler, humanFilesize)
20 |
21 |
22 | class FileEntry(FieldSet):
23 |
24 | def __init__(self, *args, **kw):
25 | FieldSet.__init__(self, *args, **kw)
26 | self._size = self["res_space"].value * 8
27 |
28 | def createFields(self):
29 | yield displayHandler(UInt32(self, "res_space", "Reserved space"), humanFilesize)
30 | yield displayHandler(UInt32(self, "used_space", "Used space"), humanFilesize)
31 | yield Bits(self, "file_flags", 8, "(=4)")
32 |
33 | yield textHandler(UInt16(self, "magic"), hexadecimal)
34 | yield Bits(self, "flags", 16)
35 | yield displayHandler(UInt16(self, "page_size", "Page size in bytes"), humanFilesize)
36 | yield String(self, "structure", 16, strip="\0", charset="ASCII")
37 | yield NullBytes(self, "zero", 2)
38 | yield UInt16(self, "nb_page_splits", "Number of page splits B+ tree has suffered")
39 | yield UInt16(self, "root_page", "Page number of B+ tree root page")
40 | yield PaddingBytes(self, "one", 2, pattern="\xFF")
41 | yield UInt16(self, "nb_page", "Number of B+ tree pages")
42 | yield UInt16(self, "nb_level", "Number of levels of B+ tree")
43 | yield UInt16(self, "nb_entry", "Number of entries in B+ tree")
44 |
45 | size = (self.size - self.current_size) // 8
46 | if size:
47 | yield PaddingBytes(self, "reserved_space", size)
48 |
49 |
50 | class HlpFile(Parser):
51 | PARSER_TAGS = {
52 | "id": "hlp",
53 | "category": "misc",
54 | "file_ext": ("hlp",),
55 | "min_size": 32,
56 | "description": "Microsoft Windows Help (HLP)",
57 | }
58 | endian = LITTLE_ENDIAN
59 |
60 | def validate(self):
61 | if self["magic"].value != 0x00035F3F:
62 | return "Invalid magic"
63 | if self["filesize"].value != self.stream.size // 8:
64 | return "Invalid magic"
65 | return True
66 |
67 | def createFields(self):
68 | yield textHandler(UInt32(self, "magic"), hexadecimal)
69 | yield UInt32(self, "dir_start", "Directory start")
70 | yield Int32(self, "first_free_block", "First free block")
71 | yield UInt32(self, "filesize", "File size in bytes")
72 |
73 | yield self.seekByte(self["dir_start"].value)
74 | yield FileEntry(self, "file[]")
75 |
76 | size = (self.size - self.current_size) // 8
77 | if size:
78 | yield RawBytes(self, "end", size)
79 |
--------------------------------------------------------------------------------
/hachoir/field/__init__.py:
--------------------------------------------------------------------------------
1 | # Field classes
2 | from hachoir.field.field import Field, FieldError, MissingField, joinPath # noqa
3 | from hachoir.field.bit_field import Bit, Bits, RawBits # noqa
4 | from hachoir.field.byte_field import Bytes, RawBytes # noqa
5 | from hachoir.field.sub_file import SubFile, CompressedField # noqa
6 | from hachoir.field.character import Character # noqa
7 | from hachoir.field.integer import (Int8, Int16, Int24, Int32, Int64, # noqa
8 | UInt8, UInt16, UInt24, UInt32, UInt64,
9 | GenericInteger)
10 | from hachoir.field.enum import Enum # noqa
11 | from hachoir.field.string_field import (GenericString, # noqa
12 | String, CString, UnixLine,
13 | PascalString8, PascalString16,
14 | PascalString32)
15 | from hachoir.field.padding import (PaddingBits, PaddingBytes, # noqa
16 | NullBits, NullBytes)
17 |
18 | # Functions
19 | from hachoir.field.helper import (isString, isInteger, # noqa
20 | createPaddingField, createNullField,
21 | createRawField, writeIntoFile,
22 | createOrphanField)
23 |
24 | # FieldSet classes
25 | from hachoir.field.fake_array import FakeArray # noqa
26 | from hachoir.field.basic_field_set import (BasicFieldSet, # noqa
27 | ParserError, MatchError)
28 | from hachoir.field.generic_field_set import GenericFieldSet # noqa
29 | from hachoir.field.seekable_field_set import SeekableFieldSet, RootSeekableFieldSet # noqa
30 | from hachoir.field.field_set import FieldSet # noqa
31 | from hachoir.field.static_field_set import StaticFieldSet # noqa
32 | from hachoir.field.parser import Parser # noqa
33 | from hachoir.field.vector import GenericVector, UserVector # noqa
34 |
35 | # Complex types
36 | from hachoir.field.float import Float32, Float64, Float80 # noqa
37 | from hachoir.field.timestamp import ( # noqa
38 | GenericTimestamp,
39 | TimestampUnix32, TimestampUnix64, TimestampMac32, TimestampUUID60,
40 | TimestampWin64, TimedeltaMillisWin64,
41 | DateTimeMSDOS32, TimeDateMSDOS32, TimedeltaWin64)
42 |
43 | # Special Field classes
44 | from hachoir.field.link import Link, Fragment # noqa
45 | from hachoir.field.fragment import FragmentGroup, CustomFragment # noqa
46 |
47 | available_types = (Bit, Bits, RawBits,
48 | Bytes, RawBytes,
49 | SubFile,
50 | Character,
51 | Int8, Int16, Int24, Int32, Int64,
52 | UInt8, UInt16, UInt24, UInt32, UInt64,
53 | String, CString, UnixLine,
54 | PascalString8, PascalString16, PascalString32,
55 | Float32, Float64,
56 | PaddingBits, PaddingBytes,
57 | NullBits, NullBytes,
58 | TimestampUnix32, TimestampMac32, TimestampWin64,
59 | TimedeltaMillisWin64,
60 | DateTimeMSDOS32, TimeDateMSDOS32,
61 | # GenericInteger, GenericString,
62 | )
63 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Prepare a release:
4 | #
5 | # - check version: hachoir/__init__.py and doc/conf.py
6 | # - set the release date: edit doc/changelog.rst
7 | # - run: git commit -a
8 | # - Remove untracked files/dirs: git clean -fdx
9 | # - run tests: tox --parallel auto
10 | # - run: git push
11 | # - check GitHub Actions status:
12 | # https://github.com/vstinner/hachoir/actions
13 | #
14 | # Release a new version:
15 | #
16 | # - git tag x.y.z
17 | # - git clean -fdx # Remove untracked files/dirs
18 | # - python3 setup.py sdist bdist_wheel
19 | # - git push --tags
20 | # - twine upload dist/*
21 | #
22 | # After the release:
23 | #
24 | # - set version to N+1: hachoir/__init__.py and doc/conf.py
25 |
26 | ENTRY_POINTS = {
27 | 'console_scripts': [
28 | "hachoir-grep = hachoir.grep:main",
29 | "hachoir-metadata = hachoir.metadata.main:main",
30 | "hachoir-strip = hachoir.strip:main",
31 | "hachoir-urwid = hachoir.urwid:main"
32 | ],
33 | 'gui_scripts': [
34 | "hachoir-wx = hachoir.wx.main:main"
35 | ]
36 | }
37 | # FIXME: hachoir-subfile is currently broken
38 | # "hachoir-subfile",
39 |
40 | CLASSIFIERS = [
41 | 'Development Status :: 5 - Production/Stable',
42 | 'Environment :: Console :: Curses',
43 | 'Environment :: Plugins',
44 | 'Intended Audience :: Developers',
45 | 'Intended Audience :: Education',
46 | 'License :: OSI Approved :: GNU General Public License (GPL)',
47 | 'Natural Language :: English',
48 | 'Operating System :: OS Independent',
49 | 'Programming Language :: Python :: 3',
50 | 'Topic :: Multimedia',
51 | 'Topic :: Scientific/Engineering :: Information Analysis',
52 | 'Topic :: Software Development :: Disassemblers',
53 | 'Topic :: Software Development :: Interpreters',
54 | 'Topic :: Software Development :: Libraries :: Python Modules',
55 | 'Topic :: System :: Filesystems',
56 | 'Topic :: Text Processing',
57 | 'Topic :: Utilities',
58 | ]
59 |
60 |
61 | def main():
62 | from setuptools import setup
63 | from setuptools import find_packages
64 |
65 | import hachoir
66 |
67 | readme = open('README.rst')
68 | long_description = readme.read()
69 | readme.close()
70 |
71 | install_options = {
72 | "name": "hachoir",
73 | "version": hachoir.__version__,
74 | "url": 'http://hachoir.readthedocs.io/',
75 | "project_urls": {
76 | "Source": "https://github.com/vstinner/hachoir",
77 | },
78 | "author": "Hachoir team (see AUTHORS file)",
79 | "description": "Package of Hachoir parsers used to open binary files",
80 | "long_description": long_description,
81 | "classifiers": CLASSIFIERS,
82 | "license": 'GNU GPL v2',
83 | "packages": find_packages(),
84 | "package_data": {"hachoir.wx.resource": ['hachoir_wx.xrc']},
85 | "entry_points": ENTRY_POINTS,
86 | "extras_require": {
87 | "urwid": [
88 | "urwid"
89 | ],
90 | "wx": [
91 | "wxPython==4.*"
92 | ]
93 | },
94 | "zip_safe": True,
95 | }
96 | setup(**install_options)
97 |
98 |
99 | if __name__ == "__main__":
100 | main()
101 |
--------------------------------------------------------------------------------