├── pyFileFixity ├── tests │ ├── __init__.py │ ├── files │ │ ├── testaa.txt │ │ ├── Sub2 │ │ │ └── testsub2.txt │ │ ├── tux.jpg │ │ ├── alice.pdf │ │ ├── sub │ │ │ ├── Snark.zip │ │ │ └── testsub.txt │ │ └── tuxsmall.jpg │ ├── results │ │ ├── test_repair_ecc_check.db │ │ ├── test_header_ecc_test_algo.db │ │ ├── test_header_ecc_test_dir.db │ │ ├── test_repair_ecc_sa_check.db │ │ ├── test_header_ecc_test_one_file.db │ │ ├── test_header_ecc_test_one_file_tamper.db │ │ ├── test_rfigc_test_error_file.log │ │ ├── test_structural_adaptive_ecc_test_algo.db │ │ ├── test_structural_adaptive_ecc_test_dir.db │ │ ├── test_structural_adaptive_ecc_test_one_file.db │ │ ├── test_structural_adaptive_ecc_test_one_file_tamper.db │ │ ├── test_rfigc_test_one_file.csv │ │ ├── test_rfigc_test_update_remove.csv │ │ ├── test_rfigc_test_dir.csv │ │ ├── test_rfigc_test_update_append.csv │ │ ├── resiliency_tester_config_hard.cfg │ │ └── resiliency_tester_config_easy.cfg │ ├── test_hasher.py │ ├── test_tee.py │ ├── test_aux_funcs.py │ ├── aux_tests.py │ ├── test_eccman.py │ ├── test_rfigc.py │ ├── test_header_ecc.py │ ├── test_repair_ecc.py │ └── test_resiliency_tester.py ├── lib │ ├── __init__.py │ ├── profilers │ │ ├── __init__.py │ │ ├── pprofile │ │ │ └── __init__.py │ │ ├── visual │ │ │ ├── __init__.py │ │ │ ├── pympler │ │ │ │ ├── util │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── compat.py │ │ │ │ │ └── stringutils.py │ │ │ │ ├── __init__.py │ │ │ │ ├── metadata.py │ │ │ │ ├── charts.py │ │ │ │ ├── garbagegraph.py │ │ │ │ └── mprofile.py │ │ │ ├── runsnakerun │ │ │ │ ├── __init__.py │ │ │ │ ├── squaremap │ │ │ │ │ └── __init__.py │ │ │ │ ├── macshim.py │ │ │ │ ├── homedirectory.py │ │ │ │ ├── pstatsadapter.py │ │ │ │ ├── _meliaejson.py │ │ │ │ ├── coldshotadapter.py │ │ │ │ └── meliaeadapter.py │ │ │ ├── kthread.py │ │ │ ├── profilebrowser.py │ │ │ ├── debug.py │ │ │ └── functionprofiler.py │ │ ├── memory_profiler │ │ │ ├── __init__.py │ │ │ └── README_DEV.rst │ │ └── pyinstrument │ │ │ ├── __init__.py │ │ │ ├── resources │ │ │ ├── triangle_hide.png │ │ │ ├── triangle_show.png │ │ │ ├── profile.js │ │ │ └── style.css │ │ │ ├── LICENSE │ │ │ ├── middleware.py │ │ │ ├── __main__.py │ │ │ └── README.md │ ├── docs │ │ ├── python - Optimizing a reed-solomon encoder (polynomial division) - Stack Overflow.pdf │ │ ├── Richard E. Blahut - Algebraic Codes for Data Transmission 2003 - Excerpt on errata decoder.pdf │ │ ├── python - Errata (erasures+errors) Berlekamp-Massey for Reed-Solomon decoding - Stack Overflow.pdf │ │ └── Initialization of errata evaluator polynomial for simultaneous computation in Berlekamp-Massey for Reed-Solomon.pdf │ ├── _compat.py │ ├── tee.py │ └── hasher.py ├── __init__.py ├── ecc_specification.txt ├── resiliency_tester_config.txt ├── easy_profiler.py └── pff.py ├── tux-example.jpg ├── requirements.txt ├── .gitattributes ├── setup.py ├── setup.py.bak ├── .coveragerc ├── .gitignore ├── codecov.yml ├── LICENSE ├── MANIFEST.in ├── appveyor.yml ├── tox.ini ├── pycleaner.py ├── .github └── workflows │ ├── codeql.yml │ ├── ci-build-downstream.yml │ ├── ci-build.yml │ └── ci-cd.yml ├── Makefile └── setup.cfg.bak /pyFileFixity/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /pyFileFixity/lib/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /pyFileFixity/tests/files/testaa.txt: -------------------------------------------------------------------------------- 1 | stcstcst -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pprofile/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /pyFileFixity/tests/files/Sub2/testsub2.txt: -------------------------------------------------------------------------------- 1 | abcdef -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/memory_profiler/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/util/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/__init__.py: -------------------------------------------------------------------------------- 1 | from profiler import Profiler 2 | -------------------------------------------------------------------------------- /tux-example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/tux-example.jpg -------------------------------------------------------------------------------- /pyFileFixity/tests/files/tux.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/tux.jpg -------------------------------------------------------------------------------- /pyFileFixity/tests/files/alice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/alice.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pathlib2 2 | argparse 3 | sortedcontainers 4 | unireedsolomon 5 | reedsolo>=2.0.0a 6 | tqdm 7 | distance 8 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/__init__.py: -------------------------------------------------------------------------------- 1 | """The RunSnakeRun GUI Profiler utility""" 2 | __version__ = '2.0.4' 3 | -------------------------------------------------------------------------------- /pyFileFixity/tests/files/sub/Snark.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/sub/Snark.zip -------------------------------------------------------------------------------- /pyFileFixity/tests/files/tuxsmall.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/tuxsmall.jpg -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/squaremap/__init__.py: -------------------------------------------------------------------------------- 1 | """Hierarchic data-viewing widget for wxPython""" 2 | __version__ = '1.0.1' 3 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_repair_ecc_check.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_repair_ecc_check.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_header_ecc_test_algo.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_algo.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_header_ecc_test_dir.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_dir.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_repair_ecc_sa_check.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_repair_ecc_sa_check.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_header_ecc_test_one_file.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_one_file.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_header_ecc_test_one_file_tamper.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_one_file_tamper.db -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/resources/triangle_hide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/profilers/pyinstrument/resources/triangle_hide.png -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/resources/triangle_show.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/profilers/pyinstrument/resources/triangle_show.png -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_rfigc_test_error_file.log: -------------------------------------------------------------------------------- 1 | tuxsmall.jpg|both md5 and sha1 hash failed, modification date has changed (before: 2015-11-13 19:21:34 - now: 2015-11-13 19:24:34) 2 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_structural_adaptive_ecc_test_algo.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_algo.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_structural_adaptive_ecc_test_dir.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_dir.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file.db -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file_tamper.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file_tamper.db -------------------------------------------------------------------------------- /pyFileFixity/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = "Stephen Karl Larroque", 4 | __email__ = "LRQ3000@gmail.com", 5 | __version__ = "3.1.4" 6 | __all__ = ['__author__', '__email__', '__version__'] 7 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_rfigc_test_one_file.csv: -------------------------------------------------------------------------------- 1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext 2 | tuxsmall.jpg|1c5704dd227e1de7d96b355c6111c764|f8a1f7675ea360bff97d02443c174c102fbcdefa| -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_rfigc_test_update_remove.csv: -------------------------------------------------------------------------------- 1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext 2 | added_file.txt|fad0092ae8c6218c1fb78d281238168d|0a21ef1d2ccc47ffedf45192d0b8c26afd5d552f| 3 | -------------------------------------------------------------------------------- /pyFileFixity/lib/docs/python - Optimizing a reed-solomon encoder (polynomial division) - Stack Overflow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/python - Optimizing a reed-solomon encoder (polynomial division) - Stack Overflow.pdf -------------------------------------------------------------------------------- /pyFileFixity/lib/docs/Richard E. Blahut - Algebraic Codes for Data Transmission 2003 - Excerpt on errata decoder.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/Richard E. Blahut - Algebraic Codes for Data Transmission 2003 - Excerpt on errata decoder.pdf -------------------------------------------------------------------------------- /pyFileFixity/lib/docs/python - Errata (erasures+errors) Berlekamp-Massey for Reed-Solomon decoding - Stack Overflow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/python - Errata (erasures+errors) Berlekamp-Massey for Reed-Solomon decoding - Stack Overflow.pdf -------------------------------------------------------------------------------- /pyFileFixity/tests/files/sub/testsub.txt: -------------------------------------------------------------------------------- 1 | oOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo 2 | oOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 3 | OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO( -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/macshim.py: -------------------------------------------------------------------------------- 1 | def macshim(): 2 | """Shim to run 32-bit on 64-bit mac as a sub-process""" 3 | import subprocess, sys 4 | subprocess.call([ 5 | sys.argv[0] + '32' 6 | ]+sys.argv[1:], 7 | env={"VERSIONER_PYTHON_PREFER_32_BIT":"yes"} 8 | ) 9 | -------------------------------------------------------------------------------- /pyFileFixity/lib/docs/Initialization of errata evaluator polynomial for simultaneous computation in Berlekamp-Massey for Reed-Solomon.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/Initialization of errata evaluator polynomial for simultaneous computation in Berlekamp-Massey for Reed-Solomon.pdf -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/memory_profiler/README_DEV.rst: -------------------------------------------------------------------------------- 1 | Some information on the internals of this package. 2 | 3 | Tests 4 | ----- 5 | `make test` is the closest thing to tests on this package. It executes some 6 | example code and prints the information. If you don't see any exceptions nor 7 | any strange output then the tests suite "has succeeded". 8 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Simple .gitattributes that disables any text end-of-line normalization/conversion 2 | # `-text` still allows diffs for text types detected by git heuristics, contrary to `binary` 3 | # This avoids issues with binary files eof being converted when using `pip install git+...` 4 | # More templates can be found at: https://github.com/alexkaratarakis/gitattributes 5 | 6 | * -text 7 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | DATA_PATH = '' 4 | 5 | # DATA_PATH will be initialized from distutils when installing. If Pympler is 6 | # installed via setuptools/easy_install, the data will be installed alongside 7 | # the source files instead. 8 | if not os.path.exists(DATA_PATH): 9 | DATA_PATH = os.path.realpath(os.path.join(__file__, '..', '..')) 10 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/resources/profile.js: -------------------------------------------------------------------------------- 1 | /* jshint globalstrict: true */ 2 | /* global $ */ 3 | 4 | 'use strict'; 5 | $('.frame').click(function (event) { 6 | $(this).toggleClass('collapse'); 7 | event.stopPropagation(); 8 | }); 9 | $('.frame, body').mousemove(function (event) { 10 | $('.frame.last-hover').removeClass('last-hover'); 11 | $(this).addClass('last-hover'); 12 | event.stopPropagation(); 13 | }); 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # An empty setup.py is required for retrocompatibility with older versions of pip that do not support pyproject.toml-only projects, especially to install in editable mode, see: https://github.com/pypa/setuptools/issues/2816 2 | 3 | from setuptools import setup 4 | setup() # necessary to have at least a setup() call, otherwise setuptools will complaint with an exception: `AssertionError: Multiple .egg-info directories found\nerror: subprocess-exited-with-error` 5 | -------------------------------------------------------------------------------- /setup.py.bak: -------------------------------------------------------------------------------- 1 | # For Py2: Necessary for setup.py to exist even if empty, so that setuptools finds setup.cfg 2 | # per setuptools documentation: "If compatibility with legacy builds (i.e. those not using the PEP 517 build API) is desired, a setup.py file containing a setup() function call is still required even if your configuration resides in setup.cfg." https://setuptools.pypa.io/en/latest/userguide/declarative_config.html 3 | # DEPRECATED: Python 2.7 support dropped, hence this file is unnecessary. 4 | 5 | from setuptools import setup 6 | 7 | setup() 8 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | relative_files = True 3 | branch = True 4 | omit = 5 | pyFileFixity/tests/* 6 | pyFileFixity/__init__.py 7 | pyFileFixity/easy_profiler.py 8 | pyFileFixity/ecc_speedtest.py 9 | pyFileFixity/filetamper.py 10 | pycleaner.py 11 | setup.py 12 | include = 13 | pyFileFixity/lib/aux_funcs.py 14 | pyFileFixity/lib/eccman.py 15 | pyFileFixity/lib/hasher.py 16 | pyFileFixity/lib/tee.py 17 | pyFileFixity/_infos.py 18 | pyFileFixity/header_ecc.py 19 | pyFileFixity/repair_ecc.py 20 | pyFileFixity/replication_repair.py 21 | pyFileFixity/resiliency_tester.py 22 | pyFileFixity/rfigc.py 23 | pyFileFixity/structural_adaptive_ecc.py 24 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_rfigc_test_dir.csv: -------------------------------------------------------------------------------- 1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext 2 | alice.pdf|298aeefe8c00f2d92d660987bee67260|106e7ad4d3927c5906cd366cc0d5bd887bdc3300| 3 | testaa.txt|c0d8a5f3a813d488cbfb83f1b147b14b|6ca36c14f68e4eefa47ec23ccc333378b8d0fe73| 4 | tux.jpg|81e19bbf2efaeb1d6d6473c21c48e4b7|6e38ea91680ef0f960db0fd6a973cf50ef765369| 5 | tuxsmall.jpg|1c5704dd227e1de7d96b355c6111c764|f8a1f7675ea360bff97d02443c174c102fbcdefa| 6 | Sub2/testsub2.txt|e80b5017098950fc58aad83c8c14978e|1f8ac10f23c5b5bc1167bda84b833e5c057a77d2| 7 | sub/Snark.zip|f8435b883eaf03bf84cae75a706a9b8c|e68efd832dd3517d4c80db6a84b98591eeabe864| 8 | sub/testsub.txt|8ef3d6be5baa449c127aa00083ebbe34|bfb7ef83b23e0791199e4ebe9ae34489a4ef7004| 9 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/test_rfigc_test_update_append.csv: -------------------------------------------------------------------------------- 1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext 2 | alice.pdf|298aeefe8c00f2d92d660987bee67260|106e7ad4d3927c5906cd366cc0d5bd887bdc3300| 3 | testaa.txt|c0d8a5f3a813d488cbfb83f1b147b14b|6ca36c14f68e4eefa47ec23ccc333378b8d0fe73| 4 | tux.jpg|81e19bbf2efaeb1d6d6473c21c48e4b7|6e38ea91680ef0f960db0fd6a973cf50ef765369| 5 | tuxsmall.jpg|1c5704dd227e1de7d96b355c6111c764|f8a1f7675ea360bff97d02443c174c102fbcdefa| 6 | sub/Snark.zip|f8435b883eaf03bf84cae75a706a9b8c|e68efd832dd3517d4c80db6a84b98591eeabe864| 7 | sub/testsub.txt|8ef3d6be5baa449c127aa00083ebbe34|bfb7ef83b23e0791199e4ebe9ae34489a4ef7004| 8 | added_file.txt|fad0092ae8c6218c1fb78d281238168d|0a21ef1d2ccc47ffedf45192d0b8c26afd5d552f| 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # Testing dos scripts 25 | *.bat 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | 52 | # PyBuilder 53 | target/ 54 | 55 | # Temporary test files 56 | /tests/out/ 57 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # Note about syntax: avoid using tabs! 2 | # If yaml is malformatted, it can be checked locally against codecov specific yaml reader: https://docs.codecov.com/docs/codecov-yaml#validate-your-repository-yaml 3 | coverage: 4 | precision: 2 # 0 decimals of precision 5 | round: down # Round to floor 6 | range: # red -> yellow -> green 7 | - 60.0 8 | - 80.0 9 | 10 | status: 11 | project: 12 | default: 13 | # basic 14 | target: auto 15 | removed_code_behavior: fully_covered_patch # see: https://about.codecov.io/blog/new-codecov-setting-removed-code-behavior/ 16 | threshold: 1% # allow 1% coverage variance, because depending on the run, different OSes results are uploaded for each run and some code is OS-specific, so this is it's within expected OS-dependent variability 17 | 18 | patch: 19 | default: 20 | threshold: 1% # allow 1% coverage variance 21 | -------------------------------------------------------------------------------- /pyFileFixity/ecc_specification.txt: -------------------------------------------------------------------------------- 1 | **This is an example ECC file with clear specification of each field. Lines beginning with ** and finishing with a line return \n are considered comments. In fact, all lines before the first entrymarker are considered comments and are just skipped. Note that after the headers comments, the file is binary, and thus there's no line returns. 2 | **SOFTWAREIDENTv111...000** 3 | ** Parameters: 4 | ** Parameters: 5 | ** Parameters: 6 | ** Generated under . 7 | [relative-file1-path.file-extension][file1-size][relative-file1-path-ecc][file1-size-ecc][block0-hash][block0-ecc][block1-hash][block1-ecc][block2-hash][block2-ecc]...[relative-file2-path.file-extension][file2-size][relative-file2-path-ecc][file2-size-ecc][block0-hash][block0-ecc][block1-hash][block1-ecc][block2-hash][block2-ecc]... -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2023 Stephen Larroque 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Cannot yet fully replace MANIFEST.in by pyproject.toml if we use setuptools, see: https://github.com/pypa/setuptools/issues/3341 2 | # Misc 3 | include .coveragerc # for compatibility with Py2, otherwise the coverage is configured in pyproject.toml 4 | #include LICENSE 5 | #include Makefile 6 | #include README.md 7 | #include README.rst 8 | #include TODO.md 9 | 10 | # Non-python files 11 | include pyFileFixity/ecc_specification.txt # done in pyproject.toml, but for Py2 we need to put it here 12 | include pyFileFixity/resiliency_tester_config.txt # idem 13 | 14 | # Libraries 15 | recursive-include pyFileFixity/lib *.py 16 | recursive-include pyFileFixity/lib *.pyx # Cython files, all were moved to their own modules in distinct repositories, but we may optimize some routines in the future 17 | recursive-exclude pyFileFixity/lib/profilers * # no need for profilers 18 | 19 | # Test suite 20 | recursive-include pyFileFixity/tests *.py # unit test scripts 21 | recursive-include pyFileFixity/tests/files * # attach necessary files to run tests 22 | recursive-include pyFileFixity/tests/results * # attach necessary py-make config and resulting database files to run and compare tests results 23 | include tox.ini 24 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{branch}-{build}' 2 | build: off 3 | cache: 4 | - '%LOCALAPPDATA%\pip\Cache' 5 | environment: 6 | global: 7 | WITH_COMPILER: 'cmd /E:ON /V:ON /C .\ci\appveyor-with-compiler.cmd' 8 | matrix: 9 | - TOXENV: py27 10 | TOXPYTHON: C:\Python27\python.exe 11 | PYTHON_HOME: C:\Python27 12 | PYTHON_VERSION: '2.7' 13 | PYTHON_ARCH: '27' 14 | 15 | - TOXENV: pypy 16 | TOXPYTHON: pypy.exe 17 | PYTHON_VERSION: '2.7' 18 | PYTHON_ARCH: '27' 19 | 20 | init: 21 | - ps: echo $env:TOXENV 22 | - ps: ls C:\Python* 23 | install: 24 | - python -u ci\appveyor-bootstrap.py 25 | - '%PYTHON_HOME%\Scripts\virtualenv --version' 26 | - '%PYTHON_HOME%\Scripts\easy_install --version' 27 | - '%PYTHON_HOME%\Scripts\pip --version' 28 | - '%PYTHON_HOME%\Scripts\tox --version' 29 | - (New-Object Net.WebClient).DownloadFile('https://bitbucket.org/pypy/pypy/downloads/pypy-4.0.0-win32.zip', "$env:appveyor_build_folder\pypy-4.0.0-win32.zip") 30 | - 7z x pypy-4.0.0-win32.zip | Out-Null 31 | - $env:path = "$env:appveyor_build_folder\pypy-4.0.0-win32.zip;$env:path" 32 | test_script: 33 | - '%WITH_COMPILER% %PYTHON_HOME%\Scripts\tox' 34 | 35 | on_failure: 36 | - ps: dir "env:" 37 | - ps: get-content .tox\*\log\* 38 | artifacts: 39 | - path: dist\* 40 | ### To enable remote debugging uncomment this: 41 | # - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) 42 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Joe Rickerby 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | 30 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/metadata.py: -------------------------------------------------------------------------------- 1 | """Project metadata. 2 | 3 | This information is used in setup.py as well as in doc/source/conf.py. 4 | 5 | """ 6 | 7 | project_name = 'Pympler' 8 | version = '0.2.1' 9 | url = 'http://packages.python.org/Pympler/' 10 | license = 'Apache License, Version 2.0' #PYCHOK valid 11 | author = 'Jean Brouwers, Ludwig Haehne, Robert Schuppenies' 12 | author_email = 'pympler-dev@googlegroups.com' 13 | copyright = '2008-2011, ' + author #PYCHOK valid 14 | description = ('A development tool to measure, monitor and analyze ' 15 | 'the memory behavior of Python objects.') 16 | long_description = ''' 17 | Pympler is a development tool to measure, monitor and analyze the 18 | memory behavior of Python objects in a running Python application. 19 | 20 | By pympling a Python application, detailed insight in the size and 21 | the lifetime of Python objects can be obtained. Undesirable or 22 | unexpected runtime behavior like memory bloat and other "pymples" 23 | can easily be identified. 24 | 25 | Pympler integrates three previously separate projects into a single, 26 | comprehensive profiling tool. Asizeof provides basic size information 27 | for one or several Python objects, muppy is used for on-line 28 | monitoring of a Python application and the class tracker provides 29 | off-line analysis of the lifetime of selected Python objects. A 30 | web profiling frontend exposes process statistics, garbage 31 | visualisation and class tracker statistics. 32 | 33 | Pympler is written entirely in Python, with no dependencies to 34 | external libraries. It has been tested with Python 2.4, 2.5, 2.6, 2.7, 35 | 3.1, 3.2 on Linux, Windows and MacOS X. 36 | ''' 37 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/resiliency_tester_config_hard.cfg: -------------------------------------------------------------------------------- 1 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make 2 | # sure that you only put one command per line, and ALWAYS put a line return 3 | # after an alias and before a command, eg: 4 | # 5 | #``` 6 | #all: 7 | # test 8 | # install 9 | #test: 10 | # nosetest 11 | #install: 12 | # python setup.py install 13 | # ``` 14 | # 15 | # resiliency_tester.py supports a templating system: you can use the following special tags, they will be interpolated at runtime: 16 | # - {inputdir}: input directory. Depending on the stage, this is either the untampered files (a copy of the original files), the tampered folder, or even previous repair folders during the repair stage. 17 | # - {dbdir}: database directory, where the generated databases will be placed. 18 | # - {outputdir}: output directory, where the files generated after executing the current command will be placed in. 19 | 20 | before_tamper: # this will be executed before files tampering. Generate your ecc/database files here. 21 | #python header_ecc.py -i "{inputdir}" -d "{dbdir}/hecc.txt" --size 4096 --ecc_algo 3 -g -f --silent 22 | 23 | tamper: # parameters to tamper the files and even the database files. 24 | python filetamper.py -i "{inputdir}" -m "n" -p 0.05 -b "3|6" --silent 25 | python filetamper.py -i "{dbdir}" -m "n" -p 0.001 -b "4|9" --silent 26 | 27 | after_tamper: # execute commands after tampering. Can be used to recover 28 | #python repair_ecc.py -i "{dbdir}/hecc.txt" --index "{dbdir}/hecc.txt.idx" -o "{dbdir}/heccrep.txt" -t 0.4 -f --silent 29 | 30 | repair: 31 | #python header_ecc.py -i "{inputdir}" -d "{dbdir}/heccrep.txt" -o "{outputdir}" -c --size 4096 --no_fast_check --ecc_algo 3 --silent 32 | 33 | none: 34 | # used for unit testing 35 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/resources/style.css: -------------------------------------------------------------------------------- 1 | .frame { 2 | margin-left: 0; 3 | font-size: 10pt; 4 | border-left: 1px solid #eee; 5 | padding-left: 15px; 6 | background-image: url(); 7 | background-repeat: no-repeat; 8 | cursor: default; 9 | pointer-events: fill; 10 | background-color: rgba(255,255,255,0.2); 11 | } 12 | .frame.collapse { 13 | background-image: url(); 14 | } 15 | .frame.collapse > .frame-children { 16 | display: none; 17 | } 18 | .frame.no_children { 19 | background-image: none !important; 20 | } 21 | 22 | .frame.application > .frame-info > .function { 23 | background-color: rgba(40, 255, 66, 0.14); 24 | } 25 | 26 | .frame:hover .frame { 27 | color: #888; 28 | } 29 | .frame:hover { 30 | color: black !important; 31 | background-color: rgba(188,213,255,0.02); 32 | } 33 | .frame.last-hover { 34 | border-left-color: #a66 !important; 35 | color: black !important; 36 | } 37 | .frame.last-hover .frame { 38 | color: black !important; 39 | } 40 | .function { 41 | font-weight: bold; 42 | font-family: 'Consolas', 'Lucida Console', 'Monaco', monospace; 43 | } 44 | .code-position { 45 | opacity: 0.5; 46 | } -------------------------------------------------------------------------------- /pyFileFixity/tests/test_hasher.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import unittest 4 | import sys 5 | import os 6 | import shutil 7 | 8 | from .aux_tests import path_sample_files, create_dir_if_not_exist 9 | 10 | from ..lib.hasher import Hasher 11 | 12 | class TestHasher(unittest.TestCase): 13 | def setup_module(self): 14 | """ Initialize the tests by emptying the out directory """ 15 | outfolder = path_sample_files('output') 16 | shutil.rmtree(outfolder, ignore_errors=True) 17 | create_dir_if_not_exist(outfolder) 18 | 19 | def test_hasher(self): 20 | """ hasher: test hashes """ 21 | instring = "Lorem ipsum and some more stuff\nThe answer to the question of life, universe and everything is... 42." 22 | # Put all hashing algo results here (format: "algo_name": [length, result_for_instring]) 23 | algo_params = {"md5": [32, b'173efbe0280ce506ddbfbfc9aeb44a1a'], 24 | "shortmd5": [8, b'MTczZWZi'], 25 | "shortsha256": [8, b'NjgzMjRk'], 26 | "minimd5": [4, b'MTcz'], 27 | "minisha256": [4, b'Njgz'], 28 | "none": [0, ''], 29 | } 30 | # For each hashing algo, produce a hash and check the length and hash 31 | for algo in Hasher.known_algo: 32 | h = Hasher(algo) 33 | shash = h.hash(instring) 34 | #print(algo+": "+shash) # debug 35 | assert len(shash) == algo_params[algo][0] 36 | assert shash == algo_params[algo][1] 37 | # Check that unknown algorithms raise an exception 38 | self.assertRaises(NameError, Hasher, "unknown_algo") 39 | # Second check of unknown algo raising exception 40 | h = Hasher() 41 | h.algo = "unknown_algo" 42 | self.assertRaises(NameError, h.hash, "abcdef") 43 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # Isolated package builds test from a temporary directory via tox 2 | # Thanks to Paul Ganssle for the minimal example, see: https://blog.ganssle.io/articles/2019/08/test-as-installed.html and https://github.com/pganssle/tox-examples/blob/master/changedir/tox.ini 3 | # Use `tox -e py` 4 | # 5 | # Tox (http://tox.testrun.org/) is a tool for running tests 6 | # in multiple virtualenvs. This configuration file will run the 7 | # test suite on all supported python versions. To use it, "pip install tox" 8 | 9 | [tox] 10 | minversion=3.13.0 11 | isolated_build=True 12 | envlist = py27, py32, py34, pypy, pypy3, setup.py 13 | 14 | [testenv] 15 | description = Run the tests under {basepython} 16 | deps = pytest 17 | changedir = {envtmpdir} # use a temporary directory to ensure we test the built package, not the repository version: https://blog.ganssle.io/articles/2019/08/test-as-installed.html and https://github.com/pganssle/tox-examples/blob/master/changedir/tox.ini 18 | commands = python -m pytest {posargs} {toxinidir} 19 | 20 | [testenvpy2] 21 | deps = 22 | #jpeg pillow # to support rfigc.py --structure_check 23 | nose 24 | nose-timer 25 | coverage<4 26 | coveralls 27 | commands = 28 | nosetests pyFileFixity/tests/ --with-coverage --cover-package=pyFileFixity -d -v --with-timer 29 | coveralls 30 | 31 | [testenv:pypy2] 32 | #basepython=C:\Program Files (x86)\pypy-4.0.0-win32\pypy.exe 33 | # No coverage for PyPy, too slow... 34 | deps = 35 | #pypy-tk # necessary for pypy to install pillow 36 | #jpeg pillow # to support rfigc.py --structure_check 37 | nose 38 | nose-timer 39 | commands = 40 | pypy --version 41 | nosetests pyFileFixity/tests/ -d -v --with-timer 42 | 43 | [testenv:pypy3] 44 | # No coverage for PyPy, too slow... 45 | deps = 46 | nose 47 | nose-timer 48 | commands = 49 | pypy --version 50 | nosetests pyFileFixity/tests/ -d -v --with-timer 51 | -------------------------------------------------------------------------------- /pycleaner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import fnmatch 5 | 6 | class GlobDirectoryWalker: 7 | 8 | # a forward iterator that traverses a directory tree 9 | def __init__(self, directory, pattern="*"): 10 | self.stack = [directory] 11 | self.pattern = pattern 12 | self.files = [] 13 | self.index = 0 14 | 15 | def __getitem__(self, index): 16 | while 1: 17 | try: 18 | file = self.files[self.index] 19 | self.index = self.index + 1 20 | except IndexError: 21 | # pop next directory from stack 22 | self.directory = self.stack.pop() 23 | self.files = os.listdir(self.directory) 24 | self.index = 0 25 | else: 26 | # got a filename 27 | fullname = os.path.join(self.directory, file) 28 | if os.path.isdir(fullname) and not os.path.islink(fullname): 29 | self.stack.append(fullname) 30 | if fnmatch.fnmatch(file, self.pattern): 31 | return fullname 32 | 33 | def pycCleanup(directory,path,filext='pyc'): 34 | for filename in directory: 35 | if filename[-3:] == filext: 36 | print '- ' + filename 37 | os.remove(path+os.sep+filename) 38 | elif os.path.isdir(path+os.sep+filename): 39 | pycCleanup(os.listdir(path+os.sep+filename),path+os.sep+filename) 40 | 41 | def cleanup1(filext='pyc'): 42 | directory = os.listdir('.') 43 | print('Deleting .%s files recursively in %s.' % (filext, str(directory))) 44 | pycCleanup(directory,'.',filext) 45 | 46 | def cleanup2(filext='pyc'): 47 | for file in GlobDirectoryWalker(".", "*."+filext): 48 | print file 49 | os.remove(file) 50 | 51 | print "After..." 52 | for file in GlobDirectoryWalker(".", "*."+filext): 53 | print file 54 | 55 | if __name__ == '__main__': 56 | cleanup1() -------------------------------------------------------------------------------- /pyFileFixity/lib/_compat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | 6 | try: # compatibility with Python 3+ 7 | _range = xrange 8 | except NameError: 9 | _range = range 10 | 11 | try: 12 | from cStringIO import StringIO 13 | _StringIO = StringIO 14 | except (ImportError, NameError): #python3.x 15 | from io import StringIO 16 | _StringIO = StringIO 17 | 18 | try: 19 | from itertools import izip 20 | _izip = izip 21 | except ImportError: #python3.x 22 | _izip = zip 23 | 24 | try: 25 | _str = basestring 26 | except NameError: 27 | _str = str 28 | 29 | if sys.version_info < (3,): 30 | def b(x): 31 | return x 32 | else: 33 | import codecs 34 | def b(x): 35 | if isinstance(x, _str): 36 | return codecs.latin_1_encode(x)[0] 37 | else: 38 | return x 39 | 40 | if sys.version_info < (3,): 41 | import io 42 | def _open_csv(x, mode='r'): 43 | return io.open(x, mode+'b') # on Py3, io.open() is the same as open(), see: https://stackoverflow.com/questions/5250744/difference-between-open-and-codecs-open-in-python 44 | else: 45 | def _open_csv(x, mode='r'): 46 | return open(x, mode+'t', newline='', encoding='utf-8') # for csv module, open() mode needed to be binary for Python 2, but on Py3 it needs to be text mode, no binary! https://stackoverflow.com/a/34283957/1121352 47 | 48 | if sys.version_info < (3,): 49 | def _ord(x): 50 | return ord(x) 51 | else: 52 | def _ord(x): 53 | if isinstance(x, int): 54 | return x 55 | else: 56 | return ord(x) 57 | 58 | if sys.version_info < (3,): 59 | def _bytes(x): 60 | return bytes(x) 61 | else: 62 | def _bytes(x): 63 | if isinstance(x, (bytes, bytearray)): 64 | return x 65 | else: 66 | return bytes(x, 'latin-1') 67 | 68 | try: 69 | from itertools import izip 70 | _izip = izip 71 | except ImportError: 72 | _izip = zip 73 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/util/compat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compatibility layer to allow Pympler being used from Python 2.x and Python 3.x. 3 | """ 4 | 5 | import sys 6 | 7 | # Version dependent imports 8 | 9 | try: 10 | from StringIO import StringIO 11 | BytesIO = StringIO 12 | except ImportError: 13 | from io import StringIO, BytesIO 14 | 15 | try: 16 | import cPickle as pickle 17 | except ImportError: 18 | import pickle #PYCHOK Python 3.0 module 19 | 20 | try: 21 | from new import instancemethod 22 | except ImportError: # Python 3.0 23 | def instancemethod(*args): 24 | return args[0] 25 | 26 | try: 27 | from HTMLParser import HTMLParser 28 | except ImportError: # Python 3.0 29 | from html.parser import HTMLParser 30 | 31 | try: 32 | from httplib import HTTPConnection 33 | except ImportError: # Python 3.0 34 | from http.client import HTTPConnection 35 | 36 | try: 37 | from urllib2 import Request, urlopen, URLError 38 | except ImportError: # Python 3.0 39 | from urllib.request import Request, urlopen 40 | from urllib.error import URLError 41 | 42 | try: 43 | import pympler.util.bottle2 as bottle 44 | except (SyntaxError, ImportError): 45 | try: 46 | import pympler.util.bottle3 as bottle 47 | except (SyntaxError, ImportError): # Python 2.4 48 | bottle = None 49 | 50 | # Helper functions 51 | 52 | # Python 2.x expects strings when calling communicate and passing data via a 53 | # pipe while Python 3.x expects binary (encoded) data. The following works with 54 | # both: 55 | # 56 | # p = Popen(..., stdin=PIPE) 57 | # p.communicate(encode4pipe("spam")) 58 | # 59 | encode4pipe = lambda s: s 60 | if sys.hexversion >= 0x3000000: 61 | encode4pipe = lambda s: s.encode() 62 | 63 | def object_in_list(obj, l): 64 | """Returns True if object o is in list. 65 | 66 | Required compatibility function to handle WeakSet objects. 67 | """ 68 | for o in l: 69 | if o is obj: 70 | return True 71 | return False 72 | -------------------------------------------------------------------------------- /pyFileFixity/resiliency_tester_config.txt: -------------------------------------------------------------------------------- 1 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make 2 | # sure that you only put one command per line, and ALWAYS put a line return 3 | # after an alias and before a command, eg: 4 | # 5 | #``` 6 | #all: 7 | # test 8 | # install 9 | #test: 10 | # nosetest 11 | #install: 12 | # python setup.py install 13 | # ``` 14 | # 15 | # resiliency_tester.py supports a templating system: you can use the following special tags, they will be interpolated at runtime: 16 | # - {inputdir}: input directory. Depending on the stage, this is either the untampered files (a copy of the original files), the tampered folder, or even previous repair folders during the repair stage. 17 | # - {dbdir}: database directory, where the generated databases will be placed. 18 | # - {outputdir}: output directory, where the files generated after executing the current command will be placed in. 19 | 20 | before_tamper: # this will be executed before files tampering. Generate your ecc/database files here. 21 | python header_ecc.py -i "{inputdir}" -d "{dbdir}/hecc.txt" --size 4096 --ecc_algo 3 -g -f 22 | python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/ecc.txt" -r1 0.3 -r2 0.2 -r3 0.1 -g -f --ecc_algo 3 23 | 24 | tamper: # parameters to tamper the files and even the database files. 25 | python filetamper.py -i "{inputdir}" -m "n" -p 0.005 -b "3|6" 26 | python filetamper.py -i "{dbdir}" -m "n" -p 0.001 -b "4|9" 27 | 28 | after_tamper: # execute commands after tampering. Can be used to recover 29 | python repair_ecc.py -i "{dbdir}/hecc.txt" --index "{dbdir}/hecc.txt.idx" -o "{dbdir}/heccrep.txt" -t 0.4 -f 30 | python repair_ecc.py -i "{dbdir}/ecc.txt" --index "{dbdir}/ecc.txt.idx" -o "{dbdir}/eccrep.txt" -t 0.4 -f 31 | 32 | repair: 33 | python header_ecc.py -i "{inputdir}" -d "{dbdir}/heccrep.txt" -o "{outputdir}" -c --size 4096 --no_fast_check --ecc_algo 3 34 | python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/eccrep.txt" -o "{outputdir}" -c -r1 0.3 -r2 0.2 -r3 0.1 -f --ecc_algo 3 35 | 36 | none: 37 | # used for unit testing 38 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/util/stringutils.py: -------------------------------------------------------------------------------- 1 | """ 2 | String utility functions. 3 | """ 4 | 5 | def safe_repr(obj, clip=None): 6 | """ 7 | Convert object to string representation, yielding the same result a `repr` 8 | but catches all exceptions and returns 'N/A' instead of raising the 9 | exception. Strings may be truncated by providing `clip`. 10 | 11 | >>> safe_repr(42) 12 | '42' 13 | >>> safe_repr('Clipped text', clip=8) 14 | 'Clip..xt' 15 | >>> safe_repr([1,2,3,4], clip=8) 16 | '[1,2..4]' 17 | """ 18 | try: 19 | s = repr(obj) 20 | if not clip or len(s) <= clip: 21 | return s 22 | else: 23 | return s[:clip-4]+'..'+s[-2:] 24 | except: 25 | return 'N/A' 26 | 27 | 28 | def trunc(obj, max, left=0): 29 | """ 30 | Convert `obj` to string, eliminate newlines and truncate the string to `max` 31 | characters. If there are more characters in the string add ``...`` to the 32 | string. With `left=True`, the string can be truncated at the beginning. 33 | 34 | @note: Does not catch exceptions when converting `obj` to string with `str`. 35 | 36 | >>> trunc('This is a long text.', 8) 37 | This ... 38 | >>> trunc('This is a long text.', 8, left) 39 | ...text. 40 | """ 41 | s = str(obj) 42 | s = s.replace('\n', '|') 43 | if len(s) > max: 44 | if left: 45 | return '...'+s[len(s)-max+3:] 46 | else: 47 | return s[:(max-3)]+'...' 48 | else: 49 | return s 50 | 51 | def pp(i, base=1024): 52 | """ 53 | Pretty-print the integer `i` as a human-readable size representation. 54 | """ 55 | degree = 0 56 | pattern = "%4d %s" 57 | while i > base: 58 | pattern = "%7.2f %s" 59 | i = i / float(base) 60 | degree += 1 61 | scales = ['B', 'KB', 'MB', 'GB', 'TB', 'EB'] 62 | return pattern % (i, scales[degree]) 63 | 64 | def pp_timestamp(t): 65 | """ 66 | Get a friendly timestamp represented as a string. 67 | """ 68 | if t is None: 69 | return '' 70 | h, m, s = int(t / 3600), int(t / 60 % 60), t % 60 71 | return "%02d:%02d:%05.2f" % (h, m, s) 72 | 73 | -------------------------------------------------------------------------------- /pyFileFixity/tests/results/resiliency_tester_config_easy.cfg: -------------------------------------------------------------------------------- 1 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make 2 | # sure that you only put one command per line, and ALWAYS put a line return 3 | # after an alias and before a command, eg: 4 | # 5 | #``` 6 | #all: 7 | # test 8 | # install 9 | #test: 10 | # nosetest 11 | #install: 12 | # python setup.py install 13 | # ``` 14 | # 15 | # resiliency_tester.py supports a templating system: you can use the following special tags, they will be interpolated at runtime: 16 | # - {inputdir}: input directory. Depending on the stage, this is either the untampered files (a copy of the original files), the tampered folder, or even previous repair folders during the repair stage. 17 | # - {dbdir}: database directory, where the generated databases will be placed. 18 | # - {outputdir}: output directory, where the files generated after executing the current command will be placed in. 19 | 20 | before_tamper: # this will be executed before files tampering. Generate your ecc/database files here. 21 | python header_ecc.py -i "{inputdir}" -d "{dbdir}/hecc.txt" --size 4096 --ecc_algo 3 -g -f --silent 22 | #python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/ecc.txt" -r1 0.3 -r2 0.2 -r3 0.1 -g -f --ecc_algo 3 --silent 23 | 24 | tamper: # parameters to tamper the files and even the database files. 25 | python filetamper.py -i "{inputdir}" -m "n" -p 0.001 -b "3|6" --header 4096 --silent 26 | python filetamper.py -i "{dbdir}" -m "n" -p 0.0001 -b "4|9" --header 4096 --silent 27 | 28 | after_tamper: # execute commands after tampering. Can be used to recover 29 | python repair_ecc.py -i "{dbdir}/hecc.txt" --index "{dbdir}/hecc.txt.idx" -o "{dbdir}/heccrep.txt" -t 0.4 -f --silent 30 | #python repair_ecc.py -i "{dbdir}/ecc.txt" --index "{dbdir}/ecc.txt.idx" -o "{dbdir}/eccrep.txt" -t 0.4 -f --silent 31 | 32 | repair: 33 | python header_ecc.py -i "{inputdir}" -d "{dbdir}/heccrep.txt" -o "{outputdir}" -c --size 4096 --no_fast_check --ecc_algo 3 --silent 34 | #python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/eccrep.txt" -o "{outputdir}" -c -r1 0.3 -r2 0.2 -r3 0.1 -f --ecc_algo 3 --silent 35 | 36 | none: 37 | # used for unit testing 38 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/charts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate charts from gathered data. 3 | 4 | Requires **matplotlib**. 5 | """ 6 | 7 | try: 8 | import matplotlib 9 | matplotlib.use('Agg') 10 | import matplotlib.pyplot as plt 11 | 12 | def tracker_timespace(filename, stats): 13 | """ 14 | Create a time-space chart from a ``Stats`` instance. 15 | """ 16 | classlist = list(stats.index.keys()) 17 | classlist.sort() 18 | 19 | for snapshot in stats.snapshots: 20 | stats.annotate_snapshot(snapshot) 21 | 22 | timestamps = [fp.timestamp for fp in stats.snapshots] 23 | offsets = [0] * len(stats.snapshots) 24 | poly_labels = [] 25 | polys = [] 26 | for clsname in classlist: 27 | pct = [fp.classes[clsname]['pct'] for fp in stats.snapshots] 28 | if max(pct) > 3.0: 29 | sizes = [fp.classes[clsname]['sum'] for fp in stats.snapshots] 30 | sizes = [float(x)/(1024*1024) for x in sizes] 31 | sizes = [offset+size for offset, size in zip(offsets, sizes)] 32 | poly = matplotlib.mlab.poly_between(timestamps, offsets, sizes) 33 | polys.append( (poly, {'label': clsname}) ) 34 | poly_labels.append(clsname) 35 | offsets = sizes 36 | 37 | fig = plt.figure(figsize=(10, 4)) 38 | axis = fig.add_subplot(111) 39 | 40 | axis.set_title("Snapshot Memory") 41 | axis.set_xlabel("Execution Time [s]") 42 | axis.set_ylabel("Virtual Memory [MiB]") 43 | 44 | totals = [x.asizeof_total for x in stats.snapshots] 45 | totals = [float(x)/(1024*1024) for x in totals] 46 | axis.plot(timestamps, totals, 'r--', label='Total') 47 | tracked = [x.tracked_total for x in stats.snapshots] 48 | tracked = [float(x)/(1024*1024) for x in tracked] 49 | axis.plot(timestamps, tracked, 'b--', label='Tracked total') 50 | 51 | for (args, kwds) in polys: 52 | axis.fill(*args, **kwds) 53 | axis.legend(loc=2) # TODO fill legend 54 | fig.savefig(filename) 55 | 56 | except ImportError: 57 | def tracker_timespace(*_args): 58 | pass 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_tee.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | import shutil 6 | 7 | from .aux_tests import get_marker, dummy_ecc_file_gen, check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, remove_if_exist 8 | 9 | from ..lib.tee import Tee 10 | 11 | from ..lib._compat import _StringIO 12 | 13 | def setup_module(): 14 | """ Initialize the tests by emptying the out directory """ 15 | outfolder = path_sample_files('output') 16 | shutil.rmtree(outfolder, ignore_errors=True) 17 | create_dir_if_not_exist(outfolder) 18 | 19 | def test_tee_file(): 20 | """ tee: test tee file output """ 21 | instring1 = b"First line\nSecond line\n" 22 | instring2 = b"Third line\n" 23 | filelog = path_sample_files('output', 'tee1.log') 24 | remove_if_exist(filelog) 25 | # Write first string 26 | t = Tee(filelog, 'wb', nostdout=True) 27 | t.write(instring1, end='') 28 | del t # deleting Tee should close the file 29 | with open(filelog, 'rb') as fl: 30 | res1 = fl.read() 31 | assert res1 == instring1 32 | # Write second string while appending 33 | t2 = Tee(filelog, 'ab', nostdout=True) 34 | t2.write(instring2, end='') 35 | del t2 # deleting Tee should close the file 36 | with open(filelog, 'rb') as fl: 37 | res2 = fl.read() 38 | assert res2 == instring1+instring2 39 | 40 | def test_tee_stdout(): 41 | """ tee: test tee stdout """ 42 | instring1 = "First line\nSecond line\n" 43 | instring2 = "Third line\n" 44 | filelog = path_sample_files('output', 'tee2.log') 45 | remove_if_exist(filelog) 46 | # Access stdout and memorize the cursor position just before the test 47 | sysout = sys.stdout 48 | startpos = sysout.tell() 49 | # Write first string 50 | t = Tee() 51 | t.write(instring1, end='') 52 | del t # deleting Tee should close the file 53 | # Read stdout and check Tee wrote into stdout 54 | sysout.seek(startpos) 55 | assert sysout.read() == instring1 56 | # Write second string 57 | t2 = Tee() 58 | t2.write(instring2, end='', flush=False) 59 | t2.flush() # try to manually flush by the way 60 | del t2 # deleting Tee should close the file 61 | # Read stdout and check Tee appended the second string into stdout 62 | sysout.seek(startpos) 63 | assert sysout.read().startswith(instring1+instring2) # sys.stdout appends a newline return at the second writing, don't know why... 64 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/kthread.py: -------------------------------------------------------------------------------- 1 | ''' 2 | kthread.py: A killable thread implementation. 3 | 4 | Copyright (C) 2004 Connelly Barnes (connellybarnes@yahoo.com) 5 | 6 | This module allows you to kill threads. The class KThread is a drop-in 7 | replacement for threading.Thread. It adds the kill() method, which should stop 8 | most threads in their tracks. 9 | 10 | This library is free software; you can redistribute it and/or modify it under 11 | the terms of the GNU Lesser General Public License as published by the Free 12 | Software Foundation; either version 2.1 of the License, or (at your option) 13 | any later version. 14 | 15 | This library is distributed in the hope that it will be useful, but WITHOUT 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 18 | details. 19 | 20 | You should have received a copy of the GNU Lesser General Public License along 21 | with this library; if not, write to the Free Software Foundation, Inc., 59 22 | Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 | ''' 24 | 25 | __first__ = '2004.9.9' 26 | __last__ = '2004.10.29' 27 | 28 | import sys 29 | import trace 30 | import threading 31 | import time 32 | 33 | class KThreadError(Exception): 34 | '''Encapsulates KThread exceptions.''' 35 | pass 36 | 37 | class KThread(threading.Thread): 38 | """A subclass of threading.Thread, with a kill() method.""" 39 | def __init__(self, *args, **keywords): 40 | threading.Thread.__init__(self, *args, **keywords) 41 | self.killed = False 42 | 43 | def start(self): 44 | """Start the thread.""" 45 | self.__run_backup = self.run 46 | self.run = self.__run # Force the Thread to install our trace. 47 | threading.Thread.start(self) 48 | 49 | def __run(self): 50 | """Hacked run function, which installs the trace.""" 51 | sys.settrace(self.globaltrace) 52 | self.__run_backup() 53 | self.run = self.__run_backup 54 | 55 | def globaltrace(self, frame, why, arg): 56 | if why == 'call': 57 | return self.localtrace 58 | else: 59 | return None 60 | 61 | def localtrace(self, frame, why, arg): 62 | if self.killed: 63 | if why == 'line': 64 | raise SystemExit() 65 | return self.localtrace 66 | 67 | def kill(self): 68 | self.killed = True 69 | 70 | if __name__ == '__main__': 71 | 72 | def func(): 73 | print('Function started') 74 | for i in xrange(1000000): 75 | pass 76 | print('Function finished') 77 | 78 | A = KThread(target=func) 79 | A.start() 80 | for i in xrange(1000): 81 | pass 82 | A.kill() 83 | 84 | print('End of main program') 85 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/middleware.py: -------------------------------------------------------------------------------- 1 | from django.http import HttpResponse 2 | from django.conf import settings 3 | from pyinstrument import Profiler 4 | from pyinstrument.profiler import NotMainThreadError 5 | import time 6 | import os 7 | 8 | not_main_thread_message = ( 9 | "pyinstrument can only be used on the main thread in signal mode. Run your server process in " 10 | "single-threaded mode. \n\n" 11 | "With the built-in server, you can do this with\n" 12 | "./manage.py runserver --nothreading --noreload\n\n" 13 | "Alternatively, you can set 'PYINSTRUMENT_USE_SIGNAL = False' in your settings.py to run in" 14 | "'setprofile' mode. For more information, see\n" 15 | "https://github.com/joerick/pyinstrument#signal-or-setprofile-mode") 16 | 17 | 18 | class ProfilerMiddleware(object): 19 | def process_request(self, request): 20 | profile_dir = getattr(settings, 'PYINSTRUMENT_PROFILE_DIR', None) 21 | use_signal = getattr(settings, 'PYINSTRUMENT_USE_SIGNAL', True) 22 | 23 | if getattr(settings, 'PYINSTRUMENT_URL_ARGUMENT', 'profile') in request.GET or profile_dir: 24 | profiler = Profiler(use_signal=use_signal) 25 | try: 26 | profiler.start() 27 | request.profiler = profiler 28 | except NotMainThreadError: 29 | raise NotMainThreadError(not_main_thread_message) 30 | 31 | 32 | def process_response(self, request, response): 33 | if hasattr(request, 'profiler'): 34 | try: 35 | request.profiler.stop() 36 | 37 | output_html = request.profiler.output_html() 38 | 39 | profile_dir = getattr(settings, 'PYINSTRUMENT_PROFILE_DIR', None) 40 | 41 | if profile_dir: 42 | filename = '{total_time:.3f}s {path} {timestamp:.0f}.html'.format( 43 | total_time=request.profiler.root_frame().time(), 44 | path=request.get_full_path().replace('/', '_'), 45 | timestamp=time.time() 46 | ) 47 | 48 | file_path = os.path.join(profile_dir, filename) 49 | 50 | if not os.path.exists(profile_dir): 51 | os.mkdir(profile_dir) 52 | 53 | with open(file_path, 'w') as f: 54 | f.write(output_html) 55 | 56 | if getattr(settings, 'PYINSTRUMENT_URL_ARGUMENT', 'profile') in request.GET: 57 | return HttpResponse(output_html) 58 | else: 59 | return response 60 | except NotMainThreadError: 61 | raise NotMainThreadError(not_main_thread_message) 62 | else: 63 | return response 64 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/garbagegraph.py: -------------------------------------------------------------------------------- 1 | 2 | from pympler.refgraph import ReferenceGraph 3 | from pympler.util.stringutils import trunc, pp 4 | 5 | import sys 6 | import gc 7 | 8 | __all__ = ['GarbageGraph', 'start_debug_garbage', 'end_debug_garbage'] 9 | 10 | 11 | class GarbageGraph(ReferenceGraph): 12 | """ 13 | The ``GarbageGraph`` is a ``ReferenceGraph`` that illustrates the objects building 14 | reference cycles. The garbage collector is switched to debug mode (all 15 | identified garbage is stored in `gc.garbage`) and the garbage collector is 16 | invoked. The collected objects are then illustrated in a directed graph. 17 | 18 | Large graphs can be reduced to the actual cycles by passing ``reduce=True`` to 19 | the constructor. 20 | 21 | It is recommended to disable the garbage collector when using the 22 | ``GarbageGraph``. 23 | 24 | >>> from pympler.garbagegraph import GarbageGraph, start_debug_garbage 25 | >>> start_debug_garbage() 26 | >>> l = [] 27 | >>> l.append(l) 28 | >>> del l 29 | >>> gb = GarbageGraph() 30 | >>> gb.render('garbage.eps') 31 | True 32 | """ 33 | def __init__(self, reduce=False, collectable=True): 34 | """ 35 | Initialize the GarbageGraph with the objects identified by the garbage 36 | collector. If `collectable` is true, every reference cycle is recorded. 37 | Otherwise only uncollectable objects are reported. 38 | """ 39 | if collectable: 40 | gc.set_debug(gc.DEBUG_SAVEALL) 41 | else: 42 | gc.set_debug(0) 43 | gc.collect() 44 | 45 | ReferenceGraph.__init__(self, gc.garbage, reduce) 46 | 47 | def print_stats(self, stream=None): 48 | """ 49 | Log annotated garbage objects to console or file. 50 | 51 | :param stream: open file, uses sys.stdout if not given 52 | """ 53 | if not stream: # pragma: no cover 54 | stream = sys.stdout 55 | self.metadata.sort(key=lambda x: -x.size) 56 | stream.write('%-10s %8s %-12s %-46s\n' % ('id', 'size', 'type', 'representation')) 57 | for g in self.metadata: 58 | stream.write('0x%08x %8d %-12s %-46s\n' % (g.id, g.size, trunc(g.type, 12), 59 | trunc(g.str, 46))) 60 | stream.write('Garbage: %8d collected objects (%s in cycles): %12s\n' % \ 61 | (self.count, self.num_in_cycles, pp(self.total_size))) 62 | 63 | 64 | def start_debug_garbage(): 65 | """ 66 | Turn off garbage collector to analyze *collectable* reference cycles. 67 | """ 68 | gc.collect() 69 | gc.disable() 70 | 71 | 72 | def end_debug_garbage(): 73 | """ 74 | Turn garbage collection on and disable debug output. 75 | """ 76 | gc.set_debug(0) 77 | gc.enable() 78 | 79 | 80 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/homedirectory.py: -------------------------------------------------------------------------------- 1 | """Attempt to determine the current user's "system" directories""" 2 | try: 3 | ## raise ImportError 4 | from win32com.shell import shell, shellcon 5 | except ImportError: 6 | shell = None 7 | try: 8 | import _winreg 9 | except ImportError: 10 | _winreg = None 11 | import os, sys 12 | 13 | 14 | ## The registry keys where the SHGetFolderPath values appear to be stored 15 | r"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" 16 | r"HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" 17 | 18 | def _winreg_getShellFolder( name ): 19 | """Get a shell folder by string name from the registry""" 20 | k = _winreg.OpenKey( 21 | _winreg.HKEY_CURRENT_USER, 22 | r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" 23 | ) 24 | try: 25 | # should check that it's valid? How? 26 | return _winreg.QueryValueEx( k, name )[0] 27 | finally: 28 | _winreg.CloseKey( k ) 29 | def shell_getShellFolder( type ): 30 | """Get a shell folder by shell-constant from COM interface""" 31 | return shell.SHGetFolderPath( 32 | 0,# null hwnd 33 | type, # the (roaming) appdata path 34 | 0,# null access token (no impersonation) 35 | 0 # want current value, shellcon.SHGFP_TYPE_CURRENT isn't available, this seems to work 36 | ) 37 | 38 | 39 | 40 | def appdatadirectory( ): 41 | """Attempt to retrieve the current user's app-data directory 42 | 43 | This is the location where application-specific 44 | files should be stored. On *nix systems, this will 45 | be the ${HOME}/.config directory. On Win32 systems, it will be 46 | the "Application Data" directory. Note that for 47 | Win32 systems it is normal to create a sub-directory 48 | for storing data in the Application Data directory. 49 | """ 50 | if shell: 51 | # on Win32 and have Win32all extensions, best-case 52 | return shell_getShellFolder(shellcon.CSIDL_APPDATA) 53 | if _winreg: 54 | # on Win32, but no Win32 shell com available, this uses 55 | # a direct registry access, likely to fail on Win98/Me 56 | return _winreg_getShellFolder( 'AppData' ) 57 | # okay, what if for some reason _winreg is missing? would we want to allow ctypes? 58 | ## default case, look for name in environ... 59 | for name in ['APPDATA', 'HOME']: 60 | if name in os.environ: 61 | return os.path.join( os.environ[name], '.config' ) 62 | # well, someone's being naughty, see if we can get ~ to expand to a directory... 63 | possible = os.path.abspath(os.path.expanduser( '~/.config' )) 64 | if os.path.exists( possible ): 65 | return possible 66 | raise OSError( """Unable to determine user's application-data directory, no ${HOME} or ${APPDATA} in environment""" ) 67 | 68 | if __name__ == "__main__": 69 | print 'AppData', appdatadirectory() 70 | 71 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/pstatsadapter.py: -------------------------------------------------------------------------------- 1 | import wx, sys, os, logging 2 | log = logging.getLogger( __name__ ) 3 | from squaremap import squaremap 4 | import pstatsloader 5 | 6 | class PStatsAdapter(squaremap.DefaultAdapter): 7 | 8 | percentageView = False 9 | total = 0 10 | 11 | TREE = pstatsloader.TREE_CALLS 12 | 13 | def value(self, node, parent=None): 14 | if isinstance(parent, pstatsloader.PStatGroup): 15 | if parent.cumulative: 16 | return node.cumulative / parent.cumulative 17 | else: 18 | return 0 19 | elif parent is None: 20 | return node.cumulative 21 | return parent.child_cumulative_time(node) 22 | 23 | def label(self, node): 24 | if isinstance(node, pstatsloader.PStatGroup): 25 | return '%s / %s' % (node.filename, node.directory) 26 | if self.percentageView and self.total: 27 | time = '%0.2f%%' % round(node.cumulative * 100.0 / self.total, 2) 28 | else: 29 | time = '%0.3fs' % round(node.cumulative, 3) 30 | return '%s@%s:%s [%s]' % (node.name, node.filename, node.lineno, time) 31 | 32 | def empty(self, node): 33 | if node.cumulative: 34 | return node.local / float(node.cumulative) 35 | return 0.0 36 | 37 | def parents(self, node): 38 | """Determine all parents of node in our tree""" 39 | return [ 40 | parent for parent in 41 | getattr( node, 'parents', [] ) 42 | if getattr(parent, 'tree', self.TREE) == self.TREE 43 | ] 44 | 45 | color_mapping = None 46 | 47 | def background_color(self, node, depth): 48 | """Create a (unique-ish) background color for each node""" 49 | if self.color_mapping is None: 50 | self.color_mapping = {} 51 | color = self.color_mapping.get(node.key) 52 | if color is None: 53 | depth = len(self.color_mapping) 54 | red = (depth * 10) % 255 55 | green = 200 - ((depth * 5) % 200) 56 | blue = (depth * 25) % 200 57 | self.color_mapping[node.key] = color = wx.Colour(red, green, blue) 58 | return color 59 | 60 | def SetPercentage(self, percent, total): 61 | """Set whether to display percentage values (and total for doing so)""" 62 | self.percentageView = percent 63 | self.total = total 64 | 65 | def filename( self, node ): 66 | """Extension to squaremap api to provide "what file is this" information""" 67 | if not node.directory: 68 | # TODO: any cases other than built-ins? 69 | return None 70 | if node.filename == '~': 71 | # TODO: look up C/Cython/whatever source??? 72 | return None 73 | return os.path.join(node.directory, node.filename) 74 | 75 | 76 | class DirectoryViewAdapter(PStatsAdapter): 77 | """Provides a directory-view-only adapter for PStats objects""" 78 | TREE = pstatsloader.TREE_FILES 79 | def children(self, node): 80 | if isinstance(node, pstatsloader.PStatGroup): 81 | return node.children 82 | return [] 83 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "master" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "master" ] 20 | schedule: 21 | - cron: '27 18 * * 3' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Use only 'java' to analyze code written in Java, Kotlin or both 38 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 39 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 40 | 41 | steps: 42 | - name: Checkout repository 43 | uses: actions/checkout@v3 44 | 45 | # Initializes the CodeQL tools for scanning. 46 | - name: Initialize CodeQL 47 | uses: github/codeql-action/init@v2 48 | with: 49 | languages: ${{ matrix.language }} 50 | # If you wish to specify custom queries, you can do so here or in a config file. 51 | # By default, queries listed here will override any specified in a config file. 52 | # Prefix the list here with "+" to use these queries and those in the config file. 53 | 54 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 55 | # queries: security-extended,security-and-quality 56 | 57 | 58 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 59 | # If this step fails, then you should remove it and run the build manually (see below) 60 | - name: Autobuild 61 | uses: github/codeql-action/autobuild@v2 62 | 63 | # ℹ️ Command-line programs to run using the OS shell. 64 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 65 | 66 | # If the Autobuild fails above, remove it and uncomment the following three lines. 67 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 68 | 69 | # - run: | 70 | # echo "Run, Build Application using script" 71 | # ./location_of_script_within_repo/buildscript.sh 72 | 73 | - name: Perform CodeQL Analysis 74 | uses: github/codeql-action/analyze@v2 75 | with: 76 | category: "/language:${{matrix.language}}" 77 | -------------------------------------------------------------------------------- /pyFileFixity/lib/tee.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | 4 | import sys 5 | 6 | from ._compat import b 7 | 8 | class Tee(object): 9 | """ Redirect print output to the terminal as well as in a log file """ 10 | 11 | def __init__(self, name=None, mode=None, nostdout=False, silent=False): 12 | self.file = None 13 | self.nostdout = nostdout 14 | self.silent = silent 15 | if not nostdout: 16 | self.stdout = sys.stdout 17 | sys.stdout = self 18 | if name is not None and mode is not None: 19 | self.filename = name 20 | self.filemode = mode 21 | self.file = open(name, mode) 22 | 23 | def close(self): 24 | """ Restore stdout and close file when Tee is closed """ 25 | try: 26 | self.flush() # commit all latest changes before exiting 27 | except: 28 | pass # sometimes it's already closed, just skip 29 | if not self.nostdout and hasattr(self, 'stdout'): 30 | sys.stdout = self.stdout 31 | self.stdout = None 32 | if self.file: self.file.close() 33 | 34 | def __del__(self): 35 | self.close() 36 | 37 | def write(self, data, end="\n", flush=True): 38 | """ Output data to stdout and/or file """ 39 | if not self.silent: 40 | if not self.nostdout: 41 | self.stdout.write(data) 42 | self.stdout.write(end) 43 | if self.file is not None: 44 | # Binary mode: need to convert to byte objects if Python 3 45 | if 'b' in self.filemode: 46 | data = b(data) 47 | end = b(end) 48 | self.file.write(data) 49 | self.file.write(end) 50 | if flush: 51 | self.flush() 52 | 53 | def flush(self): 54 | """ Force commit changes to the file and stdout """ 55 | if not self.silent: 56 | if not self.nostdout: 57 | self.stdout.flush() 58 | if self.file is not None: 59 | self.file.flush() 60 | 61 | # def disable(self): 62 | # """ Temporarily disable Tee's redirection """ 63 | # self.flush() # commit all latest changes before exiting 64 | # if not self.nostdout and hasattr(self, 'stdout'): 65 | # sys.stdout = self.stdout 66 | # self.stdout = None 67 | # if self.file: 68 | # self.file.close() 69 | # self.file = None 70 | 71 | # def enable(self): 72 | # """ Reenable Tee's redirection after being temporarily disabled """ 73 | # if not self.nostdout and not self.stdout: 74 | # self.__del__.stdout = sys.stdout 75 | # self.stdout = self.__del__.stdout # The weakref proxy is to prevent Python, or yourself from deleting the self.files variable somehow (if it is deleted, then it will not affect the original file list). If it is not the case that this is being deleted even though there are more references to the variable, then you can remove the proxy encapsulation. http://stackoverflow.com/questions/865115/how-do-i-correctly-clean-up-a-python-object 76 | # sys.stdout = self 77 | # if not self.file and self.filename is not None and self.filemode is not None: 78 | # self.file = open(self.filename, self.filemode) 79 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/_meliaejson.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """Horrible hack to attempt to load meliae dumps a bit faster 3 | 4 | Makes meliae loading about 4.25x faster on python 2.6 compared to the 5 | json + C speedups. This is *not* however, a full json decoder, it is 6 | *just* a parser for the flat records meliae produces (i.e. no recursive 7 | structures, no floats, just ints, strings and lists-of-ints) 8 | """ 9 | import re, unittest, json 10 | 11 | whitespace = r'[ \t]' 12 | 13 | escape = r"""(?:\\[uU][0-9a-fA-F]{4})""" 14 | string = r"""(?:["](?P<%(key)s>([^"\\]|(\\")|%(escape)s|\\[^uU"])*?)["])""" 15 | key = string%{'key':'key','escape':escape} 16 | string = string%{'key':'string','escape':escape} 17 | integer = r"""(?P[+-]*\d+)""" 18 | listcontent = r"""([+-]*\d+[,]?%(whitespace)s*?)*"""%globals() 19 | intlist = r"""\[%(whitespace)s*(?P%(listcontent)s)%(whitespace)s*\]"""%globals() 20 | 21 | attr = r"""%(whitespace)s*%(key)s%(whitespace)s*:%(whitespace)s*(%(intlist)s|%(string)s|%(integer)s)(,)?"""%globals() 22 | 23 | escape = re.compile( escape, re.U ) 24 | simple_escape = re.compile( r'\\([^uU])', re.U ) 25 | 26 | assert escape.match( "\u0000" ) 27 | attr = re.compile( attr ) 28 | string = re.compile( string ) 29 | integer = re.compile( integer ) 30 | intlist = re.compile( intlist ) 31 | 32 | assert string.match( '"this"' ).group('string') == "this" 33 | assert string.match( '"this": "that"' ).group('string') == "this" 34 | assert string.match( '"this\\u0000"' ).group('string') == "this\\u0000", string.match( '"this\\u0000"' ).group('string') 35 | 36 | assert integer.match( '23' ).group( 'int' ) == '23' 37 | assert intlist.match( '[1, 2,3,4]' ).group( 'list' ) == '1, 2,3,4' 38 | assert intlist.match( '[139828625414688, 70572696, 52870672, 40989336]' ).group('list') == '139828625414688, 70572696, 52870672, 40989336' 39 | 40 | def loads( source ): 41 | """Load json structure from meliae from source 42 | 43 | Supports only the required structures to support loading meliae memory dumps 44 | """ 45 | source = source.strip() 46 | assert source.startswith( '{' ) 47 | assert source.endswith( '}' ) 48 | source = source[1:-1] 49 | result = {} 50 | for match in attr.finditer( source ): 51 | key = match.group('key') 52 | if match.group( 'list' ) is not None: 53 | value = [ 54 | int(x) 55 | for x in match.group( 'list' ).strip().replace(',',' ').split() 56 | ] 57 | elif match.group( 'int' ) is not None: 58 | value = int( match.group( 'int' )) 59 | elif match.group( 'string' ) is not None: 60 | def deescape( match ): 61 | return unichr( int( match.group(0)[2:], 16 )) 62 | value = match.group('string').decode( 'utf-8' ) 63 | value = escape.sub( 64 | deescape, 65 | value, 66 | ) 67 | value = simple_escape.sub( 68 | lambda x: x.group(1), 69 | value, 70 | ) 71 | else: 72 | raise RuntimeError( "Matched something we don't know how to process:", match.groupdict() ) 73 | result[key] = value 74 | return result 75 | if __name__ == "__main__": 76 | import sys, pprint 77 | for line in open( sys.argv[1] ): 78 | official = json.loads( line ) 79 | -------------------------------------------------------------------------------- /.github/workflows/ci-build-downstream.yml: -------------------------------------------------------------------------------- 1 | # This workflow will test if this package can be used as a downstream dependency test (it is in fact a test for https://github.com/pypa/pip/issues/11952) 2 | # It uses the Python Package GitHub Actions workflow. 3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 4 | # and https://www.youtube.com/watch?v=l6fV09z5XHk 5 | 6 | name: ci-build-dowstream 7 | 8 | on: 9 | push: 10 | branches: 11 | - master # $default-branch only works in Workflows templates, not in Workflows, see https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable 12 | pull_request: 13 | branches: 14 | - master 15 | 16 | jobs: 17 | testdownstream: 18 | name: Unit test downstream package depending on our package 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: ["*", "pypy-3.9"] # check the list of versions: https://github.com/actions/python-versions/releases and https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md -- note that "*" represents the latest stable version of Python 24 | os: [ ubuntu-latest, macos-latest, windows-latest ] 25 | steps: 26 | - uses: actions/checkout@v3 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v3 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | cache: 'pip' 32 | # You can test your matrix by printing the current Python version 33 | - name: Display Python version 34 | run: | 35 | python -c "import sys; print(sys.version)" 36 | - name: Install dependencies 37 | run: | 38 | python -m pip install --upgrade pip 39 | # The rest is managed by the pyproject.toml 40 | - name: Echo current Python version 41 | run: echo "${{ matrix.python-version }}" 42 | # - name: Compile the Cython extension 43 | # if: ${{ matrix.python-version != 'pypy-3.9' }} # ${{}} GitHub expression syntax, need to place the target python-version in single quotes (not double quotes!) so that it does not stop parsing the literal at dots, otherwise dots will truncate the string https://docs.github.com/en/actions/learn-github-actions/expressions 44 | # run: | 45 | # pip install --upgrade --config-setting="--install-option=--no-cython-compile" cython>=3.0.0b2 46 | - name: Test installing and unit testing pyFileFixity from git 47 | # Make sure to have a .gitattributes file with `* -text` without quotes inside, to prevent automatic crlf line endings conversions/normalization by git. 48 | # FIXME: Need to use the @ form once issue https://github.com/pypa/pip/issues/11951 is fixed, as supplying extras to an egg fragment is deprecated and will be removed in pip v25. 49 | run: | 50 | pip install --upgrade --editable git+https://github.com/lrq3000/pyFileFixity.git#egg=pyFileFixity[test] --verbose 51 | pytest src/pyfilefixity 52 | - name: Get files contents (if failure) 53 | # See: https://stackoverflow.com/questions/60679609/github-action-to-simply-cat-a-file-to-an-output 54 | # https://www.howtohaven.com/system/view-binary-file-on-windows.shtml 55 | id: vars 56 | if: failure() # || success() 57 | run: | 58 | format-hex src/pyfilefixity/pyFileFixity/tests/out/d_dir.csv 59 | format-hex src/pyfilefixity/pyFileFixity/tests/results/test_rfigc_test_dir.csv 60 | shell: pwsh 61 | -------------------------------------------------------------------------------- /pyFileFixity/lib/hasher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Hash manager facade api 4 | # Allows to easily use different kinds of hashing algorithms, size and libraries under one single class. 5 | # Copyright (C) 2015 Larroque Stephen 6 | # 7 | # Licensed under the MIT License (MIT) 8 | # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | # 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | # 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | 27 | from ._compat import b 28 | 29 | import sys 30 | import hashlib 31 | #import zlib 32 | from base64 import b64encode, b64decode # using b64encode is about 3x faster than using encode('base64_codec') 33 | # alternative to base64: from codecs import encode 34 | 35 | class Hasher(object): 36 | '''Class to provide a hasher object with various hashing algorithms. What's important is to provide the __len__ so that we can easily compute the block size of ecc entries. Must only use fixed size hashers for the rest of the script to work properly.''' 37 | 38 | known_algo = ["md5", "shortmd5", "shortsha256", "minimd5", "minisha256", "none"] 39 | __slots__ = ['algo', 'length'] 40 | 41 | def __init__(self, algo="md5"): 42 | # Store the selected hashing algo 43 | self.algo = algo.lower() 44 | # Precompute length so that it's very fast to access it later 45 | if self.algo == "md5": 46 | self.length = 32 47 | elif self.algo == "shortmd5" or self.algo == "shortsha256": 48 | self.length = 8 49 | elif self.algo == "minimd5" or self.algo == "minisha256": 50 | self.length = 4 51 | elif self.algo == "none": 52 | self.length = 0 53 | else: 54 | raise NameError('Hashing algorithm %s is unknown!' % algo) 55 | 56 | def hash(self, mes): 57 | # use hashlib.algorithms_guaranteed to list algorithms 58 | mes = b(mes) 59 | if self.algo == "md5": 60 | return b(hashlib.md5(mes).hexdigest()) 61 | elif self.algo == "shortmd5": # from: http://www.peterbe.com/plog/best-hashing-function-in-python 62 | return b64encode(b(hashlib.md5(mes).hexdigest()))[:8] 63 | elif self.algo == "shortsha256": 64 | return b64encode(b(hashlib.sha256(mes).hexdigest()))[:8] 65 | elif self.algo == "minimd5": 66 | return b64encode(b(hashlib.md5(mes).hexdigest()))[:4] 67 | elif self.algo == "minisha256": 68 | return b64encode(b(hashlib.sha256(mes).hexdigest()))[:4] 69 | elif self.algo == "none": 70 | return '' 71 | else: 72 | raise NameError('Hashing algorithm %s is unknown!' % self.algo) 73 | 74 | def __len__(self): 75 | return self.length -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile like config file for py-make 2 | # To use: `pip install py3make` 3 | # then calle `pymake ` 4 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make 5 | # sure that you only put one command per line, and ALWAYS put a line return 6 | # after an alias and before a command, eg (without the 3 ticks): 7 | # 8 | #``` 9 | #all: 10 | # test 11 | # install 12 | #test: 13 | # nosetest 14 | #install: 15 | # python setup.py install 16 | #``` 17 | 18 | # CRITICAL NOTE: if you get a "FileNotFoundError" exception when trying to call @+python or @+make, then it is because you used spaces instead of a hard TAB character to indent! TODO: bugfix this. It happens only for @+ commands and for those after the first command (if the @+ command with spaces as indentation is the first and only statement in a command, it works!) 19 | 20 | help: 21 | @+make -p 22 | 23 | alltests: 24 | @+make testcoverage 25 | @+make testsetup 26 | 27 | all: 28 | @make alltests 29 | @make build 30 | 31 | prebuildclean: 32 | @+python -c "import shutil; shutil.rmtree('build', True)" 33 | @+python -c "import shutil; shutil.rmtree('dist', True)" 34 | @+python -c "import shutil; shutil.rmtree('pyFileFixity.egg-info', True)" # very important to delete egg-info before any new build or pip install, otherwise may cause an error that multiple egg-info folders are present, or it may build using old definitions 35 | 36 | coverclean: 37 | @+python -c "import os; os.remove('.coverage') if os.path.exists('.coverage') else None" 38 | @+python -c "import shutil; shutil.rmtree('__pycache__', True)" 39 | @+python -c "import shutil; shutil.rmtree('tests/__pycache__', True)" 40 | 41 | test: 42 | #tox --skip-missing-interpreters 43 | pytest 44 | 45 | testnose: 46 | # Only for Py2 47 | nosetests pyFileFixity/tests/ -d -v 48 | 49 | testpyproject: 50 | validate-pyproject pyproject.toml -v 51 | 52 | testsetuppost: 53 | twine check "dist/*" 54 | 55 | testrst: 56 | rstcheck README.rst 57 | 58 | testcoverage: 59 | @+make coverclean 60 | #nosetests pyFileFixity/tests/ --with-coverage --cover-package=pyFileFixity -d -v # Py2 only 61 | coverage run --branch -m pytest pyFileFixity -v 62 | coverage report -m 63 | 64 | testmalloc: 65 | @+python -X dev -X tracemalloc=5 -m pytest 66 | 67 | installdev: 68 | @+make prebuildclean 69 | # Should work for both Py2 and Py3, --editable option and isolation builds work with both pyproject.toml and setup.cfg 70 | @+python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test,testmeta] --verbose --use-pep517 71 | 72 | installdevpy2: 73 | @+make prebuildclean 74 | @+python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test] --verbose --use-pep517 75 | 76 | install: 77 | @+make prebuildclean 78 | @+python -m pip install --upgrade . --verbose --use-pep517 79 | 80 | build: 81 | # requires `pip install build` 82 | @+make testrst 83 | @+make prebuildclean 84 | @+make testpyproject 85 | @+python -sBm build # do NOT use the -w flag, otherwise only the wheel will be built, but we need sdist for source distros such as Debian and Gentoo! 86 | @+make testsetuppost 87 | 88 | buildpy2: 89 | # Py2 only 90 | # requires `pip install build` 91 | @+make testrst 92 | @+make prebuildclean 93 | @+python -sBm build # do NOT use the -w flag, otherwise only the wheel will be built, but we need sdist for source distros such as Debian and Gentoo! 94 | @+make testsetuppost 95 | 96 | buildwheelhouse: 97 | cibuildwheel --platform auto 98 | 99 | upload: 100 | twine upload dist/* 101 | 102 | buildupload: 103 | @+make build 104 | @+make upload 105 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/pympler/mprofile.py: -------------------------------------------------------------------------------- 1 | """ 2 | Memory usage profiler for Python. 3 | 4 | """ 5 | import inspect 6 | import sys 7 | 8 | from pympler import muppy 9 | 10 | class MProfiler(object): 11 | """A memory usage profiler class. 12 | 13 | Memory data for each function is stored as a 3-element list in the 14 | dictionary self.memories. The index is always a codepoint (see below). 15 | The following are the definitions of the members: 16 | 17 | [0] = The number of times this function was called 18 | [1] = Minimum memory consumption when this function was measured. 19 | [2] = Maximum memory consumption when this function was measured. 20 | 21 | A codepoint is a list of 3-tuple of the type 22 | (filename, functionname, linenumber). You can omit either element, which 23 | will cause the profiling to be triggered if any of the other criteria 24 | match. E.g. 25 | - (None, foo, None), will profile any foo function, 26 | - (bar, foo, None) will profile only the foo function from the bar file, 27 | - (bar, foo, 17) will profile only line 17 of the foo function defined 28 | in the file bar. 29 | 30 | Additionally, you can define on what events you want the profiling be 31 | triggered. Possible events are defined in 32 | http://docs.python.org/lib/debugger-hooks.html. 33 | 34 | If you do not define either codepoints or events, the profiler will 35 | record the memory usage in at every codepoint and event. 36 | 37 | """ 38 | 39 | def __init__(self, codepoints=None, events=None): 40 | """ 41 | keyword arguments: 42 | codepoints -- a list of points in code to monitor (defaults to all codepoints) 43 | events -- a list of events to monitor (defaults to all events) 44 | """ 45 | self.memories = {} 46 | self.codepoints = codepoints 47 | self.events = events 48 | 49 | def codepoint_included(self, codepoint): 50 | """Check if codepoint matches any of the defined codepoints.""" 51 | if self.codepoints == None: 52 | return True 53 | for cp in self.codepoints: 54 | mismatch = False 55 | for i in range(len(cp)): 56 | if (cp[i] is not None) and (cp[i] != codepoint[i]): 57 | mismatch = True 58 | break 59 | if not mismatch: 60 | return True 61 | return False 62 | 63 | def profile(self, frame, event, arg): #PYCHOK arg requ. to match signature 64 | """Profiling method used to profile matching codepoints and events.""" 65 | if (self.events == None) or (event in self.events): 66 | frame_info = inspect.getframeinfo(frame) 67 | cp = (frame_info[0], frame_info[2], frame_info[1]) 68 | if self.codepoint_included(cp): 69 | objects = muppy.get_objects() 70 | size = muppy.get_size(objects) 71 | if cp not in self.memories: 72 | self.memories[cp] = [0,0,0,0] 73 | self.memories[cp][0] = 1 74 | self.memories[cp][1] = size 75 | self.memories[cp][2] = size 76 | else: 77 | self.memories[cp][0] += 1 78 | if self.memories[cp][1] > size: 79 | self.memories[cp][1] = size 80 | if self.memories[cp][2] < size: 81 | self.memories[cp][2] = size 82 | 83 | def run(self, cmd): 84 | sys.setprofile(self.profile) 85 | try: 86 | exec(cmd) 87 | finally: 88 | sys.setprofile(None) 89 | return self 90 | 91 | if __name__ == "__main__": 92 | p = MProfiler() 93 | p.run("print 'hello'") 94 | print(p.memories) 95 | 96 | 97 | -------------------------------------------------------------------------------- /setup.cfg.bak: -------------------------------------------------------------------------------- 1 | # Config file for Python 2.7, replaces setup.py 2 | # Note that for some reason pip under Python 2.7 still access pyproject.toml to at least get build-system requires, and then switches to processing setup.cfg for the rest of the directives. 3 | # DEPRECATED: Python 2.7 support was dropped because even if it is technically possible to install on Python 2.7 and with PEP517 compliance, most tools are not available anymore, so the dependency resolver needs to do a very time consuming exhaustive search, hence a very unclean install, so it was deemed better to just drop support so that Python 2.7 users can just install the older release that targeted Python 2.7 and was installable much more cleanly, even if not compliant with PEP 517. 4 | [metadata] 5 | name = pyFileFixity 6 | version = attr: pyFileFixity.__version__ 7 | author = Stephen Karl Larroque 8 | author_email = lrq3000@gmail.com 9 | description = Helping file fixity (long term storage of data) via redundant error correcting codes and hash auditing. 10 | long_description = file: README.rst, LICENSE 11 | long_description_content_type = text/x-rst 12 | keywords = file, repair, monitor, change, reed-solomon, error, correction, error correction, parity, parity files, parity bytes, data protection, data recovery, file protection, qr codes, qr code 13 | license = MIT License 14 | classifiers = 15 | Development Status :: 5 - Production/Stable 16 | License :: OSI Approved :: MIT License 17 | Environment :: Console 18 | Operating System :: Microsoft :: Windows 19 | Operating System :: MacOS :: MacOS X 20 | Operating System :: POSIX :: Linux 21 | Programming Language :: Python 22 | Programming Language :: Python :: 2.7 23 | Programming Language :: Python :: 3 24 | Programming Language :: Python :: 3.7 25 | Programming Language :: Python :: 3.8 26 | Programming Language :: Python :: 3.9 27 | Programming Language :: Python :: 3.10 28 | Programming Language :: Python :: 3.11 29 | Programming Language :: Python :: 3.12 30 | Programming Language :: Python :: Implementation :: PyPy 31 | Topic :: Software Development :: Libraries 32 | Topic :: Software Development :: Libraries :: Python Modules 33 | Topic :: System :: Archiving 34 | Topic :: System :: Archiving :: Backup 35 | Topic :: System :: Monitoring 36 | Topic :: System :: Recovery Tools 37 | Topic :: Utilities 38 | Intended Audience :: Developers 39 | Intended Audience :: End Users/Desktop 40 | Intended Audience :: Information Technology 41 | Intended Audience :: System Administrators 42 | 43 | [options] 44 | zip_safe = False 45 | include_package_data = True 46 | #packages = find_namespace: # not supported in python < 3.3 47 | packages = find: 48 | include = pyFileFixity 49 | python_requires = >=2.7 50 | install_requires = 51 | importlib-metadata; python_version<"3.8" 52 | pathlib2 53 | argparse 54 | sortedcontainers 55 | tqdm 56 | distance 57 | reedsolo==1.7.0; python_version<"3" 58 | unireedsolomon==1.0.5; python_version<"3" 59 | reedsolo>=2.0.0b1; python_version>="3.7" 60 | unireedsolomon>=1.0.6b1; python_version>="3.7" 61 | 62 | [options.package_data] 63 | * = *.rst, LICENSE*, README*, *.pyx, *.c 64 | #pyFileFixity = ecc_specification.txt, resiliency_tester_config.txt # does not work... 65 | #pyFileFixity.tests.files = * 66 | #pyFileFixity.tests.results = * 67 | 68 | #[options.entry_points] 69 | #console_scripts = 70 | # executable-name = pyFileFixity.module:function 71 | 72 | [options.extras_require] 73 | test = pytest; pytest-cov; py-make # minimum test dependencies. To support coveralls in Py2, use coveralls<4 74 | testmeta = build; twine; validate-pyproject; rstcheck # dependencies to test meta-data. Note that some of these dependencies make cibuildwheel choke on cryptography 75 | 76 | #[options.packages.find] 77 | #exclude = 78 | # examples* 79 | # tools* 80 | # docs* 81 | # pyFileFixity.tests* 82 | -------------------------------------------------------------------------------- /.github/workflows/ci-build.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies and run tests with a variety of Python versions 2 | # It uses the Python Package GitHub Actions workflow. 3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 4 | # and https://www.youtube.com/watch?v=l6fV09z5XHk 5 | 6 | name: ci-build 7 | 8 | on: 9 | push: 10 | branches: 11 | - master # $default-branch only works in Workflows templates, not in Workflows, see https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable 12 | pull_request: 13 | branches: 14 | - master 15 | 16 | jobs: 17 | build: 18 | runs-on: ${{ matrix.os }} 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | python-version: ["3.8", "3.10", "3.11", "*", pypy-3.9] # check the list of versions: https://github.com/actions/python-versions/releases and https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md -- note that "*" represents the latest stable version of Python 23 | os: [ ubuntu-latest, windows-latest, macos-latest ] # jobs that run on Windows and macOS runners that GitHub hosts consume minutes at 2 and 10 times the rate that jobs on Linux runners consume respectively. But it's free for public OSS repositories. 24 | steps: 25 | - uses: actions/checkout@v4 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | cache: 'pip' 31 | # You can test your matrix by printing the current Python version 32 | - name: Display Python version 33 | run: | 34 | python -c "import sys; print(sys.version)" 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip 38 | #python -m pip install pytest pytest-cov # done in setup.cfg for Py2 or pyproject.toml for Py3 39 | #if [ ${{ matrix.python-version }} <= 3.7 ]; then python -m pip install 'coverage<4'; else python -m pip install coverage; fi 40 | #if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 41 | - name: Install this module without testmeta 42 | if: ${{ matrix.python-version != '*' }} 43 | #if: ${{ matrix.python-version >= 3 }} # does not work on dynamic versions, see: https://github.com/actions/setup-python/issues/644 44 | # Do not import testmeta, they make the build fails somehow, because some dependencies are unavailable on Py2 45 | # We use test.pypi.org to test against cutting-edge builds of reedsolo 46 | run: | 47 | python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test] --verbose --use-pep517 48 | - name: Install this module with testmeta packages 49 | if: ${{ matrix.python-version == '*' }} 50 | run: | 51 | python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test,testmeta] --verbose --use-pep517 52 | - name: Test with pytest 53 | run: | 54 | coverage run --branch -m pytest . -v 55 | coverage report -m 56 | - name: Upload coverage to Codecov 57 | uses: codecov/codecov-action@v4 58 | with: 59 | token: ${{ secrets.CODECOV_TOKEN }} # now required even for public repos, and also advised to avoid rate-limiting API by GitHub which makes the upload fails randomly: https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954/9 and https://github.com/codecov/codecov-action/issues/598 60 | #directory: ./coverage/reports/ 61 | env_vars: OS,PYTHON 62 | fail_ci_if_error: true 63 | #files: ./coverage1.xml,./coverage2.xml 64 | flags: unittests 65 | name: codecov-umbrella 66 | verbose: true 67 | - name: Build sdist (necessary for the other tests below) 68 | if: ${{ matrix.python-version == '*' }} 69 | run: python -sBm build 70 | - name: Twine check 71 | if: ${{ matrix.python-version == '*' }} 72 | run: | 73 | twine check "dist/*" 74 | rstcheck README.rst 75 | - name: pyproject.toml validity 76 | if: ${{ matrix.python-version == '*' }} 77 | run: validate-pyproject pyproject.toml -v 78 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/profilebrowser.py: -------------------------------------------------------------------------------- 1 | # Excerpt from pstats.py rev 1.0 4/1/94 (from python v2.6) which define this class only when it's the entry point (main), so it is here copied to avoid compatibility issues with the next python's releases. 2 | 3 | from pstats import * 4 | 5 | import cmd 6 | try: 7 | import readline 8 | except ImportError: 9 | pass 10 | 11 | class ProfileBrowser(cmd.Cmd): 12 | def __init__(self, profile=None): 13 | cmd.Cmd.__init__(self) 14 | self.prompt = "% " 15 | if profile is not None: 16 | self.stats = Stats(profile) 17 | self.stream = self.stats.stream 18 | else: 19 | self.stats = None 20 | self.stream = sys.stdout 21 | 22 | def generic(self, fn, line): 23 | args = line.split() 24 | processed = [] 25 | for term in args: 26 | try: 27 | processed.append(int(term)) 28 | continue 29 | except ValueError: 30 | pass 31 | try: 32 | frac = float(term) 33 | if frac > 1 or frac < 0: 34 | print >> self.stream, "Fraction argument must be in [0, 1]" 35 | continue 36 | processed.append(frac) 37 | continue 38 | except ValueError: 39 | pass 40 | processed.append(term) 41 | if self.stats: 42 | getattr(self.stats, fn)(*processed) 43 | else: 44 | print >> self.stream, "No statistics object is loaded." 45 | return 0 46 | def generic_help(self): 47 | print >> self.stream, "Arguments may be:" 48 | print >> self.stream, "* An integer maximum number of entries to print." 49 | print >> self.stream, "* A decimal fractional number between 0 and 1, controlling" 50 | print >> self.stream, " what fraction of selected entries to print." 51 | print >> self.stream, "* A regular expression; only entries with function names" 52 | print >> self.stream, " that match it are printed." 53 | 54 | def do_add(self, line): 55 | self.stats.add(line) 56 | return 0 57 | def help_add(self): 58 | print >> self.stream, "Add profile info from given file to current statistics object." 59 | 60 | def do_callees(self, line): 61 | return self.generic('print_callees', line) 62 | def help_callees(self): 63 | print >> self.stream, "Print callees statistics from the current stat object." 64 | self.generic_help() 65 | 66 | def do_callers(self, line): 67 | return self.generic('print_callers', line) 68 | def help_callers(self): 69 | print >> self.stream, "Print callers statistics from the current stat object." 70 | self.generic_help() 71 | 72 | def do_EOF(self, line): 73 | print >> self.stream, "" 74 | return 1 75 | def help_EOF(self): 76 | print >> self.stream, "Leave the profile brower." 77 | 78 | def do_quit(self, line): 79 | return 1 80 | def help_quit(self): 81 | print >> self.stream, "Leave the profile brower." 82 | 83 | def do_read(self, line): 84 | if line: 85 | try: 86 | self.stats = Stats(line) 87 | except IOError, args: 88 | print >> self.stream, args[1] 89 | return 90 | self.prompt = line + "% " 91 | elif len(self.prompt) > 2: 92 | line = self.prompt[-2:] 93 | else: 94 | print >> self.stream, "No statistics object is current -- cannot reload." 95 | return 0 96 | def help_read(self): 97 | print >> self.stream, "Read in profile data from a specified file." 98 | 99 | def do_reverse(self, line): 100 | self.stats.reverse_order() 101 | return 0 102 | def help_reverse(self): 103 | print >> self.stream, "Reverse the sort order of the profiling report." 104 | 105 | def do_sort(self, line): 106 | abbrevs = self.stats.get_sort_arg_defs() 107 | if line and not filter(lambda x,a=abbrevs: x not in a,line.split()): 108 | self.stats.sort_stats(*line.split()) 109 | else: 110 | print >> self.stream, "Valid sort keys (unique prefixes are accepted):" 111 | for (key, value) in Stats.sort_arg_dict_default.iteritems(): 112 | print >> self.stream, "%s -- %s" % (key, value[1]) 113 | return 0 114 | def help_sort(self): 115 | print >> self.stream, "Sort profile data according to specified keys." 116 | print >> self.stream, "(Typing `sort' without arguments lists valid keys.)" 117 | def complete_sort(self, text, *args): 118 | return [a for a in Stats.sort_arg_dict_default if a.startswith(text)] 119 | 120 | def do_stats(self, line): 121 | return self.generic('print_stats', line) 122 | def help_stats(self): 123 | print >> self.stream, "Print statistics from the current stat object." 124 | self.generic_help() 125 | 126 | def do_strip(self, line): 127 | self.stats.strip_dirs() 128 | return 0 129 | def help_strip(self): 130 | print >> self.stream, "Strip leading path information from filenames in the report." 131 | 132 | def postcmd(self, stop, line): 133 | if stop: 134 | return stop 135 | return None -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/__main__.py: -------------------------------------------------------------------------------- 1 | from optparse import OptionParser 2 | import sys 3 | import os 4 | import codecs 5 | from pyinstrument import Profiler 6 | from pyinstrument.profiler import SignalUnavailableError 7 | 8 | # Python 3 compatibility. Mostly borrowed from SymPy 9 | PY3 = sys.version_info[0] > 2 10 | 11 | if PY3: 12 | import builtins 13 | exec_ = getattr(builtins, "exec") 14 | else: 15 | def exec_(_code_, _globs_=None, _locs_=None): 16 | """Execute code in a namespace.""" 17 | if _globs_ is None: 18 | frame = sys._getframe(1) 19 | _globs_ = frame.f_globals 20 | if _locs_ is None: 21 | _locs_ = frame.f_locals 22 | del frame 23 | elif _locs_ is None: 24 | _locs_ = _globs_ 25 | exec("exec _code_ in _globs_, _locs_") 26 | 27 | def main(): 28 | usage = ("usage: pyinstrument [options] scriptfile [arg] ...") 29 | parser = OptionParser(usage=usage) 30 | parser.allow_interspersed_args = False 31 | 32 | parser.add_option('', '--setprofile', 33 | dest='setprofile', action='store_true', 34 | help='run in setprofile mode, instead of signal mode', default=False) 35 | 36 | parser.add_option('', '--html', 37 | dest="output_html", action='store_true', 38 | help="output HTML instead of text", default=False) 39 | parser.add_option('-o', '--outfile', 40 | dest="outfile", action='store', 41 | help="save report to ", default=None) 42 | 43 | parser.add_option('', '--unicode', 44 | dest='unicode', action='store_true', 45 | help='force unicode text output') 46 | parser.add_option('', '--no-unicode', 47 | dest='unicode', action='store_false', 48 | help='force ascii text output') 49 | 50 | parser.add_option('', '--color', 51 | dest='color', action='store_true', 52 | help='force ansi color text output') 53 | parser.add_option('', '--no-color', 54 | dest='color', action='store_false', 55 | help='force no color text output') 56 | 57 | if not sys.argv[1:]: 58 | parser.print_help() 59 | sys.exit(2) 60 | 61 | (options, args) = parser.parse_args() 62 | sys.argv[:] = args 63 | 64 | if len(args) > 0: 65 | progname = args[0] 66 | sys.path.insert(0, os.path.dirname(progname)) 67 | 68 | with open(progname, 'rb') as fp: 69 | code = compile(fp.read(), progname, 'exec') 70 | globs = { 71 | '__file__': progname, 72 | '__name__': '__main__', 73 | '__package__': None, 74 | } 75 | 76 | try: 77 | profiler = Profiler(use_signal=not options.setprofile) 78 | except SignalUnavailableError: 79 | profiler = Profiler(use_signal=False) 80 | 81 | profiler.start() 82 | 83 | try: 84 | exec_(code, globs, None) 85 | except IOError as e: 86 | import errno 87 | 88 | if e.errno == errno.EINTR: 89 | print( 90 | 'Failed to run program due to interrupted system system call.\n' 91 | 'This happens because pyinstrument is sending OS signals to the running\n' 92 | 'process to interrupt it. If your program has long-running syscalls this\n' 93 | 'can cause a problem.\n' 94 | '\n' 95 | 'You can avoid this error by running in \'setprofile\' mode. Do this by\n' 96 | 'passing \'--setprofile\' when calling pyinstrument at the command-line.\n' 97 | '\n' 98 | 'For more information, see\n' 99 | 'https://github.com/joerick/pyinstrument/issues/16\n' 100 | ) 101 | 102 | raise 103 | except (SystemExit, KeyboardInterrupt): 104 | pass 105 | 106 | profiler.stop() 107 | 108 | if options.outfile: 109 | f = codecs.open(options.outfile, 'w', 'utf-8') 110 | else: 111 | f = sys.stdout 112 | 113 | unicode_override = options.unicode != None 114 | color_override = options.color != None 115 | 116 | unicode = options.unicode if unicode_override else file_supports_unicode(f) 117 | color = options.color if color_override else file_supports_color(f) 118 | 119 | if options.output_html: 120 | f.write(profiler.output_html()) 121 | else: 122 | f.write(profiler.output_text(unicode=unicode, color=color)) 123 | 124 | f.close() 125 | else: 126 | parser.print_usage() 127 | return parser 128 | 129 | def file_supports_color(file_obj): 130 | """ 131 | Returns True if the running system's terminal supports color, and False 132 | otherwise. 133 | 134 | Borrowed from Django 135 | https://github.com/django/django/blob/master/django/core/management/color.py 136 | """ 137 | plat = sys.platform 138 | supported_platform = plat != 'Pocket PC' and (plat != 'win32' or 139 | 'ANSICON' in os.environ) 140 | 141 | is_a_tty = hasattr(file_obj, 'isatty') and file_obj.isatty() 142 | if not supported_platform or not is_a_tty: 143 | return False 144 | return True 145 | 146 | def file_supports_unicode(file_obj): 147 | encoding = getattr(file_obj, 'encoding', None) 148 | if not encoding: 149 | return False 150 | 151 | codec_info = codecs.lookup(encoding) 152 | 153 | if 'utf' in codec_info.name: 154 | return True 155 | return False 156 | 157 | if __name__ == '__main__': 158 | main() 159 | -------------------------------------------------------------------------------- /.github/workflows/ci-cd.yml: -------------------------------------------------------------------------------- 1 | # This workflow will test the module and then upload to PyPi, when triggered by the creation of a new GitHub Release 2 | # It uses the Python Package GitHub Actions workflow. 3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 4 | # and https://www.youtube.com/watch?v=l6fV09z5XHk 5 | # and https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi 6 | 7 | name: ci-cd 8 | 9 | # Build only on creation of new releases 10 | on: 11 | # push: # build on every commit push 12 | # pull_request: # build on every pull request 13 | release: # build on every releases 14 | types: 15 | - published # use published, not released and prereleased, because prereleased is not triggered if created from a draft: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#release 16 | 17 | jobs: 18 | testbuild: 19 | name: Unit test and building 20 | runs-on: ${{ matrix.os }} 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | python-version: ["*"] # check the list of versions: https://github.com/actions/python-versions/releases and https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md -- note that "*" represents the latest stable version of Python 25 | os: [ ubuntu-latest, windows-latest, macos-latest ] # jobs that run on Windows and macOS runners that GitHub hosts consume minutes at 2 and 10 times the rate that jobs on Linux runners consume respectively. But it's free for public OSS repositories. 26 | steps: 27 | - uses: actions/checkout@v3 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v3 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | cache: 'pip' 33 | # You can test your matrix by printing the current Python version 34 | - name: Display Python version 35 | run: | 36 | python -c "import sys; print(sys.version)" 37 | - name: Install dependencies 38 | run: | 39 | python -m pip install --upgrade pip 40 | #python -m pip install pytest pytest-cov # done in setup.cfg for Py2 or pyproject.toml for Py3 41 | #if [ ${{ matrix.python-version }} <= 3.7 ]; then python -m pip install 'coverage<4'; else python -m pip install coverage; fi 42 | #if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 43 | - name: Install this module 44 | #if: ${{ matrix.python-version >= 3 }} # does not work on dynamic versions, see: https://github.com/actions/setup-python/issues/644 45 | # Do not import testmeta, they make the build fails somehow, because some dependencies are unavailable on Py2 46 | run: | 47 | #python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test] --verbose --use-pep517 48 | # Here we do NOT build against test.pypi.org but only the real pypi because we want to test before shipping whether users with a normal pypi version can install our package! 49 | python -m pip install --upgrade --editable .[test] --verbose --use-pep517 50 | - name: Test with pytest 51 | run: | 52 | coverage run --branch -m pytest . -v 53 | coverage report -m 54 | - name: Build source distribution and wheel 55 | run: | 56 | python -m pip install --upgrade build 57 | python -sBm build 58 | - name: Save dist/ content for reuse in other GitHub Workflow blocks 59 | uses: actions/upload-artifact@v3 60 | with: 61 | path: dist/* 62 | 63 | upload_test_pypi: # Upload to TestPyPi first to ensure that the release is OK (we will try to download it and install it afterwards), as recommended in https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi 64 | name: Upload to TestPyPi 65 | needs: [testbuild] 66 | runs-on: ubuntu-latest 67 | steps: 68 | - name: Unpack default artifact into dist/ 69 | uses: actions/download-artifact@v4.1.7 70 | with: 71 | # unpacks default artifact into dist/ 72 | # if `name: artifact` is omitted, the action will create extra parent dir 73 | name: artifact 74 | path: dist 75 | 76 | - name: Upload to TestPyPi 77 | uses: pypa/gh-action-pypi-publish@v1.5.0 78 | with: 79 | user: __token__ 80 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 81 | repository_url: https://test.pypi.org/legacy/ 82 | # To test: repository_url: https://test.pypi.org/legacy/ # and also change token: ${{ secrets.PYPI_API_TOKEN }} to secrets.TEST_PYPI_API_TOKEN # for more infos on registering and using TestPyPi, read: https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi -- remove the repository_url to upload to the real PyPi 83 | 84 | - name: Test install from TestPyPI 85 | run: | 86 | python -m pip install --upgrade pip 87 | pip install \ 88 | --index-url https://test.pypi.org/simple/ \ 89 | --extra-index-url https://pypi.org/simple \ 90 | pyFileFixity 91 | 92 | upload_pypi: # Upload to the real PyPi if everything else worked before, as suggested in: https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi 93 | name: Upload to the real PyPi 94 | needs: [testbuild, upload_test_pypi] 95 | runs-on: ubuntu-latest 96 | steps: 97 | - uses: actions/download-artifact@v4.1.7 98 | with: 99 | # unpacks default artifact into dist/ 100 | # if `name: artifact` is omitted, the action will create extra parent dir 101 | name: artifact 102 | path: dist 103 | 104 | - uses: pypa/gh-action-pypi-publish@v1.5.0 105 | with: 106 | user: __token__ 107 | password: ${{ secrets.PYPI_API_TOKEN }} 108 | 109 | - name: Test install from PyPI 110 | run: | 111 | python -m pip install --upgrade pip 112 | pip uninstall pyFileFixity -y 113 | pip install --upgrade pyFileFixity 114 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/debug.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os, sys, time 4 | 5 | # add current script path libs 6 | #pathname = os.path.dirname(sys.argv[0]) 7 | #sys.path.insert(0, os.path.join(pathname, 'lib', 'debug')) 8 | 9 | pathname = os.path.dirname(os.path.realpath(__file__)) 10 | #sys.path.append(os.path.join(pathname, 'lib', 'debug')) 11 | sys.path.append(pathname) 12 | 13 | # import functools, used to preserve the correct func.__name__ 14 | import functools 15 | 16 | # import some functions profiler functions and GUI 17 | import functionprofiler 18 | # Note: as an alternative, you can also use pyprof2calltree and kcachegrind to get a lot more informations and interactive call graph 19 | 20 | # import profilehooks lib 21 | from profilehooks import profile 22 | 23 | # import memory profiler line by line 24 | from memory_profiler import profile as memoryprofile_linebyline 25 | 26 | 27 | #### NON DECORATOR FUNCTIONS #### 28 | ################################# 29 | 30 | def startmemorytracker(): 31 | from pympler import tracker 32 | tr = tracker.SummaryTracker() 33 | return tr 34 | 35 | def runprofilerandshow(funcname, profilepath, argv='', *args, **kwargs): 36 | ''' 37 | Run a functions profiler and show it in a GUI visualisation using RunSnakeRun 38 | Note: can also use calibration for more exact results 39 | ''' 40 | functionprofiler.runprofile(funcname+'(\''+argv+'\')', profilepath, *args, **kwargs) 41 | print 'Showing profile (windows should open in the background)'; sys.stdout.flush(); 42 | functionprofiler.browseprofilegui(profilepath) 43 | 44 | 45 | 46 | 47 | #### DECORATOR FUNCTIONS #### 48 | ############################# 49 | 50 | # @profile: use profilehooks to profile functions 51 | # @profileit: profile using python's profile (works with threads) 52 | # @showprofile: show the functions profile in a nice GUI using RunSnakeRun (alternative: using the generated profile log files you can use pyprof2calltree and kcachegrind to get a lot more informations and interactive call graph) 53 | # @memorytrack: use Pympler to track and show memory usage (only console, no GUI) 54 | #@callgraph: save the call graph in text format and image (if GraphViz is available, more specifically the dot program) 55 | #@profile_linebyline: profile a function with line by line CPU consumption (using line_profiler, need to install it because it is compiled in C) 56 | #@memoryprofile_linebyline: memory profile a function with line by line memory consumption (using memory_profiler, needs psutils on Windows) 57 | 58 | # eg: 59 | # @showprofile 60 | # @profileit 61 | # def func(): ... 62 | 63 | def memorytrack(func): 64 | @functools.wraps(func) 65 | def wrapper(*args, **kwargs): 66 | from pympler import tracker 67 | tr = tracker.SummaryTracker() 68 | func(*args, **kwargs) 69 | tr.print_diff() 70 | return wrapper 71 | 72 | def profileit(func): 73 | import profile 74 | @functools.wraps(func) 75 | def wrapper(*args, **kwargs): 76 | #datafn = func.__name__ + ".profile" # Name the data file sensibly 77 | datafn = 'profile.log' 78 | prof = profile.Profile() 79 | retval = prof.runcall(func, *args, **kwargs) 80 | prof.dump_stats(datafn) 81 | return retval 82 | 83 | return wrapper 84 | 85 | def profileit_log(log): 86 | import profile 87 | def inner(func): 88 | @functools.wraps(func) 89 | def wrapper(*args, **kwargs): 90 | prof = profile.Profile() 91 | retval = prof.runcall(func, *args, **kwargs) 92 | # Note use of name from outer scope 93 | prof.dump_stats(log) 94 | return retval 95 | return wrapper 96 | return inner 97 | 98 | def showprofile(func): 99 | profilepath = 'profile.log' 100 | @functools.wraps(func) 101 | def wrapper(*args, **kwargs): 102 | func(*args, **kwargs) 103 | functionprofiler.browseprofilegui(profilepath) 104 | return wrapper 105 | 106 | def callgraph(func): 107 | ''' Makes a call graph 108 | Note: be sure to install GraphViz prior to printing the dot graph! 109 | ''' 110 | import pycallgraph 111 | @functools.wraps(func) 112 | def wrapper(*args, **kwargs): 113 | pycallgraph.start_trace() 114 | func(*args, **kwargs) 115 | pycallgraph.save_dot('callgraph.log') 116 | pycallgraph.make_dot_graph('callgraph.png') 117 | #pycallgraph.make_dot_graph('callgraph.jpg', format='jpg', tool='neato') 118 | return wrapper 119 | 120 | def profile_linebyline(func): 121 | import line_profiler 122 | @functools.wraps(func) 123 | def wrapper(*args, **kwargs): 124 | prof = line_profiler.LineProfiler() 125 | val = prof(func)(*args, **kwargs) 126 | prof.print_stats() 127 | return val 128 | return wrapper 129 | 130 | 131 | # Some debug testing here 132 | if __name__ == '__main__': 133 | 134 | @showprofile 135 | @profileit 136 | #@memorytrack 137 | #@callgraph 138 | #@profile 139 | #@memoryprofile_linebyline 140 | #@profile_linebyline 141 | def testcaptcha(): 142 | import captchagenerator 143 | 144 | captcha = captchagenerator.CaptchaGenerator(True, True, debugPng=True, debug=False, nbElem=10, modelsPath='bammodels', windowWidth='320', windowHeight='240') 145 | 146 | #captcha.renderCaptcha('solmasks', 'solmasks') 147 | captcha.renderCaptchaMulti(4, 'solmasks', 'solmasks') 148 | 149 | #time.sleep(20) 150 | 151 | #@memoryprofile_linebyline 152 | #@profile_linebyline 153 | def test_1(): 154 | a = [1] * (10 ** 6) 155 | b = [2] * (2 * 10 ** 7) 156 | del b 157 | 158 | for i in range(2): 159 | a = [1] * (10 ** 6) 160 | b = [2] * (2 * 10 ** 7) 161 | del b 162 | return a 163 | 164 | # Test 1 165 | #runprofilerandshow('testcaptcha', 'profile.log') 166 | 167 | # Test 2 168 | testcaptcha() 169 | 170 | # Test 3 171 | #test_1() 172 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/coldshotadapter.py: -------------------------------------------------------------------------------- 1 | """Adapter for RunSnakeRun to load coldshot profiles""" 2 | import wx, sys, os, logging 3 | log = logging.getLogger( __name__ ) 4 | from squaremap import squaremap 5 | from coldshot import stack,loader 6 | 7 | class BaseColdshotAdapter( squaremap.DefaultAdapter): 8 | """Base class for the various adapters""" 9 | percentageView = False 10 | total = 0 11 | def filename( self, node ): 12 | return getattr(node,'path',None) 13 | color_mapping = None 14 | 15 | def background_color(self, node, depth): 16 | """Create a (unique-ish) background color for each node""" 17 | if self.color_mapping is None: 18 | self.color_mapping = {} 19 | color = self.color_mapping.get(node.key) 20 | if color is None: 21 | depth = len(self.color_mapping) 22 | red = (depth * 10) % 255 23 | green = 200 - ((depth * 5) % 200) 24 | blue = (depth * 25) % 200 25 | self.color_mapping[node.key] = color = wx.Colour(red, green, blue) 26 | return color 27 | 28 | def SetPercentage(self, percent, total): 29 | """Set whether to display percentage values (and total for doing so)""" 30 | self.percentageView = percent 31 | self.total = total 32 | 33 | def parents(self, node): 34 | return getattr(node, 'parents', []) 35 | def label(self, node): 36 | if self.percentageView and self.total: 37 | time = '%0.2f%%' % round(node.cumulative * 100.0 / self.total, 2) 38 | else: 39 | time = '%0.3fs' % round(node.cumulative, 3) 40 | if hasattr( node, 'line' ): 41 | return '%s@%s:%s [%s]' % (node.name, node.filename, node.line, time) 42 | else: 43 | return '%s [%s]'%( node.name, time ) 44 | 45 | class ColdshotAdapter(BaseColdshotAdapter): 46 | """Adapts a coldshot.loader.Loader into a Squaremap-compatible structure""" 47 | 48 | def value(self, node, parent=None): 49 | if parent: 50 | return parent.child_cumulative_time(node) 51 | else: 52 | return node.cumulative 53 | 54 | def empty(self, node): 55 | """Calculate percentage of "empty" time""" 56 | return node.empty 57 | 58 | # 59 | #class ColdshotCallsAdapter( BaseColdshotAdapter ): 60 | # def value(self, node, parent=None): 61 | # return node.cumulative / parent.cumulative 62 | # 63 | # def empty(self, node): 64 | # """Calculate percentage of "empty" time""" 65 | # return node.empty 66 | 67 | class FunctionLineWrapper( object ): 68 | def __init__( self, function_info, line_info ): 69 | self.function_info = function_info 70 | self.line_info = line_info 71 | @property 72 | def children( self ): 73 | return [] 74 | @property 75 | def parents( self ): 76 | return [ self.function_info ] 77 | @property 78 | def cumulative( self ): 79 | return self.line_info.time * self.function_info.loader.timer_unit 80 | @property 81 | def empty( self ): 82 | return 0.0 83 | @property 84 | def local( self ): 85 | return self.line_info.time * self.function_info.loader.timer_unit 86 | @property 87 | def key( self ): 88 | return self.function_info.key 89 | @property 90 | def name( self ): 91 | return '%s:%s'%( self.line_info.line, self.function_info.filename, ) 92 | @property 93 | def calls( self ): 94 | return self.line_info.calls 95 | 96 | class ModuleAdapter( ColdshotAdapter ): 97 | """Currently doesn't do anything different""" 98 | def label(self, node): 99 | if isinstance( node, stack.FunctionInfo ): 100 | return super( ModuleAdapter, self ).label( node ) 101 | if self.percentageView and self.total: 102 | time = '%0.2f%%' % round(node.cumulative * 100.0 / self.total, 2) 103 | else: 104 | time = '%0.3fs' % round(node.cumulative, 3) 105 | return '%s [%s]'%(node.key or 'PYTHONPATH', time) 106 | def parents( self, node ): 107 | if isinstance( node, stack.FunctionInfo ): 108 | parent = node.loader.modules.get( node.module ) 109 | if parent: 110 | return [parent] 111 | return [] 112 | elif isinstance( node, stack.FunctionLineInfo ): 113 | return [node.function] 114 | else: 115 | return getattr( node, 'parents', [] ) 116 | def children( self, node ): 117 | if isinstance( node, stack.FunctionInfo ): 118 | return [ 119 | FunctionLineWrapper( node, line ) 120 | for lineno,line in sorted( node.line_map.items()) 121 | ] 122 | return ColdshotAdapter.children( self, node ) 123 | def label(self, node): 124 | if isinstance( node, FunctionLineWrapper ): 125 | return node.name 126 | return ColdshotAdapter.label( self, node ) 127 | 128 | 129 | class Loader( loader.Loader ): 130 | """Coldshot loader subclass with knowledge of squaremap adapters""" 131 | def functions_rows( self ): 132 | """Get cProfile-like function metadata rows 133 | 134 | returns an ID: function mapping 135 | """ 136 | return self.info.functions 137 | def location_rows( self ): 138 | """Get our location records (finalized) 139 | 140 | returns an module-name: Grouping mapping 141 | """ 142 | self.info.finalize_modules() 143 | return self.info.modules 144 | 145 | ROOTS = ['functions','location' ]# ,'thread','calls'] 146 | 147 | def get_root( self, key ): 148 | """Retrieve the given root by type-key""" 149 | return self.info.roots[key] 150 | def get_rows( self, key ): 151 | """Get the set of rows for the type-key""" 152 | return getattr( self, '%s_rows'%(key,) )( ) 153 | def get_adapter( self, key ): 154 | """Get an adapter for our given key""" 155 | if key == 'functions': 156 | return ColdshotAdapter() 157 | elif key == 'location': 158 | return ModuleAdapter() 159 | else: 160 | raise KeyError( """Unknown root type %s"""%( key, )) 161 | 162 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/runsnakerun/meliaeadapter.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """Module to load meliae memory-profile dumps 3 | 4 | Trees: 5 | 6 | * has-a 7 | * module root 8 | * each held reference contributes a weighted cost to the parent 9 | * hierarchy of held objects, so globals, classes, functions, and their children 10 | * held modules do not contribute to cost 11 | 12 | * module 13 | * instance-tree 14 | 15 | Planned: 16 | 17 | * is-a 18 | * class/type root 19 | * instances contribute to their type 20 | * summary-by-type 21 | 22 | 23 | """ 24 | import wx, sys, os, logging, imp 25 | import wx.lib.newevent 26 | log = logging.getLogger( __name__ ) 27 | import sys 28 | from squaremap import squaremap 29 | import meliaeloader 30 | 31 | RANKS = [ 32 | (1024*1024*1024,'%0.1fGB'), 33 | (1024*1024,'%0.1fMB'), 34 | (1024,'%0.1fKB'), 35 | (0,'%iB'), 36 | ] 37 | 38 | def mb( value ): 39 | for (unit,format) in RANKS: 40 | if abs(value) >= unit * 2: 41 | return format%( value / float (unit or 1)) 42 | raise ValueError( "Number where abs(x) is not >= 0?: %s"%(value,)) 43 | 44 | class MeliaeAdapter( squaremap.DefaultAdapter ): 45 | """Default adapter class for adapting node-trees to SquareMap API""" 46 | def SetPercentage( self, *args ): 47 | """Ignore percentage requests for now""" 48 | def children( self, node ): 49 | """Retrieve the set of nodes which are children of this node""" 50 | return node.get('children',[]) 51 | def value( self, node, parent=None ): 52 | """Return value used to compare size of this node""" 53 | # this is the *weighted* size/contribution of the node 54 | try: 55 | return node['contribution'] 56 | except KeyError, err: 57 | contribution = int(node.get('totsize',0)/float( len(node.get('parents',())) or 1)) 58 | node['contribution'] = contribution 59 | return contribution 60 | def label( self, node ): 61 | """Return textual description of this node""" 62 | result = [] 63 | if node.get('type'): 64 | result.append( node['type'] ) 65 | if node.get('name' ): 66 | result.append( node['name'] ) 67 | elif node.get('value') is not None: 68 | result.append( unicode(node['value'])[:32]) 69 | if 'module' in node and not node['module'] in result: 70 | result.append( ' in %s'%( node['module'] )) 71 | if node.get( 'size' ): 72 | result.append( '%s'%( mb( node['size'] ))) 73 | if node.get( 'totsize' ): 74 | result.append( '(%s)'%( mb( node['totsize'] ))) 75 | parent_count = len( node.get('parents',())) 76 | if parent_count > 1: 77 | result.append( '/%s refs'%( parent_count )) 78 | return " ".join(result) 79 | def overall( self, node ): 80 | return node.get('totsize',0) 81 | def empty( self, node ): 82 | if node.get('totsize'): 83 | return node['size']/float(node['totsize']) 84 | else: 85 | return 0 86 | def parents( self, node ): 87 | """Retrieve/calculate the set of parents for the given node""" 88 | if 'index' in node: 89 | index = node['index']() 90 | parents = list(meliaeloader.children( node, index, 'parents' )) 91 | return parents 92 | return [] 93 | def best_parent( self, node, tree_type=None ): 94 | """Choose the best parent for a given node""" 95 | parents = self.parents(node) 96 | selected_parent = None 97 | if node['type'] == 'type': 98 | module = ".".join( node['name'].split( '.' )[:-1] ) 99 | if module: 100 | for mod in parents: 101 | if mod['type'] == 'module' and mod['name'] == module: 102 | selected_parent = mod 103 | if parents and selected_parent is None: 104 | parents.sort( key = lambda x: self.value(node, x) ) 105 | return parents[-1] 106 | return selected_parent 107 | 108 | color_mapping = None 109 | def background_color(self, node, depth): 110 | """Create a (unique-ish) background color for each node""" 111 | if self.color_mapping is None: 112 | self.color_mapping = {} 113 | if node['type'] == 'type': 114 | key = node['name'] 115 | else: 116 | key = node['type'] 117 | color = self.color_mapping.get(key) 118 | if color is None: 119 | depth = len(self.color_mapping) 120 | red = (depth * 10) % 255 121 | green = 200 - ((depth * 5) % 200) 122 | blue = (depth * 25) % 200 123 | self.color_mapping[key] = color = wx.Colour(red, green, blue) 124 | return color 125 | def filename( self, node ): 126 | if 'module' in node and not 'filename' in node: 127 | try: 128 | fp, pathname, description = imp.find_module(node['module']) 129 | except (ImportError), err: 130 | node['filename'] = None 131 | else: 132 | if fp: 133 | fp.close() 134 | node['filename'] = pathname 135 | elif not 'filename' in node: 136 | return None 137 | return node['filename'] 138 | 139 | class TestApp(wx.App): 140 | """Basic application for holding the viewing Frame""" 141 | handler = wx.PNGHandler() 142 | def OnInit(self): 143 | """Initialise the application""" 144 | wx.Image.AddHandler(self.handler) 145 | self.frame = frame = wx.Frame( None, 146 | ) 147 | frame.CreateStatusBar() 148 | 149 | model = model = self.get_model( sys.argv[1]) 150 | self.sq = squaremap.SquareMap( 151 | frame, model=model, adapter = MeliaeAdapter(), padding=2, margin=1, 152 | square_style=True 153 | ) 154 | squaremap.EVT_SQUARE_HIGHLIGHTED( self.sq, self.OnSquareSelected ) 155 | frame.Show(True) 156 | self.SetTopWindow(frame) 157 | return True 158 | def get_model( self, path ): 159 | return meliaeloader.load( path )[0] # tree-only 160 | def OnSquareSelected( self, event ): 161 | text = self.sq.adapter.label( event.node ) 162 | self.frame.SetToolTipString( text ) 163 | 164 | usage = 'meliaeloader.py somefile' 165 | 166 | def main(): 167 | """Mainloop for the application""" 168 | if not sys.argv[1:]: 169 | print usage 170 | else: 171 | app = TestApp(0) 172 | app.MainLoop() 173 | 174 | if __name__ == "__main__": 175 | main() 176 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_aux_funcs.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import unittest 4 | import sys 5 | import os 6 | import shutil 7 | 8 | from .aux_tests import get_marker, dummy_ecc_file_gen, path_sample_files, create_dir_if_not_exist 9 | 10 | from ..lib import aux_funcs as auxf 11 | from argparse import ArgumentTypeError 12 | 13 | from io import BytesIO 14 | 15 | class TestAuxFuncs(unittest.TestCase): 16 | def setup_module(self): 17 | """ Initialize the tests by emptying the out directory """ 18 | outfolder = path_sample_files('output') 19 | shutil.rmtree(outfolder, ignore_errors=True) 20 | create_dir_if_not_exist(outfolder) 21 | 22 | def test_get_next_entry(self): 23 | """ aux: test detection of next entry """ 24 | entries = [ 25 | b'''file1.ext\xfa\xff\xfa\xff\xfafilesize1\xfa\xff\xfa\xff\xfarelfilepath1_ecc\xfa\xff\xfa\xff\xfafilesize1_ecc\xfa\xff\xfa\xff\xfahash-ecc-entry_hash-ecc-entry_hash-ecc-entry_''', 26 | b'''file2.ext\xfa\xff\xfa\xff\xfafilesize2\xfa\xff\xfa\xff\xfarelfilepath2_ecc\xfa\xff\xfa\xff\xfafilesize2_ecc\xfa\xff\xfa\xff\xfahash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_''' 27 | ] 28 | entries_pos = [ 29 | [83, 195], 30 | [205, 362] 31 | ] 32 | 33 | filecontent = dummy_ecc_file_gen(2) 34 | fp1 = BytesIO(filecontent) 35 | entry = auxf.get_next_entry(fp1, entrymarker=get_marker(1), only_coord=False, blocksize=len(get_marker(1))+1) 36 | assert entry == entries[0] 37 | entry = auxf.get_next_entry(fp1, entrymarker=get_marker(1), only_coord=False, blocksize=len(get_marker(1))+1) 38 | assert entry == entries[1] 39 | fp2 = BytesIO(filecontent) 40 | entry = auxf.get_next_entry(fp2, entrymarker=get_marker(1), only_coord=True, blocksize=len(get_marker(1))+1) 41 | assert entry == entries_pos[0] 42 | entry = auxf.get_next_entry(fp2, entrymarker=get_marker(1), only_coord=True, blocksize=len(get_marker(1))+1) 43 | assert entry == entries_pos[1] 44 | 45 | def test_sizeof_fmt(self): 46 | """ aux: test SI formatting """ 47 | # Test without SI prefix 48 | assert auxf.sizeof_fmt(1023.0, suffix='B', mod=1024.0) == "1023.0B" 49 | # Test all possible SI prefixes 50 | pows = ['', 'K','M','G','T','P','E','Z', 'Y'] 51 | for p in range(1, len(pows)): 52 | assert auxf.sizeof_fmt(1024.0**p, suffix='B', mod=1024.0) == ("1.0%sB" % pows[p]) 53 | 54 | def test_path2unix(self): 55 | """ aux: test path2unix """ 56 | assert auxf.path2unix(r'test\some\folder\file.ext', fromwinpath=True) == r'test/some/folder/file.ext' 57 | assert auxf.path2unix(r'test\some\folder\file.ext', nojoin=True, fromwinpath=True) == ['test', 'some', 'folder', 'file.ext'] 58 | assert auxf.path2unix(r'test/some/folder/file.ext') == r'test/some/folder/file.ext' 59 | 60 | def test_is_file(self): 61 | """ aux: test is_file() """ 62 | indir = path_sample_files('input') 63 | infile = path_sample_files('input', 'tux.jpg') 64 | assert auxf.is_file(infile) 65 | self.assertRaises(ArgumentTypeError, auxf.is_file, indir) 66 | 67 | def test_is_dir(self): 68 | """ aux: test is_dir() """ 69 | indir = path_sample_files('input') 70 | infile = path_sample_files('input', 'tux.jpg') 71 | assert auxf.is_dir(indir) 72 | self.assertRaises(ArgumentTypeError, auxf.is_dir, infile) 73 | 74 | def test_is_dir_or_file(self): 75 | """ aux: test is_dir_or_file() """ 76 | indir = path_sample_files('input') 77 | infile = path_sample_files('input', 'tux.jpg') 78 | indir_fake = r'path/that/do/not/exist/at/all' 79 | infile_fake = path_sample_files('input', 'random_gibberish_file_that_do_not_exists') 80 | assert auxf.is_dir_or_file(indir) 81 | assert auxf.is_dir_or_file(infile) 82 | self.assertRaises(ArgumentTypeError, auxf.is_dir_or_file, indir_fake) 83 | self.assertRaises(ArgumentTypeError, auxf.is_dir_or_file, infile_fake) 84 | 85 | def test_recwalk(self): 86 | """ aux: test recwalk() """ 87 | def list_paths_posix(recwalk_result): 88 | """ helper function to convert all paths to relative posix like paths (to ease comparison) """ 89 | return [auxf.path2unix(os.path.join(os.path.relpath(x, pardir),y)) for x,y in recwalk_result] 90 | indir = path_sample_files('input') 91 | pardir = os.path.dirname(indir) 92 | # Compare between sorted and non-sorted path walking (the result should be different! but sorted path should always be the same on all platforms!) 93 | res1 = list_paths_posix(auxf.recwalk(indir, sorting=True)) 94 | res2 = list_paths_posix(auxf.recwalk(indir, sorting=False)) 95 | # Absolute test: sorted walking should always return the same result on all platforms 96 | assert res1 == ['files/alice.pdf', 'files/testaa.txt', 'files/tux.jpg', 'files/tuxsmall.jpg', 'files/Sub2/testsub2.txt', 'files/sub/Snark.zip', 'files/sub/testsub.txt'] 97 | # Relative test: compare with platform's results 98 | if os.name == 'nt': 99 | assert res2 != res1 100 | assert res2 == ['files/alice.pdf', 'files/testaa.txt', 'files/tux.jpg', 'files/tuxsmall.jpg', 'files/sub/Snark.zip', 'files/sub/testsub.txt', 'files/Sub2/testsub2.txt'] 101 | elif os.name == 'posix': 102 | assert res2 != res1 # BEWARE, do NOT use sets here! On linux, order of generated files can change, although a set is unordered, they will be equal if elements in the sets are the same, contrary to lists, but that's what we are testing here, with ordered walk it should NOT be the same! 103 | 104 | def test_fullpath(self): 105 | """ aux: test fullpath() """ 106 | def relpath(path, pardir): 107 | """ helper function to always return a relative posix-like path (ease comparisons) """ 108 | return auxf.path2unix(os.path.relpath(path, pardir)) 109 | # Can't really objectively test fullpath() but we can relatively compare the result 110 | indir = path_sample_files('input') 111 | infile = path_sample_files('input', 'tux.jpg') 112 | pardir = os.path.dirname(indir) 113 | # Directory test 114 | assert relpath(auxf.fullpath(indir), pardir) == 'files' 115 | # File test 116 | res1 = relpath(auxf.fullpath(infile), pardir) 117 | assert res1 == 'files/tux.jpg' 118 | # Opened file test 119 | with open(infile, 'rb') as fh: 120 | res2 = relpath(auxf.fullpath(fh), pardir) 121 | assert res1 == res2 122 | 123 | def test_get_version(self): 124 | """ aux: test get_version() """ 125 | thispathname = os.path.dirname(__file__) 126 | assert '.' in auxf.get_version('__init__.py', os.path.join(thispathname, '..')) 127 | self.assertRaises(RuntimeError, auxf.get_version, 'test_aux_funcs.py', thispathname) 128 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/visual/functionprofiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | 4 | # 5 | # TODO: 6 | # - implement cProfile or yappi, or use threading.setProfile and sys.setProfile, or implement one's own multi-threaded profiler: 7 | # http://code.google.com/p/yappi/ 8 | # http://code.activestate.com/recipes/465831-profiling-threads/ 9 | # http://effbot.org/librarybook/sys.htm 10 | # 11 | # 12 | # CHANGELOG: 13 | # 2014-18-08 - v0.5.1 - lrq3000 14 | # * force refresh (flush) stdout after printing 15 | # * fixed runsnakerun 16 | # 2012-11-12 - v0.5.0 - lrq3000 17 | # * cleaned the functions a bit and added a no timeout mode 18 | # 2010-09-22 - v0.4.3 - lrq3000 19 | # * added error handling if profile and pstats libraries can't be found 20 | # 2010-09-17 - v0.4.2 - lrq3000 21 | # * added an automatic calibration prior to profiling 22 | # 2010-09-17 - v0.4.1 - lrq3000 23 | # * fixed import bug 24 | # 2010-09-16 - v0.4 - lrq3000 25 | # * fallback to profile instead of cProfile : even if this pure python implementation is much slower, it at least work with threads (cProfile, alias hotshot, is not compatible with multi-threaded applications at the moment) 26 | # 2010-09-09 - v0.3 - lrq3000 27 | # * workaround for a bug with cProfile 28 | # 2010-09-08 - v0.2 - lrq3000 29 | # * added the parsestats, browsegui and browsenogui functions 30 | # * centralized runprofile here 31 | # 2010-09-06 - v0.1 - lrq3000 32 | # * Initial version. 33 | 34 | __author__ = 'lrq3000' 35 | __version__ = '0.5.0' 36 | 37 | 38 | noprofiler = False 39 | try: 40 | import profile, pstats # using profile and not cProfile because cProfile does not support multi-threaded applications. 41 | except: 42 | noprofiler = True 43 | 44 | import sys, os 45 | pathname = os.path.dirname(sys.argv[0]) 46 | sys.path.append(os.path.join(pathname)) 47 | 48 | from kthread import * 49 | from profilebrowser import * 50 | 51 | 52 | def runprofile(mainfunction, output, timeout = 0, calibrate=False): 53 | ''' 54 | Run the functions profiler and save the result 55 | If timeout is greater than 0, the profile will automatically stops after timeout seconds 56 | ''' 57 | if noprofiler == True: 58 | print('ERROR: profiler and/or pstats library missing ! Please install it (probably package named python-profile) before running a profiling !') 59 | return False 60 | # This is the main function for profiling 61 | def _profile(): 62 | profile.run(mainfunction, output) 63 | print('=> RUNNING FUNCTIONS PROFILER\n\n'); sys.stdout.flush(); 64 | # Calibrate the profiler (only use this if the profiler produces some funny stuff, but calibration can also produce even more funny stuff with the latest cProfile of Python v2.7! So you should only enable calibration if necessary) 65 | if calibrate: 66 | print('Calibrating the profiler...'); sys.stdout.flush(); 67 | cval = calibrateprofile() 68 | print('Calibration found value : %s' % cval); sys.stdout.flush(); 69 | print('Initializing the profiler...'); sys.stdout.flush(); 70 | # Run in timeout mode (if the function cannot ends by itself, this is the best mode: the function must ends for the profile to be saved) 71 | if timeout > 0: 72 | pthread = KThread(target=_profile) # we open the function with the profiler, in a special killable thread (see below why) 73 | print('Will now run the profiling and terminate it in %s seconds. Results will be saved in %s' % (str(timeout), str(output))); sys.stdout.flush(); 74 | print('\nCountdown:'); sys.stdout.flush(); 75 | for i in range(0,5): 76 | print(str(5-i)) 77 | sys.stdout.flush() 78 | time.sleep(1) 79 | print('0\nStarting to profile...'); sys.stdout.flush(); 80 | pthread.start() # starting the thread 81 | time.sleep(float(timeout)) # after this amount of seconds, the thread gets killed and the profiler will end its job 82 | print('\n\nFinishing the profile and saving to the file %s' % str(output)); sys.stdout.flush(); 83 | pthread.kill() # we must end the main function in order for the profiler to output its results (if we didn't launch a thread and just closed the process, it would have done no result) 84 | # Run in full length mode (we run the function until it ends) 85 | else: 86 | print("Running the profiler, please wait until the process terminates by itself (if you forcequit before, the profile won't be saved)") 87 | _profile() 88 | print('=> Functions Profile done !') 89 | return True 90 | 91 | def calibrateprofile(): 92 | ''' 93 | Calibrate the profiler (necessary to have non negative and more exact values) 94 | ''' 95 | pr = profile.Profile() 96 | calib = [] 97 | crepeat = 10 98 | for i in range(crepeat): 99 | calib.append(pr.calibrate(10000)) 100 | final = sum(calib) / crepeat 101 | profile.Profile.bias = final # Apply computed bias to all Profile instances created hereafter 102 | return final 103 | 104 | def parseprofile(profilelog, out): 105 | ''' 106 | Parse a profile log and print the result on screen 107 | ''' 108 | file = open(out, 'w') # opening the output file 109 | print('Opening the profile in %s...' % profilelog) 110 | p = pstats.Stats(profilelog, stream=file) # parsing the profile with pstats, and output everything to the file 111 | 112 | print('Generating the stats, please wait...') 113 | file.write("=== All stats:\n") 114 | p.strip_dirs().sort_stats(-1).print_stats() 115 | file.write("=== Cumulative time:\n") 116 | p.sort_stats('cumulative').print_stats(100) 117 | file.write("=== Time:\n") 118 | p.sort_stats('time').print_stats(100) 119 | file.write("=== Time + cumulative time:\n") 120 | p.sort_stats('time', 'cum').print_stats(.5, 'init') 121 | file.write("=== Callees:\n") 122 | p.print_callees() 123 | file.write("=== Callers:\n") 124 | p.print_callers() 125 | #p.print_callers(.5, 'init') 126 | #p.add('fooprof') 127 | file.close() 128 | print('Stats generated and saved to %s.' % out) 129 | print('Everything is done. Exiting') 130 | 131 | def browseprofile(profilelog): 132 | ''' 133 | Browse interactively a profile log in console 134 | ''' 135 | print('Starting the pstats profile browser...\n') 136 | try: 137 | browser = ProfileBrowser(profilelog) 138 | print >> browser.stream, "Welcome to the profile statistics browser. Type help to get started." 139 | browser.cmdloop() 140 | print >> browser.stream, "Goodbye." 141 | except KeyboardInterrupt: 142 | pass 143 | 144 | def browseprofilegui(profilelog): 145 | ''' 146 | Browse interactively a profile log in GUI using RunSnakeRun and SquareMap 147 | ''' 148 | from runsnakerun import runsnake # runsnakerun needs wxPython lib, if it's not available then we can pass if we don't want a GUI. RunSnakeRun is only used for GUI visualisation, not for profiling (and you can still use pstats for console browsing) 149 | app = runsnake.RunSnakeRunApp(0) 150 | app.OnInit(profilelog) 151 | #app.OnInit() 152 | app.MainLoop() 153 | -------------------------------------------------------------------------------- /pyFileFixity/tests/aux_tests.py: -------------------------------------------------------------------------------- 1 | """ Auxiliary functions for unit tests """ 2 | 3 | from __future__ import with_statement 4 | 5 | import os 6 | import shutil 7 | 8 | from ..lib._compat import _range, b 9 | 10 | def check_eq_files(path1, path2, blocksize=65535, startpos1=0, startpos2=0): 11 | """ Return True if both files are identical, False otherwise """ 12 | flag = True 13 | with open(path1, 'rb') as f1, open(path2, 'rb') as f2: 14 | buf1 = 1 15 | buf2 = 1 16 | f1.seek(startpos1) 17 | f2.seek(startpos2) 18 | while buf1 and buf2: 19 | buf1 = f1.read(blocksize) 20 | buf2 = f2.read(blocksize) 21 | if buf1 != buf2 or (buf1 and not buf2) or (buf2 and not buf1): 22 | # Reached end of file or the content is different, then return false 23 | flag = False 24 | break 25 | elif (not buf1 and not buf2): 26 | # End of file for both files 27 | break 28 | return flag 29 | #return filecmp.cmp(path1, path2, shallow=False) # does not work on Travis 30 | 31 | def check_eq_dir(path1, path2): 32 | """ Return True if both folders have same structure totally identical files, False otherwise """ 33 | # List files in both directories 34 | files1 = [] 35 | files2 = [] 36 | for dirpath, dirs, files in os.walk(path1): 37 | files1.extend([os.path.relpath(os.path.join(dirpath, file), path1) for file in files]) 38 | for dirpath, dirs, files in os.walk(path2): 39 | files2.extend([os.path.relpath(os.path.join(dirpath, file), path2) for file in files]) 40 | # Ensure the same order for both lists (filesystem can spit the files in whatever order it wants) 41 | files1.sort() 42 | files2.sort() 43 | 44 | # Different files in one or both lists: we fail 45 | if files1 != files2: 46 | return False 47 | # Else we need to compare the files contents 48 | else: 49 | flag = True 50 | for i in _range(len(files1)): 51 | #print("files: %s %s" % (files1[i], files2[i])) # debug 52 | # If the files contents are different, we fail 53 | if not check_eq_files(os.path.join(path1, files1[i]), os.path.join(path2, files2[i])): 54 | flag = False 55 | break 56 | # Else if all files contents were equal and all files are in both lists, success! 57 | return flag 58 | 59 | def fullpath(relpath): 60 | '''Relative path to absolute''' 61 | if (type(relpath) is object or hasattr(relpath, 'read')): # relpath is either an object or file-like, try to get its name 62 | relpath = relpath.name 63 | return os.path.abspath(os.path.expanduser(relpath)) 64 | 65 | def path_sample_files(type=None, path=None, createdir=False): 66 | """ Helper function to return the full path to the test files """ 67 | subdir = '' 68 | if not type: 69 | return '' 70 | elif type == 'input': 71 | subdir = 'files' 72 | elif type == 'results': 73 | subdir = 'results' 74 | elif type == 'output': 75 | subdir = 'out' 76 | 77 | dirpath = '' 78 | scriptpath = os.path.dirname(os.path.realpath(__file__)) 79 | if path: 80 | dirpath = fullpath(os.path.join(scriptpath, subdir, path)) 81 | else: 82 | dirpath = fullpath(os.path.join(scriptpath, subdir)) 83 | 84 | if createdir: 85 | create_dir_if_not_exist(dirpath) 86 | 87 | return dirpath 88 | 89 | def tamper_file(path, pos=0, replace_str=None): 90 | """Tamper a file at the given position and using the given string""" 91 | if not replace_str: 92 | replace_str = "\x00" 93 | try: 94 | with open(path, "r+b") as fh: 95 | if pos < 0: # if negative, we calculate the position backward from the end of file 96 | fsize = os.fstat(fh.fileno()).st_size 97 | pos = fsize + pos 98 | fh.seek(pos) 99 | fh.write(b(replace_str)) 100 | except IOError: 101 | return False 102 | finally: 103 | try: 104 | fh.close() 105 | except Exception: 106 | pass 107 | return True 108 | 109 | def find_next_entry(path, marker="\xFF\xFF\xFF\xFF", initpos=0): 110 | '''Find the next position of a marker in a file''' 111 | blocksize = 65535 112 | start = None # start is the relative position of the marker in the current buffer 113 | startcursor = None # startcursor is the absolute position of the starting position of the marker in the file 114 | buf = 1 115 | infile = open(path, 'rb') 116 | if initpos > 0: infile.seek(initpos) 117 | # Enumerate all markers in a generator 118 | while (buf): 119 | # Read a long block at once, we will readjust the file cursor after 120 | buf = bytearray(infile.read(blocksize)) 121 | # Find the start marker 122 | start = buf.find(marker); # relative position of the starting marker in the currently read string 123 | if start >= 0: # assign startcursor only if it's empty (meaning that we did not find the starting entrymarker, else if found we are only looking for 124 | startcursor = infile.tell() - len(buf) + start # absolute position of the starting marker in the file 125 | infile.close() # close the file before yielding result, to avoid locking the file 126 | yield startcursor 127 | infile = open(path, 'rb') # reopen the file just after yield before doing further processing 128 | infile.seek(startcursor+len(marker)) # place reading cursor just after the current marker to avoid repeatedly detecting the same marker 129 | infile.close() # don't forget to close after the loop! 130 | 131 | def create_dir_if_not_exist(path): 132 | """Create a directory if it does not already exist, else nothing is done and no error is return""" 133 | if not os.path.exists(path): 134 | os.makedirs(path) 135 | 136 | def remove_if_exist(path): 137 | """Delete a file or a directory recursively if it exists, else no exception is raised""" 138 | if os.path.exists(path): 139 | if os.path.isdir(path): 140 | shutil.rmtree(path) 141 | return True 142 | elif os.path.isfile(path): 143 | os.remove(path) 144 | return True 145 | return False 146 | 147 | def get_marker(type=1): 148 | """Helper function to store the usual entry and fields markers in ecc files""" 149 | if type == 1: 150 | return b"\xFE\xFF\xFE\xFF\xFE\xFF\xFE\xFF\xFE\xFF" 151 | elif type == 2: 152 | return b"\xFA\xFF\xFA\xFF\xFA" 153 | else: 154 | return b'' 155 | 156 | def dummy_ecc_file_gen(nb_files=1): 157 | """ Generate a dummy ecc file, following the specs (of course the ecc tracks are fake!) """ 158 | # Create header comments 159 | fcontent = b'''**SCRIPT_CODE_NAMEv111...000...000**\n** Comment 2\n** Yet another comment\n''' 160 | # Create files entries 161 | for i in range(1, nb_files+1): 162 | fcontent += get_marker(1)+(b"file"+b(str(i))+b".ext")+get_marker(2)+(b"filesize"+b(str(i)))+get_marker(2)+(b"relfilepath"+b(str(i))+b"_ecc")+get_marker(2)+(b"filesize"+b(str(i))+b"_ecc")+get_marker(2)+b"hash-ecc-entry_"*(i*3) 163 | return fcontent 164 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_eccman.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import unittest 4 | import sys 5 | import os 6 | import shutil 7 | 8 | from .aux_tests import get_marker, dummy_ecc_file_gen, check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, remove_if_exist 9 | 10 | from ..lib.eccman import ECCMan, compute_ecc_params, detect_reedsolomon_parameters 11 | 12 | from ..lib._compat import _StringIO, b 13 | 14 | class TestECCMan(unittest.TestCase): 15 | def setup_module(self): 16 | """ Initialize the tests by emptying the out directory """ 17 | outfolder = path_sample_files('output') 18 | shutil.rmtree(outfolder, ignore_errors=True) 19 | create_dir_if_not_exist(outfolder) 20 | 21 | def test_eccman_detect_rs_param(self): 22 | """ eccman: test reedsolomon param detection """ 23 | message = b("hello world") 24 | mesecc_orig = [104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 187, 161, 157, 88, 92, 175, 116, 251, 116] 25 | mesecc_orig_tampered = [104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 187, 161, 157, 88, 0, 175, 116, 251, 116] 26 | n = len(mesecc_orig) 27 | k = len(message) 28 | params = [n, k, 2, 0x187, 120] 29 | res = detect_reedsolomon_parameters(message, mesecc_orig) 30 | res2 = detect_reedsolomon_parameters(message, mesecc_orig_tampered) 31 | assert ("Hamming distance 0 (0=perfect match):\ngen_nb=%i prim=%i(%s) fcr=%i" % (params[2], params[3], hex(params[3]), params[4])) in res 32 | assert ("Hamming distance 1:\ngen_nb=%i prim=%i(%s) fcr=%i" % (params[2], params[3], hex(params[3]), params[4])) in res2 33 | res3 = detect_reedsolomon_parameters(message, [-1]*len(mesecc_orig), [3]) 34 | assert "Parameters could not be automatically detected" in res3 35 | self.assertRaises(ValueError, detect_reedsolomon_parameters, [257, 0, 0], [0, 0, 0], c_exp=8) 36 | self.assertRaises(ValueError, detect_reedsolomon_parameters, [0, 0, 0], [257, 0, 0], c_exp=8) 37 | 38 | def test_eccman_compute_ecc_params(self): 39 | """ eccman: test ecc params computation """ 40 | class Hasher(object): 41 | """ Dummy Hasher """ 42 | def __len__(self): 43 | return 32 44 | hasher = Hasher() 45 | assert compute_ecc_params(255, 0.5, hasher) == {'ecc_size': 127, 'hash_size': 32, 'message_size': 128} 46 | assert compute_ecc_params(255, 0.0, hasher) == {'ecc_size': 0, 'hash_size': 32, 'message_size': 255} 47 | assert compute_ecc_params(255, 1.0, hasher) == {'ecc_size': 170, 'hash_size': 32, 'message_size': 85} 48 | assert compute_ecc_params(255, 0.3, hasher) == {'ecc_size': 96, 'hash_size': 32, 'message_size': 159} 49 | assert compute_ecc_params(255, 0.7, hasher) == {'ecc_size': 149, 'hash_size': 32, 'message_size': 106} 50 | assert compute_ecc_params(255, 2.0, hasher) == {'ecc_size': 204, 'hash_size': 32, 'message_size': 51} 51 | assert compute_ecc_params(255, 10.0, hasher) == {'ecc_size': 243, 'hash_size': 32, 'message_size': 12} 52 | assert compute_ecc_params(140, 10.0, hasher) == {'ecc_size': 133, 'hash_size': 32, 'message_size': 7} 53 | 54 | def test_eccman_codecs(self): 55 | """ eccman: test ecc generation and decoding """ 56 | expected = [ 57 | [206, 234, 144, 153, 141, 196, 170, 96, 62], 58 | [206, 234, 144, 153, 141, 196, 170, 96, 62], 59 | [206, 234, 144, 153, 141, 196, 170, 96, 62], 60 | [187, 161, 157, 88, 92, 175, 116, 251, 116] 61 | ] 62 | message = b("hello world") 63 | message_eras = b("h\x00ll\x00 world") 64 | message_noise = b("h\x00ll\x00 worla") 65 | n = 20 66 | k = 11 67 | for i in range(1,5): 68 | eccman = ECCMan(n, k, algo=i) 69 | ecc = bytearray(b(eccman.encode(message))) 70 | assert list(ecc) == expected[i-1] 71 | assert b(eccman.decode(message_eras, ecc)[0]) == message 72 | assert b(eccman.decode(message_eras, ecc, enable_erasures=True)[0]) == message 73 | assert b(eccman.decode(message_eras, ecc, enable_erasures=True, only_erasures=True)[0]) == message 74 | #eccman.decode(message_noise, ecc, enable_erasures=True, only_erasures=True)[0] 75 | assert eccman.check(message, ecc) 76 | assert not eccman.check(message_eras, ecc) 77 | assert "Reed-Solomon with polynomials in Galois field of characteristic" in eccman.description() 78 | # Unknown algorithm test 79 | self.assertRaises(Exception, ECCMan, n, k, algo=-1) 80 | eccman = ECCMan(n, k, algo=1) 81 | eccman.algo = -1 82 | assert "No description for this ECC algorithm." in eccman.description() 83 | 84 | def test_eccman_pad(self): 85 | """ eccman: test ecc padding """ 86 | message = b("hello world") 87 | ecc = b(''.join([chr(x) for x in [206, 234, 144, 153, 141, 196, 170, 96, 62]])) 88 | # Oversize parameters compared to the message and ecc 89 | n = 22 # should be 20 90 | k = 13 # should be 11, but we add +2, which bytes we will pad onto the ecc and the decoding should still work! 91 | eccman = ECCMan(n, k, algo=3) 92 | # Test left padding (the input message) 93 | pmessage = eccman.pad(message) 94 | assert pmessage == [b('\x00\x00hello world'), b('\x00\x00')] # format: [padded_message, padonly] 95 | assert eccman.check(pmessage[0], ecc) 96 | # Test right padding (the ecc block) 97 | pecc = eccman.rpad(ecc, 11) 98 | assert pecc == [b('\xce\xea\x90\x99\x8d\xc4\xaa`>\x00\x00'), b('\x00\x00')] 99 | assert eccman.check(message, pecc[0]) 100 | # Test decoding with both padding! 101 | assert eccman.check(pmessage[0], pecc[0]) 102 | 103 | def test_eccman_lpad_decoding(self): 104 | """ eccman: test ecc decoding when message needs left padding """ 105 | message = b("hello world") 106 | ecc = b(''.join([chr(x) for x in [206, 234, 144, 153, 141, 196, 170, 96, 62]])) 107 | message_eras = b("h\x00ll\x00 world") 108 | # Oversize parameters compared to the message and ecc 109 | n = 22 # should be 20 110 | k = 13 # should be 11, but we add +2, which bytes we will pad onto the ecc and the decoding should still work! 111 | eccman = ECCMan(n, k, algo=3) 112 | # Test decoding with erasure when the message needs to be padded 113 | assert eccman.decode(message_eras, ecc, enable_erasures=True) 114 | 115 | def test_eccman_rpad_decoding(self): 116 | """ eccman: test ecc decoding when right padding """ 117 | message = b("hello world") 118 | ecc = b(''.join([chr(x) for x in [206, 234, 144, 153, 141, 196, 170, 96, 62]])) 119 | message_eras = b("h\x00ll\x00 world") 120 | # Oversize parameters compared to the message and ecc 121 | n = 20 # should be 20 122 | k = 11 # should be 11, but we add +2, which bytes we will pad onto the ecc and the decoding should still work! 123 | eccman = ECCMan(n, k, algo=3) 124 | # Test decoding with erasure when the message needs to be padded 125 | assert eccman.decode(message_eras, ecc, enable_erasures=True) 126 | assert eccman.decode(message_eras, ecc[:-2]) 127 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_rfigc.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, with_statement 2 | 3 | import sys 4 | import os 5 | import itertools 6 | import hashlib 7 | 8 | import shutil 9 | 10 | from ..lib._compat import b, _open_csv 11 | 12 | from .. import rfigc 13 | from ..lib.aux_funcs import recwalk 14 | from .aux_tests import check_eq_files, check_eq_dir, path_sample_files, tamper_file, create_dir_if_not_exist 15 | 16 | def partial_eq(file, file_partial): 17 | """ Do a partial comparison, line by line, we compare only using "line2 in line1", where line2 is from file_partial """ 18 | flag = True 19 | with _open_csv(file, 'r') as outf, _open_csv(file_partial, 'r') as expectedf: 20 | out = outf.read().strip("\r").strip("\n") 21 | expected = expectedf.read().split("\n") 22 | for exp in expected: 23 | if not exp.strip("\n") in out: 24 | flag = False 25 | break 26 | return flag 27 | 28 | def setup_module(): 29 | """ Initialize the tests by emptying the out directory """ 30 | outfolder = path_sample_files('output') 31 | shutil.rmtree(outfolder, ignore_errors=True) 32 | create_dir_if_not_exist(outfolder) 33 | 34 | def test_one_file(): 35 | """ rfigc: test creation and verification of rfigc database for one file """ 36 | filein = path_sample_files('input', 'tuxsmall.jpg') 37 | filedb = path_sample_files('output', 'd_file.csv') 38 | fileres = path_sample_files('results', 'test_rfigc_test_one_file.csv') 39 | # Generate database file 40 | assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0 41 | # Check files are ok 42 | assert rfigc.main('-i "%s" -d "%s" --silent' % (filein, filedb)) == 0 43 | # Check database file is the same as the pregenerated result 44 | with _open_csv(filedb, 'r') as outf, _open_csv(fileres, 'r') as expectedf: 45 | out = outf.read().strip("\r").strip("\n") 46 | # Because of differing timestamps between local and git repo, we must only do a partial comparison (we compare the beginning of the file up to the timestamp) 47 | # TODO: to do full comparisons including timestamps, use https://github.com/adamchainz/time-machine or freezegun 48 | expected = expectedf.read().split("\n") # workaround to remove windows carriage return character, it does not always get added but under some strange conditions (in GitHub Actions env, and not all the time, but only on Windows-2019) it can get added by csv writer, ignoring our settings. TODO: remove strip("\r") and try to find a REAL fix. 49 | for exp in expected: 50 | assert exp.strip("\r").strip("\n") in out 51 | 52 | def test_dir(): 53 | """ rfigc: test creation and verification of database for a full directory """ 54 | filein = path_sample_files('input', ) 55 | filedb = path_sample_files('output', 'd_dir.csv') 56 | fileres = path_sample_files('results', 'test_rfigc_test_dir.csv') 57 | # Generate database file 58 | assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0 59 | # Check files are ok 60 | assert rfigc.main('-i "%s" -d "%s" --silent' % (filein, filedb)) == 0 61 | # Check database file is the same as the pregenerated result 62 | # We can't directly compare the two files because of timestamps! 63 | # So we manually process the expected results and compare each line to see if it's present in the output 64 | assert partial_eq(filedb, fileres) 65 | # TODO: add a regular expression to check that all fields are present 66 | 67 | def test_error_file(): 68 | """ rfigc: test tamper file and error file generation """ 69 | filein = path_sample_files('input', 'tuxsmall.jpg') 70 | filedb = path_sample_files('output', 'd.csv') 71 | fileout = path_sample_files('output', 'tuxsmall.jpg') 72 | fileout2 = path_sample_files('output', 'errors.log') 73 | fileres = path_sample_files('results', 'test_rfigc_test_error_file.log') 74 | assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0 75 | shutil.copyfile(filein, fileout) 76 | tamper_file(fileout, 3) 77 | assert rfigc.main('-i "%s" -d "%s" -e "%s" --silent' % (fileout, filedb, fileout2)) == 1 78 | check_eq_files(fileout2, fileres) 79 | 80 | def test_filescrape(): 81 | """ rfigc: test --filescraping_recovery """ 82 | filein_dir = path_sample_files('input', ) 83 | filedb = path_sample_files('output', 'db_filescrape.csv') 84 | fileout_dir = path_sample_files('output', 'filescrape') 85 | fileout_dir_rec = path_sample_files('output', 'filescrape_rec') 86 | create_dir_if_not_exist(fileout_dir) 87 | create_dir_if_not_exist(fileout_dir_rec) 88 | # Simulate a filescrape (copy the files but rename them all) 89 | i = 0 90 | for dirpath, filepath in recwalk(filein_dir): 91 | i += 1 92 | shutil.copyfile(os.path.join(dirpath, filepath), os.path.join(fileout_dir, "%s.stuff" % i)) 93 | assert not check_eq_dir(filein_dir, fileout_dir) # check that we correctly filescraped! 94 | # Use rfigc to recover from filescrape 95 | assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein_dir, filedb)) == 0 96 | assert rfigc.main('-i "%s" -d "%s" --filescraping_recovery -o "%s" --silent' % (fileout_dir, filedb, fileout_dir_rec)) == 0 97 | assert check_eq_dir(filein_dir, fileout_dir_rec) # check that we recovered from filescraping! 98 | 99 | def test_update(): 100 | """ rfigc: test --update """ 101 | filein = path_sample_files('input', ) 102 | filedb = path_sample_files('output', 'd_update.csv') 103 | fileout_dir = path_sample_files('output', 'update') 104 | fileout = path_sample_files('output', 'update/added_file.txt') 105 | fileres1 = path_sample_files('results', 'test_rfigc_test_update_append.csv') 106 | fileres2 = path_sample_files('results', 'test_rfigc_test_update_remove.csv') 107 | # Generate a database from input files 108 | assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0 109 | # Create a new file in another folder 110 | create_dir_if_not_exist(fileout_dir) 111 | with open(fileout, 'wb') as fh: 112 | fh.write(b'abcdefABCDEF\n1234598765') 113 | # Append file in database 114 | assert rfigc.main('-i "%s" -d "%s" --update --append --silent' % (fileout_dir, filedb)) == 0 115 | assert partial_eq(filedb, fileres1) 116 | # Remove all other files from database 117 | assert rfigc.main('-i "%s" -d "%s" --update --remove --silent' % (fileout_dir, filedb)) == 0 118 | assert partial_eq(filedb, fileres2) 119 | 120 | def test_generate_hashes(): 121 | """ rfigc: test internal: generate_hashes() """ 122 | # Test with a file we make on the spot, so this should always be correct! 123 | infile0 = path_sample_files('output', 'test_rfigc_generate_hashes.txt') 124 | with open(infile0, 'wb') as f0: 125 | f0.write(b"Lorem ipsum etc\n"*20) 126 | assert rfigc.generate_hashes(infile0) == ('c6e0c87cbb8eeaca8179f22186384e6b', '6f46949be7cda1437bc3fb61fb827a6552beaf8b') 127 | # Test with input files, this may change if we change the files 128 | infile1 = path_sample_files('input', 'tux.jpg') 129 | infile2 = path_sample_files('input', 'alice.pdf') 130 | assert rfigc.generate_hashes(infile1) == ('81e19bbf2efaeb1d6d6473c21c48e4b7', '6e38ea91680ef0f960db0fd6a973cf50ef765369') 131 | assert rfigc.generate_hashes(infile2) == ('298aeefe8c00f2d92d660987bee67260', '106e7ad4d3927c5906cd366cc0d5bd887bdc3300') 132 | -------------------------------------------------------------------------------- /pyFileFixity/easy_profiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Easy Profiler 4 | # Copyright (C) 2015 Larroque Stephen 5 | # 6 | # Licensed under the MIT License (MIT) 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | # 26 | #------------------------------ 27 | # 28 | 29 | # Import necessary libraries 30 | from lib.aux_funcs import fullpath 31 | import argparse 32 | import os, sys 33 | 34 | 35 | def main(argv=None): 36 | if argv is None: 37 | argv = sys.argv[1:] 38 | 39 | #==== COMMANDLINE PARSER ==== 40 | 41 | #== Commandline description 42 | desc = '''Easy Profiler for Python scripts 43 | Description: Provide an easy way to launch CPU/Memory profile (with GUI support) of python scripts. You can supply arguments of the target script by appending them at the end of the arguments for this script, without any special formatting (unrecognized arguments will be passed along to the target script). 44 | ''' 45 | ep = ''' ''' 46 | 47 | #== Commandline arguments 48 | #-- Constructing the parser 49 | main_parser = argparse.ArgumentParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter) 50 | # Required arguments 51 | main_parser.add_argument('--script', metavar='script.py', type=str, nargs=1, required=True, 52 | help='Path to the script to import and execute (the script must implement a main() function).') 53 | main_parser.add_argument('--profile_log', metavar='profile.log', type=str, nargs=1, required=False, 54 | help='Path where to store the profile log.') 55 | main_parser.add_argument('--cpu', action='store_true', required=False, default=False, 56 | help='CPU line-by-line profiler (pprofile.py).') 57 | main_parser.add_argument('--cpu_stack', action='store_true', required=False, default=False, 58 | help='CPU stack (tree-like) profiler (pyinstrument.py).') 59 | main_parser.add_argument('--memory', action='store_true', required=False, default=False, 60 | help='Memory line-by-line profiler (memory_profiler.py).') 61 | main_parser.add_argument('--gui', action='store_true', required=False, default=False, 62 | help='GUI interface for the CPU line-by-line profiler (not ready for the memory profiler) using RunSnakeRun.') 63 | # Optional arguments 64 | 65 | #== Parsing the arguments 66 | args, args_rest = main_parser.parse_known_args(argv) # Storing all arguments to args 67 | 68 | #-- Set variables from arguments 69 | script = args.script[0] 70 | cpu = args.cpu 71 | memory = args.memory 72 | gui = args.gui 73 | cpu_stack = args.cpu_stack 74 | 75 | profile_log = None 76 | if args.profile_log: 77 | profile_log = fullpath(args.profile_log[0]) 78 | 79 | if script.find('.') == -1: 80 | script = script + '.py' 81 | 82 | if not os.path.isfile(script): 83 | print("File does not exist: %s" % script) 84 | else: 85 | print("==== LAUNCHING PROFILING ====") 86 | 87 | scriptname = os.path.splitext(script)[0] # remove any extension to be able to import 88 | scriptmod = __import__(scriptname) # dynamic import 89 | 90 | if cpu: 91 | # Line-by-line CPU runtime profiling (pure python using pprofile) 92 | from lib.profilers.pprofile import pprofile 93 | # Load the profiler 94 | pprof = pprofile.Profile() 95 | # Launch experiment under the profiler 96 | args_rest = ' '.join(args_rest) 97 | with pprof: 98 | scriptmod.main(args_rest) 99 | # Print the result 100 | print("==> Profiling done.") 101 | if profile_log: 102 | pprof.dump_stats(profile_log) 103 | else: 104 | pprof.print_stats() 105 | elif memory: 106 | # Line-by-line memory profiler (pure python using memory_profiler) 107 | from lib.profilers.memory_profiler import memory_profiler 108 | # Load the memory profiler 109 | mprof = memory_profiler.LineProfiler() 110 | # Launch experiment under the memory profiler 111 | args_rest = ' '.join(args_rest) 112 | mpr = mprof(scriptmod.main)(args_rest) 113 | # Print results 114 | print("==> Profiling done.") 115 | if not mprof.code_map: # just to check that everything's alright 116 | print 'Error: the memory_profiler did not work! Please check that your are correctly calling mprof(func)(arguments)' 117 | else: 118 | if profile_log: 119 | with open(profile_log, 'w') as pf: 120 | memory_profiler.show_results(mprof, stream=pf) 121 | else: 122 | print(memory_profiler.show_results(mprof, stream=None)) 123 | elif gui: 124 | # Visual profiler with GUI (runsnakerun) 125 | # NOTE: you need wxPython to launch it 126 | from lib.profilers.visual.debug import runprofilerandshow 127 | if not profile_log: profile_log = 'profile.log' # a profile log is necessary to use the GUI because the profile will be generated separately, and then the GUI will read the file. File based communication is currently the only way to communicate with RunSnakeRun. 128 | args_rest = ' '.join(args_rest) 129 | runprofilerandshow('import '+scriptname+"\n"+scriptname+'.main', profile_log, argv=args_rest, calibrate=True) 130 | #runscriptprofilerandshow(script, profile_log, argv=args_rest, calibrate=True) 131 | elif cpu_stack: 132 | # Tree like cpu profiling 133 | from lib.profilers.pyinstrument import Profiler 134 | from lib.profilers.pyinstrument.profiler import SignalUnavailableError 135 | try: 136 | profiler = Profiler() # or if signal is not available on your system, use Profiler(use_signal=False), see below 137 | except SignalUnavailableError as e: 138 | profiler = Profiler(use_signal=False) 139 | profiler.start() 140 | scriptmod.main(args_rest) 141 | profiler.stop() 142 | print("==> Profiling done.") 143 | if profile_log: 144 | import codecs 145 | with codecs.open(profile_log, 'wb', encoding='utf8') as pf: 146 | pf.write( profiler.output_text(unicode=True, color=True) ) 147 | else: 148 | print(profiler.output_text(unicode=True, color=True)) 149 | 150 | 151 | # Calling main function if the script is directly called (not imported as a library in another program) 152 | if __name__ == "__main__": 153 | sys.exit(main()) 154 | -------------------------------------------------------------------------------- /pyFileFixity/pff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Main script entry point for pyFileFixity, provides an interface with subcommands 4 | # Copyright (C) 2023 Stephen Karl Larroque 5 | # 6 | # Licensed under the MIT License (MIT) 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | # 26 | #================================= 27 | # pyFileFixity Main Subcommands Facade API 28 | # by Stephen Larroque 29 | # License: MIT 30 | # Creation date: 2023-08-04 31 | #================================= 32 | # Inspired by Adam Johnson's template for a script with subcommands: https://adamj.eu/tech/2021/10/15/a-python-script-template-with-sub-commands-and-type-hints/ 33 | # 34 | 35 | from __future__ import annotations 36 | 37 | # Import tools for argument parsing and typing 38 | import argparse 39 | from collections.abc import Sequence 40 | import sys 41 | 42 | # Include the lib folder in the python import path to be able to do relative imports 43 | # DEPRECATED: unnecessary since PEP328, but need to use the "from .a import x" form, not "import .x" https://fortierq.github.io/python-import/ -- but note that editable mode is very fine and accepted nowadays, a subsequent PEP fixed the issue! 44 | #import os, sys 45 | #thispathname = os.path.dirname(__file__) 46 | #sys.path.append(os.path.join(thispathname)) 47 | 48 | # Import all pyFileFixity subcommands tools 49 | from .rfigc import main as rfigc_main 50 | from .header_ecc import main as hecc_main 51 | from .structural_adaptive_ecc import main as saecc_main 52 | from .repair_ecc import main as recc_main 53 | from .replication_repair import main as replication_repair_main 54 | from .resiliency_tester import main as restest_main 55 | from .filetamper import main as filetamper_main 56 | from .ecc_speedtest import main as ecc_speedtest_main 57 | 58 | def main(argv: Sequence[str] | None = None) -> int: 59 | parser = argparse.ArgumentParser() 60 | subparsers = parser.add_subparsers(dest="subcommand", required=True) 61 | 62 | # Add sub-commands 63 | rfigc_parser = subparsers.add_parser("hash", aliases=["rfigc"], help="Check files integrity fast by hash, size, modification date or by data structure integrity.", add_help=False) # disable help, so that we can redefine it and propagate as an argument downstream to the called module 64 | rfigc_parser.add_argument('-h', '--help', action='store_true') # redefine help argument so that we can pass it downstream to submodules' argparse parsers 65 | 66 | hecc_parser = subparsers.add_parser("header", aliases=["header_ecc", "hecc"], help="Protect/repair files headers with error correction codes", add_help=False) 67 | hecc_parser.add_argument('-h', '--help', action='store_true') 68 | 69 | saecc_parser = subparsers.add_parser("whole", aliases=["structural_adaptive_ecc", "saecc", "protect", "repair"], help="Protect/repair whole files with error correction codes", add_help=False) 70 | saecc_parser.add_argument('-h', '--help', action='store_true') 71 | 72 | recc_parser = subparsers.add_parser("recover", aliases=["repair_ecc", "recc"], help="Utility to try to recover damaged ecc files using a failsafe mechanism, a sort of recovery mode (note: this does NOT recover your files, only the ecc files, which may then be used to recover your files!)", add_help=False) 73 | recc_parser.add_argument('-h', '--help', action='store_true') 74 | 75 | replication_repair_parser = subparsers.add_parser("dup", aliases=["replication_repair"], help="Repair files from multiple copies of various storage mediums using a majority vote", add_help=False) 76 | replication_repair_parser.add_argument('-h', '--help', action='store_true') 77 | 78 | restest_parser = subparsers.add_parser("restest", aliases=["resilience_tester"], help="Run tests to quantify robustness of a file protection scheme (can be used on any, not just pyFileFixity)", add_help=False) 79 | restest_parser.add_argument('-h', '--help', action='store_true') 80 | 81 | filetamper_parser = subparsers.add_parser("filetamper", help="Tamper files using various schemes", add_help=False) 82 | filetamper_parser.add_argument('-h', '--help', action='store_true') 83 | 84 | ecc_speedtest_parser = subparsers.add_parser("speedtest", aliases=["ecc_speedtest"], help="Run error correction encoding and decoding speedtests", add_help=False) 85 | ecc_speedtest_parser.add_argument('-h', '--help', action='store_true') 86 | 87 | # Parse known arguments, but we have almost none, this is done on purpose so that we can pass all arguments (except helps) downstream for submodules to handle with their own Argparse 88 | args, args_remainder = parser.parse_known_args(argv) # if argv is None, then parse_known_args() will fallback to sys.argv 89 | #print(type(args_remainder)) # DEBUGLINE 90 | #print(args) # DEBUGLINE 91 | 92 | if len(sys.argv) >= 2: 93 | # Prepare subarguments 94 | subargs = [] 95 | if args.help is True: 96 | # Manage custom case of manually propagating --help to downstream module, we prepend to the string of the remainder of arguments 97 | subargs.append("--help") 98 | # Add the rest of the arguments, so that the downstream module can handle them with their own Argparse parser 99 | subargs.extend(args_remainder) # args_remainder is a list, so we can extend subargs with it 100 | 101 | fullcommand = "pff.py " + args.subcommand 102 | 103 | if args.subcommand in ["hash", "rfigc"]: 104 | return rfigc_main(argv=subargs, command=fullcommand) 105 | elif args.subcommand in ["header", "header_ecc", "hecc"]: 106 | return hecc_main(argv=subargs, command=fullcommand) 107 | elif args.subcommand in ["whole", "structural_adaptive_ecc", "saecc", "protect", "repair"]: 108 | return saecc_main(argv=subargs, command=fullcommand) 109 | elif args.subcommand in ["recover", "repair_ecc", "recc"]: 110 | return recc_main(argv=subargs, command=fullcommand) 111 | elif args.subcommand in ["dup", "replication_repair"]: 112 | return replication_repair_main(argv=subargs, command=fullcommand) 113 | elif args.subcommand in ["restest", "resilience_tester"]: 114 | return restest_main(argv=subargs, command=fullcommand) 115 | elif args.subcommand in ["filetamper"]: 116 | return filetamper_main(argv=subargs, command=fullcommand) 117 | elif args.subcommand in ["speedtest", "ecc_speedtest"]: 118 | return ecc_speedtest_main(argv=subargs, command=fullcommand) 119 | else: 120 | # Unreachable 121 | raise NotImplementedError( 122 | f"Command {args.command} is not implemented (dev forgot!).", 123 | ) 124 | 125 | 126 | def subcommand1(string: str) -> int: 127 | # Implement behaviour 128 | 129 | return 0 130 | 131 | 132 | if __name__ == "__main__": 133 | raise SystemExit(main()) 134 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_header_ecc.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | import itertools 6 | import hashlib 7 | 8 | import shutil 9 | 10 | from .. import header_ecc as hecc 11 | from ..lib.aux_funcs import get_next_entry 12 | from ..lib.eccman import compute_ecc_params, ECCMan 13 | from .aux_tests import check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, get_marker, dummy_ecc_file_gen 14 | 15 | from ..lib._compat import b 16 | 17 | from io import BytesIO 18 | 19 | def setup_module(): 20 | """ Initialize the tests by emptying the out directory """ 21 | outfolder = path_sample_files('output') 22 | shutil.rmtree(outfolder, ignore_errors=True) 23 | create_dir_if_not_exist(outfolder) 24 | 25 | def test_one_file(): 26 | """ hecc: test creation and verification of database for one file """ 27 | filein = path_sample_files('input', 'tuxsmall.jpg') 28 | filedb = path_sample_files('output', 'hecc_file.db') 29 | fileout = path_sample_files('output', 'tuxsmall.jpg') 30 | fileout_rec = path_sample_files('output', 'rectemp', True) # temporary folder where repaired files will be placed (we expect none so this should be temporary, empty folder) 31 | fileres = path_sample_files('results', 'test_header_ecc_test_one_file.db') 32 | # Generate an ecc file 33 | assert hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) == 0 34 | # Check that generated ecc file is correct 35 | startpos1 = next(find_next_entry(filedb, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins 36 | startpos2 = next(find_next_entry(fileres, get_marker(type=1))) 37 | assert check_eq_files(filedb, fileres, startpos1=startpos1, startpos2=startpos2) 38 | # Check that the ecc file correctly validates the correct files 39 | assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=3 -c --silent' % (filein, filedb, fileout_rec)) == 0 40 | 41 | def test_one_file_tamper(): 42 | """ hecc: test file repair """ 43 | filein = path_sample_files('input', 'tuxsmall.jpg') 44 | filedb = path_sample_files('output', 'hecc_tamper.db') 45 | fileout = path_sample_files('output', 'tuxsmall.jpg') 46 | fileout2 = path_sample_files('output', 'repaired/tuxsmall.jpg') 47 | fileout2_dir = path_sample_files('output', 'repaired') 48 | fileres = path_sample_files('results', 'test_header_ecc_test_one_file_tamper.db') 49 | create_dir_if_not_exist(fileout2_dir) 50 | # Generate an ecc file 51 | assert hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) == 0 52 | # Tamper the file 53 | shutil.copyfile(filein, fileout) # Copy it to avoid tampering the original 54 | tamper_file(fileout, 4, r'abcde') 55 | # Repair the file 56 | assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=3 -c --silent' % (fileout, filedb, fileout2_dir)) == 0 57 | # Check that the file was completely repaired 58 | assert check_eq_files(filein, fileout2) 59 | 60 | def test_dir(): 61 | """ hecc: test creation and verification of database for a full directory """ 62 | filein = path_sample_files('input', ) 63 | filedb = path_sample_files('output', 'hecc_dir.db') 64 | fileout = path_sample_files('output', ) 65 | fileout_rec = path_sample_files('output', 'rectemp', True) # temporary folder where repaired files will be placed (we expect none so this should be temporary, empty folder) 66 | fileres = path_sample_files('results', 'test_header_ecc_test_dir.db') 67 | # Generate an ecc file 68 | assert hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) == 0 69 | # Check that generated ecc file is correct 70 | startpos1 = next(find_next_entry(filedb, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins 71 | startpos2 = next(find_next_entry(fileres, get_marker(type=1))) 72 | assert check_eq_files(filedb, fileres, startpos1=startpos1, startpos2=startpos2) 73 | # Check that the ecc file correctly validates the correct files 74 | assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=3 -c --silent' % (filein, filedb, fileout_rec)) == 0 75 | 76 | def test_algo(): 77 | """ hecc: test algorithms equivalence """ 78 | filein = path_sample_files('input', 'tuxsmall.jpg') 79 | filedb = [path_sample_files('output', 'hecc_algo1.db'), 80 | path_sample_files('output', 'hecc_algo2.db'), 81 | path_sample_files('output', 'hecc_algo3.db'), 82 | ] 83 | fileres = path_sample_files('results', 'test_header_ecc_test_algo.db') 84 | fileout_rec = path_sample_files('output', 'rectemp', True) # temporary folder where repaired files will be placed (we expect none so this should be temporary, empty folder) 85 | # For each algorithm 86 | for i in range(len(filedb)): 87 | # Generate an ecc file 88 | assert hecc.main('-i "%s" -d "%s" --ecc_algo=%i -g -f --silent' % (filein, filedb[i], i+1)) == 0 89 | # Check file with this ecc algo 90 | assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=%i -c --silent' % (filein, filedb[i], fileout_rec, i+1)) == 0 91 | for i in range(1, len(filedb)): 92 | # Check that generated ecc file is correct 93 | startpos1 = next(find_next_entry(filedb[0], get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins 94 | startpos2 = next(find_next_entry(filedb[i], get_marker(type=1))) 95 | assert check_eq_files(filedb[0], filedb[i], startpos1=startpos1, startpos2=startpos2) 96 | # Check against expected ecc file 97 | startpos1 = next(find_next_entry(filedb[0], get_marker(type=1))) 98 | startpos2 = next(find_next_entry(fileres, get_marker(type=1))) 99 | assert check_eq_files(filedb[0], fileres, startpos1=startpos1, startpos2=startpos2) 100 | 101 | def test_entry_fields(): 102 | """ hecc: test internal: entry_fields() """ 103 | ecc = dummy_ecc_file_gen(3) 104 | eccf = BytesIO(ecc) 105 | ecc_entry = get_next_entry(eccf, get_marker(1), only_coord=False) 106 | assert hecc.entry_fields(ecc_entry, field_delim=get_marker(2)) == {'ecc_field': b'hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_', 'filesize_ecc': b'filesize1_ecc', 'relfilepath_ecc': b'relfilepath1_ecc', 'relfilepath': b'file1.ext', 'filesize': b'filesize1'} 107 | ecc_entry = get_next_entry(eccf, get_marker(1), only_coord=False) 108 | assert hecc.entry_fields(ecc_entry, field_delim=get_marker(2)) == {'ecc_field': b'hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_', 'filesize_ecc': b'filesize2_ecc', 'relfilepath_ecc': b'relfilepath2_ecc', 'relfilepath': b'file2.ext', 'filesize': b'filesize2'} 109 | 110 | def test_entry_assemble(): 111 | """ hecc: test internal: entry_assemble() """ 112 | class Hasher(object): 113 | """ Dummy Hasher """ 114 | def __len__(self): 115 | return 32 116 | tempfile = path_sample_files('output', 'hecc_entry_assemble.txt') 117 | with open(tempfile, 'wb') as tfile: 118 | tfile.write(b("Lorem ipsum\nAnd stuff and stuff and stuff\n"*20)) 119 | ecc = dummy_ecc_file_gen(3) 120 | eccf = BytesIO(ecc) 121 | ecc_entry = get_next_entry(eccf, get_marker(1), only_coord=False) 122 | entry_fields = hecc.entry_fields(ecc_entry, field_delim=get_marker(2)) 123 | ecc_params = compute_ecc_params(255, 0.5, Hasher()) 124 | out = hecc.entry_assemble(entry_fields, ecc_params, 10, tempfile, fileheader=None) 125 | assert out == [{'ecc': b'sh-ecc-entry_', 'message': b'Lorem ipsu', 'hash': b'hash-ecc-entry_hash-ecc-entry_ha'}] 126 | # TODO: check that several blocks can be assembled, currently we only check one block 127 | 128 | def test_compute_ecc_hash(): 129 | """ hecc: test internal: compute_ecc_hash() """ 130 | class Hasher(object): 131 | """ Dummy Hasher """ 132 | def hash(self, mes): 133 | return "dummyhsh" 134 | def __len__(self): 135 | return 8 136 | n = 20 137 | k = 11 138 | instring = "hello world!"*20 139 | header_size = 1024 140 | eccman = ECCMan(n, k, algo=3) 141 | out1 = hecc.compute_ecc_hash(eccman, Hasher(), instring[:header_size], 255, 0.5, message_size=None, as_string=False) 142 | assert out1 == [[b'dummyhsh', b'\x9b\x18\xeb\xc9z\x01c\xf2\x07'], [b'dummyhsh', b'\xa2Q\xc0Y\xae\xc3b\xd5\x81']] 143 | out2 = hecc.compute_ecc_hash(eccman, Hasher(), instring[:header_size], 255, 0.5, message_size=None, as_string=True) 144 | assert out2 == [b('dummyhsh\x9b\x18\xeb\xc9z\x01c\xf2\x07'), b('dummyhsh\xa2Q\xc0Y\xae\xc3b\xd5\x81')] 145 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_repair_ecc.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | import itertools 6 | import hashlib 7 | 8 | import shutil 9 | 10 | from .. import repair_ecc as recc 11 | from .. import header_ecc as hecc 12 | from .. import structural_adaptive_ecc as saecc 13 | from .aux_tests import check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, get_marker 14 | 15 | def get_db(): 16 | return [path_sample_files('output', 'recc_file.db'), path_sample_files('output', 'recc_file.db_bak')] 17 | 18 | def get_db_idx(): 19 | return [path_sample_files('output', 'recc_file.db.idx'), path_sample_files('output', 'recc_file.db.idx_bak')] 20 | 21 | def get_db_sa(): 22 | return [path_sample_files('output', 'recc_file_sa.db'), path_sample_files('output', 'recc_file_sa.db_bak')] 23 | 24 | def get_db_sa_idx(): 25 | return [path_sample_files('output', 'recc_file_sa.db.idx'), path_sample_files('output', 'recc_file_sa.db.idx_bak')] 26 | 27 | def restore_files(type): 28 | """ Restore the backup files to clean before the test """ 29 | if type == 'hecc': 30 | filedb, filedb_bak = get_db() 31 | elif type == 'hecc_idx': 32 | filedb, filedb_bak = get_db_idx() 33 | elif type == 'saecc': 34 | filedb, filedb_bak = get_db_sa() 35 | elif type == 'saecc_idx': 36 | filedb, filedb_bak = get_db_sa_idx() 37 | os.remove(filedb) 38 | shutil.copyfile(filedb_bak, filedb) 39 | return 0 40 | 41 | def setup_module(): 42 | """ Initialize the tests by emptying the out directory """ 43 | outfolder = path_sample_files('output') 44 | shutil.rmtree(outfolder, ignore_errors=True) 45 | create_dir_if_not_exist(outfolder) 46 | # Generate an header_ecc generated ecc file for the repair tests to use 47 | filein = path_sample_files('input') 48 | filedb, filedb_bak = get_db() 49 | filedb_idx, filedb_idx_bak = get_db_idx() 50 | hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) 51 | shutil.copyfile(filedb, filedb_bak) # keep a backup, we will reuse it for each test 52 | shutil.copyfile(filedb_idx, filedb_idx_bak) 53 | # Do the same with structural_adaptive_ecc 54 | filedb_sa, filedb_sa_bak = get_db_sa() 55 | filedb_sa_idx, filedb_sa_idx_bak = get_db_sa_idx() 56 | saecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb_sa)) 57 | shutil.copyfile(filedb_sa, filedb_sa_bak) # keep a backup, we will reuse it for each test 58 | shutil.copyfile(filedb_sa_idx, filedb_sa_idx_bak) 59 | 60 | def test_check(): 61 | """ recc: check db and index files are the same as expected """ 62 | # this also helps to check that restore_files() is working correctly since they are critical for other tests 63 | filedb, filedb_bak = get_db() 64 | filedb_sa, filedb_sa_bak = get_db_sa() 65 | #filedb_idx, filedb_idx_bak = get_db_idx() 66 | fileres = path_sample_files('results', 'test_repair_ecc_check.db') 67 | fileres_sa = path_sample_files('results', 'test_repair_ecc_sa_check.db') 68 | #fileres_idx = path_sample_files('results', 'test_repair_ecc_check.db.idx') 69 | # Recopy the original untampered files 70 | restore_files('hecc') 71 | restore_files('saecc') 72 | #restore_files('hecc_idx') 73 | # Check that generated files are correct (header_ecc generated) 74 | startpos1 = next(find_next_entry(filedb, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins 75 | startpos2 = next(find_next_entry(fileres, get_marker(type=1))) 76 | assert check_eq_files(filedb, fileres, startpos1=startpos1, startpos2=startpos2) 77 | # assert check_eq_files(filedb_idx, fileres_idx) # cannot check the index file because of the possibly differing comments in the header (this will offset the position of every markers, and thus the index file will be different) 78 | # Check that generated files are correct (structural_adaptive_ecc generated) 79 | startpos1 = next(find_next_entry(filedb_sa, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins 80 | startpos2 = next(find_next_entry(fileres_sa, get_marker(type=1))) 81 | assert check_eq_files(filedb_sa, fileres_sa, startpos1=startpos1, startpos2=startpos2) 82 | 83 | def test_repair_by_index(): 84 | """ recc: tamper ecc file and repair by index file """ 85 | filedb, filedb_bak = get_db() 86 | filedb_idx, filedb_idx_bak = get_db_idx() 87 | fileout = path_sample_files('output', 'recc_file_repaired_index.db') 88 | marker1 = get_marker(type=1) 89 | marker2 = get_marker(type=2) 90 | restore_files('hecc') 91 | restore_files('hecc_idx') 92 | # Completely overwrite a few markers (hence they cannot be recovered by hamming) 93 | startpos1 = next(find_next_entry(filedb, marker1)) 94 | startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1))) 95 | startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1))) 96 | tamper_file(filedb, startpos1, "a"*len(marker1)) 97 | tamper_file(filedb, startpos2, "a"*len(marker1)) 98 | tamper_file(filedb, startpos3, "a"*len(marker2)) 99 | # Repair ecc file using index file 100 | assert recc.main('-i "%s" --index "%s" -o "%s" -t 0.0 -f --silent' % (filedb, filedb_idx, fileout)) == 0 101 | assert check_eq_files(filedb_bak, fileout) 102 | 103 | def test_repair_by_hamming(): 104 | """ recc: tamper ecc file and repair by hamming distance """ 105 | filedb, filedb_bak = get_db() 106 | fileout = path_sample_files('output', 'recc_file_repaired.db') 107 | marker1 = get_marker(type=1) 108 | marker2 = get_marker(type=2) 109 | restore_files('hecc') 110 | # Completely overwrite a few markers (hence they cannot be recovered by hamming) 111 | startpos1 = next(find_next_entry(filedb, marker1)) 112 | startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1))) 113 | startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1))) 114 | tamper_file(filedb, startpos1, "a"*int(len(marker1)*0.3)) 115 | tamper_file(filedb, startpos2, "a"*int(len(marker1)*0.3)) 116 | tamper_file(filedb, startpos3, "a"*int(len(marker2)*0.3)) 117 | # Repair ecc file by hamming similarity 118 | assert recc.main('-i "%s" -o "%s" -t 0.3 -f --silent' % (filedb, fileout)) == 0 119 | assert check_eq_files(filedb_bak, fileout) 120 | 121 | def test_tamper_index(): 122 | """ recc: tamper index file and see if it can repair itself (hecc) """ 123 | filedb, filedb_bak = get_db() 124 | filedb_idx, filedb_idx_bak = get_db_idx() 125 | fileout = path_sample_files('output', 'recc_file_repaired_index.db') 126 | marker1 = get_marker(type=1) 127 | marker2 = get_marker(type=2) 128 | restore_files('hecc') 129 | restore_files('hecc_idx') 130 | # Completely overwrite a few markers (hence they cannot be recovered by hamming) 131 | startpos1 = next(find_next_entry(filedb, marker1)) 132 | startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1))) 133 | startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1))) 134 | tamper_file(filedb, startpos1, "a"*len(marker1)) 135 | tamper_file(filedb, startpos2, "a"*len(marker1)) 136 | tamper_file(filedb, startpos3, "a"*len(marker2)) 137 | # Tamper index file 138 | tamper_file(filedb_idx, 0, "abcd") 139 | tamper_file(filedb_idx, 9, "abcd") 140 | tamper_file(filedb_idx, 27, "abcd") 141 | assert recc.main('-i "%s" --index "%s" -o "%s" -t 0.0 -f --silent' % (filedb, filedb_idx, fileout)) == 0 142 | assert check_eq_files(filedb_bak, fileout) 143 | 144 | def test_tamper_index_saecc(): 145 | """ recc: tamper index file and see if it can repair itself (saecc) """ 146 | filedb, filedb_bak = get_db_sa() 147 | filedb_idx, filedb_idx_bak = get_db_sa_idx() 148 | fileout = path_sample_files('output', 'recc_file_sa_repaired.db') 149 | marker1 = get_marker(type=1) 150 | marker2 = get_marker(type=2) 151 | restore_files('saecc') 152 | restore_files('saecc_idx') 153 | # Completely overwrite a few markers (hence they cannot be recovered by hamming) 154 | startpos1 = next(find_next_entry(filedb, marker1)) 155 | startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1))) 156 | startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1))) 157 | tamper_file(filedb, startpos1, "a"*len(marker1)) 158 | tamper_file(filedb, startpos2, "a"*len(marker1)) 159 | tamper_file(filedb, startpos3, "a"*len(marker2)) 160 | # Tamper index file 161 | tamper_file(filedb_idx, 0, "abcd") 162 | tamper_file(filedb_idx, 9, "abcd") 163 | tamper_file(filedb_idx, 27, "abcd") 164 | assert recc.main('-i "%s" --index "%s" -o "%s" -t 0.0 -f --silent' % (filedb, filedb_idx, fileout)) == 0 165 | assert check_eq_files(filedb_bak, fileout) 166 | -------------------------------------------------------------------------------- /pyFileFixity/lib/profilers/pyinstrument/README.md: -------------------------------------------------------------------------------- 1 | pyinstrument 2 | ============ 3 | 4 | A Python profiler that records the call stack of the executing code, instead 5 | of just the final function in it. 6 | 7 | [![Screenshot](screenshot.jpg)](https://raw.githubusercontent.com/joerick/pyinstrument/master/screenshot.jpg) 8 | 9 | It uses a **statistical profiler**, meaning the code samples the stack 10 | periodically (every 1 ms). This is lower overhead than event- 11 | based profiling (as done by `profile` and `cProfile`). 12 | 13 | This module is still very young, so I'd love any feedback/bug reports/pull 14 | requests! 15 | 16 | Documentation 17 | ------------- 18 | 19 | * [Installation](#installation) 20 | * [Usage](#usage) 21 | * [Command-line](#command-line) 22 | * [Django](#django) 23 | * [Python](#python) 24 | * [Signal or setprofile mode?](#signal-or-setprofile-mode) 25 | * [Known issues](#known-issues) 26 | * [Changelog](#changelog) 27 | * [What's new in v0.13](#whats-new-in-v013) 28 | * [What's new in v0.12](#whats-new-in-v012) 29 | * [Further information](#further-information) 30 | * [Call stack profiling?](#call-stack-profiling) 31 | 32 | Installation 33 | ------------ 34 | 35 | pip install -e git+https://github.com/joerick/pyinstrument.git#egg=pyinstrument 36 | 37 | pyinstrument supports Python 2.7 and 3.3+. 38 | 39 | Usage 40 | ----- 41 | 42 | #### Command-line #### 43 | 44 | You can call pyinstrument directly from the command line. 45 | 46 | python -m pyinstrument [options] myscript.py [args...] 47 | 48 | Options: 49 | -h, --help show this help message and exit 50 | --setprofile run in setprofile mode, instead of signal mode 51 | --html output HTML instead of text 52 | -o OUTFILE, --outfile=OUTFILE 53 | save report to 54 | --unicode force unicode text output 55 | --no-unicode force ascii text output 56 | --color force ansi color text output 57 | --no-color force no color text output 58 | 59 | 60 | This will run `myscript.py` to completion or until you interrupt it, and 61 | then output the call tree. 62 | 63 | #### Django #### 64 | 65 | Add `pyinstrument.middleware.ProfilerMiddleware` to `MIDDLEWARE_CLASSES`. 66 | If you want to profile your middleware as well as your view (you probably 67 | do) then put it at the start of the list. 68 | 69 | ##### Per-request profiling ##### 70 | 71 | Add `?profile` to the end of the request URL to activate the profiler. 72 | Instead of seeing the output of your view, pyinstrument renders an HTML 73 | call tree for the view (as in the screenshot above). 74 | 75 | ##### Using `PYINSTRUMENT_PROFILE_DIR` ##### 76 | 77 | If you're writing an API, it's not easy to change the URL when you want 78 | to profile something. In this case, add 79 | `PYINSTRUMENT_PROFILE_DIR = 'profiles'` to your settings.py. 80 | pyinstrument will profile every request and save the HTML output to the 81 | folder `profiles` in your working directory. 82 | 83 | #### Python #### 84 | 85 | ```python 86 | from pyinstrument import Profiler 87 | 88 | profiler = Profiler() # or Profiler(use_signal=False), see below 89 | profiler.start() 90 | 91 | # code you want to profile 92 | 93 | profiler.stop() 94 | 95 | print(profiler.output_text(unicode=True, color=True)) 96 | ``` 97 | 98 | You can omit the `unicode` and `color` flags if your output/terminal does 99 | not support them. 100 | 101 | Signal or setprofile mode? 102 | -------------------------- 103 | 104 | On Mac/Linux/Unix, pyinstrument can run in 'signal' mode. This uses 105 | OS-provided signals to interrupt the process every 1ms and record the stack. 106 | It gives much lower overhead (and thus accurate) readings than the standard 107 | Python [`sys.setprofile`][setprofile] style profilers. However, this can 108 | only profile the main thread. 109 | 110 | On Windows and on multi-threaded applications, a `setprofile` mode is 111 | available by passing `use_signal=False` to the Profiler constructor. It works 112 | exactly the same as the signal mode, but has higher overhead. See the below 113 | table for an example of the amount of overhead. 114 | 115 | [setprofile]: https://docs.python.org/2/library/sys.html#sys.setprofile 116 | 117 | This overhead is important because code that makes a lot of Python function 118 | calls will appear to take longer than code that does not. 119 | 120 | | Django template render × 4000 | Overhead 121 | ---------------------------|------------------------------:|---------: 122 | Base | 1.46s | 123 | | | 124 | pyinstrument (signal) | 1.84s | 26% 125 | cProfile | 2.18s | 49% 126 | pyinstrument (setprofile) | 5.33s | 365% 127 | profile | 25.39s | 1739% 128 | 129 | To run in setprofile mode: 130 | 131 | * Use flag `--setprofile` if using the command-line interface 132 | * Use setting `PYINSTRUMENT_USE_SIGNAL = False` in Django 133 | * Use argument `use_signal=False` in the constructor for the Python API 134 | 135 | Known issues 136 | ------------ 137 | 138 | - When profiling Django, I'd recommend disabling django-debug-toolbar, 139 | django-devserver etc., as their instrumentation distort timings. 140 | 141 | - In signal mode, any calls to [`time.sleep`][pysleep] will return 142 | immediately. This is because of an implementation detail of `time.sleep`, 143 | but matches the behaviour of the C function [`sleep`][csleep]. 144 | 145 | - Some system calls can fail with `IOError` when being profiled in signal 146 | mode. If this happens to you, your only option is to run in setprofile 147 | mode. 148 | 149 | [pysleep]: https://docs.python.org/2/library/time.html#time.sleep 150 | [csleep]: http://pubs.opengroup.org/onlinepubs/009695399/functions/sleep.html 151 | 152 | Changelog 153 | --------- 154 | 155 | ### What's new in v0.13 ### 156 | 157 | - `pyinstrument` command. You can now profile python scripts from the shell 158 | by running `$ pyinstrument script.py`. This is now equivalent to 159 | `python -m pyinstrument`. Thanks @asmeurer! 160 | 161 | ### What's new in v0.12 ### 162 | 163 | - Application code is highlighted in HTML traces to make it easier to spot 164 | 165 | - Added `PYINSTRUMENT_PROFILE_DIR` option to the Django interface, which 166 | will log profiles of all requests to a file the specified folder. Useful 167 | for profiling API calls. 168 | 169 | - Added `PYINSTRUMENT_USE_SIGNAL` option to the Django interface, for use 170 | when signal mode presents problems. 171 | 172 | Further information 173 | =================== 174 | 175 | Call stack profiling? 176 | --------------------- 177 | 178 | The standard Python profilers [`profile`][1] and [`cProfile`][2] produce 179 | output where time is totalled according to the time spent in each function. 180 | This is great, but it falls down when you profile code where most time is 181 | spent in framework code that you're not familiar with. 182 | 183 | [1]: http://docs.python.org/2/library/profile.html#module-profile 184 | [2]: http://docs.python.org/2/library/profile.html#module-cProfile 185 | 186 | Here's an example of profile output when using Django. 187 | 188 | 151940 function calls (147672 primitive calls) in 1.696 seconds 189 | 190 | Ordered by: cumulative time 191 | 192 | ncalls tottime percall cumtime percall filename:lineno(function) 193 | 1 0.000 0.000 1.696 1.696 profile:0( at 0x1053d6a30, file "./manage.py", line 2>) 194 | 1 0.001 0.001 1.693 1.693 manage.py:2() 195 | 1 0.000 0.000 1.586 1.586 __init__.py:394(execute_from_command_line) 196 | 1 0.000 0.000 1.586 1.586 __init__.py:350(execute) 197 | 1 0.000 0.000 1.142 1.142 __init__.py:254(fetch_command) 198 | 43 0.013 0.000 1.124 0.026 __init__.py:1() 199 | 388 0.008 0.000 1.062 0.003 re.py:226(_compile) 200 | 158 0.005 0.000 1.048 0.007 sre_compile.py:496(compile) 201 | 1 0.001 0.001 1.042 1.042 __init__.py:78(get_commands) 202 | 153 0.001 0.000 1.036 0.007 re.py:188(compile) 203 | 106/102 0.001 0.000 1.030 0.010 __init__.py:52(__getattr__) 204 | 1 0.000 0.000 1.029 1.029 __init__.py:31(_setup) 205 | 1 0.000 0.000 1.021 1.021 __init__.py:57(_configure_logging) 206 | 2 0.002 0.001 1.011 0.505 log.py:1() 207 | 208 | 209 | When you're using big frameworks like Django, it's very hard to understand how 210 | your own code relates to these traces. 211 | 212 | Pyinstrument records the entire stack, so tracking expensive calls is much 213 | easier. 214 | -------------------------------------------------------------------------------- /pyFileFixity/tests/test_resiliency_tester.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | 6 | import shutil 7 | 8 | from .. import resiliency_tester as restest 9 | from .aux_tests import path_sample_files, tamper_file, create_dir_if_not_exist, remove_if_exist 10 | 11 | from ..lib._compat import _StringIO 12 | 13 | def setup_module(): 14 | """ Initialize the tests by emptying the out directory """ 15 | outfolder = path_sample_files('output') 16 | shutil.rmtree(outfolder, ignore_errors=True) 17 | create_dir_if_not_exist(outfolder) 18 | 19 | def test_parse_configfile(): 20 | """ restest: test internal: parse_configfile() """ 21 | config = ''' 22 | before_tamper: 23 | cmd1 -i "arg1" -o "arg2" 24 | cmd2 25 | 26 | tamper: 27 | cmd3 28 | cmd4 29 | 30 | after_tamper: 31 | cmd5 32 | cmd6 33 | # a comment 34 | 35 | repair: 36 | cmd7 37 | cmd8 38 | ''' 39 | fconfig = _StringIO(config) 40 | parsed = restest.parse_configfile(fconfig) 41 | assert parsed == {'tamper': ['cmd3', 'cmd4'], 'after_tamper': ['cmd5', 'cmd6'], 'before_tamper': ['cmd1 -i "arg1" -o "arg2"', 'cmd2'], 'repair': ['cmd7', 'cmd8']} 42 | 43 | def test_get_filename_no_ext(): 44 | """ restest: test internal: get_filename_no_ext() """ 45 | filepath = '/test/path/to/filename_no_ext.ext' 46 | res = restest.get_filename_no_ext(filepath) 47 | assert res == 'filename_no_ext' 48 | 49 | def test_interpolate_dict(): 50 | """ restest: test internal: interpolate_dict() """ 51 | s = 'Some {var1} with {var2} makes for {var3} parties!' 52 | d = {'var1': 'wine', 'var2': 'beer', 'var3': 'fun', 'var4': 'Hidden'} 53 | res = restest.interpolate_dict(s, interp_args=d) 54 | assert res == 'Some wine with beer makes for fun parties!' 55 | 56 | def test_get_dbfile(): 57 | """ restest: test internal: get_dbfile() """ 58 | res = restest.get_dbfile('databases', 10) 59 | assert 'databases' in res 60 | assert 'db10' in res 61 | 62 | def test_diff_bytes_files(): 63 | """ restest: test internal: diff_bytes_files() """ 64 | filein = path_sample_files('input', 'tuxsmall.jpg') 65 | fileout1 = path_sample_files('output', 'bytes_tuxsmall1.jpg') 66 | fileout2 = path_sample_files('output', 'bytes_tuxsmall2.jpg') 67 | shutil.copy2(filein, fileout1) 68 | shutil.copy2(filein, fileout2) 69 | res = restest.diff_bytes_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0) 70 | assert res[0] == 0 71 | assert res[1] == os.stat(fileout1).st_size 72 | tamper_file(fileout2, 0, "X") 73 | tamper_file(fileout2, 4, "X") 74 | tamper_file(fileout2, 2000, "X") 75 | res = restest.diff_bytes_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0) 76 | assert res[0] == 3 77 | 78 | def test_diff_count_files(): 79 | """ restest: test internal: diff_count_files() """ 80 | filein = path_sample_files('input', 'tuxsmall.jpg') 81 | fileout1 = path_sample_files('output', 'count_tuxsmall1.jpg') 82 | fileout2 = path_sample_files('output', 'count_tuxsmall2.jpg') 83 | shutil.copy2(filein, fileout1) 84 | shutil.copy2(filein, fileout2) 85 | res = restest.diff_count_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0) 86 | assert res 87 | tamper_file(fileout2, 0, "X") 88 | tamper_file(fileout2, 4, "X") 89 | tamper_file(fileout2, 2000, "X") 90 | res = restest.diff_count_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0) 91 | assert not res 92 | 93 | def test_diff_bytes_dir(): 94 | """ restest: test internal: diff_bytes_dir() """ 95 | dirin = path_sample_files('input') 96 | dirout = path_sample_files('output', 'restest/bytes') 97 | fileout = path_sample_files('output', 'restest/bytes/tuxsmall.jpg') 98 | fileout2 = path_sample_files('output', 'restest/bytes/testaa.txt') 99 | remove_if_exist(dirout) 100 | shutil.copytree(dirin, dirout) 101 | 102 | # First compare the two folders that are identical 103 | res = restest.diff_bytes_dir(dirin, dirout) 104 | assert res[0] == 0 105 | 106 | # Tamper a few bytes of two files 107 | tamper_file(fileout, 0, "X") 108 | tamper_file(fileout, 4, "X") 109 | tamper_file(fileout, 2000, "X") 110 | tamper_file(fileout2, 0, "X") 111 | res = restest.diff_bytes_dir(dirin, dirout) 112 | assert res[0] == 4 113 | 114 | # Now remove a file altogether, its size should be added to the amount of differing bytes 115 | filesize = os.stat(fileout).st_size 116 | remove_if_exist(fileout) 117 | res = restest.diff_bytes_dir(dirin, dirout) 118 | assert res[0] == (filesize+1) 119 | 120 | def test_diff_count_dir(): 121 | """ restest: test internal: diff_count_dir() """ 122 | dirin = path_sample_files('input') 123 | dirout = path_sample_files('output', 'restest/count') 124 | fileout = path_sample_files('output', 'restest/count/tuxsmall.jpg') 125 | fileout2 = path_sample_files('output', 'restest/count/testaa.txt') 126 | remove_if_exist(dirout) 127 | shutil.copytree(dirin, dirout) 128 | 129 | # First compare the two folders that are identical 130 | res = restest.diff_count_dir(dirin, dirout) 131 | assert res[0] == 0 132 | 133 | # Tamper a few bytes of two files 134 | tamper_file(fileout, 0, "X") 135 | tamper_file(fileout, 4, "X") 136 | tamper_file(fileout, 2000, "X") 137 | tamper_file(fileout2, 0, "X") 138 | res = restest.diff_count_dir(dirin, dirout) 139 | assert res[0] == 2 140 | 141 | # Now remove a file altogether, its size should be added to the amount of differing bytes 142 | filesize = os.stat(fileout).st_size 143 | remove_if_exist(fileout) 144 | res = restest.diff_count_dir(dirin, dirout) 145 | assert res[0] == 2 146 | 147 | def test_compute_repair_power(): 148 | """ restest: test internal: compute_repair_power() """ 149 | # Note: be careful if you add tests here, the displayed value by print() may be rounded up! Use print(repr(copute_repair_power())) 150 | assert restest.compute_repair_power(0.3, 0.5) == 40.0 151 | assert restest.compute_repair_power(0.2, 0.8) == 75.0 152 | assert restest.compute_repair_power(0.6, 0.3) == -100.0 153 | assert restest.compute_repair_power(0.6, 0.0) == 0.6 154 | 155 | def test_compute_diff_stats(): 156 | """ restest: test internal: compute_diff_stats() """ 157 | dirin = path_sample_files('input') 158 | dirout = path_sample_files('output', 'restest/count') 159 | fileout = path_sample_files('output', 'restest/count/tuxsmall.jpg') 160 | fileout2 = path_sample_files('output', 'restest/count/testaa.txt') 161 | remove_if_exist(dirout) 162 | shutil.copytree(dirin, dirout) 163 | 164 | # First compare the two folders that are identical 165 | res = restest.compute_diff_stats(dirin, dirin, dirout) 166 | assert dict(res) == {'diff_bytes': (0, 92955), 'diff_bytes_prev': (0, 92955), 'diff_count': (0, 7), 'diff_count_prev': (0, 7), 'repair_power': 0, 'error': 0.0} 167 | 168 | # Tamper a few bytes of two files 169 | tamper_file(fileout, 0, "X") 170 | tamper_file(fileout, 4, "X") 171 | tamper_file(fileout, 2000, "X") 172 | tamper_file(fileout2, 0, "X") 173 | res = restest.compute_diff_stats(dirin, dirin, dirout) 174 | assert dict(res) == {'diff_bytes': (4, 92955), 'diff_bytes_prev': (4, 92955), 'diff_count': (2, 7), 'diff_count_prev': (2, 7), 'repair_power': 0, 'error': 0.0043031574417729005} 175 | 176 | def test_stats_running_average(): 177 | """ restest: test internal: stats_running_average() """ 178 | stats1 = {'diff_bytes': (0, 92955), 'diff_bytes_prev': (0, 92955), 'diff_count': (0, 7), 'diff_count_prev': (0, 7), 'repair_power': 0, 'error': 0.0} 179 | stats2 = {'diff_bytes': (4, 92955), 'diff_bytes_prev': (4, 92955), 'diff_count': (2, 7), 'diff_count_prev': (2, 7), 'repair_power': 0, 'error': 0.5} 180 | assert restest.stats_running_average({"tamper": stats1}, {"tamper": stats2}, 1) == {'tamper': {'diff_count_prev': [1.0, 7.0], 'diff_count': [1.0, 7.0], 'diff_bytes_prev': [2.0, 92955.0], 'error': 0.25, 'repair_power': 0.0, 'diff_bytes': [2.0, 92955.0]}} 181 | assert restest.stats_running_average({"tamper": stats1}, {"tamper": stats2}, 3) == {'tamper': {'diff_count_prev': [0.5, 7.0], 'diff_count': [0.5, 7.0], 'diff_bytes_prev': [1.0, 92955.0], 'error': 0.125, 'repair_power': 0.0, 'diff_bytes': [1.0, 92955.0]}} 182 | 183 | def test_main(): 184 | """ restest: test main() """ 185 | # Change directory so that the config's commands can access pyFileFixity scripts 186 | thispathname = os.path.dirname(__file__) 187 | sys.path.append(os.path.join(thispathname, '..')) 188 | # Setup paths 189 | dirin = path_sample_files('input') 190 | dirout = path_sample_files('output', 'restest/fulltest') 191 | configfile = path_sample_files('results', 'resiliency_tester_config_easy.cfg') 192 | configfile_hard = path_sample_files('results', 'resiliency_tester_config_hard.cfg') 193 | # Should be no error with the easy scenario (repair should be successful) 194 | assert restest.main("-i \"%s\" -o \"%s\" -c \"%s\" -f --silent" % (dirin, dirout, configfile)) == 0 195 | # Should be error with the hard scenario 196 | assert restest.main("-i \"%s\" -o \"%s\" -c \"%s\" -m 2 -f --silent" % (dirin, dirout, configfile_hard)) == 1 197 | # TODO: catch sys.stdout and check for the end stats? 198 | --------------------------------------------------------------------------------