├── pyFileFixity
    ├── tests
    │   ├── __init__.py
    │   ├── files
    │   │   ├── testaa.txt
    │   │   ├── Sub2
    │   │   │   └── testsub2.txt
    │   │   ├── tux.jpg
    │   │   ├── alice.pdf
    │   │   ├── sub
    │   │   │   ├── Snark.zip
    │   │   │   └── testsub.txt
    │   │   └── tuxsmall.jpg
    │   ├── results
    │   │   ├── test_repair_ecc_check.db
    │   │   ├── test_header_ecc_test_algo.db
    │   │   ├── test_header_ecc_test_dir.db
    │   │   ├── test_repair_ecc_sa_check.db
    │   │   ├── test_header_ecc_test_one_file.db
    │   │   ├── test_header_ecc_test_one_file_tamper.db
    │   │   ├── test_rfigc_test_error_file.log
    │   │   ├── test_structural_adaptive_ecc_test_algo.db
    │   │   ├── test_structural_adaptive_ecc_test_dir.db
    │   │   ├── test_structural_adaptive_ecc_test_one_file.db
    │   │   ├── test_structural_adaptive_ecc_test_one_file_tamper.db
    │   │   ├── test_rfigc_test_one_file.csv
    │   │   ├── test_rfigc_test_update_remove.csv
    │   │   ├── test_rfigc_test_dir.csv
    │   │   ├── test_rfigc_test_update_append.csv
    │   │   ├── resiliency_tester_config_hard.cfg
    │   │   └── resiliency_tester_config_easy.cfg
    │   ├── test_hasher.py
    │   ├── test_tee.py
    │   ├── test_aux_funcs.py
    │   ├── aux_tests.py
    │   ├── test_eccman.py
    │   ├── test_rfigc.py
    │   ├── test_header_ecc.py
    │   ├── test_repair_ecc.py
    │   └── test_resiliency_tester.py
    ├── lib
    │   ├── __init__.py
    │   ├── profilers
    │   │   ├── __init__.py
    │   │   ├── pprofile
    │   │   │   └── __init__.py
    │   │   ├── visual
    │   │   │   ├── __init__.py
    │   │   │   ├── pympler
    │   │   │   │   ├── util
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── compat.py
    │   │   │   │   │   └── stringutils.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── metadata.py
    │   │   │   │   ├── charts.py
    │   │   │   │   ├── garbagegraph.py
    │   │   │   │   └── mprofile.py
    │   │   │   ├── runsnakerun
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── squaremap
    │   │   │   │   │   └── __init__.py
    │   │   │   │   ├── macshim.py
    │   │   │   │   ├── homedirectory.py
    │   │   │   │   ├── pstatsadapter.py
    │   │   │   │   ├── _meliaejson.py
    │   │   │   │   ├── coldshotadapter.py
    │   │   │   │   └── meliaeadapter.py
    │   │   │   ├── kthread.py
    │   │   │   ├── profilebrowser.py
    │   │   │   ├── debug.py
    │   │   │   └── functionprofiler.py
    │   │   ├── memory_profiler
    │   │   │   ├── __init__.py
    │   │   │   └── README_DEV.rst
    │   │   └── pyinstrument
    │   │   │   ├── __init__.py
    │   │   │   ├── resources
    │   │   │       ├── triangle_hide.png
    │   │   │       ├── triangle_show.png
    │   │   │       ├── profile.js
    │   │   │       └── style.css
    │   │   │   ├── LICENSE
    │   │   │   ├── middleware.py
    │   │   │   ├── __main__.py
    │   │   │   └── README.md
    │   ├── docs
    │   │   ├── python - Optimizing a reed-solomon encoder (polynomial division) - Stack Overflow.pdf
    │   │   ├── Richard E. Blahut - Algebraic Codes for Data Transmission 2003 - Excerpt on errata decoder.pdf
    │   │   ├── python - Errata (erasures+errors) Berlekamp-Massey for Reed-Solomon decoding - Stack Overflow.pdf
    │   │   └── Initialization of errata evaluator polynomial for simultaneous computation in Berlekamp-Massey for Reed-Solomon.pdf
    │   ├── _compat.py
    │   ├── tee.py
    │   └── hasher.py
    ├── __init__.py
    ├── ecc_specification.txt
    ├── resiliency_tester_config.txt
    ├── easy_profiler.py
    └── pff.py
├── tux-example.jpg
├── requirements.txt
├── .gitattributes
├── setup.py
├── setup.py.bak
├── .coveragerc
├── .gitignore
├── codecov.yml
├── LICENSE
├── MANIFEST.in
├── appveyor.yml
├── tox.ini
├── pycleaner.py
├── .github
    └── workflows
    │   ├── codeql.yml
    │   ├── ci-build-downstream.yml
    │   ├── ci-build.yml
    │   └── ci-cd.yml
├── Makefile
└── setup.cfg.bak


/pyFileFixity/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/pyFileFixity/lib/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/testaa.txt:
--------------------------------------------------------------------------------
1 | stcstcst


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pprofile/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/Sub2/testsub2.txt:
--------------------------------------------------------------------------------
1 | abcdef


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/memory_profiler/__init__.py:
--------------------------------------------------------------------------------
1 | #


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/__init__.py:
--------------------------------------------------------------------------------
1 | from profiler import Profiler
2 | 


--------------------------------------------------------------------------------
/tux-example.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/tux-example.jpg


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/tux.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/tux.jpg


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/alice.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/alice.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pathlib2
2 | argparse
3 | sortedcontainers
4 | unireedsolomon
5 | reedsolo>=2.0.0a
6 | tqdm
7 | distance
8 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/__init__.py:
--------------------------------------------------------------------------------
1 | """The RunSnakeRun GUI Profiler utility"""
2 | __version__ = '2.0.4'
3 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/sub/Snark.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/sub/Snark.zip


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/tuxsmall.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/files/tuxsmall.jpg


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/squaremap/__init__.py:
--------------------------------------------------------------------------------
1 | """Hierarchic data-viewing widget for wxPython"""
2 | __version__ = '1.0.1'
3 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_repair_ecc_check.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_repair_ecc_check.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_header_ecc_test_algo.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_algo.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_header_ecc_test_dir.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_dir.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_repair_ecc_sa_check.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_repair_ecc_sa_check.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_header_ecc_test_one_file.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_one_file.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_header_ecc_test_one_file_tamper.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_header_ecc_test_one_file_tamper.db


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/resources/triangle_hide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/profilers/pyinstrument/resources/triangle_hide.png


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/resources/triangle_show.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/profilers/pyinstrument/resources/triangle_show.png


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_rfigc_test_error_file.log:
--------------------------------------------------------------------------------
1 | tuxsmall.jpg|both md5 and sha1 hash failed, modification date has changed (before: 2015-11-13 19:21:34 - now: 2015-11-13 19:24:34)
2 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_algo.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_algo.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_dir.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_dir.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file.db


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file_tamper.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/tests/results/test_structural_adaptive_ecc_test_one_file_tamper.db


--------------------------------------------------------------------------------
/pyFileFixity/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | __author__ = "Stephen Karl Larroque",
4 | __email__ = "LRQ3000@gmail.com",
5 | __version__ = "3.1.4"
6 | __all__ = ['__author__', '__email__', '__version__']
7 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_rfigc_test_one_file.csv:
--------------------------------------------------------------------------------
1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext
2 | tuxsmall.jpg|1c5704dd227e1de7d96b355c6111c764|f8a1f7675ea360bff97d02443c174c102fbcdefa|


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_rfigc_test_update_remove.csv:
--------------------------------------------------------------------------------
1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext
2 | added_file.txt|fad0092ae8c6218c1fb78d281238168d|0a21ef1d2ccc47ffedf45192d0b8c26afd5d552f|
3 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/docs/python - Optimizing a reed-solomon encoder (polynomial division) - Stack Overflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/python - Optimizing a reed-solomon encoder (polynomial division) - Stack Overflow.pdf


--------------------------------------------------------------------------------
/pyFileFixity/lib/docs/Richard E. Blahut - Algebraic Codes for Data Transmission 2003 - Excerpt on errata decoder.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/Richard E. Blahut - Algebraic Codes for Data Transmission 2003 - Excerpt on errata decoder.pdf


--------------------------------------------------------------------------------
/pyFileFixity/lib/docs/python - Errata (erasures+errors) Berlekamp-Massey for Reed-Solomon decoding - Stack Overflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/python - Errata (erasures+errors) Berlekamp-Massey for Reed-Solomon decoding - Stack Overflow.pdf


--------------------------------------------------------------------------------
/pyFileFixity/tests/files/sub/testsub.txt:
--------------------------------------------------------------------------------
1 | oOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo
2 | oOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
3 | OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO(


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/macshim.py:
--------------------------------------------------------------------------------
1 | def macshim():
2 |     """Shim to run 32-bit on 64-bit mac as a sub-process"""
3 |     import subprocess, sys
4 |     subprocess.call([
5 |         sys.argv[0] + '32'
6 |     ]+sys.argv[1:], 
7 |         env={"VERSIONER_PYTHON_PREFER_32_BIT":"yes"}
8 |     )
9 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/docs/Initialization of errata evaluator polynomial for simultaneous computation in Berlekamp-Massey for Reed-Solomon.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lrq3000/pyFileFixity/HEAD/pyFileFixity/lib/docs/Initialization of errata evaluator polynomial for simultaneous computation in Berlekamp-Massey for Reed-Solomon.pdf


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/memory_profiler/README_DEV.rst:
--------------------------------------------------------------------------------
1 | Some information on the internals of this package.
2 | 
3 | Tests
4 | -----
5 | `make test` is the closest thing to tests on this package. It executes some
6 | example code and prints the information. If you don't see any exceptions nor 
7 | any strange output then the tests suite "has succeeded".
8 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Simple .gitattributes that disables any text end-of-line normalization/conversion
2 | # `-text` still allows diffs for text types detected by git heuristics, contrary to `binary`
3 | # This avoids issues with binary files eof being converted when using `pip install git+...`
4 | # More templates can be found at: https://github.com/alexkaratarakis/gitattributes
5 | 
6 | * -text
7 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | DATA_PATH = ''
 4 | 
 5 | # DATA_PATH will be initialized from distutils when installing. If Pympler is
 6 | # installed via setuptools/easy_install, the data will be installed alongside
 7 | # the source files instead.
 8 | if not os.path.exists(DATA_PATH):
 9 |     DATA_PATH = os.path.realpath(os.path.join(__file__, '..', '..'))
10 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/resources/profile.js:
--------------------------------------------------------------------------------
 1 | /* jshint globalstrict: true */
 2 | /* global $ */
 3 | 
 4 | 'use strict';
 5 | $('.frame').click(function (event) {
 6 |     $(this).toggleClass('collapse');
 7 |     event.stopPropagation();
 8 | });
 9 | $('.frame, body').mousemove(function (event) {
10 |     $('.frame.last-hover').removeClass('last-hover');
11 |     $(this).addClass('last-hover');
12 |     event.stopPropagation();
13 | });
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # An empty setup.py is required for retrocompatibility with older versions of pip that do not support pyproject.toml-only projects, especially to install in editable mode, see: https://github.com/pypa/setuptools/issues/2816
2 | 
3 | from setuptools import setup
4 | setup()  # necessary to have at least a setup() call, otherwise setuptools will complaint with an exception: `AssertionError: Multiple .egg-info directories found\nerror: subprocess-exited-with-error`
5 | 


--------------------------------------------------------------------------------
/setup.py.bak:
--------------------------------------------------------------------------------
1 | # For Py2: Necessary for setup.py to exist even if empty, so that setuptools finds setup.cfg
2 | # per setuptools documentation: "If compatibility with legacy builds (i.e. those not using the PEP 517 build API) is desired, a setup.py file containing a setup() function call is still required even if your configuration resides in setup.cfg." https://setuptools.pypa.io/en/latest/userguide/declarative_config.html
3 | # DEPRECATED: Python 2.7 support dropped, hence this file is unnecessary.
4 | 
5 | from setuptools import setup
6 | 
7 | setup()
8 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | relative_files = True
 3 | branch = True
 4 | omit =
 5 |     pyFileFixity/tests/*
 6 |     pyFileFixity/__init__.py
 7 |     pyFileFixity/easy_profiler.py
 8 |     pyFileFixity/ecc_speedtest.py
 9 |     pyFileFixity/filetamper.py
10 |     pycleaner.py
11 |     setup.py
12 | include = 
13 |     pyFileFixity/lib/aux_funcs.py
14 |     pyFileFixity/lib/eccman.py
15 |     pyFileFixity/lib/hasher.py
16 |     pyFileFixity/lib/tee.py
17 |     pyFileFixity/_infos.py
18 |     pyFileFixity/header_ecc.py
19 |     pyFileFixity/repair_ecc.py
20 |     pyFileFixity/replication_repair.py
21 |     pyFileFixity/resiliency_tester.py
22 |     pyFileFixity/rfigc.py
23 |     pyFileFixity/structural_adaptive_ecc.py
24 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_rfigc_test_dir.csv:
--------------------------------------------------------------------------------
1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext
2 | alice.pdf|298aeefe8c00f2d92d660987bee67260|106e7ad4d3927c5906cd366cc0d5bd887bdc3300|
3 | testaa.txt|c0d8a5f3a813d488cbfb83f1b147b14b|6ca36c14f68e4eefa47ec23ccc333378b8d0fe73|
4 | tux.jpg|81e19bbf2efaeb1d6d6473c21c48e4b7|6e38ea91680ef0f960db0fd6a973cf50ef765369|
5 | tuxsmall.jpg|1c5704dd227e1de7d96b355c6111c764|f8a1f7675ea360bff97d02443c174c102fbcdefa|
6 | Sub2/testsub2.txt|e80b5017098950fc58aad83c8c14978e|1f8ac10f23c5b5bc1167bda84b833e5c057a77d2|
7 | sub/Snark.zip|f8435b883eaf03bf84cae75a706a9b8c|e68efd832dd3517d4c80db6a84b98591eeabe864|
8 | sub/testsub.txt|8ef3d6be5baa449c127aa00083ebbe34|bfb7ef83b23e0791199e4ebe9ae34489a4ef7004|
9 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/test_rfigc_test_update_append.csv:
--------------------------------------------------------------------------------
1 | path|md5|sha1|last_modification_timestamp|last_modification_date|size|ext
2 | alice.pdf|298aeefe8c00f2d92d660987bee67260|106e7ad4d3927c5906cd366cc0d5bd887bdc3300|
3 | testaa.txt|c0d8a5f3a813d488cbfb83f1b147b14b|6ca36c14f68e4eefa47ec23ccc333378b8d0fe73|
4 | tux.jpg|81e19bbf2efaeb1d6d6473c21c48e4b7|6e38ea91680ef0f960db0fd6a973cf50ef765369|
5 | tuxsmall.jpg|1c5704dd227e1de7d96b355c6111c764|f8a1f7675ea360bff97d02443c174c102fbcdefa|
6 | sub/Snark.zip|f8435b883eaf03bf84cae75a706a9b8c|e68efd832dd3517d4c80db6a84b98591eeabe864|
7 | sub/testsub.txt|8ef3d6be5baa449c127aa00083ebbe34|bfb7ef83b23e0791199e4ebe9ae34489a4ef7004|
8 | added_file.txt|fad0092ae8c6218c1fb78d281238168d|0a21ef1d2ccc47ffedf45192d0b8c26afd5d552f|
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # Testing dos scripts
25 | *.bat
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | 
45 | # Translations
46 | *.mo
47 | *.pot
48 | 
49 | # Sphinx documentation
50 | docs/_build/
51 | 
52 | # PyBuilder
53 | target/
54 | 
55 | # Temporary test files
56 | /tests/out/
57 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | # Note about syntax: avoid using tabs!
 2 | # If yaml is malformatted, it can be checked locally against codecov specific yaml reader: https://docs.codecov.com/docs/codecov-yaml#validate-your-repository-yaml
 3 | coverage:
 4 |   precision: 2          # 0 decimals of precision
 5 |   round: down           # Round to floor
 6 |   range:                # red -> yellow -> green
 7 |   - 60.0
 8 |   - 80.0
 9 | 
10 |   status:
11 |     project:
12 |       default:
13 |         # basic
14 |         target: auto
15 |         removed_code_behavior: fully_covered_patch  # see: https://about.codecov.io/blog/new-codecov-setting-removed-code-behavior/
16 |         threshold: 1%       # allow 1% coverage variance, because depending on the run, different OSes results are uploaded for each run and some code is OS-specific, so this is it's within expected OS-dependent variability
17 | 
18 |     patch:
19 |       default:
20 |         threshold: 1%       # allow 1% coverage variance
21 | 


--------------------------------------------------------------------------------
/pyFileFixity/ecc_specification.txt:
--------------------------------------------------------------------------------
1 | **This is an example ECC file with clear specification of each field. Lines beginning with ** and finishing with a line return \n are considered comments. In fact, all lines before the first entrymarker are considered comments and are just skipped. Note that after the headers comments, the file is binary, and thus there's no line returns.
2 | **SOFTWAREIDENTv111...000**
3 | ** Parameters: <recap-of-parameters-here>
4 | ** Parameters: <recap-of-parameters-here>
5 | ** Parameters: <recap-of-parameters-here>
6 | ** Generated under <recap-of-ecc-algorithm-and-parameters-here>.
7 | <entry-marker>[relative-file1-path.file-extension]<field_delim>[file1-size]<field_delim>[relative-file1-path-ecc]<field_delim>[file1-size-ecc]<field_delim>[block0-hash][block0-ecc][block1-hash][block1-ecc][block2-hash][block2-ecc]...<entry-marker>[relative-file2-path.file-extension]<field_delim>[file2-size]<field_delim>[relative-file2-path-ecc]<field_delim>[file2-size-ecc]<field_delim>[block0-hash][block0-ecc][block1-hash][block1-ecc][block2-hash][block2-ecc]...


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015-2023 Stephen Larroque
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | # Cannot yet fully replace MANIFEST.in by pyproject.toml if we use setuptools, see: https://github.com/pypa/setuptools/issues/3341
 2 | # Misc
 3 | include .coveragerc  # for compatibility with Py2, otherwise the coverage is configured in pyproject.toml
 4 | #include LICENSE
 5 | #include Makefile
 6 | #include README.md
 7 | #include README.rst
 8 | #include TODO.md
 9 | 
10 | # Non-python files
11 | include pyFileFixity/ecc_specification.txt  # done in pyproject.toml, but for Py2 we need to put it here
12 | include pyFileFixity/resiliency_tester_config.txt  # idem
13 | 
14 | # Libraries
15 | recursive-include pyFileFixity/lib *.py
16 | recursive-include pyFileFixity/lib *.pyx  # Cython files, all were moved to their own modules in distinct repositories, but we may optimize some routines in the future
17 | recursive-exclude pyFileFixity/lib/profilers *  # no need for profilers
18 | 
19 | # Test suite
20 | recursive-include pyFileFixity/tests *.py  # unit test scripts
21 | recursive-include pyFileFixity/tests/files *  # attach necessary files to run tests
22 | recursive-include pyFileFixity/tests/results *  # attach necessary py-make config and resulting database files to run and compare tests results
23 | include tox.ini
24 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: '{branch}-{build}'
 2 | build: off
 3 | cache:
 4 |   - '%LOCALAPPDATA%\pip\Cache'
 5 | environment:
 6 |   global:
 7 |     WITH_COMPILER: 'cmd /E:ON /V:ON /C .\ci\appveyor-with-compiler.cmd'
 8 |   matrix:
 9 |     - TOXENV: py27
10 |       TOXPYTHON: C:\Python27\python.exe
11 |       PYTHON_HOME: C:\Python27
12 |       PYTHON_VERSION: '2.7'
13 |       PYTHON_ARCH: '27'
14 | 
15 |     - TOXENV: pypy
16 |       TOXPYTHON: pypy.exe
17 |       PYTHON_VERSION: '2.7'
18 |       PYTHON_ARCH: '27'
19 | 
20 | init:
21 |   - ps: echo $env:TOXENV
22 |   - ps: ls C:\Python*
23 | install:
24 |   - python -u ci\appveyor-bootstrap.py
25 |   - '%PYTHON_HOME%\Scripts\virtualenv --version'
26 |   - '%PYTHON_HOME%\Scripts\easy_install --version'
27 |   - '%PYTHON_HOME%\Scripts\pip --version'
28 |   - '%PYTHON_HOME%\Scripts\tox --version'
29 |   - (New-Object Net.WebClient).DownloadFile('https://bitbucket.org/pypy/pypy/downloads/pypy-4.0.0-win32.zip', "$env:appveyor_build_folder\pypy-4.0.0-win32.zip")
30 |   - 7z x pypy-4.0.0-win32.zip | Out-Null
31 |   - $env:path = "$env:appveyor_build_folder\pypy-4.0.0-win32.zip;$env:path"
32 | test_script:
33 |   - '%WITH_COMPILER% %PYTHON_HOME%\Scripts\tox'
34 | 
35 | on_failure:
36 |   - ps: dir "env:"
37 |   - ps: get-content .tox\*\log\*
38 | artifacts:
39 |   - path: dist\*
40 | ### To enable remote debugging uncomment this:
41 | #  - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
42 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Joe Rickerby
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 | list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 | may be used to endorse or promote products derived from this software without
16 | specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/metadata.py:
--------------------------------------------------------------------------------
 1 | """Project metadata.
 2 | 
 3 | This information is used in setup.py as well as in doc/source/conf.py.
 4 | 
 5 | """
 6 | 
 7 | project_name = 'Pympler'
 8 | version      = '0.2.1'
 9 | url          = 'http://packages.python.org/Pympler/'
10 | license      = 'Apache License, Version 2.0' #PYCHOK valid
11 | author       = 'Jean Brouwers, Ludwig Haehne, Robert Schuppenies'
12 | author_email = 'pympler-dev@googlegroups.com'
13 | copyright    = '2008-2011, ' + author #PYCHOK valid
14 | description  = ('A development tool to measure, monitor and analyze '
15 |                 'the memory behavior of Python objects.')
16 | long_description = '''
17 | Pympler is a development tool to measure, monitor and analyze the
18 | memory behavior of Python objects in a running Python application.
19 | 
20 | By pympling a Python application, detailed insight in the size and
21 | the lifetime of Python objects can be obtained.  Undesirable or
22 | unexpected runtime behavior like memory bloat and other "pymples"
23 | can easily be identified.
24 | 
25 | Pympler integrates three previously separate projects into a single,
26 | comprehensive profiling tool. Asizeof provides basic size information
27 | for one or several Python objects, muppy is used for on-line
28 | monitoring of a Python application and the class tracker provides
29 | off-line analysis of the lifetime of selected Python objects. A
30 | web profiling frontend exposes process statistics, garbage
31 | visualisation and class tracker statistics.
32 | 
33 | Pympler is written entirely in Python, with no dependencies to
34 | external libraries. It has been tested with Python 2.4, 2.5, 2.6, 2.7,
35 | 3.1, 3.2 on Linux, Windows and MacOS X.
36 | '''
37 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/resiliency_tester_config_hard.cfg:
--------------------------------------------------------------------------------
 1 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make
 2 | # sure that you only put one command per line, and ALWAYS put a line return
 3 | # after an alias and before a command, eg:
 4 | #
 5 | #```
 6 | #all:
 7 | #	test
 8 | #	install
 9 | #test:
10 | #	nosetest
11 | #install:
12 | #	python setup.py install
13 | #    ```
14 | #
15 | # resiliency_tester.py supports a templating system: you can use the following special tags, they will be interpolated at runtime:
16 | #   - {inputdir}: input directory. Depending on the stage, this is either the untampered files (a copy of the original files), the tampered folder, or even previous repair folders during the repair stage.
17 | #   - {dbdir}: database directory, where the generated databases will be placed.
18 | #   - {outputdir}: output directory, where the files generated after executing the current command will be placed in.
19 | 
20 | before_tamper: # this will be executed before files tampering. Generate your ecc/database files here.
21 |     #python header_ecc.py -i "{inputdir}" -d "{dbdir}/hecc.txt" --size 4096 --ecc_algo 3 -g -f --silent
22 | 
23 | tamper: # parameters to tamper the files and even the database files.
24 |     python filetamper.py -i "{inputdir}" -m "n" -p 0.05 -b "3|6" --silent
25 |     python filetamper.py -i "{dbdir}" -m "n" -p 0.001 -b "4|9" --silent
26 | 
27 | after_tamper: # execute commands after tampering. Can be used to recover
28 |     #python repair_ecc.py -i "{dbdir}/hecc.txt" --index "{dbdir}/hecc.txt.idx" -o "{dbdir}/heccrep.txt" -t 0.4 -f --silent
29 | 
30 | repair:
31 |     #python header_ecc.py -i "{inputdir}" -d "{dbdir}/heccrep.txt" -o "{outputdir}" -c --size 4096 --no_fast_check --ecc_algo 3 --silent
32 | 
33 | none:
34 | 	# used for unit testing
35 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/resources/style.css:
--------------------------------------------------------------------------------
 1 | .frame {
 2 |     margin-left: 0;
 3 |     font-size: 10pt;
 4 |     border-left: 1px solid #eee;
 5 |     padding-left: 15px;
 6 |     background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAALCAMAAAB8rpxtAAAARVBMVEUAAAD9/f5xe4R1fYZ1fIf////O0NWzs7OysrJ2fYh2fYh2fIf///+FjJaxtbv////n6Or///+VmqP///92fYh5gYyCiJLu0pBqAAAAFHRSTlMAghs7SSu2CgTirXMj/M9EnhHqZ/jLwbwAAABGSURBVAjXY0ABHOxQwAHkMIqJQAAnkMPKBuWwgNQJC4HZXGBNggKiIA4TxAh+PiCbG2YeI68IJwfMdB42EWaEXawsDJgAACnNA3xj5yn5AAAAAElFTkSuQmCC);
 7 |     background-repeat: no-repeat;
 8 |     cursor: default;
 9 |     pointer-events: fill;
10 |     background-color: rgba(255,255,255,0.2);
11 | }
12 | .frame.collapse {
13 |     background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAsAAAAMCAMAAACDd7esAAAARVBMVEUAAAD9/f51fYZxe4R2fYjO0NX///+zs7OysrJ1fId2fYh2fIf///+xtbuFjJb////n6Or///+VmqP///92fYh5gYyCiJLiCCvVAAAAFHRSTlMAgjsb4rYrCgRJrXMjz/xEnhHqZ/fev68AAAA+SURBVAjXY8AJOFg44Wx2EREuZjgbCLg5EGwRFiYIGwI44Ww+VjYoW5SXkQeqRkiAH6pXjFVYEGYXIxuUBQDVJAN8ddpNUwAAAABJRU5ErkJggg==);
14 | }
15 | .frame.collapse > .frame-children {
16 |     display: none;
17 | }
18 | .frame.no_children {
19 |     background-image: none !important;
20 | }
21 | 
22 | .frame.application > .frame-info > .function {
23 |     background-color: rgba(40, 255, 66, 0.14);
24 | }
25 | 
26 | .frame:hover .frame {
27 |     color: #888;
28 | }
29 | .frame:hover {
30 |     color: black !important;
31 |     background-color: rgba(188,213,255,0.02);
32 | }
33 | .frame.last-hover {
34 |     border-left-color: #a66 !important;
35 |     color: black !important;
36 | }
37 | .frame.last-hover .frame {
38 |     color: black !important;
39 | }
40 | .function {
41 |     font-weight: bold;
42 |     font-family: 'Consolas', 'Lucida Console', 'Monaco', monospace;
43 | }
44 | .code-position {
45 |     opacity: 0.5;
46 | }


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_hasher.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import unittest
 4 | import sys
 5 | import os
 6 | import shutil
 7 | 
 8 | from .aux_tests import path_sample_files, create_dir_if_not_exist
 9 | 
10 | from ..lib.hasher import Hasher
11 | 
12 | class TestHasher(unittest.TestCase):
13 |     def setup_module(self):
14 |         """ Initialize the tests by emptying the out directory """
15 |         outfolder = path_sample_files('output')
16 |         shutil.rmtree(outfolder, ignore_errors=True)
17 |         create_dir_if_not_exist(outfolder)
18 | 
19 |     def test_hasher(self):
20 |         """ hasher: test hashes """
21 |         instring = "Lorem ipsum and some more stuff\nThe answer to the question of life, universe and everything is... 42."
22 |         # Put all hashing algo results here (format: "algo_name": [length, result_for_instring])
23 |         algo_params = {"md5": [32, b'173efbe0280ce506ddbfbfc9aeb44a1a'],
24 |                        "shortmd5": [8, b'MTczZWZi'],
25 |                        "shortsha256": [8, b'NjgzMjRk'],
26 |                        "minimd5":  [4, b'MTcz'],
27 |                        "minisha256": [4, b'Njgz'],
28 |                        "none": [0, ''],
29 |                       }
30 |         # For each hashing algo, produce a hash and check the length and hash
31 |         for algo in Hasher.known_algo:
32 |             h = Hasher(algo)
33 |             shash = h.hash(instring)
34 |             #print(algo+": "+shash) # debug
35 |             assert len(shash) == algo_params[algo][0]
36 |             assert shash == algo_params[algo][1]
37 |         # Check that unknown algorithms raise an exception
38 |         self.assertRaises(NameError, Hasher, "unknown_algo")
39 |         # Second check of unknown algo raising exception
40 |         h = Hasher()
41 |         h.algo = "unknown_algo"
42 |         self.assertRaises(NameError, h.hash, "abcdef")
43 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Isolated package builds test from a temporary directory via tox
 2 | # Thanks to Paul Ganssle for the minimal example, see: https://blog.ganssle.io/articles/2019/08/test-as-installed.html and https://github.com/pganssle/tox-examples/blob/master/changedir/tox.ini
 3 | # Use `tox -e py`
 4 | #
 5 | # Tox (http://tox.testrun.org/) is a tool for running tests
 6 | # in multiple virtualenvs. This configuration file will run the
 7 | # test suite on all supported python versions. To use it, "pip install tox"
 8 | 
 9 | [tox]
10 | minversion=3.13.0
11 | isolated_build=True
12 | envlist = py27, py32, py34, pypy, pypy3, setup.py
13 | 
14 | [testenv]
15 | description = Run the tests under {basepython}
16 | deps = pytest
17 | changedir = {envtmpdir}  # use a temporary directory to ensure we test the built package, not the repository version: https://blog.ganssle.io/articles/2019/08/test-as-installed.html and https://github.com/pganssle/tox-examples/blob/master/changedir/tox.ini
18 | commands = python -m pytest {posargs} {toxinidir}
19 | 
20 | [testenvpy2]
21 | deps =
22 |     #jpeg pillow # to support rfigc.py --structure_check
23 |     nose
24 |     nose-timer
25 |     coverage<4
26 |     coveralls
27 | commands =
28 |     nosetests pyFileFixity/tests/ --with-coverage --cover-package=pyFileFixity -d -v --with-timer
29 |     coveralls
30 | 
31 | [testenv:pypy2]
32 | #basepython=C:\Program Files (x86)\pypy-4.0.0-win32\pypy.exe
33 | # No coverage for PyPy, too slow...
34 | deps =
35 |     #pypy-tk # necessary for pypy to install pillow
36 |     #jpeg pillow # to support rfigc.py --structure_check
37 |     nose
38 |     nose-timer
39 | commands =
40 |     pypy --version
41 |     nosetests pyFileFixity/tests/ -d -v --with-timer
42 | 
43 | [testenv:pypy3]
44 | # No coverage for PyPy, too slow...
45 | deps =
46 |     nose
47 |     nose-timer
48 | commands =
49 |     pypy --version
50 |     nosetests pyFileFixity/tests/ -d -v --with-timer
51 | 


--------------------------------------------------------------------------------
/pycleaner.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import fnmatch
 5 | 
 6 | class GlobDirectoryWalker:
 7 | 
 8 |     # a forward iterator that traverses a directory tree
 9 |     def __init__(self, directory, pattern="*"):
10 |         self.stack = [directory]
11 |         self.pattern = pattern
12 |         self.files = []
13 |         self.index = 0
14 | 
15 |     def __getitem__(self, index):
16 |         while 1:
17 |             try:
18 |                 file = self.files[self.index]
19 |                 self.index = self.index + 1
20 |             except IndexError:
21 |                 # pop next directory from stack
22 |                 self.directory = self.stack.pop()
23 |                 self.files = os.listdir(self.directory)
24 |                 self.index = 0
25 |             else:
26 |                 # got a filename
27 |                 fullname = os.path.join(self.directory, file)
28 |                 if os.path.isdir(fullname) and not os.path.islink(fullname):
29 |                     self.stack.append(fullname)
30 |                 if fnmatch.fnmatch(file, self.pattern):
31 |                     return fullname
32 | 
33 | def pycCleanup(directory,path,filext='pyc'):
34 |     for filename in directory:
35 |         if     filename[-3:] == filext:
36 |             print '- ' + filename
37 |             os.remove(path+os.sep+filename)
38 |         elif os.path.isdir(path+os.sep+filename):
39 |             pycCleanup(os.listdir(path+os.sep+filename),path+os.sep+filename)
40 | 
41 | def cleanup1(filext='pyc'):
42 |     directory = os.listdir('.')
43 |     print('Deleting .%s files recursively in %s.' % (filext, str(directory)))
44 |     pycCleanup(directory,'.',filext)
45 | 
46 | def cleanup2(filext='pyc'):
47 |     for file in GlobDirectoryWalker(".", "*."+filext):
48 |         print file
49 |         os.remove(file)
50 | 
51 |     print "After..."
52 |     for file in GlobDirectoryWalker(".", "*."+filext):
53 |         print file
54 | 
55 | if __name__ == '__main__':
56 |     cleanup1()


--------------------------------------------------------------------------------
/pyFileFixity/lib/_compat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys
 5 | 
 6 | try: # compatibility with Python 3+
 7 |     _range = xrange
 8 | except NameError:
 9 |     _range = range
10 | 
11 | try:
12 |     from cStringIO import StringIO
13 |     _StringIO = StringIO
14 | except (ImportError, NameError): #python3.x
15 |     from io import StringIO
16 |     _StringIO = StringIO
17 | 
18 | try:
19 |     from itertools import izip
20 |     _izip = izip
21 | except ImportError:  #python3.x
22 |     _izip = zip
23 | 
24 | try:
25 |     _str = basestring
26 | except NameError:
27 |     _str = str
28 | 
29 | if sys.version_info < (3,):
30 |     def b(x):
31 |         return x
32 | else:
33 |     import codecs
34 |     def b(x):
35 |         if isinstance(x, _str):
36 |             return codecs.latin_1_encode(x)[0]
37 |         else:
38 |             return x
39 | 
40 | if sys.version_info < (3,):
41 |     import io
42 |     def _open_csv(x, mode='r'):
43 |         return io.open(x, mode+'b')  # on Py3, io.open() is the same as open(), see: https://stackoverflow.com/questions/5250744/difference-between-open-and-codecs-open-in-python
44 | else:
45 |     def _open_csv(x, mode='r'):
46 |         return open(x, mode+'t', newline='', encoding='utf-8')  # for csv module, open() mode needed to be binary for Python 2, but on Py3 it needs to be text mode, no binary! https://stackoverflow.com/a/34283957/1121352
47 | 
48 | if sys.version_info < (3,):
49 |     def _ord(x):
50 |         return ord(x)
51 | else:
52 |     def _ord(x):
53 |         if isinstance(x, int):
54 |             return x
55 |         else:
56 |             return ord(x)
57 | 
58 | if sys.version_info < (3,):
59 |     def _bytes(x):
60 |         return bytes(x)
61 | else:
62 |     def _bytes(x):
63 |         if isinstance(x, (bytes, bytearray)):
64 |             return x
65 |         else:
66 |             return bytes(x, 'latin-1')
67 | 
68 | try:
69 |     from itertools import izip
70 |     _izip = izip
71 | except ImportError:
72 |     _izip = zip
73 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/util/compat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compatibility layer to allow Pympler being used from Python 2.x and Python 3.x.
 3 | """
 4 | 
 5 | import sys
 6 | 
 7 | # Version dependent imports
 8 | 
 9 | try:
10 |     from StringIO import StringIO
11 |     BytesIO = StringIO
12 | except ImportError:
13 |     from io import StringIO, BytesIO
14 | 
15 | try:
16 |     import cPickle as pickle
17 | except ImportError:
18 |     import pickle #PYCHOK Python 3.0 module
19 | 
20 | try:
21 |     from new         import instancemethod
22 | except ImportError: # Python 3.0
23 |     def instancemethod(*args):
24 |         return args[0]
25 | 
26 | try:
27 |     from HTMLParser import HTMLParser
28 | except ImportError: # Python 3.0
29 |     from html.parser import HTMLParser
30 | 
31 | try:
32 |     from httplib import HTTPConnection
33 | except ImportError: # Python 3.0
34 |     from http.client import HTTPConnection
35 | 
36 | try:
37 |     from urllib2 import Request, urlopen, URLError
38 | except ImportError: # Python 3.0
39 |     from urllib.request import Request, urlopen
40 |     from urllib.error import URLError
41 | 
42 | try:
43 |     import pympler.util.bottle2 as bottle
44 | except (SyntaxError, ImportError):
45 |     try:
46 |         import pympler.util.bottle3 as bottle
47 |     except (SyntaxError, ImportError): # Python 2.4
48 |         bottle = None
49 | 
50 | # Helper functions
51 | 
52 | # Python 2.x expects strings when calling communicate and passing data via a
53 | # pipe while Python 3.x expects binary (encoded) data. The following works with
54 | # both:
55 | #
56 | #   p = Popen(..., stdin=PIPE)
57 | #   p.communicate(encode4pipe("spam"))
58 | #
59 | encode4pipe = lambda s: s
60 | if sys.hexversion >= 0x3000000:
61 |     encode4pipe = lambda s: s.encode()
62 | 
63 | def object_in_list(obj, l):
64 |     """Returns True if object o is in list.
65 | 
66 |     Required compatibility function to handle WeakSet objects.
67 |     """
68 |     for o in l:
69 |         if o is obj:
70 |             return True
71 |     return False
72 | 


--------------------------------------------------------------------------------
/pyFileFixity/resiliency_tester_config.txt:
--------------------------------------------------------------------------------
 1 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make
 2 | # sure that you only put one command per line, and ALWAYS put a line return
 3 | # after an alias and before a command, eg:
 4 | #
 5 | #```
 6 | #all:
 7 | #	test
 8 | #	install
 9 | #test:
10 | #	nosetest
11 | #install:
12 | #	python setup.py install
13 | #    ```
14 | #
15 | # resiliency_tester.py supports a templating system: you can use the following special tags, they will be interpolated at runtime:
16 | #   - {inputdir}: input directory. Depending on the stage, this is either the untampered files (a copy of the original files), the tampered folder, or even previous repair folders during the repair stage.
17 | #   - {dbdir}: database directory, where the generated databases will be placed.
18 | #   - {outputdir}: output directory, where the files generated after executing the current command will be placed in.
19 | 
20 | before_tamper: # this will be executed before files tampering. Generate your ecc/database files here.
21 |     python header_ecc.py -i "{inputdir}" -d "{dbdir}/hecc.txt" --size 4096 --ecc_algo 3 -g -f
22 |     python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/ecc.txt" -r1 0.3 -r2 0.2 -r3 0.1 -g -f --ecc_algo 3
23 | 
24 | tamper: # parameters to tamper the files and even the database files.
25 |     python filetamper.py -i "{inputdir}" -m "n" -p 0.005 -b "3|6"
26 |     python filetamper.py -i "{dbdir}" -m "n" -p 0.001 -b "4|9"
27 | 
28 | after_tamper: # execute commands after tampering. Can be used to recover
29 |     python repair_ecc.py -i "{dbdir}/hecc.txt" --index "{dbdir}/hecc.txt.idx" -o "{dbdir}/heccrep.txt" -t 0.4 -f
30 |     python repair_ecc.py -i "{dbdir}/ecc.txt" --index "{dbdir}/ecc.txt.idx" -o "{dbdir}/eccrep.txt" -t 0.4 -f
31 | 
32 | repair:
33 |     python header_ecc.py -i "{inputdir}" -d "{dbdir}/heccrep.txt" -o "{outputdir}" -c --size 4096 --no_fast_check --ecc_algo 3
34 |     python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/eccrep.txt" -o "{outputdir}" -c -r1 0.3 -r2 0.2 -r3 0.1 -f --ecc_algo 3
35 | 
36 | none:
37 | 	# used for unit testing
38 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/util/stringutils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | String utility functions.
 3 | """
 4 | 
 5 | def safe_repr(obj, clip=None):
 6 |     """
 7 |     Convert object to string representation, yielding the same result a `repr`
 8 |     but catches all exceptions and returns 'N/A' instead of raising the
 9 |     exception. Strings may be truncated by providing `clip`.
10 | 
11 |     >>> safe_repr(42)
12 |     '42'
13 |     >>> safe_repr('Clipped text', clip=8)
14 |     'Clip..xt'
15 |     >>> safe_repr([1,2,3,4], clip=8)
16 |     '[1,2..4]'
17 |     """
18 |     try:
19 |         s = repr(obj)
20 |         if not clip or len(s) <= clip:
21 |             return s
22 |         else:
23 |             return s[:clip-4]+'..'+s[-2:]
24 |     except:
25 |         return 'N/A'
26 | 
27 | 
28 | def trunc(obj, max, left=0):
29 |     """
30 |     Convert `obj` to string, eliminate newlines and truncate the string to `max`
31 |     characters. If there are more characters in the string add ``...`` to the
32 |     string. With `left=True`, the string can be truncated at the beginning.
33 | 
34 |     @note: Does not catch exceptions when converting `obj` to string with `str`.
35 | 
36 |     >>> trunc('This is a long text.', 8)
37 |     This ...
38 |     >>> trunc('This is a long text.', 8, left)
39 |     ...text.
40 |     """
41 |     s = str(obj)
42 |     s = s.replace('\n', '|')
43 |     if len(s) > max:
44 |         if left:
45 |             return '...'+s[len(s)-max+3:]
46 |         else:
47 |             return s[:(max-3)]+'...'
48 |     else:
49 |         return s
50 | 
51 | def pp(i, base=1024):
52 |     """
53 |     Pretty-print the integer `i` as a human-readable size representation.
54 |     """
55 |     degree = 0
56 |     pattern = "%4d     %s"
57 |     while i > base:
58 |         pattern = "%7.2f %s"
59 |         i = i / float(base)
60 |         degree += 1
61 |     scales = ['B', 'KB', 'MB', 'GB', 'TB', 'EB']
62 |     return pattern % (i, scales[degree])
63 | 
64 | def pp_timestamp(t):
65 |     """
66 |     Get a friendly timestamp represented as a string.
67 |     """
68 |     if t is None:
69 |         return ''
70 |     h, m, s = int(t / 3600), int(t / 60 % 60), t % 60
71 |     return "%02d:%02d:%05.2f" % (h, m, s)
72 | 
73 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/results/resiliency_tester_config_easy.cfg:
--------------------------------------------------------------------------------
 1 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make
 2 | # sure that you only put one command per line, and ALWAYS put a line return
 3 | # after an alias and before a command, eg:
 4 | #
 5 | #```
 6 | #all:
 7 | #	test
 8 | #	install
 9 | #test:
10 | #	nosetest
11 | #install:
12 | #	python setup.py install
13 | #    ```
14 | #
15 | # resiliency_tester.py supports a templating system: you can use the following special tags, they will be interpolated at runtime:
16 | #   - {inputdir}: input directory. Depending on the stage, this is either the untampered files (a copy of the original files), the tampered folder, or even previous repair folders during the repair stage.
17 | #   - {dbdir}: database directory, where the generated databases will be placed.
18 | #   - {outputdir}: output directory, where the files generated after executing the current command will be placed in.
19 | 
20 | before_tamper: # this will be executed before files tampering. Generate your ecc/database files here.
21 |     python header_ecc.py -i "{inputdir}" -d "{dbdir}/hecc.txt" --size 4096 --ecc_algo 3 -g -f --silent
22 |     #python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/ecc.txt" -r1 0.3 -r2 0.2 -r3 0.1 -g -f --ecc_algo 3 --silent
23 | 
24 | tamper: # parameters to tamper the files and even the database files.
25 |     python filetamper.py -i "{inputdir}" -m "n" -p 0.001 -b "3|6" --header 4096 --silent
26 |     python filetamper.py -i "{dbdir}" -m "n" -p 0.0001 -b "4|9" --header 4096 --silent
27 | 
28 | after_tamper: # execute commands after tampering. Can be used to recover
29 |     python repair_ecc.py -i "{dbdir}/hecc.txt" --index "{dbdir}/hecc.txt.idx" -o "{dbdir}/heccrep.txt" -t 0.4 -f --silent
30 |     #python repair_ecc.py -i "{dbdir}/ecc.txt" --index "{dbdir}/ecc.txt.idx" -o "{dbdir}/eccrep.txt" -t 0.4 -f --silent
31 | 
32 | repair:
33 |     python header_ecc.py -i "{inputdir}" -d "{dbdir}/heccrep.txt" -o "{outputdir}" -c --size 4096 --no_fast_check --ecc_algo 3 --silent
34 |     #python structural_adaptive_ecc.py -i "{inputdir}" -d "{dbdir}/eccrep.txt" -o "{outputdir}" -c -r1 0.3 -r2 0.2 -r3 0.1 -f --ecc_algo 3 --silent
35 | 
36 | none:
37 | 	# used for unit testing
38 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/charts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Generate charts from gathered data.
 3 | 
 4 | Requires **matplotlib**.
 5 | """
 6 | 
 7 | try:
 8 |     import matplotlib
 9 |     matplotlib.use('Agg')
10 |     import matplotlib.pyplot as plt
11 | 
12 |     def tracker_timespace(filename, stats):
13 |         """
14 |         Create a time-space chart from a ``Stats`` instance.
15 |         """
16 |         classlist = list(stats.index.keys())
17 |         classlist.sort()
18 | 
19 |         for snapshot in stats.snapshots:
20 |             stats.annotate_snapshot(snapshot)
21 | 
22 |         timestamps = [fp.timestamp for fp in stats.snapshots]
23 |         offsets = [0] * len(stats.snapshots)
24 |         poly_labels = []
25 |         polys = []
26 |         for clsname in classlist:
27 |             pct = [fp.classes[clsname]['pct'] for fp in stats.snapshots]
28 |             if max(pct) > 3.0:
29 |                 sizes = [fp.classes[clsname]['sum'] for fp in stats.snapshots]
30 |                 sizes = [float(x)/(1024*1024) for x in sizes]
31 |                 sizes = [offset+size for offset, size in zip(offsets, sizes)]
32 |                 poly = matplotlib.mlab.poly_between(timestamps, offsets, sizes)
33 |                 polys.append( (poly, {'label': clsname}) )
34 |                 poly_labels.append(clsname)
35 |                 offsets = sizes
36 | 
37 |         fig = plt.figure(figsize=(10, 4))
38 |         axis = fig.add_subplot(111)
39 | 
40 |         axis.set_title("Snapshot Memory")
41 |         axis.set_xlabel("Execution Time [s]")
42 |         axis.set_ylabel("Virtual Memory [MiB]")
43 | 
44 |         totals = [x.asizeof_total for x in stats.snapshots]
45 |         totals = [float(x)/(1024*1024) for x in totals]
46 |         axis.plot(timestamps, totals, 'r--', label='Total')
47 |         tracked = [x.tracked_total for x in stats.snapshots]
48 |         tracked = [float(x)/(1024*1024) for x in tracked]
49 |         axis.plot(timestamps, tracked, 'b--', label='Tracked total')
50 | 
51 |         for (args, kwds) in polys:
52 |             axis.fill(*args, **kwds)
53 |         axis.legend(loc=2) # TODO fill legend
54 |         fig.savefig(filename)
55 | 
56 | except ImportError:
57 |     def tracker_timespace(*_args):
58 |         pass
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_tee.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sys
 4 | import os
 5 | import shutil
 6 | 
 7 | from .aux_tests import get_marker, dummy_ecc_file_gen, check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, remove_if_exist
 8 | 
 9 | from ..lib.tee import Tee
10 | 
11 | from ..lib._compat import _StringIO
12 | 
13 | def setup_module():
14 |     """ Initialize the tests by emptying the out directory """
15 |     outfolder = path_sample_files('output')
16 |     shutil.rmtree(outfolder, ignore_errors=True)
17 |     create_dir_if_not_exist(outfolder)
18 | 
19 | def test_tee_file():
20 |     """ tee: test tee file output """
21 |     instring1 = b"First line\nSecond line\n"
22 |     instring2 = b"Third line\n"
23 |     filelog = path_sample_files('output', 'tee1.log')
24 |     remove_if_exist(filelog)
25 |     # Write first string
26 |     t = Tee(filelog, 'wb', nostdout=True)
27 |     t.write(instring1, end='')
28 |     del t # deleting Tee should close the file
29 |     with open(filelog, 'rb') as fl:
30 |         res1 = fl.read()
31 |     assert res1 == instring1
32 |     # Write second string while appending
33 |     t2 = Tee(filelog, 'ab', nostdout=True)
34 |     t2.write(instring2, end='')
35 |     del t2 # deleting Tee should close the file
36 |     with open(filelog, 'rb') as fl:
37 |         res2 = fl.read()
38 |     assert res2 == instring1+instring2
39 | 
40 | def test_tee_stdout():
41 |     """ tee: test tee stdout """
42 |     instring1 = "First line\nSecond line\n"
43 |     instring2 = "Third line\n"
44 |     filelog = path_sample_files('output', 'tee2.log')
45 |     remove_if_exist(filelog)
46 |     # Access stdout and memorize the cursor position just before the test
47 |     sysout = sys.stdout
48 |     startpos = sysout.tell()
49 |     # Write first string
50 |     t = Tee()
51 |     t.write(instring1, end='')
52 |     del t # deleting Tee should close the file
53 |     # Read stdout and check Tee wrote into stdout
54 |     sysout.seek(startpos)
55 |     assert sysout.read() == instring1
56 |     # Write second string
57 |     t2 = Tee()
58 |     t2.write(instring2, end='', flush=False)
59 |     t2.flush() # try to manually flush by the way
60 |     del t2 # deleting Tee should close the file
61 |     # Read stdout and check Tee appended the second string into stdout
62 |     sysout.seek(startpos)
63 |     assert sysout.read().startswith(instring1+instring2) # sys.stdout appends a newline return at the second writing, don't know why...
64 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/kthread.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | kthread.py: A killable thread implementation.
 3 | 
 4 | Copyright (C) 2004 Connelly Barnes (connellybarnes@yahoo.com)
 5 | 
 6 | This module allows you to kill threads. The class KThread is a drop-in 
 7 | replacement for threading.Thread. It adds the kill() method, which should stop 
 8 | most threads in their tracks.
 9 | 
10 | This library is free software; you can redistribute it and/or modify it under 
11 | the terms of the GNU Lesser General Public License as published by the Free 
12 | Software Foundation; either version 2.1 of the License, or (at your option) 
13 | any later version.
14 | 
15 | This library is distributed in the hope that it will be useful, but WITHOUT 
16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 
17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more 
18 | details.
19 | 
20 | You should have received a copy of the GNU Lesser General Public License along 
21 | with this library; if not, write to the Free Software Foundation, Inc., 59 
22 | Temple Place, Suite 330, Boston, MA 02111-1307 USA 
23 | '''
24 | 
25 | __first__ = '2004.9.9'
26 | __last__ = '2004.10.29'
27 | 
28 | import sys
29 | import trace
30 | import threading
31 | import time
32 | 
33 | class KThreadError(Exception):
34 |     '''Encapsulates KThread exceptions.'''
35 |     pass
36 | 
37 | class KThread(threading.Thread):
38 |   """A subclass of threading.Thread, with a kill() method."""
39 |   def __init__(self, *args, **keywords):
40 |     threading.Thread.__init__(self, *args, **keywords)
41 |     self.killed = False
42 | 
43 |   def start(self):
44 |     """Start the thread."""
45 |     self.__run_backup = self.run
46 |     self.run = self.__run      # Force the Thread to install our trace.
47 |     threading.Thread.start(self)
48 | 
49 |   def __run(self):
50 |     """Hacked run function, which installs the trace."""
51 |     sys.settrace(self.globaltrace)
52 |     self.__run_backup()
53 |     self.run = self.__run_backup
54 | 
55 |   def globaltrace(self, frame, why, arg):
56 |     if why == 'call':
57 |       return self.localtrace
58 |     else:
59 |       return None
60 | 
61 |   def localtrace(self, frame, why, arg):
62 |     if self.killed:
63 |       if why == 'line':
64 |         raise SystemExit()
65 |     return self.localtrace
66 | 
67 |   def kill(self):
68 |     self.killed = True
69 | 
70 | if __name__ == '__main__':
71 |     
72 |     def func():
73 |         print('Function started')
74 |         for i in xrange(1000000):
75 |             pass
76 |         print('Function finished')
77 | 
78 |     A = KThread(target=func)
79 |     A.start()
80 |     for i in xrange(1000):
81 |       pass
82 |     A.kill()
83 | 
84 |     print('End of main program')
85 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/middleware.py:
--------------------------------------------------------------------------------
 1 | from django.http import HttpResponse
 2 | from django.conf import settings
 3 | from pyinstrument import Profiler
 4 | from pyinstrument.profiler import NotMainThreadError
 5 | import time
 6 | import os
 7 | 
 8 | not_main_thread_message = (
 9 |     "pyinstrument can only be used on the main thread in signal mode. Run your server process in "
10 |     "single-threaded mode. \n\n"
11 |     "With the built-in server, you can do this with\n"
12 |     "./manage.py runserver --nothreading --noreload\n\n"
13 |     "Alternatively, you can set 'PYINSTRUMENT_USE_SIGNAL = False' in your settings.py to run in"
14 |     "'setprofile' mode. For more information, see\n"
15 |     "https://github.com/joerick/pyinstrument#signal-or-setprofile-mode")
16 | 
17 | 
18 | class ProfilerMiddleware(object):
19 |     def process_request(self, request):
20 |         profile_dir = getattr(settings, 'PYINSTRUMENT_PROFILE_DIR', None)
21 |         use_signal = getattr(settings, 'PYINSTRUMENT_USE_SIGNAL', True)
22 | 
23 |         if getattr(settings, 'PYINSTRUMENT_URL_ARGUMENT', 'profile') in request.GET or profile_dir:
24 |             profiler = Profiler(use_signal=use_signal)
25 |             try:
26 |                 profiler.start()
27 |                 request.profiler = profiler
28 |             except NotMainThreadError:
29 |                 raise NotMainThreadError(not_main_thread_message)
30 | 
31 | 
32 |     def process_response(self, request, response):
33 |         if hasattr(request, 'profiler'):
34 |             try:
35 |                 request.profiler.stop()
36 | 
37 |                 output_html = request.profiler.output_html()
38 | 
39 |                 profile_dir = getattr(settings, 'PYINSTRUMENT_PROFILE_DIR', None)
40 | 
41 |                 if profile_dir:
42 |                     filename = '{total_time:.3f}s {path} {timestamp:.0f}.html'.format(
43 |                         total_time=request.profiler.root_frame().time(),
44 |                         path=request.get_full_path().replace('/', '_'),
45 |                         timestamp=time.time()
46 |                     )
47 | 
48 |                     file_path = os.path.join(profile_dir, filename)
49 | 
50 |                     if not os.path.exists(profile_dir):
51 |                         os.mkdir(profile_dir)
52 | 
53 |                     with open(file_path, 'w') as f:
54 |                         f.write(output_html)
55 | 
56 |                 if getattr(settings, 'PYINSTRUMENT_URL_ARGUMENT', 'profile') in request.GET:
57 |                     return HttpResponse(output_html)
58 |                 else:
59 |                     return response
60 |             except NotMainThreadError:
61 |                 raise NotMainThreadError(not_main_thread_message)
62 |         else:
63 |             return response
64 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/garbagegraph.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pympler.refgraph import ReferenceGraph
 3 | from pympler.util.stringutils import trunc, pp
 4 | 
 5 | import sys
 6 | import gc
 7 | 
 8 | __all__ = ['GarbageGraph', 'start_debug_garbage', 'end_debug_garbage']
 9 | 
10 | 
11 | class GarbageGraph(ReferenceGraph):
12 |     """
13 |     The ``GarbageGraph`` is a ``ReferenceGraph`` that illustrates the objects building
14 |     reference cycles. The garbage collector is switched to debug mode (all
15 |     identified garbage is stored in `gc.garbage`) and the garbage collector is
16 |     invoked. The collected objects are then illustrated in a directed graph.
17 | 
18 |     Large graphs can be reduced to the actual cycles by passing ``reduce=True`` to
19 |     the constructor.
20 | 
21 |     It is recommended to disable the garbage collector when using the
22 |     ``GarbageGraph``.
23 | 
24 |     >>> from pympler.garbagegraph import GarbageGraph, start_debug_garbage
25 |     >>> start_debug_garbage()
26 |     >>> l = []
27 |     >>> l.append(l)
28 |     >>> del l
29 |     >>> gb = GarbageGraph()
30 |     >>> gb.render('garbage.eps')
31 |     True
32 |     """
33 |     def __init__(self, reduce=False, collectable=True):
34 |         """
35 |         Initialize the GarbageGraph with the objects identified by the garbage
36 |         collector. If `collectable` is true, every reference cycle is recorded.
37 |         Otherwise only uncollectable objects are reported.
38 |         """
39 |         if collectable:
40 |             gc.set_debug(gc.DEBUG_SAVEALL)
41 |         else:
42 |             gc.set_debug(0)
43 |         gc.collect()
44 | 
45 |         ReferenceGraph.__init__(self, gc.garbage, reduce)
46 | 
47 |     def print_stats(self, stream=None):
48 |         """
49 |         Log annotated garbage objects to console or file.
50 | 
51 |         :param stream: open file, uses sys.stdout if not given
52 |         """
53 |         if not stream: # pragma: no cover
54 |             stream = sys.stdout
55 |         self.metadata.sort(key=lambda x: -x.size)
56 |         stream.write('%-10s %8s %-12s %-46s\n' % ('id', 'size', 'type', 'representation'))
57 |         for g in self.metadata:
58 |             stream.write('0x%08x %8d %-12s %-46s\n' % (g.id, g.size, trunc(g.type, 12),
59 |                 trunc(g.str, 46)))
60 |         stream.write('Garbage: %8d collected objects (%s in cycles): %12s\n' % \
61 |             (self.count, self.num_in_cycles, pp(self.total_size)))
62 | 
63 | 
64 | def start_debug_garbage():
65 |     """
66 |     Turn off garbage collector to analyze *collectable* reference cycles.
67 |     """
68 |     gc.collect()
69 |     gc.disable()
70 | 
71 | 
72 | def end_debug_garbage():
73 |     """
74 |     Turn garbage collection on and disable debug output.
75 |     """
76 |     gc.set_debug(0)
77 |     gc.enable()
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/homedirectory.py:
--------------------------------------------------------------------------------
 1 | """Attempt to determine the current user's "system" directories"""
 2 | try:
 3 | ##	raise ImportError
 4 |     from win32com.shell import shell, shellcon
 5 | except ImportError:
 6 |     shell = None
 7 | try:
 8 |     import _winreg
 9 | except ImportError:
10 |     _winreg = None
11 | import os, sys
12 | 
13 | 
14 | ## The registry keys where the SHGetFolderPath values appear to be stored
15 | r"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
16 | r"HKEY_CURRENT_USER\Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
17 | 
18 | def _winreg_getShellFolder( name ):
19 |     """Get a shell folder by string name from the registry"""
20 |     k = _winreg.OpenKey(
21 |         _winreg.HKEY_CURRENT_USER,
22 |         r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
23 |     )
24 |     try:
25 |         # should check that it's valid? How?
26 |         return _winreg.QueryValueEx( k, name )[0]
27 |     finally:
28 |         _winreg.CloseKey( k )
29 | def shell_getShellFolder( type ):
30 |     """Get a shell folder by shell-constant from COM interface"""
31 |     return shell.SHGetFolderPath(
32 |         0,# null hwnd
33 |         type, # the (roaming) appdata path
34 |         0,# null access token (no impersonation)
35 |         0 # want current value, shellcon.SHGFP_TYPE_CURRENT isn't available, this seems to work
36 |     )
37 |     
38 |     
39 | 
40 | def appdatadirectory(  ):
41 |     """Attempt to retrieve the current user's app-data directory
42 | 
43 |     This is the location where application-specific
44 |     files should be stored.  On *nix systems, this will
45 |     be the ${HOME}/.config directory.  On Win32 systems, it will be
46 |     the "Application Data" directory.  Note that for
47 |     Win32 systems it is normal to create a sub-directory
48 |     for storing data in the Application Data directory.
49 |     """
50 |     if shell:
51 |         # on Win32 and have Win32all extensions, best-case
52 |         return shell_getShellFolder(shellcon.CSIDL_APPDATA)
53 |     if _winreg:
54 |         # on Win32, but no Win32 shell com available, this uses
55 |         # a direct registry access, likely to fail on Win98/Me
56 |         return _winreg_getShellFolder( 'AppData' )
57 |     # okay, what if for some reason _winreg is missing? would we want to allow ctypes?
58 |     ## default case, look for name in environ...
59 |     for name in ['APPDATA', 'HOME']:
60 |         if name in os.environ:
61 |             return os.path.join( os.environ[name], '.config' )
62 |     # well, someone's being naughty, see if we can get ~ to expand to a directory...
63 |     possible = os.path.abspath(os.path.expanduser( '~/.config' ))
64 |     if os.path.exists( possible ):
65 |         return possible
66 |     raise OSError( """Unable to determine user's application-data directory, no ${HOME} or ${APPDATA} in environment""" )
67 | 
68 | if __name__ == "__main__":
69 |     print 'AppData', appdatadirectory()
70 |     
71 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/pstatsadapter.py:
--------------------------------------------------------------------------------
 1 | import wx, sys, os, logging
 2 | log = logging.getLogger( __name__ )
 3 | from squaremap import squaremap
 4 | import pstatsloader
 5 | 
 6 | class PStatsAdapter(squaremap.DefaultAdapter):
 7 | 
 8 |     percentageView = False
 9 |     total = 0
10 |     
11 |     TREE = pstatsloader.TREE_CALLS
12 | 
13 |     def value(self, node, parent=None):
14 |         if isinstance(parent, pstatsloader.PStatGroup):
15 |             if parent.cumulative:
16 |                 return node.cumulative / parent.cumulative
17 |             else:
18 |                 return 0
19 |         elif parent is None:
20 |             return node.cumulative
21 |         return parent.child_cumulative_time(node)
22 | 
23 |     def label(self, node):
24 |         if isinstance(node, pstatsloader.PStatGroup):
25 |             return '%s / %s' % (node.filename, node.directory)
26 |         if self.percentageView and self.total:
27 |             time = '%0.2f%%' % round(node.cumulative * 100.0 / self.total, 2)
28 |         else:
29 |             time = '%0.3fs' % round(node.cumulative, 3)
30 |         return '%s@%s:%s [%s]' % (node.name, node.filename, node.lineno, time)
31 | 
32 |     def empty(self, node):
33 |         if node.cumulative:
34 |             return node.local / float(node.cumulative)
35 |         return 0.0
36 | 
37 |     def parents(self, node):
38 |         """Determine all parents of node in our tree"""
39 |         return [
40 |             parent for parent in
41 |             getattr( node, 'parents', [] )
42 |             if getattr(parent, 'tree', self.TREE) == self.TREE
43 |         ]
44 | 
45 |     color_mapping = None
46 | 
47 |     def background_color(self, node, depth):
48 |         """Create a (unique-ish) background color for each node"""
49 |         if self.color_mapping is None:
50 |             self.color_mapping = {}
51 |         color = self.color_mapping.get(node.key)
52 |         if color is None:
53 |             depth = len(self.color_mapping)
54 |             red = (depth * 10) % 255
55 |             green = 200 - ((depth * 5) % 200)
56 |             blue = (depth * 25) % 200
57 |             self.color_mapping[node.key] = color = wx.Colour(red, green, blue)
58 |         return color
59 | 
60 |     def SetPercentage(self, percent, total):
61 |         """Set whether to display percentage values (and total for doing so)"""
62 |         self.percentageView = percent
63 |         self.total = total
64 | 
65 |     def filename( self, node ):
66 |         """Extension to squaremap api to provide "what file is this" information"""
67 |         if not node.directory:
68 |             # TODO: any cases other than built-ins?
69 |             return None
70 |         if node.filename == '~':
71 |             # TODO: look up C/Cython/whatever source???
72 |             return None
73 |         return os.path.join(node.directory, node.filename)
74 |         
75 | 
76 | class DirectoryViewAdapter(PStatsAdapter):
77 |     """Provides a directory-view-only adapter for PStats objects"""
78 |     TREE = pstatsloader.TREE_FILES
79 |     def children(self, node):
80 |         if isinstance(node, pstatsloader.PStatGroup):
81 |             return node.children
82 |         return []
83 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "master" ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ "master" ]
20 |   schedule:
21 |     - cron: '27 18 * * 3'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 |         # Use only 'java' to analyze code written in Java, Kotlin or both
38 |         # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
39 |         # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
40 | 
41 |     steps:
42 |     - name: Checkout repository
43 |       uses: actions/checkout@v3
44 | 
45 |     # Initializes the CodeQL tools for scanning.
46 |     - name: Initialize CodeQL
47 |       uses: github/codeql-action/init@v2
48 |       with:
49 |         languages: ${{ matrix.language }}
50 |         # If you wish to specify custom queries, you can do so here or in a config file.
51 |         # By default, queries listed here will override any specified in a config file.
52 |         # Prefix the list here with "+" to use these queries and those in the config file.
53 | 
54 |         # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
55 |         # queries: security-extended,security-and-quality
56 | 
57 | 
58 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, Go, or Java).
59 |     # If this step fails, then you should remove it and run the build manually (see below)
60 |     - name: Autobuild
61 |       uses: github/codeql-action/autobuild@v2
62 | 
63 |     # ℹ️ Command-line programs to run using the OS shell.
64 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
65 | 
66 |     #   If the Autobuild fails above, remove it and uncomment the following three lines.
67 |     #   modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
68 | 
69 |     # - run: |
70 |     #     echo "Run, Build Application using script"
71 |     #     ./location_of_script_within_repo/buildscript.sh
72 | 
73 |     - name: Perform CodeQL Analysis
74 |       uses: github/codeql-action/analyze@v2
75 |       with:
76 |         category: "/language:${{matrix.language}}"
77 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/tee.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | 
 4 | import sys
 5 | 
 6 | from ._compat import b
 7 | 
 8 | class Tee(object):
 9 |     """ Redirect print output to the terminal as well as in a log file """
10 | 
11 |     def __init__(self, name=None, mode=None, nostdout=False, silent=False):
12 |         self.file = None
13 |         self.nostdout = nostdout
14 |         self.silent = silent
15 |         if not nostdout:
16 |             self.stdout = sys.stdout
17 |             sys.stdout = self
18 |         if name is not None and mode is not None:
19 |             self.filename = name
20 |             self.filemode = mode
21 |             self.file = open(name, mode)
22 | 
23 |     def close(self):
24 |         """ Restore stdout and close file when Tee is closed """
25 |         try:
26 |             self.flush() # commit all latest changes before exiting
27 |         except:
28 |             pass  # sometimes it's already closed, just skip
29 |         if not self.nostdout and hasattr(self, 'stdout'):
30 |             sys.stdout = self.stdout
31 |             self.stdout = None
32 |         if self.file: self.file.close()
33 | 
34 |     def __del__(self):
35 |         self.close()
36 | 
37 |     def write(self, data, end="\n", flush=True):
38 |         """ Output data to stdout and/or file """
39 |         if not self.silent:
40 |             if not self.nostdout:
41 |                 self.stdout.write(data)
42 |                 self.stdout.write(end)
43 |             if self.file is not None:
44 |                 # Binary mode: need to convert to byte objects if Python 3
45 |                 if 'b' in self.filemode:
46 |                     data = b(data)
47 |                     end = b(end)
48 |                 self.file.write(data)
49 |                 self.file.write(end)
50 |             if flush:
51 |                 self.flush()
52 | 
53 |     def flush(self):
54 |         """ Force commit changes to the file and stdout """
55 |         if not self.silent:
56 |             if not self.nostdout:
57 |                 self.stdout.flush()
58 |             if self.file is not None:
59 |                 self.file.flush()
60 | 
61 |     # def disable(self):
62 |         # """ Temporarily disable Tee's redirection """
63 |         # self.flush() # commit all latest changes before exiting
64 |         # if not self.nostdout and hasattr(self, 'stdout'):
65 |             # sys.stdout = self.stdout
66 |             # self.stdout = None
67 |         # if self.file:
68 |             # self.file.close()
69 |             # self.file = None
70 | 
71 |     # def enable(self):
72 |         # """ Reenable Tee's redirection after being temporarily disabled """
73 |         # if not self.nostdout and not self.stdout:
74 |             # self.__del__.stdout = sys.stdout
75 |             # self.stdout = self.__del__.stdout # The weakref proxy is to prevent Python, or yourself from deleting the self.files variable somehow (if it is deleted, then it will not affect the original file list). If it is not the case that this is being deleted even though there are more references to the variable, then you can remove the proxy encapsulation. http://stackoverflow.com/questions/865115/how-do-i-correctly-clean-up-a-python-object
76 |             # sys.stdout = self
77 |         # if not self.file and self.filename is not None and self.filemode is not None:
78 |             # self.file = open(self.filename, self.filemode)
79 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/_meliaejson.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | """Horrible hack to attempt to load meliae dumps a bit faster
 3 | 
 4 | Makes meliae loading about 4.25x faster on python 2.6 compared to the 
 5 | json + C speedups.  This is *not* however, a full json decoder, it is 
 6 | *just* a parser for the flat records meliae produces (i.e. no recursive 
 7 | structures, no floats, just ints, strings and lists-of-ints)
 8 | """
 9 | import re, unittest, json
10 | 
11 | whitespace = r'[ \t]'
12 | 
13 | escape = r"""(?:\\[uU][0-9a-fA-F]{4})"""
14 | string = r"""(?:["](?P<%(key)s>([^"\\]|(\\")|%(escape)s|\\[^uU"])*?)["])"""
15 | key = string%{'key':'key','escape':escape}
16 | string = string%{'key':'string','escape':escape}
17 | integer = r"""(?P<int>[+-]*\d+)"""
18 | listcontent = r"""([+-]*\d+[,]?%(whitespace)s*?)*"""%globals()
19 | intlist = r"""\[%(whitespace)s*(?P<list>%(listcontent)s)%(whitespace)s*\]"""%globals()
20 | 
21 | attr = r"""%(whitespace)s*%(key)s%(whitespace)s*:%(whitespace)s*(%(intlist)s|%(string)s|%(integer)s)(,)?"""%globals()
22 | 
23 | escape = re.compile( escape, re.U )
24 | simple_escape = re.compile( r'\\([^uU])', re.U )
25 | 
26 | assert escape.match( "\u0000" )
27 | attr = re.compile( attr )
28 | string = re.compile( string )
29 | integer = re.compile( integer )
30 | intlist = re.compile( intlist )
31 | 
32 | assert string.match( '"this"' ).group('string') == "this"
33 | assert string.match( '"this": "that"' ).group('string') == "this"
34 | assert string.match( '"this\\u0000"' ).group('string') == "this\\u0000", string.match( '"this\\u0000"' ).group('string')
35 | 
36 | assert integer.match( '23' ).group( 'int' ) == '23'
37 | assert intlist.match( '[1, 2,3,4]' ).group( 'list' ) == '1, 2,3,4'
38 | assert intlist.match( '[139828625414688, 70572696, 52870672, 40989336]' ).group('list') == '139828625414688, 70572696, 52870672, 40989336'
39 | 
40 | def loads( source ):
41 |     """Load json structure from meliae from source
42 |     
43 |     Supports only the required structures to support loading meliae memory dumps
44 |     """
45 |     source = source.strip()
46 |     assert source.startswith( '{' )
47 |     assert source.endswith( '}' )
48 |     source = source[1:-1]
49 |     result = {}
50 |     for match in attr.finditer( source ):
51 |         key = match.group('key')
52 |         if match.group( 'list' ) is not None:
53 |             value = [ 
54 |                 int(x) 
55 |                 for x in match.group( 'list' ).strip().replace(',',' ').split() 
56 |             ]
57 |         elif match.group( 'int' ) is not None:
58 |             value = int( match.group( 'int' ))
59 |         elif match.group( 'string' ) is not None:
60 |             def deescape( match ):
61 |                 return unichr( int( match.group(0)[2:], 16 ))
62 |             value = match.group('string').decode( 'utf-8' )
63 |             value = escape.sub( 
64 |                 deescape,
65 |                 value,
66 |             )
67 |             value = simple_escape.sub(
68 |                 lambda x: x.group(1),
69 |                 value,
70 |             )
71 |         else:
72 |             raise RuntimeError( "Matched something we don't know how to process:", match.groupdict() )
73 |         result[key] = value
74 |     return result
75 | if __name__ == "__main__":
76 |     import sys, pprint
77 |     for line in open( sys.argv[1] ):
78 |         official = json.loads( line )
79 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-build-downstream.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will test if this package can be used as a downstream dependency test (it is in fact a test for https://github.com/pypa/pip/issues/11952)
 2 | # It uses the Python Package GitHub Actions workflow.
 3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 4 | # and https://www.youtube.com/watch?v=l6fV09z5XHk
 5 | 
 6 | name: ci-build-dowstream
 7 | 
 8 | on:
 9 |   push:
10 |     branches:
11 |       - master # $default-branch only works in Workflows templates, not in Workflows, see https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable
12 |   pull_request:
13 |     branches:
14 |       - master
15 | 
16 | jobs:
17 |   testdownstream:
18 |     name: Unit test downstream package depending on our package
19 |     runs-on: ${{ matrix.os }}
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: ["*", "pypy-3.9"]  # check the list of versions: https://github.com/actions/python-versions/releases and https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md -- note that "*" represents the latest stable version of Python
24 |         os: [ ubuntu-latest, macos-latest, windows-latest ]
25 |     steps:
26 |     - uses: actions/checkout@v3
27 |     - name: Set up Python ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v3
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 |         cache: 'pip'
32 |     # You can test your matrix by printing the current Python version
33 |     - name: Display Python version
34 |       run: |
35 |         python -c "import sys; print(sys.version)"
36 |     - name: Install dependencies
37 |       run: |
38 |         python -m pip install --upgrade pip
39 |         # The rest is managed by the pyproject.toml
40 |     - name: Echo current Python version
41 |       run: echo "${{ matrix.python-version }}"
42 | #    - name: Compile the Cython extension
43 | #      if: ${{ matrix.python-version != 'pypy-3.9' }}  # ${{}} GitHub expression syntax, need to place the target python-version in single quotes (not double quotes!) so that it does not stop parsing the literal at dots, otherwise dots will truncate the string https://docs.github.com/en/actions/learn-github-actions/expressions
44 | #      run: |
45 | #        pip install --upgrade --config-setting="--install-option=--no-cython-compile" cython>=3.0.0b2
46 |     - name: Test installing and unit testing pyFileFixity from git
47 |       # Make sure to have a .gitattributes file with `* -text` without quotes inside, to prevent automatic crlf line endings conversions/normalization by git.
48 |       # FIXME: Need to use the @ form once issue https://github.com/pypa/pip/issues/11951 is fixed, as supplying extras to an egg fragment is deprecated and will be removed in pip v25.
49 |       run: |
50 |         pip install --upgrade --editable git+https://github.com/lrq3000/pyFileFixity.git#egg=pyFileFixity[test] --verbose
51 |         pytest src/pyfilefixity
52 |     - name: Get files contents (if failure)
53 |       # See: https://stackoverflow.com/questions/60679609/github-action-to-simply-cat-a-file-to-an-output
54 |       # https://www.howtohaven.com/system/view-binary-file-on-windows.shtml
55 |       id: vars
56 |       if: failure()  # || success()
57 |       run: |
58 |         format-hex src/pyfilefixity/pyFileFixity/tests/out/d_dir.csv
59 |         format-hex src/pyfilefixity/pyFileFixity/tests/results/test_rfigc_test_dir.csv
60 |       shell: pwsh
61 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/hasher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Hash manager facade api
 4 | # Allows to easily use different kinds of hashing algorithms, size and libraries under one single class.
 5 | # Copyright (C) 2015 Larroque Stephen
 6 | #
 7 | # Licensed under the MIT License (MIT)
 8 | #
 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy
10 | # of this software and associated documentation files (the "Software"), to deal
11 | # in the Software without restriction, including without limitation the rights
12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | # copies of the Software, and to permit persons to whom the Software is
14 | # furnished to do so, subject to the following conditions:
15 | #
16 | # The above copyright notice and this permission notice shall be included in
17 | # all copies or substantial portions of the Software.
18 | #
19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 | # THE SOFTWARE.
26 | 
27 | from ._compat import b
28 | 
29 | import sys
30 | import hashlib
31 | #import zlib
32 | from base64 import b64encode, b64decode  # using b64encode is about 3x faster than using encode('base64_codec')
33 | # alternative to base64: from codecs import encode
34 | 
35 | class Hasher(object):
36 |     '''Class to provide a hasher object with various hashing algorithms. What's important is to provide the __len__ so that we can easily compute the block size of ecc entries. Must only use fixed size hashers for the rest of the script to work properly.'''
37 |     
38 |     known_algo = ["md5", "shortmd5", "shortsha256", "minimd5", "minisha256", "none"]
39 |     __slots__ = ['algo', 'length']
40 | 
41 |     def __init__(self, algo="md5"):
42 |         # Store the selected hashing algo
43 |         self.algo = algo.lower()
44 |         # Precompute length so that it's very fast to access it later
45 |         if self.algo == "md5":
46 |             self.length = 32
47 |         elif self.algo == "shortmd5" or self.algo == "shortsha256":
48 |             self.length = 8
49 |         elif self.algo == "minimd5" or self.algo == "minisha256":
50 |             self.length = 4
51 |         elif self.algo == "none":
52 |             self.length = 0
53 |         else:
54 |             raise NameError('Hashing algorithm %s is unknown!' % algo)
55 | 
56 |     def hash(self, mes):
57 |         # use hashlib.algorithms_guaranteed to list algorithms
58 |         mes = b(mes)
59 |         if self.algo == "md5":
60 |             return b(hashlib.md5(mes).hexdigest())
61 |         elif self.algo == "shortmd5": # from: http://www.peterbe.com/plog/best-hashing-function-in-python
62 |             return b64encode(b(hashlib.md5(mes).hexdigest()))[:8]
63 |         elif self.algo == "shortsha256":
64 |             return b64encode(b(hashlib.sha256(mes).hexdigest()))[:8]
65 |         elif self.algo == "minimd5":
66 |             return b64encode(b(hashlib.md5(mes).hexdigest()))[:4]
67 |         elif self.algo == "minisha256":
68 |             return b64encode(b(hashlib.sha256(mes).hexdigest()))[:4]
69 |         elif self.algo == "none":
70 |             return ''
71 |         else:
72 |             raise NameError('Hashing algorithm %s is unknown!' % self.algo)
73 | 
74 |     def __len__(self):
75 |         return self.length


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile like config file for py-make
  2 | # To use: `pip install py3make`
  3 | # then calle `pymake <command>`
  4 | # IMPORTANT: to be compatible with `python setup.py make alias`, you must make
  5 | # sure that you only put one command per line, and ALWAYS put a line return
  6 | # after an alias and before a command, eg (without the 3 ticks):
  7 | #
  8 | #```
  9 | #all:
 10 | #	test
 11 | #	install
 12 | #test:
 13 | #	nosetest
 14 | #install:
 15 | #	python setup.py install
 16 | #```
 17 | 
 18 | # CRITICAL NOTE: if you get a "FileNotFoundError" exception when trying to call @+python or @+make, then it is because you used spaces instead of a hard TAB character to indent! TODO: bugfix this. It happens only for @+ commands and for those after the first command (if the @+ command with spaces as indentation is the first and only statement in a command, it works!)
 19 | 
 20 | help:
 21 | 	@+make -p
 22 | 
 23 | alltests:
 24 | 	@+make testcoverage
 25 | 	@+make testsetup
 26 | 
 27 | all:
 28 | 	@make alltests
 29 | 	@make build
 30 | 
 31 | prebuildclean:
 32 | 	@+python -c "import shutil; shutil.rmtree('build', True)"
 33 | 	@+python -c "import shutil; shutil.rmtree('dist', True)"
 34 | 	@+python -c "import shutil; shutil.rmtree('pyFileFixity.egg-info', True)"  # very important to delete egg-info before any new build or pip install, otherwise may cause an error that multiple egg-info folders are present, or it may build using old definitions
 35 | 
 36 | coverclean:
 37 | 	@+python -c "import os; os.remove('.coverage') if os.path.exists('.coverage') else None"
 38 | 	@+python -c "import shutil; shutil.rmtree('__pycache__', True)"
 39 | 	@+python -c "import shutil; shutil.rmtree('tests/__pycache__', True)"
 40 | 
 41 | test:
 42 | 	#tox --skip-missing-interpreters
 43 |     pytest
 44 | 
 45 | testnose:
 46 |     # Only for Py2
 47 | 	nosetests pyFileFixity/tests/ -d -v
 48 | 
 49 | testpyproject:
 50 | 	validate-pyproject pyproject.toml -v
 51 | 
 52 | testsetuppost:
 53 | 	twine check "dist/*"
 54 | 
 55 | testrst:
 56 | 	rstcheck README.rst
 57 | 
 58 | testcoverage:
 59 | 	@+make coverclean
 60 | 	#nosetests pyFileFixity/tests/ --with-coverage --cover-package=pyFileFixity -d -v  # Py2 only
 61 | 	coverage run --branch -m pytest pyFileFixity -v
 62 | 	coverage report -m
 63 | 
 64 | testmalloc:
 65 | 	@+python -X dev -X tracemalloc=5 -m pytest
 66 | 
 67 | installdev:
 68 | 	@+make prebuildclean
 69 | 	# Should work for both Py2 and Py3, --editable option and isolation builds work with both pyproject.toml and setup.cfg
 70 | 	@+python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test,testmeta] --verbose --use-pep517
 71 | 
 72 | installdevpy2:
 73 | 	@+make prebuildclean
 74 | 	@+python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test] --verbose --use-pep517
 75 | 
 76 | install:
 77 | 	@+make prebuildclean
 78 | 	@+python -m pip install --upgrade . --verbose --use-pep517
 79 | 
 80 | build:
 81 | 	# requires `pip install build`
 82 | 	@+make testrst
 83 | 	@+make prebuildclean
 84 | 	@+make testpyproject
 85 | 	@+python -sBm build  # do NOT use the -w flag, otherwise only the wheel will be built, but we need sdist for source distros such as Debian and Gentoo!
 86 | 	@+make testsetuppost
 87 | 
 88 | buildpy2:
 89 | 	# Py2 only
 90 | 	# requires `pip install build`
 91 | 	@+make testrst
 92 | 	@+make prebuildclean
 93 | 	@+python -sBm build  # do NOT use the -w flag, otherwise only the wheel will be built, but we need sdist for source distros such as Debian and Gentoo!
 94 | 	@+make testsetuppost
 95 | 
 96 | buildwheelhouse:
 97 | 	cibuildwheel --platform auto
 98 | 
 99 | upload:
100 | 	twine upload dist/*
101 | 
102 | buildupload:
103 | 	@+make build
104 | 	@+make upload
105 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/pympler/mprofile.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Memory usage profiler for Python.
 3 | 
 4 | """
 5 | import inspect
 6 | import sys
 7 | 
 8 | from pympler import muppy
 9 | 
10 | class MProfiler(object):
11 |     """A memory usage profiler class.
12 | 
13 |     Memory data for each function is stored as a 3-element list in the
14 |     dictionary self.memories. The index is always a codepoint (see below).
15 |     The following are the definitions of the members:
16 | 
17 |     [0] = The number of times this function was called
18 |     [1] = Minimum memory consumption when this function was measured.
19 |     [2] = Maximum memory consumption when this function was measured.
20 | 
21 |     A codepoint is a list of 3-tuple of the type
22 |     (filename, functionname, linenumber). You can omit either element, which
23 |     will cause the profiling to be triggered if any of the other criteria
24 |     match. E.g.
25 |     - (None, foo, None), will profile any foo function,
26 |     - (bar, foo, None) will profile only the foo function from the bar file,
27 |     - (bar, foo, 17) will profile only line 17 of the foo function defined
28 |       in the file bar.
29 | 
30 |     Additionally, you can define on what events you want the profiling be
31 |     triggered. Possible events are defined in
32 |     http://docs.python.org/lib/debugger-hooks.html.
33 | 
34 |     If you do not define either codepoints or events, the profiler will
35 |     record the memory usage in at every codepoint and event.
36 | 
37 |     """
38 | 
39 |     def __init__(self, codepoints=None, events=None):
40 |         """
41 |         keyword arguments:
42 |         codepoints -- a list of points in code to monitor (defaults to all codepoints)
43 |         events -- a list of events to monitor (defaults to all events)
44 |         """
45 |         self.memories = {}
46 |         self.codepoints = codepoints
47 |         self.events = events
48 | 
49 |     def codepoint_included(self, codepoint):
50 |         """Check if codepoint matches any of the defined codepoints."""
51 |         if self.codepoints == None:
52 |             return True
53 |         for cp in self.codepoints:
54 |             mismatch = False
55 |             for i in range(len(cp)):
56 |                 if (cp[i] is not None) and (cp[i] != codepoint[i]):
57 |                     mismatch = True
58 |                     break
59 |             if not mismatch:
60 |                 return True
61 |         return False
62 | 
63 |     def profile(self, frame, event, arg): #PYCHOK arg requ. to match signature
64 |         """Profiling method used to profile matching codepoints and events."""
65 |         if (self.events == None) or (event in self.events):
66 |             frame_info = inspect.getframeinfo(frame)
67 |             cp = (frame_info[0], frame_info[2], frame_info[1])
68 |             if self.codepoint_included(cp):
69 |                 objects = muppy.get_objects()
70 |                 size = muppy.get_size(objects)
71 |                 if cp not in self.memories:
72 |                     self.memories[cp] = [0,0,0,0]
73 |                     self.memories[cp][0] = 1
74 |                     self.memories[cp][1] = size
75 |                     self.memories[cp][2] = size
76 |                 else:
77 |                     self.memories[cp][0] += 1
78 |                     if self.memories[cp][1] > size:
79 |                         self.memories[cp][1] = size
80 |                     if self.memories[cp][2] < size:
81 |                         self.memories[cp][2] = size
82 | 
83 |     def run(self, cmd):
84 |         sys.setprofile(self.profile)
85 |         try:
86 |             exec(cmd)
87 |         finally:
88 |             sys.setprofile(None)
89 |         return self
90 | 
91 | if __name__ == "__main__":
92 |     p = MProfiler()
93 |     p.run("print 'hello'")
94 |     print(p.memories)
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/setup.cfg.bak:
--------------------------------------------------------------------------------
 1 | # Config file for Python 2.7, replaces setup.py
 2 | # Note that for some reason pip under Python 2.7 still access pyproject.toml to at least get build-system requires, and then switches to processing setup.cfg for the rest of the directives.
 3 | # DEPRECATED: Python 2.7 support was dropped because even if it is technically possible to install on Python 2.7 and with PEP517 compliance, most tools are not available anymore, so the dependency resolver needs to do a very time consuming exhaustive search, hence a very unclean install, so it was deemed better to just drop support so that Python 2.7 users can just install the older release that targeted Python 2.7 and was installable much more cleanly, even if not compliant with PEP 517.
 4 | [metadata]
 5 | name = pyFileFixity
 6 | version = attr: pyFileFixity.__version__
 7 | author = Stephen Karl Larroque
 8 | author_email = lrq3000@gmail.com
 9 | description = Helping file fixity (long term storage of data) via redundant error correcting codes and hash auditing.
10 | long_description = file: README.rst, LICENSE
11 | long_description_content_type = text/x-rst
12 | keywords = file, repair, monitor, change, reed-solomon, error, correction, error correction, parity, parity files, parity bytes, data protection, data recovery, file protection, qr codes, qr code
13 | license = MIT License
14 | classifiers =
15 |     Development Status :: 5 - Production/Stable
16 |     License :: OSI Approved :: MIT License
17 |     Environment :: Console
18 |     Operating System :: Microsoft :: Windows
19 |     Operating System :: MacOS :: MacOS X
20 |     Operating System :: POSIX :: Linux
21 |     Programming Language :: Python
22 |     Programming Language :: Python :: 2.7
23 |     Programming Language :: Python :: 3
24 |     Programming Language :: Python :: 3.7
25 |     Programming Language :: Python :: 3.8
26 |     Programming Language :: Python :: 3.9
27 |     Programming Language :: Python :: 3.10
28 |     Programming Language :: Python :: 3.11
29 |     Programming Language :: Python :: 3.12
30 |     Programming Language :: Python :: Implementation :: PyPy
31 |     Topic :: Software Development :: Libraries
32 |     Topic :: Software Development :: Libraries :: Python Modules
33 |     Topic :: System :: Archiving
34 |     Topic :: System :: Archiving :: Backup
35 |     Topic :: System :: Monitoring
36 |     Topic :: System :: Recovery Tools
37 |     Topic :: Utilities
38 |     Intended Audience :: Developers
39 |     Intended Audience :: End Users/Desktop
40 |     Intended Audience :: Information Technology
41 |     Intended Audience :: System Administrators
42 | 
43 | [options]
44 | zip_safe = False
45 | include_package_data = True
46 | #packages = find_namespace:  # not supported in python < 3.3
47 | packages = find:
48 | include = pyFileFixity
49 | python_requires = >=2.7
50 | install_requires =
51 |     importlib-metadata; python_version<"3.8"
52 |     pathlib2
53 |     argparse
54 |     sortedcontainers
55 |     tqdm
56 |     distance
57 |     reedsolo==1.7.0; python_version<"3"
58 |     unireedsolomon==1.0.5; python_version<"3"
59 |     reedsolo>=2.0.0b1; python_version>="3.7"
60 |     unireedsolomon>=1.0.6b1; python_version>="3.7"
61 | 
62 | [options.package_data]
63 | * = *.rst, LICENSE*, README*, *.pyx, *.c
64 | #pyFileFixity = ecc_specification.txt, resiliency_tester_config.txt  # does not work...
65 | #pyFileFixity.tests.files = *
66 | #pyFileFixity.tests.results = *
67 | 
68 | #[options.entry_points]
69 | #console_scripts =
70 | #    executable-name = pyFileFixity.module:function
71 | 
72 | [options.extras_require]
73 | test = pytest; pytest-cov; py-make  # minimum test dependencies. To support coveralls in Py2, use coveralls<4
74 | testmeta = build; twine; validate-pyproject; rstcheck  # dependencies to test meta-data. Note that some of these dependencies make cibuildwheel choke on cryptography
75 | 
76 | #[options.packages.find]
77 | #exclude =
78 | #    examples*
79 | #    tools*
80 | #    docs*
81 | #    pyFileFixity.tests*
82 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-build.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies and run tests with a variety of Python versions
 2 | # It uses the Python Package GitHub Actions workflow.
 3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 4 | # and https://www.youtube.com/watch?v=l6fV09z5XHk
 5 | 
 6 | name: ci-build
 7 | 
 8 | on:
 9 |   push:
10 |     branches:
11 |       - master # $default-branch only works in Workflows templates, not in Workflows, see https://stackoverflow.com/questions/64781462/github-actions-default-branch-variable
12 |   pull_request:
13 |     branches:
14 |       - master
15 | 
16 | jobs:
17 |   build:
18 |     runs-on: ${{ matrix.os }}
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         python-version: ["3.8", "3.10", "3.11", "*", pypy-3.9]  # check the list of versions: https://github.com/actions/python-versions/releases and https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md -- note that "*" represents the latest stable version of Python
23 |         os: [ ubuntu-latest, windows-latest, macos-latest ] # jobs that run on Windows and macOS runners that GitHub hosts consume minutes at 2 and 10 times the rate that jobs on Linux runners consume respectively. But it's free for public OSS repositories.
24 |     steps:
25 |     - uses: actions/checkout@v4
26 |     - name: Set up Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v5
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 |         cache: 'pip'
31 |     # You can test your matrix by printing the current Python version
32 |     - name: Display Python version
33 |       run: |
34 |         python -c "import sys; print(sys.version)"
35 |     - name: Install dependencies
36 |       run: |
37 |         python -m pip install --upgrade pip
38 |         #python -m pip install pytest pytest-cov  # done in setup.cfg for Py2 or pyproject.toml for Py3
39 |         #if [ ${{ matrix.python-version }} <= 3.7 ]; then python -m pip install 'coverage<4'; else python -m pip install coverage; fi
40 |         #if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
41 |     - name: Install this module without testmeta
42 |       if: ${{ matrix.python-version != '*' }}
43 |       #if: ${{ matrix.python-version >= 3 }}  # does not work on dynamic versions, see: https://github.com/actions/setup-python/issues/644
44 |       # Do not import testmeta, they make the build fails somehow, because some dependencies are unavailable on Py2
45 |       # We use test.pypi.org to test against cutting-edge builds of reedsolo
46 |       run: |
47 |         python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test] --verbose --use-pep517
48 |     - name: Install this module with testmeta packages
49 |       if: ${{ matrix.python-version == '*' }}
50 |       run: |
51 |         python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test,testmeta] --verbose --use-pep517
52 |     - name: Test with pytest
53 |       run: |
54 |         coverage run --branch -m pytest . -v
55 |         coverage report -m
56 |     - name: Upload coverage to Codecov
57 |       uses: codecov/codecov-action@v4
58 |       with:
59 |         token: ${{ secrets.CODECOV_TOKEN }}  # now required even for public repos, and also advised to avoid rate-limiting API by GitHub which makes the upload fails randomly: https://community.codecov.com/t/upload-issues-unable-to-locate-build-via-github-actions-api/3954/9 and https://github.com/codecov/codecov-action/issues/598
60 |         #directory: ./coverage/reports/
61 |         env_vars: OS,PYTHON
62 |         fail_ci_if_error: true
63 |         #files: ./coverage1.xml,./coverage2.xml
64 |         flags: unittests
65 |         name: codecov-umbrella
66 |         verbose: true
67 |     - name: Build sdist (necessary for the other tests below)
68 |       if: ${{ matrix.python-version == '*' }}
69 |       run: python -sBm build
70 |     - name: Twine check
71 |       if: ${{ matrix.python-version == '*' }}
72 |       run: |
73 |         twine check "dist/*"
74 |         rstcheck README.rst
75 |     - name: pyproject.toml validity
76 |       if: ${{ matrix.python-version == '*' }}
77 |       run: validate-pyproject pyproject.toml -v
78 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/profilebrowser.py:
--------------------------------------------------------------------------------
  1 | # Excerpt from pstats.py rev 1.0  4/1/94 (from python v2.6) which define this class only when it's the entry point (main), so it is here copied to avoid compatibility issues with the next python's releases.
  2 | 
  3 | from pstats import *
  4 | 
  5 | import cmd
  6 | try:
  7 |     import readline
  8 | except ImportError:
  9 |     pass
 10 | 
 11 | class ProfileBrowser(cmd.Cmd):
 12 |     def __init__(self, profile=None):
 13 |         cmd.Cmd.__init__(self)
 14 |         self.prompt = "% "
 15 |         if profile is not None:
 16 |             self.stats = Stats(profile)
 17 |             self.stream = self.stats.stream
 18 |         else:
 19 |             self.stats = None
 20 |             self.stream = sys.stdout
 21 | 
 22 |     def generic(self, fn, line):
 23 |         args = line.split()
 24 |         processed = []
 25 |         for term in args:
 26 |             try:
 27 |                 processed.append(int(term))
 28 |                 continue
 29 |             except ValueError:
 30 |                 pass
 31 |             try:
 32 |                 frac = float(term)
 33 |                 if frac > 1 or frac < 0:
 34 |                     print >> self.stream, "Fraction argument must be in [0, 1]"
 35 |                     continue
 36 |                 processed.append(frac)
 37 |                 continue
 38 |             except ValueError:
 39 |                 pass
 40 |             processed.append(term)
 41 |         if self.stats:
 42 |             getattr(self.stats, fn)(*processed)
 43 |         else:
 44 |             print >> self.stream, "No statistics object is loaded."
 45 |         return 0
 46 |     def generic_help(self):
 47 |         print >> self.stream, "Arguments may be:"
 48 |         print >> self.stream, "* An integer maximum number of entries to print."
 49 |         print >> self.stream, "* A decimal fractional number between 0 and 1, controlling"
 50 |         print >> self.stream, "  what fraction of selected entries to print."
 51 |         print >> self.stream, "* A regular expression; only entries with function names"
 52 |         print >> self.stream, "  that match it are printed."
 53 | 
 54 |     def do_add(self, line):
 55 |         self.stats.add(line)
 56 |         return 0
 57 |     def help_add(self):
 58 |         print >> self.stream, "Add profile info from given file to current statistics object."
 59 | 
 60 |     def do_callees(self, line):
 61 |         return self.generic('print_callees', line)
 62 |     def help_callees(self):
 63 |         print >> self.stream, "Print callees statistics from the current stat object."
 64 |         self.generic_help()
 65 | 
 66 |     def do_callers(self, line):
 67 |         return self.generic('print_callers', line)
 68 |     def help_callers(self):
 69 |         print >> self.stream, "Print callers statistics from the current stat object."
 70 |         self.generic_help()
 71 | 
 72 |     def do_EOF(self, line):
 73 |         print >> self.stream, ""
 74 |         return 1
 75 |     def help_EOF(self):
 76 |         print >> self.stream, "Leave the profile brower."
 77 | 
 78 |     def do_quit(self, line):
 79 |         return 1
 80 |     def help_quit(self):
 81 |         print >> self.stream, "Leave the profile brower."
 82 | 
 83 |     def do_read(self, line):
 84 |         if line:
 85 |             try:
 86 |                 self.stats = Stats(line)
 87 |             except IOError, args:
 88 |                 print >> self.stream, args[1]
 89 |                 return
 90 |             self.prompt = line + "% "
 91 |         elif len(self.prompt) > 2:
 92 |             line = self.prompt[-2:]
 93 |         else:
 94 |             print >> self.stream, "No statistics object is current -- cannot reload."
 95 |         return 0
 96 |     def help_read(self):
 97 |         print >> self.stream, "Read in profile data from a specified file."
 98 | 
 99 |     def do_reverse(self, line):
100 |         self.stats.reverse_order()
101 |         return 0
102 |     def help_reverse(self):
103 |         print >> self.stream, "Reverse the sort order of the profiling report."
104 | 
105 |     def do_sort(self, line):
106 |         abbrevs = self.stats.get_sort_arg_defs()
107 |         if line and not filter(lambda x,a=abbrevs: x not in a,line.split()):
108 |             self.stats.sort_stats(*line.split())
109 |         else:
110 |             print >> self.stream, "Valid sort keys (unique prefixes are accepted):"
111 |             for (key, value) in Stats.sort_arg_dict_default.iteritems():
112 |                 print >> self.stream, "%s -- %s" % (key, value[1])
113 |         return 0
114 |     def help_sort(self):
115 |         print >> self.stream, "Sort profile data according to specified keys."
116 |         print >> self.stream, "(Typing `sort' without arguments lists valid keys.)"
117 |     def complete_sort(self, text, *args):
118 |         return [a for a in Stats.sort_arg_dict_default if a.startswith(text)]
119 | 
120 |     def do_stats(self, line):
121 |         return self.generic('print_stats', line)
122 |     def help_stats(self):
123 |         print >> self.stream, "Print statistics from the current stat object."
124 |         self.generic_help()
125 | 
126 |     def do_strip(self, line):
127 |         self.stats.strip_dirs()
128 |         return 0
129 |     def help_strip(self):
130 |         print >> self.stream, "Strip leading path information from filenames in the report."
131 | 
132 |     def postcmd(self, stop, line):
133 |         if stop:
134 |             return stop
135 |         return None


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/__main__.py:
--------------------------------------------------------------------------------
  1 | from optparse import OptionParser
  2 | import sys
  3 | import os
  4 | import codecs
  5 | from pyinstrument import Profiler
  6 | from pyinstrument.profiler import SignalUnavailableError
  7 | 
  8 | # Python 3 compatibility. Mostly borrowed from SymPy
  9 | PY3 = sys.version_info[0] > 2
 10 | 
 11 | if PY3:
 12 |     import builtins
 13 |     exec_ = getattr(builtins, "exec")
 14 | else:
 15 |     def exec_(_code_, _globs_=None, _locs_=None):
 16 |         """Execute code in a namespace."""
 17 |         if _globs_ is None:
 18 |             frame = sys._getframe(1)
 19 |             _globs_ = frame.f_globals
 20 |             if _locs_ is None:
 21 |                 _locs_ = frame.f_locals
 22 |             del frame
 23 |         elif _locs_ is None:
 24 |             _locs_ = _globs_
 25 |         exec("exec _code_ in _globs_, _locs_")
 26 | 
 27 | def main():
 28 |     usage = ("usage: pyinstrument [options] scriptfile [arg] ...")
 29 |     parser = OptionParser(usage=usage)
 30 |     parser.allow_interspersed_args = False
 31 | 
 32 |     parser.add_option('', '--setprofile',
 33 |         dest='setprofile', action='store_true',
 34 |         help='run in setprofile mode, instead of signal mode', default=False)
 35 | 
 36 |     parser.add_option('', '--html',
 37 |         dest="output_html", action='store_true',
 38 |         help="output HTML instead of text", default=False)
 39 |     parser.add_option('-o', '--outfile',
 40 |         dest="outfile", action='store',
 41 |         help="save report to <outfile>", default=None)
 42 | 
 43 |     parser.add_option('', '--unicode',
 44 |         dest='unicode', action='store_true',
 45 |         help='force unicode text output')
 46 |     parser.add_option('', '--no-unicode',
 47 |         dest='unicode', action='store_false',
 48 |         help='force ascii text output')
 49 | 
 50 |     parser.add_option('', '--color',
 51 |         dest='color', action='store_true',
 52 |         help='force ansi color text output')
 53 |     parser.add_option('', '--no-color',
 54 |         dest='color', action='store_false',
 55 |         help='force no color text output')
 56 | 
 57 |     if not sys.argv[1:]:
 58 |         parser.print_help()
 59 |         sys.exit(2)
 60 | 
 61 |     (options, args) = parser.parse_args()
 62 |     sys.argv[:] = args
 63 | 
 64 |     if len(args) > 0:
 65 |         progname = args[0]
 66 |         sys.path.insert(0, os.path.dirname(progname))
 67 | 
 68 |         with open(progname, 'rb') as fp:
 69 |             code = compile(fp.read(), progname, 'exec')
 70 |         globs = {
 71 |             '__file__': progname,
 72 |             '__name__': '__main__',
 73 |             '__package__': None,
 74 |         }
 75 | 
 76 |         try:
 77 |             profiler = Profiler(use_signal=not options.setprofile)
 78 |         except SignalUnavailableError:
 79 |             profiler = Profiler(use_signal=False)
 80 | 
 81 |         profiler.start()
 82 | 
 83 |         try:
 84 |             exec_(code, globs, None)
 85 |         except IOError as e:
 86 |             import errno
 87 | 
 88 |             if e.errno == errno.EINTR:
 89 |                 print(
 90 |                     'Failed to run program due to interrupted system system call.\n'
 91 |                     'This happens because pyinstrument is sending OS signals to the running\n'
 92 |                     'process to interrupt it. If your program has long-running syscalls this\n'
 93 |                     'can cause a problem.\n'
 94 |                     '\n'
 95 |                     'You can avoid this error by running in \'setprofile\' mode. Do this by\n'
 96 |                     'passing \'--setprofile\' when calling pyinstrument at the command-line.\n'
 97 |                     '\n'
 98 |                     'For more information, see\n'
 99 |                     'https://github.com/joerick/pyinstrument/issues/16\n'
100 |                 )
101 | 
102 |             raise
103 |         except (SystemExit, KeyboardInterrupt):
104 |             pass
105 | 
106 |         profiler.stop()
107 | 
108 |         if options.outfile:
109 |             f = codecs.open(options.outfile, 'w', 'utf-8')
110 |         else:
111 |             f = sys.stdout
112 | 
113 |         unicode_override = options.unicode != None
114 |         color_override = options.color != None
115 | 
116 |         unicode = options.unicode if unicode_override else file_supports_unicode(f)
117 |         color = options.color if color_override else file_supports_color(f)
118 | 
119 |         if options.output_html:
120 |             f.write(profiler.output_html())
121 |         else:
122 |             f.write(profiler.output_text(unicode=unicode, color=color))
123 | 
124 |         f.close()
125 |     else:
126 |         parser.print_usage()
127 |     return parser
128 | 
129 | def file_supports_color(file_obj):
130 |     """
131 |     Returns True if the running system's terminal supports color, and False
132 |     otherwise.
133 | 
134 |     Borrowed from Django
135 |     https://github.com/django/django/blob/master/django/core/management/color.py
136 |     """
137 |     plat = sys.platform
138 |     supported_platform = plat != 'Pocket PC' and (plat != 'win32' or
139 |                                                   'ANSICON' in os.environ)
140 | 
141 |     is_a_tty = hasattr(file_obj, 'isatty') and file_obj.isatty()
142 |     if not supported_platform or not is_a_tty:
143 |         return False
144 |     return True
145 | 
146 | def file_supports_unicode(file_obj):
147 |     encoding = getattr(file_obj, 'encoding', None)
148 |     if not encoding:
149 |         return False
150 | 
151 |     codec_info = codecs.lookup(encoding)
152 | 
153 |     if 'utf' in codec_info.name:
154 |         return True
155 |     return False
156 | 
157 | if __name__ == '__main__':
158 |     main()
159 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-cd.yml:
--------------------------------------------------------------------------------
  1 | # This workflow will test the module and then upload to PyPi, when triggered by the creation of a new GitHub Release
  2 | # It uses the Python Package GitHub Actions workflow.
  3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
  4 | # and https://www.youtube.com/watch?v=l6fV09z5XHk
  5 | # and https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi
  6 | 
  7 | name: ci-cd
  8 | 
  9 | # Build only on creation of new releases
 10 | on:
 11 |   # push:  # build on every commit push
 12 |   # pull_request:  # build on every pull request
 13 |   release:  # build on every releases
 14 |     types:
 15 |     - published  # use published, not released and prereleased, because prereleased is not triggered if created from a draft: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#release
 16 | 
 17 | jobs:
 18 |   testbuild:
 19 |     name: Unit test and building
 20 |     runs-on: ${{ matrix.os }}
 21 |     strategy:
 22 |       fail-fast: false
 23 |       matrix:
 24 |         python-version: ["*"]  # check the list of versions: https://github.com/actions/python-versions/releases and https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md -- note that "*" represents the latest stable version of Python
 25 |         os: [ ubuntu-latest, windows-latest, macos-latest ] # jobs that run on Windows and macOS runners that GitHub hosts consume minutes at 2 and 10 times the rate that jobs on Linux runners consume respectively. But it's free for public OSS repositories.
 26 |     steps:
 27 |     - uses: actions/checkout@v3
 28 |     - name: Set up Python ${{ matrix.python-version }}
 29 |       uses: actions/setup-python@v3
 30 |       with:
 31 |         python-version: ${{ matrix.python-version }}
 32 |         cache: 'pip'
 33 |     # You can test your matrix by printing the current Python version
 34 |     - name: Display Python version
 35 |       run: |
 36 |         python -c "import sys; print(sys.version)"
 37 |     - name: Install dependencies
 38 |       run: |
 39 |         python -m pip install --upgrade pip
 40 |         #python -m pip install pytest pytest-cov  # done in setup.cfg for Py2 or pyproject.toml for Py3
 41 |         #if [ ${{ matrix.python-version }} <= 3.7 ]; then python -m pip install 'coverage<4'; else python -m pip install coverage; fi
 42 |         #if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 43 |     - name: Install this module
 44 |       #if: ${{ matrix.python-version >= 3 }}  # does not work on dynamic versions, see: https://github.com/actions/setup-python/issues/644
 45 |       # Do not import testmeta, they make the build fails somehow, because some dependencies are unavailable on Py2
 46 |       run: |
 47 |         #python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple --upgrade --editable .[test] --verbose --use-pep517
 48 |         # Here we do NOT build against test.pypi.org but only the real pypi because we want to test before shipping whether users with a normal pypi version can install our package!
 49 |         python -m pip install --upgrade --editable .[test] --verbose --use-pep517
 50 |     - name: Test with pytest
 51 |       run: |
 52 |         coverage run --branch -m pytest . -v
 53 |         coverage report -m
 54 |     - name: Build source distribution and wheel
 55 |       run: |
 56 |         python -m pip install --upgrade build
 57 |         python -sBm build
 58 |     - name: Save dist/ content for reuse in other GitHub Workflow blocks
 59 |       uses: actions/upload-artifact@v3
 60 |       with:
 61 |         path: dist/*
 62 | 
 63 |   upload_test_pypi:  # Upload to TestPyPi first to ensure that the release is OK (we will try to download it and install it afterwards), as recommended in https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi
 64 |     name: Upload to TestPyPi
 65 |     needs: [testbuild]
 66 |     runs-on: ubuntu-latest
 67 |     steps:
 68 |       - name: Unpack default artifact into dist/
 69 |         uses: actions/download-artifact@v4.1.7
 70 |         with:
 71 |           # unpacks default artifact into dist/
 72 |           # if `name: artifact` is omitted, the action will create extra parent dir
 73 |           name: artifact
 74 |           path: dist
 75 | 
 76 |       - name: Upload to TestPyPi
 77 |         uses: pypa/gh-action-pypi-publish@v1.5.0
 78 |         with:
 79 |           user: __token__
 80 |           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
 81 |           repository_url: https://test.pypi.org/legacy/
 82 |           # To test: repository_url: https://test.pypi.org/legacy/  # and also change token: ${{ secrets.PYPI_API_TOKEN }} to secrets.TEST_PYPI_API_TOKEN # for more infos on registering and using TestPyPi, read: https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi -- remove the repository_url to upload to the real PyPi
 83 | 
 84 |       - name: Test install from TestPyPI
 85 |         run: |
 86 |           python -m pip install --upgrade pip
 87 |           pip install \
 88 |           --index-url https://test.pypi.org/simple/ \
 89 |           --extra-index-url https://pypi.org/simple \
 90 |           pyFileFixity
 91 | 
 92 |   upload_pypi:  # Upload to the real PyPi if everything else worked before, as suggested in: https://py-pkgs.org/08-ci-cd#uploading-to-testpypi-and-pypi
 93 |     name: Upload to the real PyPi
 94 |     needs: [testbuild, upload_test_pypi]
 95 |     runs-on: ubuntu-latest
 96 |     steps:
 97 |       - uses: actions/download-artifact@v4.1.7
 98 |         with:
 99 |           # unpacks default artifact into dist/
100 |           # if `name: artifact` is omitted, the action will create extra parent dir
101 |           name: artifact
102 |           path: dist
103 | 
104 |       - uses: pypa/gh-action-pypi-publish@v1.5.0
105 |         with:
106 |           user: __token__
107 |           password: ${{ secrets.PYPI_API_TOKEN }}
108 | 
109 |       - name: Test install from PyPI
110 |         run: |
111 |           python -m pip install --upgrade pip
112 |           pip uninstall pyFileFixity -y
113 |           pip install --upgrade pyFileFixity
114 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/debug.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os, sys, time
  4 | 
  5 | # add current script path libs
  6 | #pathname = os.path.dirname(sys.argv[0])
  7 | #sys.path.insert(0, os.path.join(pathname, 'lib', 'debug'))
  8 | 
  9 | pathname = os.path.dirname(os.path.realpath(__file__))
 10 | #sys.path.append(os.path.join(pathname, 'lib', 'debug'))
 11 | sys.path.append(pathname)
 12 | 
 13 | # import functools, used to preserve the correct func.__name__
 14 | import functools
 15 | 
 16 | # import some functions profiler functions and GUI
 17 | import functionprofiler
 18 | # Note: as an alternative, you can also use pyprof2calltree and kcachegrind to get a lot more informations and interactive call graph
 19 | 
 20 | # import profilehooks lib
 21 | from profilehooks import profile
 22 | 
 23 | # import memory profiler line by line
 24 | from memory_profiler import profile as memoryprofile_linebyline
 25 | 
 26 | 
 27 | #### NON DECORATOR FUNCTIONS ####
 28 | #################################
 29 | 
 30 | def startmemorytracker():
 31 |     from pympler import tracker
 32 |     tr = tracker.SummaryTracker()
 33 |     return tr
 34 | 
 35 | def runprofilerandshow(funcname, profilepath, argv='', *args, **kwargs):
 36 |     '''
 37 |     Run a functions profiler and show it in a GUI visualisation using RunSnakeRun
 38 |     Note: can also use calibration for more exact results
 39 |     '''
 40 |     functionprofiler.runprofile(funcname+'(\''+argv+'\')', profilepath, *args, **kwargs)
 41 |     print 'Showing profile (windows should open in the background)'; sys.stdout.flush();
 42 |     functionprofiler.browseprofilegui(profilepath)
 43 | 
 44 | 
 45 | 
 46 | 
 47 | #### DECORATOR FUNCTIONS ####
 48 | #############################
 49 | 
 50 | # @profile: use profilehooks to profile functions
 51 | # @profileit: profile using python's profile (works with threads)
 52 | # @showprofile: show the functions profile in a nice GUI using RunSnakeRun (alternative: using the generated profile log files you can use pyprof2calltree and kcachegrind to get a lot more informations and interactive call graph)
 53 | # @memorytrack: use Pympler to track and show memory usage (only console, no GUI)
 54 | #@callgraph: save the call graph in text format and image (if GraphViz is available, more specifically the dot program)
 55 | #@profile_linebyline: profile a function with line by line CPU consumption (using line_profiler, need to install it because it is compiled in C)
 56 | #@memoryprofile_linebyline: memory profile a function with line by line memory consumption (using memory_profiler, needs psutils on Windows)
 57 | 
 58 | # eg:
 59 | # @showprofile
 60 | # @profileit
 61 | # def func(): ...
 62 | 
 63 | def memorytrack(func):
 64 |     @functools.wraps(func)
 65 |     def wrapper(*args, **kwargs):
 66 |         from pympler import tracker
 67 |         tr = tracker.SummaryTracker()
 68 |         func(*args, **kwargs)
 69 |         tr.print_diff()
 70 |     return wrapper
 71 | 
 72 | def profileit(func):
 73 |     import profile
 74 |     @functools.wraps(func)
 75 |     def wrapper(*args, **kwargs):
 76 |         #datafn = func.__name__ + ".profile" # Name the data file sensibly
 77 |         datafn = 'profile.log'
 78 |         prof = profile.Profile()
 79 |         retval = prof.runcall(func, *args, **kwargs)
 80 |         prof.dump_stats(datafn)
 81 |         return retval
 82 | 
 83 |     return wrapper
 84 | 
 85 | def profileit_log(log):
 86 |     import profile
 87 |     def inner(func):
 88 |         @functools.wraps(func)
 89 |         def wrapper(*args, **kwargs):
 90 |             prof = profile.Profile()
 91 |             retval = prof.runcall(func, *args, **kwargs)
 92 |             # Note use of name from outer scope
 93 |             prof.dump_stats(log)
 94 |             return retval
 95 |         return wrapper
 96 |     return inner
 97 | 
 98 | def showprofile(func):
 99 |     profilepath = 'profile.log'
100 |     @functools.wraps(func)
101 |     def wrapper(*args, **kwargs):
102 |         func(*args, **kwargs)
103 |         functionprofiler.browseprofilegui(profilepath)
104 |     return wrapper
105 | 
106 | def callgraph(func):
107 |     ''' Makes a call graph
108 |     Note: be sure to install GraphViz prior to printing the dot graph!
109 |     '''
110 |     import pycallgraph
111 |     @functools.wraps(func)
112 |     def wrapper(*args, **kwargs):
113 |         pycallgraph.start_trace()
114 |         func(*args, **kwargs)
115 |         pycallgraph.save_dot('callgraph.log')
116 |         pycallgraph.make_dot_graph('callgraph.png')
117 |         #pycallgraph.make_dot_graph('callgraph.jpg', format='jpg', tool='neato')
118 |     return wrapper
119 | 
120 | def profile_linebyline(func):
121 |     import line_profiler
122 |     @functools.wraps(func)
123 |     def wrapper(*args, **kwargs):
124 |         prof = line_profiler.LineProfiler()
125 |         val = prof(func)(*args, **kwargs)
126 |         prof.print_stats()
127 |         return val
128 |     return wrapper
129 | 
130 | 
131 | # Some debug testing here
132 | if __name__ == '__main__':
133 | 
134 |     @showprofile
135 |     @profileit
136 |     #@memorytrack
137 |     #@callgraph
138 |     #@profile
139 |     #@memoryprofile_linebyline
140 |     #@profile_linebyline
141 |     def testcaptcha():
142 |         import captchagenerator
143 | 
144 |         captcha = captchagenerator.CaptchaGenerator(True, True, debugPng=True, debug=False, nbElem=10, modelsPath='bammodels', windowWidth='320', windowHeight='240')
145 | 
146 |         #captcha.renderCaptcha('solmasks', 'solmasks')
147 |         captcha.renderCaptchaMulti(4, 'solmasks', 'solmasks')
148 | 
149 |         #time.sleep(20)
150 | 
151 |     #@memoryprofile_linebyline
152 |     #@profile_linebyline
153 |     def test_1():
154 |         a = [1] * (10 ** 6)
155 |         b = [2] * (2 * 10 ** 7)
156 |         del b
157 | 
158 |         for i in range(2):
159 |             a = [1] * (10 ** 6)
160 |             b = [2] * (2 * 10 ** 7)
161 |             del b
162 |         return a
163 | 
164 |     # Test 1
165 |     #runprofilerandshow('testcaptcha', 'profile.log')
166 | 
167 |     # Test 2
168 |     testcaptcha()
169 | 
170 |     # Test 3
171 |     #test_1()
172 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/coldshotadapter.py:
--------------------------------------------------------------------------------
  1 | """Adapter for RunSnakeRun to load coldshot profiles"""
  2 | import wx, sys, os, logging
  3 | log = logging.getLogger( __name__ )
  4 | from squaremap import squaremap
  5 | from coldshot import stack,loader
  6 | 
  7 | class BaseColdshotAdapter( squaremap.DefaultAdapter):
  8 |     """Base class for the various adapters"""
  9 |     percentageView = False
 10 |     total = 0
 11 |     def filename( self, node ):
 12 |         return getattr(node,'path',None)
 13 |     color_mapping = None
 14 | 
 15 |     def background_color(self, node, depth):
 16 |         """Create a (unique-ish) background color for each node"""
 17 |         if self.color_mapping is None:
 18 |             self.color_mapping = {}
 19 |         color = self.color_mapping.get(node.key)
 20 |         if color is None:
 21 |             depth = len(self.color_mapping)
 22 |             red = (depth * 10) % 255
 23 |             green = 200 - ((depth * 5) % 200)
 24 |             blue = (depth * 25) % 200
 25 |             self.color_mapping[node.key] = color = wx.Colour(red, green, blue)
 26 |         return color
 27 | 
 28 |     def SetPercentage(self, percent, total):
 29 |         """Set whether to display percentage values (and total for doing so)"""
 30 |         self.percentageView = percent
 31 |         self.total = total
 32 | 
 33 |     def parents(self, node):
 34 |         return getattr(node, 'parents', [])
 35 |     def label(self, node):
 36 |         if self.percentageView and self.total:
 37 |             time = '%0.2f%%' % round(node.cumulative * 100.0 / self.total, 2)
 38 |         else:
 39 |             time = '%0.3fs' % round(node.cumulative, 3)
 40 |         if hasattr( node, 'line' ):
 41 |             return '%s@%s:%s [%s]' % (node.name, node.filename, node.line, time)
 42 |         else:
 43 |             return '%s [%s]'%( node.name, time )
 44 | 
 45 | class ColdshotAdapter(BaseColdshotAdapter):
 46 |     """Adapts a coldshot.loader.Loader into a Squaremap-compatible structure"""
 47 | 
 48 |     def value(self, node, parent=None):
 49 |         if parent:
 50 |             return parent.child_cumulative_time(node)
 51 |         else:
 52 |             return node.cumulative
 53 |     
 54 |     def empty(self, node):
 55 |         """Calculate percentage of "empty" time"""
 56 |         return node.empty
 57 | 
 58 | #
 59 | #class ColdshotCallsAdapter( BaseColdshotAdapter ):
 60 | #    def value(self, node, parent=None):
 61 | #        return node.cumulative / parent.cumulative
 62 | #    
 63 | #    def empty(self, node):
 64 | #        """Calculate percentage of "empty" time"""
 65 | #        return node.empty
 66 | 
 67 | class FunctionLineWrapper( object ):
 68 |     def __init__( self, function_info, line_info ):
 69 |         self.function_info = function_info
 70 |         self.line_info = line_info
 71 |     @property 
 72 |     def children( self ):
 73 |         return []
 74 |     @property 
 75 |     def parents( self ):
 76 |         return [ self.function_info ]
 77 |     @property 
 78 |     def cumulative( self ):
 79 |         return self.line_info.time * self.function_info.loader.timer_unit
 80 |     @property 
 81 |     def empty( self ):
 82 |         return 0.0
 83 |     @property 
 84 |     def local( self ):
 85 |         return self.line_info.time * self.function_info.loader.timer_unit
 86 |     @property 
 87 |     def key( self ):
 88 |         return self.function_info.key 
 89 |     @property 
 90 |     def name( self ):
 91 |         return '%s:%s'%( self.line_info.line, self.function_info.filename,  )
 92 |     @property 
 93 |     def calls( self ):
 94 |         return self.line_info.calls
 95 | 
 96 | class ModuleAdapter( ColdshotAdapter ):
 97 |     """Currently doesn't do anything different"""
 98 |     def label(self, node):
 99 |         if isinstance( node, stack.FunctionInfo ):
100 |             return super( ModuleAdapter, self ).label( node )
101 |         if self.percentageView and self.total:
102 |             time = '%0.2f%%' % round(node.cumulative * 100.0 / self.total, 2)
103 |         else:
104 |             time = '%0.3fs' % round(node.cumulative, 3)
105 |         return '%s [%s]'%(node.key or 'PYTHONPATH', time)
106 |     def parents( self, node ):
107 |         if isinstance( node, stack.FunctionInfo ):
108 |             parent = node.loader.modules.get( node.module )
109 |             if parent:
110 |                 return [parent]
111 |             return []
112 |         elif isinstance( node, stack.FunctionLineInfo ):
113 |             return [node.function]
114 |         else:
115 |             return getattr( node, 'parents', [] )
116 |     def children( self, node ):
117 |         if isinstance( node, stack.FunctionInfo ):
118 |             return [
119 |                 FunctionLineWrapper( node, line )
120 |                 for lineno,line in sorted( node.line_map.items())
121 |             ]
122 |         return ColdshotAdapter.children( self, node )
123 |     def label(self, node):
124 |         if isinstance( node, FunctionLineWrapper ):
125 |             return node.name 
126 |         return ColdshotAdapter.label( self, node )
127 |     
128 |         
129 | class Loader( loader.Loader ):
130 |     """Coldshot loader subclass with knowledge of squaremap adapters"""
131 |     def functions_rows( self ):
132 |         """Get cProfile-like function metadata rows
133 |         
134 |         returns an ID: function mapping
135 |         """
136 |         return self.info.functions
137 |     def location_rows( self ):
138 |         """Get our location records (finalized)
139 |         
140 |         returns an module-name: Grouping mapping
141 |         """
142 |         self.info.finalize_modules()
143 |         return self.info.modules
144 |         
145 |     ROOTS = ['functions','location' ]# ,'thread','calls']
146 |     
147 |     def get_root( self, key ):
148 |         """Retrieve the given root by type-key"""
149 |         return self.info.roots[key]
150 |     def get_rows( self, key ):
151 |         """Get the set of rows for the type-key"""
152 |         return getattr( self, '%s_rows'%(key,) )( )
153 |     def get_adapter( self, key ):
154 |         """Get an adapter for our given key"""
155 |         if key == 'functions':
156 |             return ColdshotAdapter()
157 |         elif key == 'location':
158 |             return ModuleAdapter()
159 |         else:
160 |             raise KeyError( """Unknown root type %s"""%( key, ))
161 |     
162 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/runsnakerun/meliaeadapter.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | """Module to load meliae memory-profile dumps
  3 | 
  4 | Trees:
  5 | 
  6 |     * has-a
  7 |         * module root 
  8 |         * each held reference contributes a weighted cost to the parent 
  9 |         * hierarchy of held objects, so globals, classes, functions, and their children
 10 |         * held modules do not contribute to cost
 11 |         
 12 |         * module 
 13 |             * instance-tree
 14 | 
 15 | Planned:
 16 | 
 17 |     * is-a
 18 |         * class/type root 
 19 |             * instances contribute to their type 
 20 |                 * summary-by-type 
 21 |             
 22 | 
 23 | """
 24 | import wx, sys, os, logging, imp
 25 | import wx.lib.newevent
 26 | log = logging.getLogger( __name__ )
 27 | import sys
 28 | from squaremap import squaremap
 29 | import meliaeloader
 30 | 
 31 | RANKS = [
 32 |     (1024*1024*1024,'%0.1fGB'),
 33 |     (1024*1024,'%0.1fMB'),
 34 |     (1024,'%0.1fKB'),
 35 |     (0,'%iB'),
 36 | ]
 37 | 
 38 | def mb( value ):
 39 |     for (unit,format) in RANKS:
 40 |         if abs(value) >= unit * 2:
 41 |             return format%( value / float (unit or 1))
 42 |     raise ValueError( "Number where abs(x) is not >= 0?: %s"%(value,))
 43 | 
 44 | class MeliaeAdapter( squaremap.DefaultAdapter ):
 45 |     """Default adapter class for adapting node-trees to SquareMap API"""
 46 |     def SetPercentage( self, *args ):
 47 |         """Ignore percentage requests for now"""
 48 |     def children( self, node ):
 49 |         """Retrieve the set of nodes which are children of this node"""
 50 |         return node.get('children',[])
 51 |     def value( self, node, parent=None ):
 52 |         """Return value used to compare size of this node"""
 53 |         # this is the *weighted* size/contribution of the node 
 54 |         try:
 55 |             return node['contribution']
 56 |         except KeyError, err:
 57 |             contribution = int(node.get('totsize',0)/float( len(node.get('parents',())) or 1))
 58 |             node['contribution'] = contribution
 59 |             return contribution
 60 |     def label( self, node ):
 61 |         """Return textual description of this node"""
 62 |         result = []
 63 |         if node.get('type'):
 64 |             result.append( node['type'] )
 65 |         if node.get('name' ):
 66 |             result.append( node['name'] )
 67 |         elif node.get('value') is not None:
 68 |             result.append( unicode(node['value'])[:32])
 69 |         if 'module' in node and not node['module'] in result:
 70 |             result.append( ' in %s'%( node['module'] ))
 71 |         if node.get( 'size' ):
 72 |             result.append( '%s'%( mb( node['size'] )))
 73 |         if node.get( 'totsize' ):
 74 |             result.append( '(%s)'%( mb( node['totsize'] )))
 75 |         parent_count = len( node.get('parents',()))
 76 |         if parent_count > 1:
 77 |             result.append( '/%s refs'%( parent_count ))
 78 |         return " ".join(result)
 79 |     def overall( self, node ):
 80 |         return node.get('totsize',0)
 81 |     def empty( self, node ):
 82 |         if node.get('totsize'):
 83 |             return node['size']/float(node['totsize'])
 84 |         else:
 85 |             return 0
 86 |     def parents( self, node ):
 87 |         """Retrieve/calculate the set of parents for the given node"""
 88 |         if 'index' in node:
 89 |             index = node['index']()
 90 |             parents = list(meliaeloader.children( node, index, 'parents' ))
 91 |             return parents 
 92 |         return []
 93 |     def best_parent( self, node, tree_type=None ):
 94 |         """Choose the best parent for a given node"""
 95 |         parents = self.parents(node)
 96 |         selected_parent = None
 97 |         if node['type'] == 'type':
 98 |             module = ".".join( node['name'].split( '.' )[:-1] )
 99 |             if module:
100 |                 for mod in parents:
101 |                     if mod['type'] == 'module' and mod['name'] == module:
102 |                         selected_parent = mod 
103 |         if parents and selected_parent is None:
104 |             parents.sort( key = lambda x: self.value(node, x) )
105 |             return parents[-1]
106 |         return selected_parent
107 | 
108 |     color_mapping = None
109 |     def background_color(self, node, depth):
110 |         """Create a (unique-ish) background color for each node"""
111 |         if self.color_mapping is None:
112 |             self.color_mapping = {}
113 |         if node['type'] == 'type':
114 |             key = node['name']
115 |         else:
116 |             key = node['type']
117 |         color = self.color_mapping.get(key)
118 |         if color is None:
119 |             depth = len(self.color_mapping)
120 |             red = (depth * 10) % 255
121 |             green = 200 - ((depth * 5) % 200)
122 |             blue = (depth * 25) % 200
123 |             self.color_mapping[key] = color = wx.Colour(red, green, blue)
124 |         return color
125 |     def filename( self, node ):
126 |         if 'module' in node and not 'filename' in node:
127 |             try:
128 |                 fp, pathname, description = imp.find_module(node['module'])
129 |             except (ImportError), err:
130 |                 node['filename'] = None
131 |             else:
132 |                 if fp:
133 |                     fp.close()
134 |                 node['filename'] = pathname
135 |         elif not 'filename' in node:
136 |             return None 
137 |         return node['filename']
138 | 
139 | class TestApp(wx.App):
140 |     """Basic application for holding the viewing Frame"""
141 |     handler = wx.PNGHandler()
142 |     def OnInit(self):
143 |         """Initialise the application"""
144 |         wx.Image.AddHandler(self.handler)
145 |         self.frame = frame = wx.Frame( None,
146 |         )
147 |         frame.CreateStatusBar()
148 | 
149 |         model = model = self.get_model( sys.argv[1])
150 |         self.sq = squaremap.SquareMap( 
151 |             frame, model=model, adapter = MeliaeAdapter(), padding=2, margin=1,
152 |             square_style=True
153 |         )
154 |         squaremap.EVT_SQUARE_HIGHLIGHTED( self.sq, self.OnSquareSelected )
155 |         frame.Show(True)
156 |         self.SetTopWindow(frame)
157 |         return True
158 |     def get_model( self, path ):
159 |         return meliaeloader.load( path )[0] # tree-only
160 |     def OnSquareSelected( self, event ):
161 |         text = self.sq.adapter.label( event.node )
162 |         self.frame.SetToolTipString( text )
163 | 
164 | usage = 'meliaeloader.py somefile'
165 | 
166 | def main():
167 |     """Mainloop for the application"""
168 |     if not sys.argv[1:]:
169 |         print usage
170 |     else:
171 |         app = TestApp(0)
172 |         app.MainLoop()
173 | 
174 | if __name__ == "__main__":
175 |     main()
176 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_aux_funcs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | import sys
  5 | import os
  6 | import shutil
  7 | 
  8 | from .aux_tests import get_marker, dummy_ecc_file_gen, path_sample_files, create_dir_if_not_exist
  9 | 
 10 | from ..lib import aux_funcs as auxf
 11 | from argparse import ArgumentTypeError
 12 | 
 13 | from io import BytesIO
 14 | 
 15 | class TestAuxFuncs(unittest.TestCase):
 16 |     def setup_module(self):
 17 |         """ Initialize the tests by emptying the out directory """
 18 |         outfolder = path_sample_files('output')
 19 |         shutil.rmtree(outfolder, ignore_errors=True)
 20 |         create_dir_if_not_exist(outfolder)
 21 | 
 22 |     def test_get_next_entry(self):
 23 |         """ aux: test detection of next entry """
 24 |         entries = [
 25 |                 b'''file1.ext\xfa\xff\xfa\xff\xfafilesize1\xfa\xff\xfa\xff\xfarelfilepath1_ecc\xfa\xff\xfa\xff\xfafilesize1_ecc\xfa\xff\xfa\xff\xfahash-ecc-entry_hash-ecc-entry_hash-ecc-entry_''',
 26 |                 b'''file2.ext\xfa\xff\xfa\xff\xfafilesize2\xfa\xff\xfa\xff\xfarelfilepath2_ecc\xfa\xff\xfa\xff\xfafilesize2_ecc\xfa\xff\xfa\xff\xfahash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_'''
 27 |               ]
 28 |         entries_pos = [
 29 |                         [83, 195],
 30 |                         [205, 362]
 31 |                       ]
 32 | 
 33 |         filecontent = dummy_ecc_file_gen(2)
 34 |         fp1 = BytesIO(filecontent)
 35 |         entry = auxf.get_next_entry(fp1, entrymarker=get_marker(1), only_coord=False, blocksize=len(get_marker(1))+1)
 36 |         assert entry == entries[0]
 37 |         entry = auxf.get_next_entry(fp1, entrymarker=get_marker(1), only_coord=False, blocksize=len(get_marker(1))+1)
 38 |         assert entry == entries[1]
 39 |         fp2 = BytesIO(filecontent)
 40 |         entry = auxf.get_next_entry(fp2, entrymarker=get_marker(1), only_coord=True, blocksize=len(get_marker(1))+1)
 41 |         assert entry == entries_pos[0]
 42 |         entry = auxf.get_next_entry(fp2, entrymarker=get_marker(1), only_coord=True, blocksize=len(get_marker(1))+1)
 43 |         assert entry == entries_pos[1]
 44 | 
 45 |     def test_sizeof_fmt(self):
 46 |         """ aux: test SI formatting """
 47 |         # Test without SI prefix
 48 |         assert auxf.sizeof_fmt(1023.0, suffix='B', mod=1024.0) == "1023.0B"
 49 |         # Test all possible SI prefixes
 50 |         pows = ['', 'K','M','G','T','P','E','Z', 'Y']
 51 |         for p in range(1, len(pows)):
 52 |             assert auxf.sizeof_fmt(1024.0**p, suffix='B', mod=1024.0) == ("1.0%sB" % pows[p])
 53 | 
 54 |     def test_path2unix(self):
 55 |         """ aux: test path2unix """
 56 |         assert auxf.path2unix(r'test\some\folder\file.ext', fromwinpath=True) == r'test/some/folder/file.ext'
 57 |         assert auxf.path2unix(r'test\some\folder\file.ext', nojoin=True, fromwinpath=True) == ['test', 'some', 'folder', 'file.ext']
 58 |         assert auxf.path2unix(r'test/some/folder/file.ext') == r'test/some/folder/file.ext'
 59 | 
 60 |     def test_is_file(self):
 61 |         """ aux: test is_file() """
 62 |         indir = path_sample_files('input')
 63 |         infile = path_sample_files('input', 'tux.jpg')
 64 |         assert auxf.is_file(infile)
 65 |         self.assertRaises(ArgumentTypeError, auxf.is_file, indir)
 66 | 
 67 |     def test_is_dir(self):
 68 |         """ aux: test is_dir() """
 69 |         indir = path_sample_files('input')
 70 |         infile = path_sample_files('input', 'tux.jpg')
 71 |         assert auxf.is_dir(indir)
 72 |         self.assertRaises(ArgumentTypeError, auxf.is_dir, infile)
 73 | 
 74 |     def test_is_dir_or_file(self):
 75 |         """ aux: test is_dir_or_file() """
 76 |         indir = path_sample_files('input')
 77 |         infile = path_sample_files('input', 'tux.jpg')
 78 |         indir_fake = r'path/that/do/not/exist/at/all'
 79 |         infile_fake = path_sample_files('input', 'random_gibberish_file_that_do_not_exists')
 80 |         assert auxf.is_dir_or_file(indir)
 81 |         assert auxf.is_dir_or_file(infile)
 82 |         self.assertRaises(ArgumentTypeError, auxf.is_dir_or_file, indir_fake)
 83 |         self.assertRaises(ArgumentTypeError, auxf.is_dir_or_file, infile_fake)
 84 | 
 85 |     def test_recwalk(self):
 86 |         """ aux: test recwalk() """
 87 |         def list_paths_posix(recwalk_result):
 88 |             """ helper function to convert all paths to relative posix like paths (to ease comparison) """
 89 |             return [auxf.path2unix(os.path.join(os.path.relpath(x, pardir),y)) for x,y in recwalk_result]
 90 |         indir = path_sample_files('input')
 91 |         pardir = os.path.dirname(indir)
 92 |         # Compare between sorted and non-sorted path walking (the result should be different! but sorted path should always be the same on all platforms!)
 93 |         res1 = list_paths_posix(auxf.recwalk(indir, sorting=True))
 94 |         res2 = list_paths_posix(auxf.recwalk(indir, sorting=False))
 95 |         # Absolute test: sorted walking should always return the same result on all platforms
 96 |         assert res1 == ['files/alice.pdf', 'files/testaa.txt', 'files/tux.jpg', 'files/tuxsmall.jpg', 'files/Sub2/testsub2.txt', 'files/sub/Snark.zip', 'files/sub/testsub.txt']
 97 |         # Relative test: compare with platform's results
 98 |         if os.name == 'nt':
 99 |             assert res2 != res1
100 |             assert res2 == ['files/alice.pdf', 'files/testaa.txt', 'files/tux.jpg', 'files/tuxsmall.jpg', 'files/sub/Snark.zip', 'files/sub/testsub.txt', 'files/Sub2/testsub2.txt']
101 |         elif os.name == 'posix':
102 |             assert res2 != res1 # BEWARE, do NOT use sets here! On linux, order of generated files can change, although a set is unordered, they will be equal if elements in the sets are the same, contrary to lists, but that's what we are testing here, with ordered walk it should NOT be the same!
103 | 
104 |     def test_fullpath(self):
105 |         """ aux: test fullpath() """
106 |         def relpath(path, pardir):
107 |             """ helper function to always return a relative posix-like path (ease comparisons) """
108 |             return auxf.path2unix(os.path.relpath(path, pardir))
109 |         # Can't really objectively test fullpath() but we can relatively compare the result
110 |         indir = path_sample_files('input')
111 |         infile = path_sample_files('input', 'tux.jpg')
112 |         pardir = os.path.dirname(indir)
113 |         # Directory test
114 |         assert relpath(auxf.fullpath(indir), pardir) == 'files'
115 |         # File test
116 |         res1 = relpath(auxf.fullpath(infile), pardir)
117 |         assert res1 == 'files/tux.jpg'
118 |         # Opened file test
119 |         with open(infile, 'rb') as fh:
120 |             res2 = relpath(auxf.fullpath(fh), pardir)
121 |         assert res1 == res2
122 | 
123 |     def test_get_version(self):
124 |         """ aux: test get_version() """
125 |         thispathname = os.path.dirname(__file__)
126 |         assert '.' in auxf.get_version('__init__.py', os.path.join(thispathname, '..'))
127 |         self.assertRaises(RuntimeError, auxf.get_version, 'test_aux_funcs.py', thispathname)
128 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/visual/functionprofiler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | 
  4 | #
  5 | # TODO:
  6 | # - implement cProfile or yappi, or use threading.setProfile and sys.setProfile, or implement one's own multi-threaded profiler:
  7 | # http://code.google.com/p/yappi/
  8 | # http://code.activestate.com/recipes/465831-profiling-threads/
  9 | # http://effbot.org/librarybook/sys.htm
 10 | #
 11 | #
 12 | # CHANGELOG:
 13 | # 2014-18-08 - v0.5.1 - lrq3000
 14 | #   * force refresh (flush) stdout after printing
 15 | #   * fixed runsnakerun
 16 | # 2012-11-12 - v0.5.0 - lrq3000
 17 | #   * cleaned the functions a bit and added a no timeout mode
 18 | # 2010-09-22 - v0.4.3 - lrq3000
 19 | #   * added error handling if profile and pstats libraries can't be found
 20 | # 2010-09-17 - v0.4.2 - lrq3000
 21 | #   * added an automatic calibration prior to profiling
 22 | # 2010-09-17 - v0.4.1 - lrq3000
 23 | #   * fixed import bug
 24 | # 2010-09-16 - v0.4 - lrq3000
 25 | #    * fallback to profile instead of cProfile : even if this pure python implementation is much slower, it at least work with threads (cProfile, alias hotshot, is not compatible with multi-threaded applications at the moment)
 26 | # 2010-09-09 - v0.3 - lrq3000
 27 | #    * workaround for a bug with cProfile
 28 | # 2010-09-08 - v0.2 - lrq3000
 29 | #    * added the parsestats, browsegui and browsenogui functions
 30 | #    * centralized runprofile here
 31 | # 2010-09-06 - v0.1 - lrq3000
 32 | #    * Initial version.
 33 | 
 34 | __author__  = 'lrq3000'
 35 | __version__ = '0.5.0'
 36 | 
 37 | 
 38 | noprofiler = False
 39 | try:
 40 | 	import profile, pstats # using profile and not cProfile because cProfile does not support multi-threaded applications.
 41 | except:
 42 | 	noprofiler = True
 43 | 
 44 | import sys, os
 45 | pathname = os.path.dirname(sys.argv[0])
 46 | sys.path.append(os.path.join(pathname))
 47 | 
 48 | from kthread import *
 49 | from profilebrowser import *
 50 | 
 51 | 
 52 | def runprofile(mainfunction, output, timeout = 0, calibrate=False):
 53 |     '''
 54 |     Run the functions profiler and save the result
 55 |     If timeout is greater than 0, the profile will automatically stops after timeout seconds
 56 |     '''
 57 |     if noprofiler == True:
 58 |             print('ERROR: profiler and/or pstats library missing ! Please install it (probably package named python-profile) before running a profiling !')
 59 |             return False
 60 |     # This is the main function for profiling
 61 |     def _profile():
 62 |         profile.run(mainfunction, output)
 63 |     print('=> RUNNING FUNCTIONS PROFILER\n\n'); sys.stdout.flush();
 64 |     # Calibrate the profiler (only use this if the profiler produces some funny stuff, but calibration can also produce even more funny stuff with the latest cProfile of Python v2.7! So you should only enable calibration if necessary)
 65 |     if calibrate:
 66 |         print('Calibrating the profiler...'); sys.stdout.flush();
 67 |         cval = calibrateprofile()
 68 |         print('Calibration found value : %s' % cval); sys.stdout.flush();
 69 |     print('Initializing the profiler...'); sys.stdout.flush();
 70 |     # Run in timeout mode (if the function cannot ends by itself, this is the best mode: the function must ends for the profile to be saved)
 71 |     if timeout > 0:
 72 |         pthread = KThread(target=_profile) # we open the function with the profiler, in a special killable thread (see below why)
 73 |         print('Will now run the profiling and terminate it in %s seconds. Results will be saved in %s' % (str(timeout), str(output))); sys.stdout.flush();
 74 |         print('\nCountdown:'); sys.stdout.flush();
 75 |         for i in range(0,5):
 76 |                 print(str(5-i))
 77 |                 sys.stdout.flush()
 78 |                 time.sleep(1)
 79 |         print('0\nStarting to profile...'); sys.stdout.flush();
 80 |         pthread.start() # starting the thread
 81 |         time.sleep(float(timeout)) # after this amount of seconds, the thread gets killed and the profiler will end its job
 82 |         print('\n\nFinishing the profile and saving to the file %s' % str(output)); sys.stdout.flush();
 83 |         pthread.kill() # we must end the main function in order for the profiler to output its results (if we didn't launch a thread and just closed the process, it would have done no result)
 84 |     # Run in full length mode (we run the function until it ends)
 85 |     else:
 86 |         print("Running the profiler, please wait until the process terminates by itself (if you forcequit before, the profile won't be saved)")
 87 |         _profile()
 88 |     print('=> Functions Profile done !')
 89 |     return True
 90 | 
 91 | def calibrateprofile():
 92 |     '''
 93 |     Calibrate the profiler (necessary to have non negative and more exact values)
 94 |     '''
 95 |     pr = profile.Profile()
 96 |     calib = []
 97 |     crepeat = 10
 98 |     for i in range(crepeat):
 99 |             calib.append(pr.calibrate(10000))
100 |     final = sum(calib) / crepeat
101 |     profile.Profile.bias = final # Apply computed bias to all Profile instances created hereafter
102 |     return final
103 | 
104 | def parseprofile(profilelog, out):
105 |     '''
106 |     Parse a profile log and print the result on screen
107 |     '''
108 |     file = open(out, 'w') # opening the output file
109 |     print('Opening the profile in %s...' % profilelog)
110 |     p = pstats.Stats(profilelog, stream=file) # parsing the profile with pstats, and output everything to the file
111 | 
112 |     print('Generating the stats, please wait...')
113 |     file.write("=== All stats:\n")
114 |     p.strip_dirs().sort_stats(-1).print_stats()
115 |     file.write("=== Cumulative time:\n")
116 |     p.sort_stats('cumulative').print_stats(100)
117 |     file.write("=== Time:\n")
118 |     p.sort_stats('time').print_stats(100)
119 |     file.write("=== Time + cumulative time:\n")
120 |     p.sort_stats('time', 'cum').print_stats(.5, 'init')
121 |     file.write("=== Callees:\n")
122 |     p.print_callees()
123 |     file.write("=== Callers:\n")
124 |     p.print_callers()
125 |     #p.print_callers(.5, 'init')
126 |     #p.add('fooprof')
127 |     file.close()
128 |     print('Stats generated and saved to %s.' % out)
129 |     print('Everything is done. Exiting')
130 | 
131 | def browseprofile(profilelog):
132 |     '''
133 |     Browse interactively a profile log in console
134 |     '''
135 |     print('Starting the pstats profile browser...\n')
136 |     try:
137 |             browser = ProfileBrowser(profilelog)
138 |             print >> browser.stream, "Welcome to the profile statistics browser. Type help to get started."
139 |             browser.cmdloop()
140 |             print >> browser.stream, "Goodbye."
141 |     except KeyboardInterrupt:
142 |             pass
143 | 
144 | def browseprofilegui(profilelog):
145 |     '''
146 |     Browse interactively a profile log in GUI using RunSnakeRun and SquareMap
147 |     '''
148 |     from runsnakerun import runsnake # runsnakerun needs wxPython lib, if it's not available then we can pass if we don't want a GUI. RunSnakeRun is only used for GUI visualisation, not for profiling (and you can still use pstats for console browsing)
149 |     app = runsnake.RunSnakeRunApp(0)
150 |     app.OnInit(profilelog)
151 |     #app.OnInit()
152 |     app.MainLoop()
153 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/aux_tests.py:
--------------------------------------------------------------------------------
  1 | """ Auxiliary functions for unit tests """
  2 | 
  3 | from __future__ import with_statement
  4 | 
  5 | import os
  6 | import shutil
  7 | 
  8 | from ..lib._compat import _range, b
  9 | 
 10 | def check_eq_files(path1, path2, blocksize=65535, startpos1=0, startpos2=0):
 11 |     """ Return True if both files are identical, False otherwise """
 12 |     flag = True
 13 |     with open(path1, 'rb') as f1, open(path2, 'rb') as f2:
 14 |         buf1 = 1
 15 |         buf2 = 1
 16 |         f1.seek(startpos1)
 17 |         f2.seek(startpos2)
 18 |         while buf1 and buf2:
 19 |             buf1 = f1.read(blocksize)
 20 |             buf2 = f2.read(blocksize)
 21 |             if buf1 != buf2 or (buf1 and not buf2) or (buf2 and not buf1):
 22 |                 # Reached end of file or the content is different, then return false
 23 |                 flag = False
 24 |                 break
 25 |             elif (not buf1 and not buf2):
 26 |                 # End of file for both files
 27 |                 break
 28 |     return flag
 29 |     #return filecmp.cmp(path1, path2, shallow=False)  # does not work on Travis
 30 | 
 31 | def check_eq_dir(path1, path2):
 32 |     """ Return True if both folders have same structure totally identical files, False otherwise """
 33 |     # List files in both directories
 34 |     files1 = []
 35 |     files2 = []
 36 |     for dirpath, dirs, files in os.walk(path1):
 37 |         files1.extend([os.path.relpath(os.path.join(dirpath, file), path1) for file in files])
 38 |     for dirpath, dirs, files in os.walk(path2):
 39 |         files2.extend([os.path.relpath(os.path.join(dirpath, file), path2) for file in files])
 40 |     # Ensure the same order for both lists (filesystem can spit the files in whatever order it wants)
 41 |     files1.sort()
 42 |     files2.sort()
 43 | 
 44 |     # Different files in one or both lists: we fail
 45 |     if files1 != files2:
 46 |         return False
 47 |     # Else we need to compare the files contents
 48 |     else:
 49 |         flag = True
 50 |         for i in _range(len(files1)):
 51 |             #print("files: %s %s" % (files1[i], files2[i]))  # debug
 52 |             # If the files contents are different, we fail
 53 |             if not check_eq_files(os.path.join(path1, files1[i]), os.path.join(path2, files2[i])):
 54 |                 flag = False
 55 |                 break
 56 |         # Else if all files contents were equal and all files are in both lists, success!
 57 |         return flag
 58 | 
 59 | def fullpath(relpath):
 60 |     '''Relative path to absolute'''
 61 |     if (type(relpath) is object or hasattr(relpath, 'read')): # relpath is either an object or file-like, try to get its name
 62 |         relpath = relpath.name
 63 |     return os.path.abspath(os.path.expanduser(relpath))
 64 | 
 65 | def path_sample_files(type=None, path=None, createdir=False):
 66 |     """ Helper function to return the full path to the test files """
 67 |     subdir = ''
 68 |     if not type:
 69 |         return ''
 70 |     elif type == 'input':
 71 |         subdir = 'files'
 72 |     elif type == 'results':
 73 |         subdir = 'results'
 74 |     elif type == 'output':
 75 |         subdir = 'out'
 76 | 
 77 |     dirpath = ''
 78 |     scriptpath = os.path.dirname(os.path.realpath(__file__))
 79 |     if path:
 80 |         dirpath = fullpath(os.path.join(scriptpath, subdir, path))
 81 |     else:
 82 |         dirpath = fullpath(os.path.join(scriptpath, subdir))
 83 | 
 84 |     if createdir:
 85 |         create_dir_if_not_exist(dirpath)
 86 | 
 87 |     return dirpath
 88 | 
 89 | def tamper_file(path, pos=0, replace_str=None):
 90 |     """Tamper a file at the given position and using the given string"""
 91 |     if not replace_str:
 92 |         replace_str = "\x00"
 93 |     try:
 94 |         with open(path, "r+b") as fh:
 95 |             if pos < 0: # if negative, we calculate the position backward from the end of file
 96 |                 fsize = os.fstat(fh.fileno()).st_size
 97 |                 pos = fsize + pos
 98 |             fh.seek(pos)
 99 |             fh.write(b(replace_str))
100 |     except IOError:
101 |         return False
102 |     finally:
103 |         try:
104 |             fh.close()
105 |         except Exception:
106 |             pass
107 |     return True
108 | 
109 | def find_next_entry(path, marker="\xFF\xFF\xFF\xFF", initpos=0):
110 |     '''Find the next position of a marker in a file'''
111 |     blocksize = 65535
112 |     start = None # start is the relative position of the marker in the current buffer
113 |     startcursor = None # startcursor is the absolute position of the starting position of the marker in the file
114 |     buf = 1
115 |     infile = open(path, 'rb')
116 |     if initpos > 0: infile.seek(initpos)
117 |     # Enumerate all markers in a generator
118 |     while (buf):
119 |         # Read a long block at once, we will readjust the file cursor after
120 |         buf = bytearray(infile.read(blocksize))
121 |         # Find the start marker
122 |         start = buf.find(marker); # relative position of the starting marker in the currently read string
123 |         if start >= 0: # assign startcursor only if it's empty (meaning that we did not find the starting entrymarker, else if found we are only looking for 
124 |             startcursor = infile.tell() - len(buf) + start # absolute position of the starting marker in the file
125 |             infile.close() # close the file before yielding result, to avoid locking the file
126 |             yield startcursor
127 |             infile = open(path, 'rb') # reopen the file just after yield before doing further processing
128 |             infile.seek(startcursor+len(marker)) # place reading cursor just after the current marker to avoid repeatedly detecting the same marker
129 |     infile.close() # don't forget to close after the loop!
130 | 
131 | def create_dir_if_not_exist(path):
132 |     """Create a directory if it does not already exist, else nothing is done and no error is return"""
133 |     if not os.path.exists(path):
134 |         os.makedirs(path)
135 | 
136 | def remove_if_exist(path):
137 |     """Delete a file or a directory recursively if it exists, else no exception is raised"""
138 |     if os.path.exists(path):
139 |         if os.path.isdir(path):
140 |             shutil.rmtree(path)
141 |             return True
142 |         elif os.path.isfile(path):
143 |             os.remove(path)
144 |             return True
145 |     return False
146 | 
147 | def get_marker(type=1):
148 |     """Helper function to store the usual entry and fields markers in ecc files"""
149 |     if type == 1:
150 |         return b"\xFE\xFF\xFE\xFF\xFE\xFF\xFE\xFF\xFE\xFF"
151 |     elif type == 2:
152 |         return b"\xFA\xFF\xFA\xFF\xFA"
153 |     else:
154 |         return b''
155 | 
156 | def dummy_ecc_file_gen(nb_files=1):
157 |     """ Generate a dummy ecc file, following the specs (of course the ecc tracks are fake!) """
158 |     # Create header comments
159 |     fcontent = b'''**SCRIPT_CODE_NAMEv111...000...000**\n** Comment 2\n** Yet another comment\n'''
160 |     # Create files entries
161 |     for i in range(1, nb_files+1):
162 |         fcontent += get_marker(1)+(b"file"+b(str(i))+b".ext")+get_marker(2)+(b"filesize"+b(str(i)))+get_marker(2)+(b"relfilepath"+b(str(i))+b"_ecc")+get_marker(2)+(b"filesize"+b(str(i))+b"_ecc")+get_marker(2)+b"hash-ecc-entry_"*(i*3)
163 |     return fcontent
164 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_eccman.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | import sys
  5 | import os
  6 | import shutil
  7 | 
  8 | from .aux_tests import get_marker, dummy_ecc_file_gen, check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, remove_if_exist
  9 | 
 10 | from ..lib.eccman import ECCMan, compute_ecc_params, detect_reedsolomon_parameters
 11 | 
 12 | from ..lib._compat import _StringIO, b
 13 | 
 14 | class TestECCMan(unittest.TestCase):
 15 |     def setup_module(self):
 16 |         """ Initialize the tests by emptying the out directory """
 17 |         outfolder = path_sample_files('output')
 18 |         shutil.rmtree(outfolder, ignore_errors=True)
 19 |         create_dir_if_not_exist(outfolder)
 20 | 
 21 |     def test_eccman_detect_rs_param(self):
 22 |         """ eccman: test reedsolomon param detection """
 23 |         message = b("hello world")
 24 |         mesecc_orig = [104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 187, 161, 157, 88, 92, 175, 116, 251, 116]
 25 |         mesecc_orig_tampered = [104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 187, 161, 157, 88, 0, 175, 116, 251, 116]
 26 |         n = len(mesecc_orig)
 27 |         k = len(message)
 28 |         params = [n, k, 2, 0x187, 120]
 29 |         res = detect_reedsolomon_parameters(message, mesecc_orig)
 30 |         res2 = detect_reedsolomon_parameters(message, mesecc_orig_tampered)
 31 |         assert ("Hamming distance 0 (0=perfect match):\ngen_nb=%i prim=%i(%s) fcr=%i" % (params[2], params[3], hex(params[3]), params[4])) in res
 32 |         assert ("Hamming distance 1:\ngen_nb=%i prim=%i(%s) fcr=%i" % (params[2], params[3], hex(params[3]), params[4])) in res2
 33 |         res3 = detect_reedsolomon_parameters(message, [-1]*len(mesecc_orig), [3])
 34 |         assert "Parameters could not be automatically detected" in res3
 35 |         self.assertRaises(ValueError, detect_reedsolomon_parameters, [257, 0, 0], [0, 0, 0], c_exp=8)
 36 |         self.assertRaises(ValueError, detect_reedsolomon_parameters, [0, 0, 0], [257, 0, 0], c_exp=8)
 37 | 
 38 |     def test_eccman_compute_ecc_params(self):
 39 |         """ eccman: test ecc params computation """
 40 |         class Hasher(object):
 41 |             """ Dummy Hasher """
 42 |             def __len__(self):
 43 |                 return 32
 44 |         hasher = Hasher()
 45 |         assert compute_ecc_params(255, 0.5, hasher) == {'ecc_size': 127, 'hash_size': 32, 'message_size': 128}
 46 |         assert compute_ecc_params(255, 0.0, hasher) == {'ecc_size': 0, 'hash_size': 32, 'message_size': 255}
 47 |         assert compute_ecc_params(255, 1.0, hasher) == {'ecc_size': 170, 'hash_size': 32, 'message_size': 85}
 48 |         assert compute_ecc_params(255, 0.3, hasher) == {'ecc_size': 96, 'hash_size': 32, 'message_size': 159}
 49 |         assert compute_ecc_params(255, 0.7, hasher) == {'ecc_size': 149, 'hash_size': 32, 'message_size': 106}
 50 |         assert compute_ecc_params(255, 2.0, hasher) == {'ecc_size': 204, 'hash_size': 32, 'message_size': 51}
 51 |         assert compute_ecc_params(255, 10.0, hasher) == {'ecc_size': 243, 'hash_size': 32, 'message_size': 12}
 52 |         assert compute_ecc_params(140, 10.0, hasher) == {'ecc_size': 133, 'hash_size': 32, 'message_size': 7}
 53 | 
 54 |     def test_eccman_codecs(self):
 55 |         """ eccman: test ecc generation and decoding """
 56 |         expected = [
 57 |             [206, 234, 144, 153, 141, 196, 170, 96, 62],
 58 |             [206, 234, 144, 153, 141, 196, 170, 96, 62],
 59 |             [206, 234, 144, 153, 141, 196, 170, 96, 62],
 60 |             [187, 161, 157, 88, 92, 175, 116, 251, 116]
 61 |         ]
 62 |         message = b("hello world")
 63 |         message_eras = b("h\x00ll\x00 world")
 64 |         message_noise = b("h\x00ll\x00 worla")
 65 |         n = 20
 66 |         k = 11
 67 |         for i in range(1,5):
 68 |             eccman = ECCMan(n, k, algo=i)
 69 |             ecc = bytearray(b(eccman.encode(message)))
 70 |             assert list(ecc) == expected[i-1]
 71 |             assert b(eccman.decode(message_eras, ecc)[0]) == message
 72 |             assert b(eccman.decode(message_eras, ecc, enable_erasures=True)[0]) == message
 73 |             assert b(eccman.decode(message_eras, ecc, enable_erasures=True, only_erasures=True)[0]) == message
 74 |             #eccman.decode(message_noise, ecc, enable_erasures=True, only_erasures=True)[0]
 75 |             assert eccman.check(message, ecc)
 76 |             assert not eccman.check(message_eras, ecc)
 77 |             assert "Reed-Solomon with polynomials in Galois field of characteristic" in eccman.description()
 78 |         # Unknown algorithm test
 79 |         self.assertRaises(Exception, ECCMan, n, k, algo=-1)
 80 |         eccman = ECCMan(n, k, algo=1)
 81 |         eccman.algo = -1
 82 |         assert "No description for this ECC algorithm." in eccman.description()
 83 | 
 84 |     def test_eccman_pad(self):
 85 |         """ eccman: test ecc padding """
 86 |         message = b("hello world")
 87 |         ecc = b(''.join([chr(x) for x in [206, 234, 144, 153, 141, 196, 170, 96, 62]]))
 88 |         # Oversize parameters compared to the message and ecc
 89 |         n = 22 # should be 20
 90 |         k = 13 # should be 11, but we add +2, which bytes we will pad onto the ecc and the decoding should still work!
 91 |         eccman = ECCMan(n, k, algo=3)
 92 |         # Test left padding (the input message)
 93 |         pmessage = eccman.pad(message)
 94 |         assert pmessage == [b('\x00\x00hello world'), b('\x00\x00')] # format: [padded_message, padonly]
 95 |         assert eccman.check(pmessage[0], ecc)
 96 |         # Test right padding (the ecc block)
 97 |         pecc = eccman.rpad(ecc, 11)
 98 |         assert pecc == [b('\xce\xea\x90\x99\x8d\xc4\xaa`>\x00\x00'), b('\x00\x00')]
 99 |         assert eccman.check(message, pecc[0])
100 |         # Test decoding with both padding!
101 |         assert eccman.check(pmessage[0], pecc[0])
102 | 
103 |     def test_eccman_lpad_decoding(self):
104 |         """ eccman: test ecc decoding when message needs left padding """
105 |         message = b("hello world")
106 |         ecc = b(''.join([chr(x) for x in [206, 234, 144, 153, 141, 196, 170, 96, 62]]))
107 |         message_eras = b("h\x00ll\x00 world")
108 |         # Oversize parameters compared to the message and ecc
109 |         n = 22 # should be 20
110 |         k = 13 # should be 11, but we add +2, which bytes we will pad onto the ecc and the decoding should still work!
111 |         eccman = ECCMan(n, k, algo=3)
112 |         # Test decoding with erasure when the message needs to be padded
113 |         assert eccman.decode(message_eras, ecc, enable_erasures=True)
114 | 
115 |     def test_eccman_rpad_decoding(self):
116 |         """ eccman: test ecc decoding when right padding """
117 |         message = b("hello world")
118 |         ecc = b(''.join([chr(x) for x in [206, 234, 144, 153, 141, 196, 170, 96, 62]]))
119 |         message_eras = b("h\x00ll\x00 world")
120 |         # Oversize parameters compared to the message and ecc
121 |         n = 20 # should be 20
122 |         k = 11 # should be 11, but we add +2, which bytes we will pad onto the ecc and the decoding should still work!
123 |         eccman = ECCMan(n, k, algo=3)
124 |         # Test decoding with erasure when the message needs to be padded
125 |         assert eccman.decode(message_eras, ecc, enable_erasures=True)
126 |         assert eccman.decode(message_eras, ecc[:-2])
127 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_rfigc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, with_statement
  2 | 
  3 | import sys
  4 | import os
  5 | import itertools
  6 | import hashlib
  7 | 
  8 | import shutil
  9 | 
 10 | from ..lib._compat import b, _open_csv
 11 | 
 12 | from .. import rfigc
 13 | from ..lib.aux_funcs import recwalk
 14 | from .aux_tests import check_eq_files, check_eq_dir, path_sample_files, tamper_file, create_dir_if_not_exist
 15 | 
 16 | def partial_eq(file, file_partial):
 17 |     """ Do a partial comparison, line by line, we compare only using "line2 in line1", where line2 is from file_partial """
 18 |     flag = True
 19 |     with _open_csv(file, 'r') as outf, _open_csv(file_partial, 'r') as expectedf:
 20 |         out = outf.read().strip("\r").strip("\n")
 21 |         expected = expectedf.read().split("\n")
 22 |         for exp in expected:
 23 |             if not exp.strip("\n") in out:
 24 |                 flag = False
 25 |                 break
 26 |     return flag
 27 | 
 28 | def setup_module():
 29 |     """ Initialize the tests by emptying the out directory """
 30 |     outfolder = path_sample_files('output')
 31 |     shutil.rmtree(outfolder, ignore_errors=True)
 32 |     create_dir_if_not_exist(outfolder)
 33 | 
 34 | def test_one_file():
 35 |     """ rfigc: test creation and verification of rfigc database for one file """
 36 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 37 |     filedb = path_sample_files('output', 'd_file.csv')
 38 |     fileres = path_sample_files('results', 'test_rfigc_test_one_file.csv')
 39 |     # Generate database file
 40 |     assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0
 41 |     # Check files are ok
 42 |     assert rfigc.main('-i "%s" -d "%s" --silent' % (filein, filedb)) == 0
 43 |     # Check database file is the same as the pregenerated result
 44 |     with _open_csv(filedb, 'r') as outf, _open_csv(fileres, 'r') as expectedf:
 45 |         out = outf.read().strip("\r").strip("\n")
 46 |         # Because of differing timestamps between local and git repo, we must only do a partial comparison (we compare the beginning of the file up to the timestamp)
 47 |         # TODO: to do full comparisons including timestamps, use https://github.com/adamchainz/time-machine or freezegun
 48 |         expected = expectedf.read().split("\n")  # workaround to remove windows carriage return character, it does not always get added but under some strange conditions (in GitHub Actions env, and not all the time, but only on Windows-2019) it can get added by csv writer, ignoring our settings. TODO: remove strip("\r") and try to find a REAL fix.
 49 |         for exp in expected:
 50 |             assert exp.strip("\r").strip("\n") in out
 51 | 
 52 | def test_dir():
 53 |     """ rfigc: test creation and verification of database for a full directory """
 54 |     filein = path_sample_files('input', )
 55 |     filedb = path_sample_files('output', 'd_dir.csv')
 56 |     fileres = path_sample_files('results', 'test_rfigc_test_dir.csv')
 57 |     # Generate database file
 58 |     assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0
 59 |     # Check files are ok
 60 |     assert rfigc.main('-i "%s" -d "%s" --silent' % (filein, filedb)) == 0
 61 |     # Check database file is the same as the pregenerated result
 62 |     # We can't directly compare the two files because of timestamps!
 63 |     # So we manually process the expected results and compare each line to see if it's present in the output
 64 |     assert partial_eq(filedb, fileres)
 65 |     # TODO: add a regular expression to check that all fields are present
 66 | 
 67 | def test_error_file():
 68 |     """ rfigc: test tamper file and error file generation """
 69 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 70 |     filedb = path_sample_files('output', 'd.csv')
 71 |     fileout = path_sample_files('output', 'tuxsmall.jpg')
 72 |     fileout2 = path_sample_files('output', 'errors.log')
 73 |     fileres = path_sample_files('results', 'test_rfigc_test_error_file.log')
 74 |     assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0
 75 |     shutil.copyfile(filein, fileout)
 76 |     tamper_file(fileout, 3)
 77 |     assert rfigc.main('-i "%s" -d "%s" -e "%s" --silent' % (fileout, filedb, fileout2)) == 1
 78 |     check_eq_files(fileout2, fileres)
 79 | 
 80 | def test_filescrape():
 81 |     """ rfigc: test --filescraping_recovery """
 82 |     filein_dir = path_sample_files('input', )
 83 |     filedb = path_sample_files('output', 'db_filescrape.csv')
 84 |     fileout_dir = path_sample_files('output', 'filescrape')
 85 |     fileout_dir_rec = path_sample_files('output', 'filescrape_rec')
 86 |     create_dir_if_not_exist(fileout_dir)
 87 |     create_dir_if_not_exist(fileout_dir_rec)
 88 |     # Simulate a filescrape (copy the files but rename them all)
 89 |     i = 0
 90 |     for dirpath, filepath in recwalk(filein_dir):
 91 |         i += 1
 92 |         shutil.copyfile(os.path.join(dirpath, filepath), os.path.join(fileout_dir, "%s.stuff" % i))
 93 |     assert not check_eq_dir(filein_dir, fileout_dir) # check that we correctly filescraped!
 94 |     # Use rfigc to recover from filescrape
 95 |     assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein_dir, filedb)) == 0
 96 |     assert rfigc.main('-i "%s" -d "%s" --filescraping_recovery -o "%s" --silent' % (fileout_dir, filedb, fileout_dir_rec)) == 0
 97 |     assert check_eq_dir(filein_dir, fileout_dir_rec) # check that we recovered from filescraping!
 98 | 
 99 | def test_update():
100 |     """ rfigc: test --update """
101 |     filein = path_sample_files('input', )
102 |     filedb = path_sample_files('output', 'd_update.csv')
103 |     fileout_dir = path_sample_files('output', 'update')
104 |     fileout = path_sample_files('output', 'update/added_file.txt')
105 |     fileres1 = path_sample_files('results', 'test_rfigc_test_update_append.csv')
106 |     fileres2 = path_sample_files('results', 'test_rfigc_test_update_remove.csv')
107 |     # Generate a database from input files
108 |     assert rfigc.main('-i "%s" -d "%s" -g -f --silent' % (filein, filedb)) == 0
109 |     # Create a new file in another folder
110 |     create_dir_if_not_exist(fileout_dir)
111 |     with open(fileout, 'wb') as fh:
112 |         fh.write(b'abcdefABCDEF\n1234598765')
113 |     # Append file in database
114 |     assert rfigc.main('-i "%s" -d "%s" --update --append --silent' % (fileout_dir, filedb)) == 0
115 |     assert partial_eq(filedb, fileres1)
116 |     # Remove all other files from database
117 |     assert rfigc.main('-i "%s" -d "%s" --update --remove --silent' % (fileout_dir, filedb)) == 0
118 |     assert partial_eq(filedb, fileres2)
119 | 
120 | def test_generate_hashes():
121 |     """ rfigc: test internal: generate_hashes() """
122 |     # Test with a file we make on the spot, so this should always be correct!
123 |     infile0 = path_sample_files('output', 'test_rfigc_generate_hashes.txt')
124 |     with open(infile0, 'wb') as f0:
125 |         f0.write(b"Lorem ipsum etc\n"*20)
126 |     assert rfigc.generate_hashes(infile0) == ('c6e0c87cbb8eeaca8179f22186384e6b', '6f46949be7cda1437bc3fb61fb827a6552beaf8b')
127 |     # Test with input files, this may change if we change the files
128 |     infile1 = path_sample_files('input', 'tux.jpg')
129 |     infile2 = path_sample_files('input', 'alice.pdf')
130 |     assert rfigc.generate_hashes(infile1) == ('81e19bbf2efaeb1d6d6473c21c48e4b7', '6e38ea91680ef0f960db0fd6a973cf50ef765369')
131 |     assert rfigc.generate_hashes(infile2) == ('298aeefe8c00f2d92d660987bee67260', '106e7ad4d3927c5906cd366cc0d5bd887bdc3300')
132 | 


--------------------------------------------------------------------------------
/pyFileFixity/easy_profiler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Easy Profiler
  4 | # Copyright (C) 2015 Larroque Stephen
  5 | #
  6 | # Licensed under the MIT License (MIT)
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in
 16 | # all copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | # THE SOFTWARE.
 25 | #
 26 | #------------------------------
 27 | #
 28 | 
 29 | # Import necessary libraries
 30 | from lib.aux_funcs import fullpath
 31 | import argparse
 32 | import os, sys
 33 | 
 34 | 
 35 | def main(argv=None):
 36 |     if argv is None:
 37 |         argv = sys.argv[1:]
 38 | 
 39 |     #==== COMMANDLINE PARSER ====
 40 | 
 41 |     #== Commandline description
 42 |     desc = '''Easy Profiler for Python scripts
 43 | Description: Provide an easy way to launch CPU/Memory profile (with GUI support) of python scripts. You can supply arguments of the target script by appending them at the end of the arguments for this script, without any special formatting (unrecognized arguments will be passed along to the target script).
 44 |     '''
 45 |     ep = ''' '''
 46 | 
 47 |     #== Commandline arguments
 48 |     #-- Constructing the parser
 49 |     main_parser = argparse.ArgumentParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter)
 50 |     # Required arguments
 51 |     main_parser.add_argument('--script', metavar='script.py', type=str, nargs=1, required=True,
 52 |                         help='Path to the script to import and execute (the script must implement a main() function).')
 53 |     main_parser.add_argument('--profile_log', metavar='profile.log', type=str, nargs=1, required=False,
 54 |                         help='Path where to store the profile log.')
 55 |     main_parser.add_argument('--cpu', action='store_true', required=False, default=False,
 56 |                         help='CPU line-by-line profiler (pprofile.py).')
 57 |     main_parser.add_argument('--cpu_stack', action='store_true', required=False, default=False,
 58 |                         help='CPU stack (tree-like) profiler (pyinstrument.py).')
 59 |     main_parser.add_argument('--memory', action='store_true', required=False, default=False,
 60 |                         help='Memory line-by-line profiler (memory_profiler.py).')
 61 |     main_parser.add_argument('--gui', action='store_true', required=False, default=False,
 62 |                         help='GUI interface for the CPU line-by-line profiler (not ready for the memory profiler) using RunSnakeRun.')
 63 |     # Optional arguments
 64 | 
 65 |     #== Parsing the arguments
 66 |     args, args_rest = main_parser.parse_known_args(argv) # Storing all arguments to args
 67 | 
 68 |     #-- Set variables from arguments
 69 |     script = args.script[0]
 70 |     cpu = args.cpu
 71 |     memory = args.memory
 72 |     gui = args.gui
 73 |     cpu_stack = args.cpu_stack
 74 | 
 75 |     profile_log = None
 76 |     if args.profile_log:
 77 |         profile_log = fullpath(args.profile_log[0])
 78 |     
 79 |     if script.find('.') == -1:
 80 |         script = script + '.py'
 81 | 
 82 |     if not os.path.isfile(script):
 83 |         print("File does not exist: %s" % script)
 84 |     else:
 85 |         print("==== LAUNCHING PROFILING ====")
 86 |     
 87 |         scriptname = os.path.splitext(script)[0] # remove any extension to be able to import
 88 |         scriptmod = __import__(scriptname) # dynamic import
 89 | 
 90 |         if cpu:
 91 |             # Line-by-line CPU runtime profiling (pure python using pprofile)
 92 |             from lib.profilers.pprofile import pprofile
 93 |             # Load the profiler
 94 |             pprof = pprofile.Profile()
 95 |             # Launch experiment under the profiler
 96 |             args_rest = ' '.join(args_rest)
 97 |             with pprof:
 98 |                 scriptmod.main(args_rest)
 99 |             # Print the result
100 |             print("==> Profiling done.")
101 |             if profile_log:
102 |                 pprof.dump_stats(profile_log)
103 |             else:
104 |                 pprof.print_stats()
105 |         elif memory:
106 |             # Line-by-line memory profiler (pure python using memory_profiler)
107 |             from lib.profilers.memory_profiler import memory_profiler
108 |             # Load the memory profiler
109 |             mprof = memory_profiler.LineProfiler()
110 |             # Launch experiment under the memory profiler
111 |             args_rest = ' '.join(args_rest)
112 |             mpr = mprof(scriptmod.main)(args_rest)
113 |             # Print results
114 |             print("==> Profiling done.")
115 |             if not mprof.code_map: # just to check that everything's alright
116 |                 print 'Error: the memory_profiler did not work! Please check that your are correctly calling mprof(func)(arguments)'
117 |             else:
118 |                 if profile_log:
119 |                     with open(profile_log, 'w') as pf:
120 |                         memory_profiler.show_results(mprof, stream=pf)
121 |                 else:
122 |                     print(memory_profiler.show_results(mprof, stream=None))
123 |         elif gui:
124 |             # Visual profiler with GUI (runsnakerun)
125 |             # NOTE: you need wxPython to launch it
126 |             from lib.profilers.visual.debug import runprofilerandshow
127 |             if not profile_log: profile_log = 'profile.log' # a profile log is necessary to use the GUI because the profile will be generated separately, and then the GUI will read the file. File based communication is currently the only way to communicate with RunSnakeRun.
128 |             args_rest = ' '.join(args_rest)
129 |             runprofilerandshow('import '+scriptname+"\n"+scriptname+'.main', profile_log, argv=args_rest, calibrate=True)
130 |             #runscriptprofilerandshow(script, profile_log, argv=args_rest, calibrate=True)
131 |         elif cpu_stack:
132 |             # Tree like cpu profiling
133 |             from lib.profilers.pyinstrument import Profiler
134 |             from lib.profilers.pyinstrument.profiler import SignalUnavailableError
135 |             try:
136 |                 profiler = Profiler() # or if signal is not available on your system, use Profiler(use_signal=False), see below
137 |             except SignalUnavailableError as e:
138 |                 profiler = Profiler(use_signal=False)
139 |             profiler.start()
140 |             scriptmod.main(args_rest)
141 |             profiler.stop()
142 |             print("==> Profiling done.")
143 |             if profile_log:
144 |                 import codecs
145 |                 with codecs.open(profile_log, 'wb', encoding='utf8') as pf:
146 |                     pf.write( profiler.output_text(unicode=True, color=True) )
147 |             else:
148 |                 print(profiler.output_text(unicode=True, color=True))
149 | 
150 | 
151 | # Calling main function if the script is directly called (not imported as a library in another program)
152 | if __name__ == "__main__":
153 |     sys.exit(main())
154 | 


--------------------------------------------------------------------------------
/pyFileFixity/pff.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # Main script entry point for pyFileFixity, provides an interface with subcommands
  4 | # Copyright (C) 2023 Stephen Karl Larroque
  5 | #
  6 | # Licensed under the MIT License (MIT)
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in
 16 | # all copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | # THE SOFTWARE.
 25 | #
 26 | #=================================
 27 | #     pyFileFixity Main Subcommands Facade API
 28 | #                by Stephen Larroque
 29 | #                       License: MIT
 30 | #              Creation date: 2023-08-04
 31 | #=================================
 32 | # Inspired by Adam Johnson's template for a script with subcommands: https://adamj.eu/tech/2021/10/15/a-python-script-template-with-sub-commands-and-type-hints/
 33 | #
 34 | 
 35 | from __future__ import annotations
 36 | 
 37 | # Import tools for argument parsing and typing
 38 | import argparse
 39 | from collections.abc import Sequence
 40 | import sys
 41 | 
 42 | # Include the lib folder in the python import path to be able to do relative imports
 43 | # DEPRECATED: unnecessary since PEP328, but need to use the "from .a import x" form, not "import .x" https://fortierq.github.io/python-import/ -- but note that editable mode is very fine and accepted nowadays, a subsequent PEP fixed the issue!
 44 | #import os, sys
 45 | #thispathname = os.path.dirname(__file__)
 46 | #sys.path.append(os.path.join(thispathname))
 47 | 
 48 | # Import all pyFileFixity subcommands tools
 49 | from .rfigc import main as rfigc_main
 50 | from .header_ecc import main as hecc_main
 51 | from .structural_adaptive_ecc import main as saecc_main
 52 | from .repair_ecc import main as recc_main
 53 | from .replication_repair import main as replication_repair_main
 54 | from .resiliency_tester import main as restest_main
 55 | from .filetamper import main as filetamper_main
 56 | from .ecc_speedtest import main as ecc_speedtest_main
 57 | 
 58 | def main(argv: Sequence[str] | None = None) -> int:
 59 |     parser = argparse.ArgumentParser()
 60 |     subparsers = parser.add_subparsers(dest="subcommand", required=True)
 61 | 
 62 |     # Add sub-commands
 63 |     rfigc_parser = subparsers.add_parser("hash", aliases=["rfigc"], help="Check files integrity fast by hash, size, modification date or by data structure integrity.", add_help=False)  # disable help, so that we can redefine it and propagate as an argument downstream to the called module
 64 |     rfigc_parser.add_argument('-h', '--help', action='store_true')  # redefine help argument so that we can pass it downstream to submodules' argparse parsers
 65 | 
 66 |     hecc_parser = subparsers.add_parser("header", aliases=["header_ecc", "hecc"], help="Protect/repair files headers with error correction codes", add_help=False)
 67 |     hecc_parser.add_argument('-h', '--help', action='store_true')
 68 | 
 69 |     saecc_parser = subparsers.add_parser("whole", aliases=["structural_adaptive_ecc", "saecc", "protect", "repair"], help="Protect/repair whole files with error correction codes", add_help=False)
 70 |     saecc_parser.add_argument('-h', '--help', action='store_true')
 71 | 
 72 |     recc_parser = subparsers.add_parser("recover", aliases=["repair_ecc", "recc"], help="Utility to try to recover damaged ecc files using a failsafe mechanism, a sort of recovery mode (note: this does NOT recover your files, only the ecc files, which may then be used to recover your files!)", add_help=False)
 73 |     recc_parser.add_argument('-h', '--help', action='store_true')
 74 | 
 75 |     replication_repair_parser = subparsers.add_parser("dup", aliases=["replication_repair"], help="Repair files from multiple copies of various storage mediums using a majority vote", add_help=False)
 76 |     replication_repair_parser.add_argument('-h', '--help', action='store_true')
 77 | 
 78 |     restest_parser = subparsers.add_parser("restest", aliases=["resilience_tester"], help="Run tests to quantify robustness of a file protection scheme (can be used on any, not just pyFileFixity)", add_help=False)
 79 |     restest_parser.add_argument('-h', '--help', action='store_true')
 80 | 
 81 |     filetamper_parser = subparsers.add_parser("filetamper", help="Tamper files using various schemes", add_help=False)
 82 |     filetamper_parser.add_argument('-h', '--help', action='store_true')
 83 | 
 84 |     ecc_speedtest_parser = subparsers.add_parser("speedtest", aliases=["ecc_speedtest"], help="Run error correction encoding and decoding speedtests", add_help=False)
 85 |     ecc_speedtest_parser.add_argument('-h', '--help', action='store_true')
 86 | 
 87 |     # Parse known arguments, but we have almost none, this is done on purpose so that we can pass all arguments (except helps) downstream for submodules to handle with their own Argparse
 88 |     args, args_remainder = parser.parse_known_args(argv)  # if argv is None, then parse_known_args() will fallback to sys.argv
 89 |     #print(type(args_remainder))  # DEBUGLINE
 90 |     #print(args)  # DEBUGLINE
 91 | 
 92 |     if len(sys.argv) >= 2:
 93 |         # Prepare subarguments
 94 |         subargs = []
 95 |         if args.help is True:
 96 |             # Manage custom case of manually propagating --help to downstream module, we prepend to the string of the remainder of arguments
 97 |             subargs.append("--help")
 98 |         # Add the rest of the arguments, so that the downstream module can handle them with their own Argparse parser
 99 |         subargs.extend(args_remainder)  # args_remainder is a list, so we can extend subargs with it
100 | 
101 |         fullcommand = "pff.py " + args.subcommand
102 | 
103 |         if args.subcommand in ["hash", "rfigc"]:
104 |             return rfigc_main(argv=subargs, command=fullcommand)
105 |         elif args.subcommand in ["header", "header_ecc", "hecc"]:
106 |             return hecc_main(argv=subargs, command=fullcommand)
107 |         elif args.subcommand in ["whole", "structural_adaptive_ecc", "saecc", "protect", "repair"]:
108 |             return saecc_main(argv=subargs, command=fullcommand)
109 |         elif args.subcommand in ["recover", "repair_ecc", "recc"]:
110 |             return recc_main(argv=subargs, command=fullcommand)
111 |         elif args.subcommand in ["dup", "replication_repair"]:
112 |             return replication_repair_main(argv=subargs, command=fullcommand)
113 |         elif args.subcommand in ["restest", "resilience_tester"]:
114 |             return restest_main(argv=subargs, command=fullcommand)
115 |         elif args.subcommand in ["filetamper"]:
116 |             return filetamper_main(argv=subargs, command=fullcommand)
117 |         elif args.subcommand in ["speedtest", "ecc_speedtest"]:
118 |             return ecc_speedtest_main(argv=subargs, command=fullcommand)
119 |         else:
120 |             # Unreachable
121 |             raise NotImplementedError(
122 |                 f"Command {args.command} is not implemented (dev forgot!).",
123 |             )
124 | 
125 | 
126 | def subcommand1(string: str) -> int:
127 |     # Implement behaviour
128 | 
129 |     return 0
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     raise SystemExit(main())
134 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_header_ecc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import sys
  4 | import os
  5 | import itertools
  6 | import hashlib
  7 | 
  8 | import shutil
  9 | 
 10 | from .. import header_ecc as hecc
 11 | from ..lib.aux_funcs import get_next_entry
 12 | from ..lib.eccman import compute_ecc_params, ECCMan
 13 | from .aux_tests import check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, get_marker, dummy_ecc_file_gen
 14 | 
 15 | from ..lib._compat import b
 16 | 
 17 | from io import BytesIO
 18 | 
 19 | def setup_module():
 20 |     """ Initialize the tests by emptying the out directory """
 21 |     outfolder = path_sample_files('output')
 22 |     shutil.rmtree(outfolder, ignore_errors=True)
 23 |     create_dir_if_not_exist(outfolder)
 24 | 
 25 | def test_one_file():
 26 |     """ hecc: test creation and verification of database for one file """
 27 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 28 |     filedb = path_sample_files('output', 'hecc_file.db')
 29 |     fileout = path_sample_files('output', 'tuxsmall.jpg')
 30 |     fileout_rec = path_sample_files('output', 'rectemp', True) # temporary folder where repaired files will be placed (we expect none so this should be temporary, empty folder)
 31 |     fileres = path_sample_files('results', 'test_header_ecc_test_one_file.db')
 32 |     # Generate an ecc file
 33 |     assert hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) == 0
 34 |     # Check that generated ecc file is correct
 35 |     startpos1 = next(find_next_entry(filedb, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins
 36 |     startpos2 = next(find_next_entry(fileres, get_marker(type=1)))
 37 |     assert check_eq_files(filedb, fileres, startpos1=startpos1, startpos2=startpos2)
 38 |     # Check that the ecc file correctly validates the correct files
 39 |     assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=3 -c --silent' % (filein, filedb, fileout_rec)) == 0
 40 | 
 41 | def test_one_file_tamper():
 42 |     """ hecc: test file repair """
 43 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 44 |     filedb = path_sample_files('output', 'hecc_tamper.db')
 45 |     fileout = path_sample_files('output', 'tuxsmall.jpg')
 46 |     fileout2 = path_sample_files('output', 'repaired/tuxsmall.jpg')
 47 |     fileout2_dir = path_sample_files('output', 'repaired')
 48 |     fileres = path_sample_files('results', 'test_header_ecc_test_one_file_tamper.db')
 49 |     create_dir_if_not_exist(fileout2_dir)
 50 |     # Generate an ecc file
 51 |     assert hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) == 0
 52 |     # Tamper the file
 53 |     shutil.copyfile(filein, fileout) # Copy it to avoid tampering the original
 54 |     tamper_file(fileout, 4, r'abcde')
 55 |     # Repair the file
 56 |     assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=3 -c --silent' % (fileout, filedb, fileout2_dir)) == 0
 57 |     # Check that the file was completely repaired
 58 |     assert check_eq_files(filein, fileout2)
 59 | 
 60 | def test_dir():
 61 |     """ hecc: test creation and verification of database for a full directory """
 62 |     filein = path_sample_files('input', )
 63 |     filedb = path_sample_files('output', 'hecc_dir.db')
 64 |     fileout = path_sample_files('output', )
 65 |     fileout_rec = path_sample_files('output', 'rectemp', True) # temporary folder where repaired files will be placed (we expect none so this should be temporary, empty folder)
 66 |     fileres = path_sample_files('results', 'test_header_ecc_test_dir.db')
 67 |     # Generate an ecc file
 68 |     assert hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb)) == 0
 69 |     # Check that generated ecc file is correct
 70 |     startpos1 = next(find_next_entry(filedb, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins
 71 |     startpos2 = next(find_next_entry(fileres, get_marker(type=1)))
 72 |     assert check_eq_files(filedb, fileres, startpos1=startpos1, startpos2=startpos2)
 73 |     # Check that the ecc file correctly validates the correct files
 74 |     assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=3 -c --silent' % (filein, filedb, fileout_rec)) == 0
 75 | 
 76 | def test_algo():
 77 |     """ hecc: test algorithms equivalence """
 78 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 79 |     filedb = [path_sample_files('output', 'hecc_algo1.db'),
 80 |                 path_sample_files('output', 'hecc_algo2.db'),
 81 |                 path_sample_files('output', 'hecc_algo3.db'),
 82 |                 ]
 83 |     fileres = path_sample_files('results', 'test_header_ecc_test_algo.db')
 84 |     fileout_rec = path_sample_files('output', 'rectemp', True) # temporary folder where repaired files will be placed (we expect none so this should be temporary, empty folder)
 85 |     # For each algorithm
 86 |     for i in range(len(filedb)):
 87 |         # Generate an ecc file
 88 |         assert hecc.main('-i "%s" -d "%s" --ecc_algo=%i -g -f --silent' % (filein, filedb[i], i+1)) == 0
 89 |         # Check file with this ecc algo
 90 |         assert hecc.main('-i "%s" -d "%s" -o "%s" --ecc_algo=%i -c --silent' % (filein, filedb[i], fileout_rec, i+1)) == 0
 91 |     for i in range(1, len(filedb)):
 92 |         # Check that generated ecc file is correct
 93 |         startpos1 = next(find_next_entry(filedb[0], get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins
 94 |         startpos2 = next(find_next_entry(filedb[i], get_marker(type=1)))
 95 |         assert check_eq_files(filedb[0], filedb[i], startpos1=startpos1, startpos2=startpos2)
 96 |     # Check against expected ecc file
 97 |     startpos1 = next(find_next_entry(filedb[0], get_marker(type=1)))
 98 |     startpos2 = next(find_next_entry(fileres, get_marker(type=1)))
 99 |     assert check_eq_files(filedb[0], fileres, startpos1=startpos1, startpos2=startpos2)
100 | 
101 | def test_entry_fields():
102 |     """ hecc: test internal: entry_fields() """
103 |     ecc = dummy_ecc_file_gen(3)
104 |     eccf = BytesIO(ecc)
105 |     ecc_entry = get_next_entry(eccf, get_marker(1), only_coord=False)
106 |     assert hecc.entry_fields(ecc_entry, field_delim=get_marker(2)) == {'ecc_field': b'hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_', 'filesize_ecc': b'filesize1_ecc', 'relfilepath_ecc': b'relfilepath1_ecc', 'relfilepath': b'file1.ext', 'filesize': b'filesize1'}
107 |     ecc_entry = get_next_entry(eccf, get_marker(1), only_coord=False)
108 |     assert hecc.entry_fields(ecc_entry, field_delim=get_marker(2)) == {'ecc_field': b'hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_hash-ecc-entry_', 'filesize_ecc': b'filesize2_ecc', 'relfilepath_ecc': b'relfilepath2_ecc', 'relfilepath': b'file2.ext', 'filesize': b'filesize2'}
109 | 
110 | def test_entry_assemble():
111 |     """ hecc: test internal: entry_assemble() """
112 |     class Hasher(object):
113 |         """ Dummy Hasher """
114 |         def __len__(self):
115 |             return 32
116 |     tempfile = path_sample_files('output', 'hecc_entry_assemble.txt')
117 |     with open(tempfile, 'wb') as tfile:
118 |         tfile.write(b("Lorem ipsum\nAnd stuff and stuff and stuff\n"*20))
119 |     ecc = dummy_ecc_file_gen(3)
120 |     eccf = BytesIO(ecc)
121 |     ecc_entry = get_next_entry(eccf, get_marker(1), only_coord=False)
122 |     entry_fields = hecc.entry_fields(ecc_entry, field_delim=get_marker(2))
123 |     ecc_params = compute_ecc_params(255, 0.5, Hasher())
124 |     out = hecc.entry_assemble(entry_fields, ecc_params, 10, tempfile, fileheader=None)
125 |     assert out == [{'ecc': b'sh-ecc-entry_', 'message': b'Lorem ipsu', 'hash': b'hash-ecc-entry_hash-ecc-entry_ha'}]
126 |     # TODO: check that several blocks can be assembled, currently we only check one block
127 | 
128 | def test_compute_ecc_hash():
129 |     """ hecc: test internal: compute_ecc_hash() """
130 |     class Hasher(object):
131 |         """ Dummy Hasher """
132 |         def hash(self, mes):
133 |             return "dummyhsh"
134 |         def __len__(self):
135 |             return 8
136 |     n = 20
137 |     k = 11
138 |     instring = "hello world!"*20
139 |     header_size = 1024
140 |     eccman = ECCMan(n, k, algo=3)
141 |     out1 = hecc.compute_ecc_hash(eccman, Hasher(), instring[:header_size], 255, 0.5, message_size=None, as_string=False)
142 |     assert out1 == [[b'dummyhsh', b'\x9b\x18\xeb\xc9z\x01c\xf2\x07'], [b'dummyhsh', b'\xa2Q\xc0Y\xae\xc3b\xd5\x81']]
143 |     out2 = hecc.compute_ecc_hash(eccman, Hasher(), instring[:header_size], 255, 0.5, message_size=None, as_string=True)
144 |     assert out2 == [b('dummyhsh\x9b\x18\xeb\xc9z\x01c\xf2\x07'), b('dummyhsh\xa2Q\xc0Y\xae\xc3b\xd5\x81')]
145 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_repair_ecc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import sys
  4 | import os
  5 | import itertools
  6 | import hashlib
  7 | 
  8 | import shutil
  9 | 
 10 | from .. import repair_ecc as recc
 11 | from .. import header_ecc as hecc
 12 | from .. import structural_adaptive_ecc as saecc
 13 | from .aux_tests import check_eq_files, check_eq_dir, path_sample_files, tamper_file, find_next_entry, create_dir_if_not_exist, get_marker
 14 | 
 15 | def get_db():
 16 |     return [path_sample_files('output', 'recc_file.db'), path_sample_files('output', 'recc_file.db_bak')]
 17 | 
 18 | def get_db_idx():
 19 |     return [path_sample_files('output', 'recc_file.db.idx'), path_sample_files('output', 'recc_file.db.idx_bak')]
 20 | 
 21 | def get_db_sa():
 22 |     return [path_sample_files('output', 'recc_file_sa.db'), path_sample_files('output', 'recc_file_sa.db_bak')]
 23 | 
 24 | def get_db_sa_idx():
 25 |     return [path_sample_files('output', 'recc_file_sa.db.idx'), path_sample_files('output', 'recc_file_sa.db.idx_bak')]
 26 | 
 27 | def restore_files(type):
 28 |     """ Restore the backup files to clean before the test """
 29 |     if type == 'hecc':
 30 |         filedb, filedb_bak = get_db()
 31 |     elif type == 'hecc_idx':
 32 |         filedb, filedb_bak = get_db_idx()
 33 |     elif type == 'saecc':
 34 |         filedb, filedb_bak = get_db_sa()
 35 |     elif type == 'saecc_idx':
 36 |         filedb, filedb_bak = get_db_sa_idx()
 37 |     os.remove(filedb)
 38 |     shutil.copyfile(filedb_bak, filedb)
 39 |     return 0
 40 | 
 41 | def setup_module():
 42 |     """ Initialize the tests by emptying the out directory """
 43 |     outfolder = path_sample_files('output')
 44 |     shutil.rmtree(outfolder, ignore_errors=True)
 45 |     create_dir_if_not_exist(outfolder)
 46 |     # Generate an header_ecc generated ecc file for the repair tests to use
 47 |     filein = path_sample_files('input')
 48 |     filedb, filedb_bak = get_db()
 49 |     filedb_idx, filedb_idx_bak = get_db_idx()
 50 |     hecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb))
 51 |     shutil.copyfile(filedb, filedb_bak) # keep a backup, we will reuse it for each test
 52 |     shutil.copyfile(filedb_idx, filedb_idx_bak)
 53 |     # Do the same with structural_adaptive_ecc
 54 |     filedb_sa, filedb_sa_bak = get_db_sa()
 55 |     filedb_sa_idx, filedb_sa_idx_bak = get_db_sa_idx()
 56 |     saecc.main('-i "%s" -d "%s" --ecc_algo=3 -g -f --silent' % (filein, filedb_sa))
 57 |     shutil.copyfile(filedb_sa, filedb_sa_bak) # keep a backup, we will reuse it for each test
 58 |     shutil.copyfile(filedb_sa_idx, filedb_sa_idx_bak)
 59 | 
 60 | def test_check():
 61 |     """ recc: check db and index files are the same as expected """
 62 |     # this also helps to check that restore_files() is working correctly since they are critical for other tests
 63 |     filedb, filedb_bak = get_db()
 64 |     filedb_sa, filedb_sa_bak = get_db_sa()
 65 |     #filedb_idx, filedb_idx_bak = get_db_idx()
 66 |     fileres = path_sample_files('results', 'test_repair_ecc_check.db')
 67 |     fileres_sa = path_sample_files('results', 'test_repair_ecc_sa_check.db')
 68 |     #fileres_idx = path_sample_files('results', 'test_repair_ecc_check.db.idx')
 69 |     # Recopy the original untampered files
 70 |     restore_files('hecc')
 71 |     restore_files('saecc')
 72 |     #restore_files('hecc_idx')
 73 |     # Check that generated files are correct (header_ecc generated)
 74 |     startpos1 = next(find_next_entry(filedb, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins
 75 |     startpos2 = next(find_next_entry(fileres, get_marker(type=1)))
 76 |     assert check_eq_files(filedb, fileres, startpos1=startpos1, startpos2=startpos2)
 77 |     # assert check_eq_files(filedb_idx, fileres_idx) # cannot check the index file because of the possibly differing comments in the header (this will offset the position of every markers, and thus the index file will be different)
 78 |     # Check that generated files are correct (structural_adaptive_ecc generated)
 79 |     startpos1 = next(find_next_entry(filedb_sa, get_marker(type=1))) # need to skip the comments, so we detect where the first entrymarker begins
 80 |     startpos2 = next(find_next_entry(fileres_sa, get_marker(type=1)))
 81 |     assert check_eq_files(filedb_sa, fileres_sa, startpos1=startpos1, startpos2=startpos2)
 82 | 
 83 | def test_repair_by_index():
 84 |     """ recc: tamper ecc file and repair by index file """
 85 |     filedb, filedb_bak = get_db()
 86 |     filedb_idx, filedb_idx_bak = get_db_idx()
 87 |     fileout = path_sample_files('output', 'recc_file_repaired_index.db')
 88 |     marker1 = get_marker(type=1)
 89 |     marker2 = get_marker(type=2)
 90 |     restore_files('hecc')
 91 |     restore_files('hecc_idx')
 92 |     # Completely overwrite a few markers (hence they cannot be recovered by hamming)
 93 |     startpos1 = next(find_next_entry(filedb, marker1))
 94 |     startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1)))
 95 |     startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1)))
 96 |     tamper_file(filedb, startpos1, "a"*len(marker1))
 97 |     tamper_file(filedb, startpos2, "a"*len(marker1))
 98 |     tamper_file(filedb, startpos3, "a"*len(marker2))
 99 |     # Repair ecc file using index file
100 |     assert recc.main('-i "%s" --index "%s" -o "%s" -t 0.0 -f --silent' % (filedb, filedb_idx, fileout)) == 0
101 |     assert check_eq_files(filedb_bak, fileout)
102 | 
103 | def test_repair_by_hamming():
104 |     """ recc: tamper ecc file and repair by hamming distance """
105 |     filedb, filedb_bak = get_db()
106 |     fileout = path_sample_files('output', 'recc_file_repaired.db')
107 |     marker1 = get_marker(type=1)
108 |     marker2 = get_marker(type=2)
109 |     restore_files('hecc')
110 |     # Completely overwrite a few markers (hence they cannot be recovered by hamming)
111 |     startpos1 = next(find_next_entry(filedb, marker1))
112 |     startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1)))
113 |     startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1)))
114 |     tamper_file(filedb, startpos1, "a"*int(len(marker1)*0.3))
115 |     tamper_file(filedb, startpos2, "a"*int(len(marker1)*0.3))
116 |     tamper_file(filedb, startpos3, "a"*int(len(marker2)*0.3))
117 |     # Repair ecc file by hamming similarity
118 |     assert recc.main('-i "%s" -o "%s" -t 0.3 -f --silent' % (filedb, fileout)) == 0
119 |     assert check_eq_files(filedb_bak, fileout)
120 | 
121 | def test_tamper_index():
122 |     """ recc: tamper index file and see if it can repair itself (hecc) """
123 |     filedb, filedb_bak = get_db()
124 |     filedb_idx, filedb_idx_bak = get_db_idx()
125 |     fileout = path_sample_files('output', 'recc_file_repaired_index.db')
126 |     marker1 = get_marker(type=1)
127 |     marker2 = get_marker(type=2)
128 |     restore_files('hecc')
129 |     restore_files('hecc_idx')
130 |     # Completely overwrite a few markers (hence they cannot be recovered by hamming)
131 |     startpos1 = next(find_next_entry(filedb, marker1))
132 |     startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1)))
133 |     startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1)))
134 |     tamper_file(filedb, startpos1, "a"*len(marker1))
135 |     tamper_file(filedb, startpos2, "a"*len(marker1))
136 |     tamper_file(filedb, startpos3, "a"*len(marker2))
137 |     # Tamper index file
138 |     tamper_file(filedb_idx, 0, "abcd")
139 |     tamper_file(filedb_idx, 9, "abcd")
140 |     tamper_file(filedb_idx, 27, "abcd")
141 |     assert recc.main('-i "%s" --index "%s" -o "%s" -t 0.0 -f --silent' % (filedb, filedb_idx, fileout)) == 0
142 |     assert check_eq_files(filedb_bak, fileout)
143 | 
144 | def test_tamper_index_saecc():
145 |     """ recc: tamper index file and see if it can repair itself (saecc) """
146 |     filedb, filedb_bak = get_db_sa()
147 |     filedb_idx, filedb_idx_bak = get_db_sa_idx()
148 |     fileout = path_sample_files('output', 'recc_file_sa_repaired.db')
149 |     marker1 = get_marker(type=1)
150 |     marker2 = get_marker(type=2)
151 |     restore_files('saecc')
152 |     restore_files('saecc_idx')
153 |     # Completely overwrite a few markers (hence they cannot be recovered by hamming)
154 |     startpos1 = next(find_next_entry(filedb, marker1))
155 |     startpos2 = next(find_next_entry(filedb, marker1, startpos1+len(marker1)))
156 |     startpos3 = next(find_next_entry(filedb, marker2, startpos2+len(marker1)))
157 |     tamper_file(filedb, startpos1, "a"*len(marker1))
158 |     tamper_file(filedb, startpos2, "a"*len(marker1))
159 |     tamper_file(filedb, startpos3, "a"*len(marker2))
160 |     # Tamper index file
161 |     tamper_file(filedb_idx, 0, "abcd")
162 |     tamper_file(filedb_idx, 9, "abcd")
163 |     tamper_file(filedb_idx, 27, "abcd")
164 |     assert recc.main('-i "%s" --index "%s" -o "%s" -t 0.0 -f --silent' % (filedb, filedb_idx, fileout)) == 0
165 |     assert check_eq_files(filedb_bak, fileout)
166 | 


--------------------------------------------------------------------------------
/pyFileFixity/lib/profilers/pyinstrument/README.md:
--------------------------------------------------------------------------------
  1 | pyinstrument
  2 | ============
  3 | 
  4 | A Python profiler that records the call stack of the executing code, instead
  5 | of just the final function in it.
  6 | 
  7 | [![Screenshot](screenshot.jpg)](https://raw.githubusercontent.com/joerick/pyinstrument/master/screenshot.jpg)
  8 | 
  9 | It uses a **statistical profiler**, meaning the code samples the stack
 10 | periodically (every 1 ms). This is lower overhead than event-
 11 | based profiling (as done by `profile` and `cProfile`).
 12 | 
 13 | This module is still very young, so I'd love any feedback/bug reports/pull
 14 | requests!
 15 | 
 16 | Documentation
 17 | -------------
 18 | 
 19 | * [Installation](#installation)
 20 | * [Usage](#usage)
 21 |   * [Command-line](#command-line)
 22 |   * [Django](#django)
 23 |   * [Python](#python)
 24 | * [Signal or setprofile mode?](#signal-or-setprofile-mode)
 25 | * [Known issues](#known-issues)
 26 | * [Changelog](#changelog)
 27 |   * [What's new in v0.13](#whats-new-in-v013)
 28 |   * [What's new in v0.12](#whats-new-in-v012)
 29 | * [Further information](#further-information)
 30 |   * [Call stack profiling?](#call-stack-profiling)
 31 | 
 32 | Installation
 33 | ------------
 34 | 
 35 |     pip install -e git+https://github.com/joerick/pyinstrument.git#egg=pyinstrument
 36 | 
 37 | pyinstrument supports Python 2.7 and 3.3+.
 38 | 
 39 | Usage
 40 | -----
 41 | 
 42 | #### Command-line ####
 43 | 
 44 | You can call pyinstrument directly from the command line.
 45 | 
 46 |     python -m pyinstrument [options] myscript.py [args...]
 47 |     
 48 |     Options:
 49 |       -h, --help            show this help message and exit
 50 |       --setprofile          run in setprofile mode, instead of signal mode
 51 |       --html                output HTML instead of text
 52 |       -o OUTFILE, --outfile=OUTFILE
 53 |                             save report to <outfile>
 54 |       --unicode             force unicode text output
 55 |       --no-unicode          force ascii text output
 56 |       --color               force ansi color text output
 57 |       --no-color            force no color text output
 58 | 
 59 | 
 60 | This will run `myscript.py` to completion or until you interrupt it, and 
 61 | then output the call tree.
 62 | 
 63 | #### Django ####
 64 |     
 65 | Add `pyinstrument.middleware.ProfilerMiddleware` to `MIDDLEWARE_CLASSES`.
 66 | If you want to profile your middleware as well as your view (you probably
 67 | do) then put it at the start of the list.
 68 | 
 69 | ##### Per-request profiling #####
 70 | 
 71 | Add `?profile` to the end of the request URL to activate the profiler. 
 72 | Instead of seeing the output of your view, pyinstrument renders an HTML
 73 | call tree for the view (as in the screenshot above).
 74 | 
 75 | ##### Using `PYINSTRUMENT_PROFILE_DIR` #####
 76 | 
 77 | If you're writing an API, it's not easy to change the URL when you want
 78 | to profile something. In this case, add 
 79 | `PYINSTRUMENT_PROFILE_DIR = 'profiles'` to your settings.py.
 80 | pyinstrument will profile every request and save the HTML output to the
 81 | folder `profiles` in your working directory.
 82 | 
 83 | #### Python ####
 84 | 
 85 | ```python
 86 | from pyinstrument import Profiler
 87 | 
 88 | profiler = Profiler() # or Profiler(use_signal=False), see below
 89 | profiler.start()
 90 | 
 91 | # code you want to profile
 92 | 
 93 | profiler.stop()
 94 | 
 95 | print(profiler.output_text(unicode=True, color=True))
 96 | ```
 97 | 
 98 | You can omit the `unicode` and `color` flags if your output/terminal does
 99 | not support them.
100 | 
101 | Signal or setprofile mode?
102 | --------------------------
103 | 
104 | On Mac/Linux/Unix, pyinstrument can run in 'signal' mode. This uses 
105 | OS-provided signals to interrupt the process every 1ms and record the stack. 
106 | It gives much lower overhead (and thus accurate) readings than the standard
107 | Python [`sys.setprofile`][setprofile] style profilers. However, this can
108 | only profile the main thread.
109 | 
110 | On Windows and on multi-threaded applications, a `setprofile` mode is
111 | available by passing `use_signal=False` to the Profiler constructor. It works
112 | exactly the same as the signal mode, but has higher overhead. See the below
113 | table for an example of the amount of overhead.
114 | 
115 | [setprofile]: https://docs.python.org/2/library/sys.html#sys.setprofile
116 | 
117 | This overhead is important because code that makes a lot of Python function
118 | calls will appear to take longer than code that does not.
119 | 
120 |                            | Django template render × 4000 | Overhead
121 | ---------------------------|------------------------------:|---------:
122 | Base                       |                         1.46s | 
123 |                            |                               |
124 | pyinstrument (signal)      |                         1.84s |      26%
125 | cProfile                   |                         2.18s |      49%
126 | pyinstrument (setprofile)  |                         5.33s |     365%
127 | profile                    |                        25.39s |    1739%
128 | 
129 | To run in setprofile mode:
130 | 
131 | * Use flag `--setprofile` if using the command-line interface
132 | * Use setting `PYINSTRUMENT_USE_SIGNAL = False` in Django
133 | * Use argument `use_signal=False` in the constructor for the Python API
134 | 
135 | Known issues
136 | ------------
137 | 
138 | -   When profiling Django, I'd recommend disabling django-debug-toolbar,
139 |     django-devserver etc., as their instrumentation distort timings.
140 |     
141 | -   In signal mode, any calls to [`time.sleep`][pysleep] will return
142 |     immediately. This is because of an implementation detail of `time.sleep`,
143 |     but matches the behaviour of the C function [`sleep`][csleep].
144 | 
145 | -   Some system calls can fail with `IOError` when being profiled in signal
146 |     mode. If this happens to you, your only option is to run in setprofile 
147 |     mode.
148 | 
149 | [pysleep]: https://docs.python.org/2/library/time.html#time.sleep
150 | [csleep]: http://pubs.opengroup.org/onlinepubs/009695399/functions/sleep.html
151 | 
152 | Changelog
153 | ---------
154 | 
155 | ### What's new in v0.13 ###
156 | 
157 | -   `pyinstrument` command. You can now profile python scripts from the shell
158 |     by running `$ pyinstrument script.py`. This is now equivalent to 
159 |     `python -m pyinstrument`. Thanks @asmeurer!
160 | 
161 | ### What's new in v0.12 ###
162 | 
163 | -   Application code is highlighted in HTML traces to make it easier to spot
164 | 
165 | -   Added `PYINSTRUMENT_PROFILE_DIR` option to the Django interface, which 
166 |     will log profiles of all requests to a file the specified folder. Useful
167 |     for profiling API calls.
168 |     
169 | -   Added `PYINSTRUMENT_USE_SIGNAL` option to the Django interface, for use
170 |     when signal mode presents problems.
171 | 
172 | Further information
173 | ===================
174 | 
175 | Call stack profiling?
176 | ---------------------
177 | 
178 | The standard Python profilers [`profile`][1] and [`cProfile`][2] produce
179 | output where time is totalled according to the time spent in each function.
180 | This is great, but it falls down when you profile code where most time is
181 | spent in framework code that you're not familiar with.
182 | 
183 | [1]: http://docs.python.org/2/library/profile.html#module-profile
184 | [2]: http://docs.python.org/2/library/profile.html#module-cProfile
185 | 
186 | Here's an example of profile output when using Django.
187 | 
188 |     151940 function calls (147672 primitive calls) in 1.696 seconds
189 | 
190 |        Ordered by: cumulative time
191 | 
192 |        ncalls  tottime  percall  cumtime  percall filename:lineno(function)
193 |             1    0.000    0.000    1.696    1.696 profile:0(<code object <module> at 0x1053d6a30, file "./manage.py", line 2>)
194 |             1    0.001    0.001    1.693    1.693 manage.py:2(<module>)
195 |             1    0.000    0.000    1.586    1.586 __init__.py:394(execute_from_command_line)
196 |             1    0.000    0.000    1.586    1.586 __init__.py:350(execute)
197 |             1    0.000    0.000    1.142    1.142 __init__.py:254(fetch_command)
198 |            43    0.013    0.000    1.124    0.026 __init__.py:1(<module>)
199 |           388    0.008    0.000    1.062    0.003 re.py:226(_compile)
200 |           158    0.005    0.000    1.048    0.007 sre_compile.py:496(compile)
201 |             1    0.001    0.001    1.042    1.042 __init__.py:78(get_commands)
202 |           153    0.001    0.000    1.036    0.007 re.py:188(compile)
203 |       106/102    0.001    0.000    1.030    0.010 __init__.py:52(__getattr__)
204 |             1    0.000    0.000    1.029    1.029 __init__.py:31(_setup)
205 |             1    0.000    0.000    1.021    1.021 __init__.py:57(_configure_logging)
206 |             2    0.002    0.001    1.011    0.505 log.py:1(<module>)
207 | 
208 | 
209 | When you're using big frameworks like Django, it's very hard to understand how
210 | your own code relates to these traces.
211 | 
212 | Pyinstrument records the entire stack, so tracking expensive calls is much
213 | easier.
214 | 


--------------------------------------------------------------------------------
/pyFileFixity/tests/test_resiliency_tester.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import sys
  4 | import os
  5 | 
  6 | import shutil
  7 | 
  8 | from .. import resiliency_tester as restest
  9 | from .aux_tests import path_sample_files, tamper_file, create_dir_if_not_exist, remove_if_exist
 10 | 
 11 | from ..lib._compat import _StringIO
 12 | 
 13 | def setup_module():
 14 |     """ Initialize the tests by emptying the out directory """
 15 |     outfolder = path_sample_files('output')
 16 |     shutil.rmtree(outfolder, ignore_errors=True)
 17 |     create_dir_if_not_exist(outfolder)
 18 | 
 19 | def test_parse_configfile():
 20 |     """ restest: test internal: parse_configfile() """
 21 |     config = '''
 22 | before_tamper:
 23 |     cmd1 -i "arg1" -o "arg2"
 24 |     cmd2
 25 | 
 26 | tamper:
 27 |     cmd3
 28 |     cmd4
 29 | 
 30 | after_tamper:
 31 |     cmd5
 32 |     cmd6
 33 |     # a comment
 34 | 
 35 | repair:
 36 |     cmd7
 37 |     cmd8
 38 |     '''
 39 |     fconfig = _StringIO(config)
 40 |     parsed = restest.parse_configfile(fconfig)
 41 |     assert parsed == {'tamper': ['cmd3', 'cmd4'], 'after_tamper': ['cmd5', 'cmd6'], 'before_tamper': ['cmd1 -i "arg1" -o "arg2"', 'cmd2'], 'repair': ['cmd7', 'cmd8']}
 42 | 
 43 | def test_get_filename_no_ext():
 44 |     """ restest: test internal: get_filename_no_ext() """
 45 |     filepath = '/test/path/to/filename_no_ext.ext'
 46 |     res = restest.get_filename_no_ext(filepath)
 47 |     assert res == 'filename_no_ext'
 48 | 
 49 | def test_interpolate_dict():
 50 |     """ restest: test internal: interpolate_dict() """
 51 |     s = 'Some {var1} with {var2} makes for {var3} parties!'
 52 |     d = {'var1': 'wine', 'var2': 'beer', 'var3': 'fun', 'var4': 'Hidden'}
 53 |     res = restest.interpolate_dict(s, interp_args=d)
 54 |     assert res == 'Some wine with beer makes for fun parties!'
 55 | 
 56 | def test_get_dbfile():
 57 |     """ restest: test internal: get_dbfile() """
 58 |     res = restest.get_dbfile('databases', 10)
 59 |     assert 'databases' in res
 60 |     assert 'db10' in res
 61 | 
 62 | def test_diff_bytes_files():
 63 |     """ restest: test internal: diff_bytes_files() """
 64 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 65 |     fileout1 = path_sample_files('output', 'bytes_tuxsmall1.jpg')
 66 |     fileout2 = path_sample_files('output', 'bytes_tuxsmall2.jpg')
 67 |     shutil.copy2(filein, fileout1)
 68 |     shutil.copy2(filein, fileout2)
 69 |     res = restest.diff_bytes_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0)
 70 |     assert res[0] == 0
 71 |     assert res[1] == os.stat(fileout1).st_size
 72 |     tamper_file(fileout2, 0, "X")
 73 |     tamper_file(fileout2, 4, "X")
 74 |     tamper_file(fileout2, 2000, "X")
 75 |     res = restest.diff_bytes_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0)
 76 |     assert res[0] == 3
 77 | 
 78 | def test_diff_count_files():
 79 |     """ restest: test internal: diff_count_files() """
 80 |     filein = path_sample_files('input', 'tuxsmall.jpg')
 81 |     fileout1 = path_sample_files('output', 'count_tuxsmall1.jpg')
 82 |     fileout2 = path_sample_files('output', 'count_tuxsmall2.jpg')
 83 |     shutil.copy2(filein, fileout1)
 84 |     shutil.copy2(filein, fileout2)
 85 |     res = restest.diff_count_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0)
 86 |     assert res
 87 |     tamper_file(fileout2, 0, "X")
 88 |     tamper_file(fileout2, 4, "X")
 89 |     tamper_file(fileout2, 2000, "X")
 90 |     res = restest.diff_count_files(fileout1, fileout2, blocksize=1000, startpos1=0, startpos2=0)
 91 |     assert not res
 92 | 
 93 | def test_diff_bytes_dir():
 94 |     """ restest: test internal: diff_bytes_dir() """
 95 |     dirin = path_sample_files('input')
 96 |     dirout = path_sample_files('output', 'restest/bytes')
 97 |     fileout = path_sample_files('output', 'restest/bytes/tuxsmall.jpg')
 98 |     fileout2 = path_sample_files('output', 'restest/bytes/testaa.txt')
 99 |     remove_if_exist(dirout)
100 |     shutil.copytree(dirin, dirout)
101 | 
102 |     # First compare the two folders that are identical
103 |     res = restest.diff_bytes_dir(dirin, dirout)
104 |     assert res[0] == 0
105 | 
106 |     # Tamper a few bytes of two files
107 |     tamper_file(fileout, 0, "X")
108 |     tamper_file(fileout, 4, "X")
109 |     tamper_file(fileout, 2000, "X")
110 |     tamper_file(fileout2, 0, "X")
111 |     res = restest.diff_bytes_dir(dirin, dirout)
112 |     assert res[0] == 4
113 | 
114 |     # Now remove a file altogether, its size should be added to the amount of differing bytes
115 |     filesize = os.stat(fileout).st_size
116 |     remove_if_exist(fileout)
117 |     res = restest.diff_bytes_dir(dirin, dirout)
118 |     assert res[0] == (filesize+1)
119 | 
120 | def test_diff_count_dir():
121 |     """ restest: test internal: diff_count_dir() """
122 |     dirin = path_sample_files('input')
123 |     dirout = path_sample_files('output', 'restest/count')
124 |     fileout = path_sample_files('output', 'restest/count/tuxsmall.jpg')
125 |     fileout2 = path_sample_files('output', 'restest/count/testaa.txt')
126 |     remove_if_exist(dirout)
127 |     shutil.copytree(dirin, dirout)
128 | 
129 |     # First compare the two folders that are identical
130 |     res = restest.diff_count_dir(dirin, dirout)
131 |     assert res[0] == 0
132 | 
133 |     # Tamper a few bytes of two files
134 |     tamper_file(fileout, 0, "X")
135 |     tamper_file(fileout, 4, "X")
136 |     tamper_file(fileout, 2000, "X")
137 |     tamper_file(fileout2, 0, "X")
138 |     res = restest.diff_count_dir(dirin, dirout)
139 |     assert res[0] == 2
140 | 
141 |     # Now remove a file altogether, its size should be added to the amount of differing bytes
142 |     filesize = os.stat(fileout).st_size
143 |     remove_if_exist(fileout)
144 |     res = restest.diff_count_dir(dirin, dirout)
145 |     assert res[0] == 2
146 | 
147 | def test_compute_repair_power():
148 |     """ restest: test internal: compute_repair_power() """
149 |     # Note: be careful if you add tests here, the displayed value by print() may be rounded up! Use print(repr(copute_repair_power()))
150 |     assert restest.compute_repair_power(0.3, 0.5) == 40.0
151 |     assert restest.compute_repair_power(0.2, 0.8) == 75.0
152 |     assert restest.compute_repair_power(0.6, 0.3) == -100.0
153 |     assert restest.compute_repair_power(0.6, 0.0) == 0.6
154 | 
155 | def test_compute_diff_stats():
156 |     """ restest: test internal: compute_diff_stats() """
157 |     dirin = path_sample_files('input')
158 |     dirout = path_sample_files('output', 'restest/count')
159 |     fileout = path_sample_files('output', 'restest/count/tuxsmall.jpg')
160 |     fileout2 = path_sample_files('output', 'restest/count/testaa.txt')
161 |     remove_if_exist(dirout)
162 |     shutil.copytree(dirin, dirout)
163 | 
164 |     # First compare the two folders that are identical
165 |     res = restest.compute_diff_stats(dirin, dirin, dirout)
166 |     assert dict(res) == {'diff_bytes': (0, 92955), 'diff_bytes_prev': (0, 92955), 'diff_count': (0, 7), 'diff_count_prev': (0, 7), 'repair_power': 0, 'error': 0.0}
167 | 
168 |     # Tamper a few bytes of two files
169 |     tamper_file(fileout, 0, "X")
170 |     tamper_file(fileout, 4, "X")
171 |     tamper_file(fileout, 2000, "X")
172 |     tamper_file(fileout2, 0, "X")
173 |     res = restest.compute_diff_stats(dirin, dirin, dirout)
174 |     assert dict(res) == {'diff_bytes': (4, 92955), 'diff_bytes_prev': (4, 92955), 'diff_count': (2, 7), 'diff_count_prev': (2, 7), 'repair_power': 0, 'error': 0.0043031574417729005}
175 | 
176 | def test_stats_running_average():
177 |     """ restest: test internal: stats_running_average() """
178 |     stats1 = {'diff_bytes': (0, 92955), 'diff_bytes_prev': (0, 92955), 'diff_count': (0, 7), 'diff_count_prev': (0, 7), 'repair_power': 0, 'error': 0.0}
179 |     stats2 = {'diff_bytes': (4, 92955), 'diff_bytes_prev': (4, 92955), 'diff_count': (2, 7), 'diff_count_prev': (2, 7), 'repair_power': 0, 'error': 0.5}
180 |     assert restest.stats_running_average({"tamper": stats1}, {"tamper": stats2}, 1) == {'tamper': {'diff_count_prev': [1.0, 7.0], 'diff_count': [1.0, 7.0], 'diff_bytes_prev': [2.0, 92955.0], 'error': 0.25, 'repair_power': 0.0, 'diff_bytes': [2.0, 92955.0]}}
181 |     assert restest.stats_running_average({"tamper": stats1}, {"tamper": stats2}, 3) == {'tamper': {'diff_count_prev': [0.5, 7.0], 'diff_count': [0.5, 7.0], 'diff_bytes_prev': [1.0, 92955.0], 'error': 0.125, 'repair_power': 0.0, 'diff_bytes': [1.0, 92955.0]}}
182 | 
183 | def test_main():
184 |     """ restest: test main() """
185 |     # Change directory so that the config's commands can access pyFileFixity scripts
186 |     thispathname = os.path.dirname(__file__)
187 |     sys.path.append(os.path.join(thispathname, '..'))
188 |     # Setup paths
189 |     dirin = path_sample_files('input')
190 |     dirout = path_sample_files('output', 'restest/fulltest')
191 |     configfile = path_sample_files('results', 'resiliency_tester_config_easy.cfg')
192 |     configfile_hard = path_sample_files('results', 'resiliency_tester_config_hard.cfg')
193 |     # Should be no error with the easy scenario (repair should be successful)
194 |     assert restest.main("-i \"%s\" -o \"%s\" -c \"%s\" -f --silent" % (dirin, dirout, configfile)) == 0
195 |     # Should be error with the hard scenario
196 |     assert restest.main("-i \"%s\" -o \"%s\" -c \"%s\" -m 2 -f --silent" % (dirin, dirout, configfile_hard)) == 1
197 |     # TODO: catch sys.stdout and check for the end stats?
198 | 


--------------------------------------------------------------------------------