├── .gitattributes ├── .github └── workflows │ ├── continuous-integration.yml │ └── supply-chain.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CONTRIBUTE.md ├── ChangeLog ├── LICENSE.md ├── Makefile ├── README.md ├── demos ├── .gitignore ├── demo_fiwalk_diskimage.py ├── demo_mac_timeline.py ├── demo_mac_timeline_iter.py ├── demo_mac_timeline_objects.py ├── demo_piecewise.py ├── demo_plot_times.py ├── demo_readtimes.py ├── demo_registry_timeline.py ├── demo_sizes.py ├── demo_spark.py ├── spark │ └── demo_spark.py └── vmstats │ ├── Makefile │ ├── skeleton.css │ ├── vmstats.py │ ├── vmstats_decode.html │ ├── vmstats_decode.py │ └── vmstats_json.html ├── dfxml ├── __init__.py ├── bin │ ├── .gitignore │ ├── Extractor.py │ ├── Makefile │ ├── README.md │ ├── TCPFlowObjects.py │ ├── __init__.py │ ├── allocation_counter.py │ ├── break_out_diffs_by_anno.py │ ├── cat_fileobjects.py │ ├── cat_partitions.py │ ├── conftest.py │ ├── corpus_sync.py │ ├── dedup.py │ ├── deidentify_xml.py │ ├── dfxinfo.py │ ├── dfxml_tool.py │ ├── exp_slack.py │ ├── filesdb.py │ ├── hash_sectors.py │ ├── iblkfind.py │ ├── icarvingtruth.py │ ├── idifference.py │ ├── idifference2.py │ ├── iexport.py │ ├── iextract.py │ ├── igrep.py │ ├── ihistogram.py │ ├── imap.py │ ├── imicrosoft_redact.py │ ├── iredact-config.txt │ ├── iredact.py │ ├── ireport.py │ ├── iverify.py │ ├── make_differential_dfxml.py │ ├── mem_info.py │ ├── nsrl_rds.py │ ├── rdifference.py │ ├── report_silent_changes.py │ ├── summarize_differential_dfxml.py │ ├── tcpdiff.py │ ├── validate_dfxml.py │ ├── walk_to_dfxml.py │ ├── xdiff.py │ ├── xmirror.py │ └── xml2body.py ├── conftest.py ├── dfxml_html.py ├── fiwalk.py ├── histogram.py ├── objects.py ├── py.typed └── writer.py ├── samples ├── .gitignore ├── Makefile ├── README.md ├── difference_test_0.xml ├── difference_test_1.xml ├── difference_test_2.xml ├── difference_test_3.xml ├── fileobjectexample.xml ├── piecewise.xml ├── simple.xml └── tcpflow_zip_generic_header.xml ├── setup.cfg ├── setup.py └── tests ├── .gitignore ├── Makefile ├── README.md ├── make_differential_dfxml ├── .gitignore ├── Makefile ├── README.md ├── differential_dfxml_test_by_path_01.txt ├── differential_dfxml_test_by_path_23.txt ├── differential_dfxml_test_by_times_01.txt ├── differential_dfxml_test_by_times_23.txt └── test_differential_dfxml.py ├── misc_bin_tests ├── README.md ├── _pick_pythons.sh ├── _sane_defaults.sh ├── dfxml_test.py ├── iexport_test.py ├── paths.sh ├── test_cat_fileobjects.sh ├── test_dfxml_tool.sh ├── test_hfsj.sh ├── test_idifference.py ├── test_idifference_to_dfxml.sh ├── test_mac_timelines.sh ├── test_redact.sh └── test_regxml.sh ├── misc_object_tests ├── .gitignore ├── ByteRun_test.py ├── ByteRuns_test.py ├── CellObject_test.py ├── DFXMLObject_program_test.py ├── DiskImageObject_test.py ├── FileObject_allocation_test.py ├── FileObject_byte_run_facets_test.py ├── FileObject_externals_test.py ├── FileObject_from_stat_test.py ├── FileObject_test.py ├── LibraryObject_read_test.py ├── LibraryObject_write_test.py ├── Makefile ├── Makefile_test.py ├── PartitionObject_test.py ├── PartitionSystemObject_test.py ├── README.md ├── RegXMLObject_test.py ├── VolumeObject_externals_test.py ├── VolumeObject_hash_test.py ├── VolumeObject_test.py ├── diff_file_ignore_sample_dfxml_test.py ├── diff_file_ignore_test.py ├── diffing_ByteRuns_test.py ├── diffing_CellObject_test.py ├── diffing_FileObject_test.py ├── diffing_HiveObject_test.py ├── diffing_TimestampObject_test.py ├── diffing_VolumeObject_test.py ├── error_test.py ├── libtest.py ├── objects_test.py ├── storage_layers_test.py └── test_TCPFlowObjects.py ├── requirements.txt ├── test_objects.py ├── test_reads.py ├── test_version.py └── walk_to_dfxml ├── .gitignore ├── Makefile ├── README.md └── test_walk_to_dfxml.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | # NOTE: At the time this rule was written, all files tracked in this repository were known to be text files. From documentation on this file at git-scm.com, it seems possible this might trip up commiting a binary file in the future. 3 | * text=auto 4 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration.yml: -------------------------------------------------------------------------------- 1 | # This file based on https://gist.github.com/mwouts/9842452d020c08faf9e84a3bba38a66f 2 | # See: https://help.github.com/en/actions/reference/software-installed-on-github-hosted-runners 3 | # 2020-06-22 - slg - customized 4 | # 2020-06-27 - slg - expanded to G++ for MacOS 5 | # 6 | name: CI (python) 7 | on: [push, pull_request] 8 | 9 | env: 10 | COVERAGE_OS: ubuntu-latest 11 | COVERAGE_PYTHON_VERSION: 3.13 12 | 13 | jobs: 14 | build: 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: ['ubuntu-latest', 'macos-latest'] 19 | python-version: ['3.9','3.13'] 20 | 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v4 24 | 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | 30 | - name: Install Python dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install pytest pytest-cov 34 | if [ -r requirements.txt ]; then pip install -r requirements.txt ; fi 35 | if [ -r requirements-dev.txt ]; then pip install -r requirements-dev.txt ; fi 36 | 37 | - name: Install xmllint on ubuntu 38 | if: runner.os == 'Linux' 39 | run: | 40 | sudo apt update 41 | sudo apt install --yes libxml2-utils 42 | 43 | - name: Pre-commit Checks 44 | run: | 45 | pip -q install pre-commit 46 | pre-commit run --all-files 47 | 48 | - name: Make check 49 | run: make check 50 | 51 | - name: Make check-tools 52 | run: make check-tools 53 | 54 | - name: Test with pytest 55 | run: pytest --cov=dfxml --cov-report=xml . 56 | 57 | - name: Upload to codecov.io 58 | if: matrix.os == env.COVERAGE_OS && matrix.python-version == env.COVERAGE_PYTHON_VERSION 59 | uses: codecov/codecov-action@v4 60 | with: 61 | token: ${{ secrets.CODECOV_TOKEN }} 62 | verbose: true 63 | files: ./coverage.xml 64 | -------------------------------------------------------------------------------- /.github/workflows/supply-chain.yml: -------------------------------------------------------------------------------- 1 | # Portions of this file contributed by NIST are governed by the 2 | # following statement: 3 | # 4 | # This software was developed at the National Institute of Standards 5 | # and Technology by employees of the Federal Government in the course 6 | # of their official duties. Pursuant to title 17 Section 105 of the 7 | # United States Code this software is not subject to copyright 8 | # protection and is in the public domain. NIST assumes no 9 | # responsibility whatsoever for its use by other parties, and makes 10 | # no guarantees, expressed or implied, about its quality, 11 | # reliability, or any other characteristic. 12 | # 13 | # We would appreciate acknowledgement if the software is used. 14 | 15 | # This workflow uses Make to review direct dependencies of this 16 | # repository. 17 | 18 | name: Supply Chain 19 | 20 | on: 21 | schedule: 22 | - cron: '15 5 * * 1,2,3,4,5' 23 | 24 | jobs: 25 | build: 26 | 27 | runs-on: ubuntu-latest 28 | strategy: 29 | matrix: 30 | python-version: 31 | - '3.9' 32 | - '3.13' 33 | 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | - name: Set up Python ${{ matrix.python-version }} 39 | uses: actions/setup-python@v5 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | - name: Review dependencies 43 | run: make check-supply-chain 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *~ 3 | __pycache__ 4 | _deps 5 | python/demo.dfxml 6 | 7 | .DS_Store 8 | build 9 | python/demo.dfxml 10 | .cache 11 | .pytest_cache 12 | *.egg-info 13 | *.log 14 | .venv-pre-commit 15 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dependencies/dfxml_schema"] 2 | path = dependencies/dfxml_schema 3 | url = https://github.com/dfxml-working-group/dfxml_schema.git 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 25.1.0 4 | hooks: 5 | - id: black 6 | -------------------------------------------------------------------------------- /CONTRIBUTE.md: -------------------------------------------------------------------------------- 1 | # Contributing to DFXML's Python code base 2 | 3 | 4 | ## Pre-commit 5 | 6 | This project uses [the `pre-commit` tool](https://pre-commit.com/) for linting. 7 | 8 | `pre-commit` hooks into Git's commit machinery to run a set of linters and static analyzers over each change. To install `pre-commit` into Git's hooks, run one (not both) of the following sets of commands: 9 | 10 | ```bash 11 | pip install pre-commit 12 | pre-commit --version 13 | pre-commit install 14 | ``` 15 | 16 | Or: 17 | 18 | ```bash 19 | make 20 | ``` 21 | 22 | 23 | ## Installable tools versus in-place scripts 24 | 25 | The [`dfxml/bin/`](dfxml/bin/) directory contains scripts for interacting with DFXML. Some of the tools are installed in the command-line `$PATH` when the `dfxml` package is installed. 26 | 27 | If there is a request to add a tool to the package's installed-tools list, the tool should have these implemented: 28 | 1. A unit test suite that exercises the tool's command line features, such as flags, and `pytest` tests to confirm expected output. 29 | 2. A documentation page, preferably a README alongside the unit test suite. The documentation should include command-line usage. 30 | 3. A row in [`dfxml/bin/README.md`](dfxml/bin/README.md)'s table of installed tools, linking to the documentation. 31 | 4. The tool should be analyzed with a static type checker. See e.g. the target `check-mypy` in the [tests Makefile](tests/Makefile) that is run as part of CI. (Note this would be started by adding type signatures to the tool's functions.) 32 | 33 | 34 | ## Version management 35 | 36 | **Note that DFXML 1.0.2 DOES NOT YET follow SEMVER practices.** 37 | 38 | This project plans to adopt [SEMVER](https://semver.org/) to denote expected stability of its offered resources. The project *has not yet* adopted SEMVER; when it does, a note will be added to the README. 39 | 40 | Once a SEMVER-adherent major version is declared, backwards-incompatible commits will be merged into the `release-x.0.0` branch (where `x` is the next major version) instead of `develop`. 41 | 42 | Following SEMVER's `major.minor.patch` version designation: 43 | * The `major` version will increment on deploying changes that are backwards-incompatible with the prior major release. 44 | * The `minor` version will increment on new functionality being added. 45 | * The `patch` version will increment on new tests for existing functionality being added, or a bug being fixed, with some discretion to be used for any needed interface corrections. 46 | 47 | The following are this repository's policies on backwards compatibility for this project's resources. 48 | 49 | 50 | ### Version of the DFXML Python code base 51 | 52 | The overall package version of `dfxml` is stored in one location, the `__version__` variable of `dfxml/__init__.py`. 53 | 54 | Other resources may track their own version independently. 55 | 56 | 57 | ### Package resources 58 | 59 | The set of command-line tools offered in the package (defined in `setup.cfg`) is considered in-scope for backwards compatibility. 60 | 61 | 62 | ### Command-line functionality 63 | 64 | Tests that illustrate expected command-line behavior are available under the [`tests/`](tests/) directory. See the `Makefile`s under the directories named after the provided tools. Recipes that include activating a virtual environment (e.g. `source $(tests_srcdir)/venv/bin/activate`) show command line execution patterns. 65 | 66 | The command-line functionality demonstrated by the `Makefile`s under `tests/` is considered in-scope for backwards compatibility. 67 | 68 | 69 | ### Module functionality 70 | 71 | This project uses the [`pytest`](https://docs.pytest.org) framework to run unit tests. These tests encode the expected behaviors of command-line results, and of module functions. Tests generally follow a "Ground-truth comparison" model, where an expected set of results is compared to a computed set of results (generally, as `expected_X == computed_X`). 72 | 73 | The module functionality exercised by the `pytest` unit tests is considered in-scope for backwards compatibility. 74 | 75 | 76 | ## Merge model 77 | 78 | On adoption of SEMVER, this project will follow the `git-flow` merge model. In short: 79 | * The `main` branch will contain tagged release commits only. 80 | * The `develop` branch will be the target of Pull Requests for new features. 81 | * `release-x.y.z` branches will be made off of `develop` when a new release is to be tagged , and merged into `main` and back into `develop`. 82 | 83 | The above practice can be seen illustrated in the first figure on [this page](https://nvie.com/posts/a-successful-git-branching-model/). 84 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | # While SHELL would typically be set with ":=" assignment, some 15 | # environments do not have Bash at /bin/bash (e.g. FreeBSD stores Bash 16 | # at /usr/local/bin/bash). 17 | ifeq ($(shell basename $(SHELL)),sh) 18 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash) 19 | endif 20 | 21 | PYTHON3 ?= python3 22 | ifeq ($(PYTHON3),) 23 | $(error python3 not found) 24 | endif 25 | 26 | all: \ 27 | .venv-pre-commit/var/.pre-commit-built.log 28 | 29 | .PHONY: \ 30 | check-mypy \ 31 | check-supply-chain \ 32 | check-supply-chain-pre-commit 33 | 34 | .git_submodule_init.done.log: .gitmodules 35 | # Confirm dfxml_schema has been checked out at least once. 36 | test -r dependencies/dfxml_schema/dfxml.xsd \ 37 | || (git submodule init dependencies/dfxml_schema && git submodule update dependencies/dfxml_schema) 38 | test -r dependencies/dfxml_schema/dfxml.xsd 39 | touch $@ 40 | 41 | # This virtual environment is meant to be built once and then persist, even through 'make clean'. 42 | # If a recipe is written to remove this flag file, it should first run `pre-commit uninstall`. 43 | .venv-pre-commit/var/.pre-commit-built.log: 44 | rm -rf .venv-pre-commit 45 | test -r .pre-commit-config.yaml \ 46 | || (echo "ERROR:Makefile:pre-commit is expected to install for this repository, but .pre-commit-config.yaml does not seem to exist." >&2 ; exit 1) 47 | $(PYTHON3) -m venv \ 48 | .venv-pre-commit 49 | source .venv-pre-commit/bin/activate \ 50 | && pip install \ 51 | --upgrade \ 52 | pip \ 53 | setuptools \ 54 | wheel 55 | source .venv-pre-commit/bin/activate \ 56 | && pip install \ 57 | pre-commit 58 | source .venv-pre-commit/bin/activate \ 59 | && pre-commit install 60 | mkdir -p \ 61 | .venv-pre-commit/var 62 | touch $@ 63 | 64 | clean: 65 | find . -name '*~' -exec rm {} \; 66 | $(MAKE) \ 67 | --directory tests \ 68 | clean 69 | 70 | check: \ 71 | .git_submodule_init.done.log \ 72 | .venv-pre-commit/var/.pre-commit-built.log 73 | $(MAKE) \ 74 | PYTHON3=$(PYTHON3) \ 75 | SHELL=$(SHELL) \ 76 | --directory tests \ 77 | check 78 | 79 | check-mypy: \ 80 | .git_submodule_init.done.log 81 | $(MAKE) \ 82 | PYTHON3=$(PYTHON3) \ 83 | SHELL=$(SHELL) \ 84 | --directory tests \ 85 | check-mypy 86 | 87 | check-supply-chain: \ 88 | check-supply-chain-pre-commit \ 89 | check-mypy 90 | 91 | # Update pre-commit configuration and use the updated config file to 92 | # review code. Only have Make exit if 'pre-commit run' modifies files. 93 | check-supply-chain-pre-commit: \ 94 | .venv-pre-commit/var/.pre-commit-built.log 95 | source .venv-pre-commit/bin/activate \ 96 | && pre-commit autoupdate 97 | git diff \ 98 | --exit-code \ 99 | .pre-commit-config.yaml \ 100 | || ( \ 101 | source .venv-pre-commit/bin/activate \ 102 | && pre-commit run \ 103 | --all-files \ 104 | --config .pre-commit-config.yaml \ 105 | ) \ 106 | || git diff \ 107 | --stat \ 108 | --exit-code \ 109 | || ( \ 110 | echo \ 111 | "WARNING:Makefile:pre-commit configuration can be updated. It appears the updated would change file formatting." \ 112 | >&2 \ 113 | ; exit 1 \ 114 | ) 115 | @git diff \ 116 | --exit-code \ 117 | .pre-commit-config.yaml \ 118 | || echo \ 119 | "INFO:Makefile:pre-commit configuration can be updated. It appears the update would not change file formatting." \ 120 | >&2 121 | 122 | check-tools: 123 | (cd tests/misc_object_tests;make check) 124 | -------------------------------------------------------------------------------- /demos/.gitignore: -------------------------------------------------------------------------------- 1 | *.dfxml 2 | *.xml 3 | -------------------------------------------------------------------------------- /demos/demo_fiwalk_diskimage.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """ 3 | This demo shows how to invoke Fiwalk as a subprocess, taking a disk image as 4 | input. Fiwalk's dfxml XML output is sent to an in-memory buffer, which is then 5 | written to an output file. Note that this may fail for very large disk images 6 | if the required buffer size exceeds available RAM! 7 | """ 8 | 9 | import io 10 | import sys 11 | 12 | from dfxml import fiwalk 13 | 14 | 15 | def writeDfxml(imageFile: str, outFile: str) -> None: 16 | """Generate filesystem metadata for disk image and and write resulting 17 | dfxml to file""" 18 | # Analyse image file 19 | with open(imageFile, "rb") as ifs: 20 | fwOutBuffer = fiwalk.fiwalk_xml_stream(imagefile=ifs) 21 | fwOut = fwOutBuffer.read() 22 | 23 | # Write dfxml to output file 24 | with io.open(outFile, "wb") as fOut: 25 | fOut.write(fwOut) 26 | 27 | 28 | def main() -> None: 29 | if len(sys.argv) < 3: 30 | print("Usage: {} ".format(sys.argv[0])) 31 | exit(1) 32 | imageFile = sys.argv[1] 33 | outFile = sys.argv[2] 34 | writeDfxml(imageFile, outFile) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /demos/demo_mac_timeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # produce a MAC-times timeline. 3 | # works under either Python2 or Python3 4 | import os 5 | import sys 6 | 7 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 8 | import dfxml 9 | 10 | timeline = [] 11 | 12 | 13 | def process(fi): 14 | if fi.mtime() != None: 15 | timeline.append([fi.mtime(), fi.filename(), " modified"]) 16 | if fi.crtime() != None: 17 | timeline.append([fi.crtime(), fi.filename(), " created"]) 18 | if fi.ctime() != None: 19 | timeline.append([fi.ctime(), fi.filename(), " changed"]) 20 | if fi.atime() != None: 21 | timeline.append([fi.atime(), fi.filename(), " accessed"]) 22 | 23 | 24 | def main(): 25 | if len(sys.argv) < 2: 26 | print("Usage: {} ".format(sys.argv[0])) 27 | exit(1) 28 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=process) 29 | timeline.sort() 30 | for record in timeline: 31 | print("\t".join(map(str, record))) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /demos/demo_mac_timeline_iter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | # produce a MAC-times timeline using the iterative DFXML interface. 17 | # works under either Python2 or Python3 18 | 19 | import os 20 | import sys 21 | 22 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 23 | import dfxml 24 | 25 | 26 | def main(): 27 | if len(sys.argv) < 2: 28 | print("Usage: {} ".format(sys.argv[0])) 29 | exit(1) 30 | 31 | timeline = [] 32 | 33 | for fi in dfxml.iter_dfxml(xmlfile=open(sys.argv[1], "rb")): 34 | if fi.mtime() != None: 35 | timeline.append([fi.mtime(), fi.filename(), " modified"]) 36 | if fi.crtime() != None: 37 | timeline.append([fi.crtime(), fi.filename(), " created"]) 38 | if fi.ctime() != None: 39 | timeline.append([fi.ctime(), fi.filename(), " changed"]) 40 | if fi.atime() != None: 41 | timeline.append([fi.atime(), fi.filename(), " accessed"]) 42 | 43 | timeline.sort() 44 | 45 | for record in timeline: 46 | print("\t".join(map(str, record))) 47 | 48 | 49 | if __name__ == "__main__": 50 | main() 51 | -------------------------------------------------------------------------------- /demos/demo_mac_timeline_objects.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | # produce a MAC-times timeline using the DFXML Objects interface. 17 | # works under either Python2 or Python3 18 | 19 | import os 20 | import sys 21 | 22 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 23 | import dfxml 24 | import dfxml.objects as Objects 25 | 26 | 27 | def main(): 28 | if len(sys.argv) < 2: 29 | print("Usage: {} ".format(sys.argv[0])) 30 | exit(1) 31 | 32 | timeline = [] 33 | 34 | for event, obj in Objects.iterparse(sys.argv[1]): 35 | # Only work on FileObjects 36 | if not isinstance(obj, Objects.FileObject): 37 | continue 38 | if not obj.mtime is None: 39 | timeline.append([obj.mtime, obj.filename, " modified"]) 40 | if not obj.crtime is None: 41 | timeline.append([obj.crtime, obj.filename, " created"]) 42 | if not obj.ctime is None: 43 | timeline.append([obj.ctime, obj.filename, " changed"]) 44 | if not obj.atime is None: 45 | timeline.append([obj.atime, obj.filename, " accessed"]) 46 | 47 | timeline.sort() 48 | 49 | for record in timeline: 50 | print("\t".join(map(str, record))) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /demos/demo_piecewise.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.2 2 | 3 | # 4 | # Demo program that prints piecewise hashes and reports on co-occurrence of hashes. 5 | # 6 | # Multimap from http://stackoverflow.com/questions/1731971/is-there-multimap-implementation-in-python 7 | 8 | import os 9 | import sys 10 | 11 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 12 | import collections 13 | import math 14 | import sys 15 | 16 | import dfxml 17 | 18 | 19 | class SectorCorrelator: 20 | def __init__(self): 21 | self.hashdb = collections.defaultdict( 22 | list 23 | ) # key is the MD5 code, value is a list of matches 24 | self.files = 0 25 | self.sectors = 0 26 | 27 | def process(self, fi): 28 | """Process the objects as they are read from the XML file""" 29 | self.files += 1 30 | print(fi.filename()) 31 | for br in fi.byte_runs(): 32 | self.sectors += 1 33 | self.hashdb[br.hashdigest["md5"]].append((fi.filename(), br.file_offset)) 34 | 35 | def print_report(self): 36 | print("Files processed: {}".format(self.files)) 37 | print("Sectors processed: {}".format(self.sectors)) 38 | print("") 39 | print("The following duplicates were found:") 40 | print("Hash Filename Offset in file") 41 | for hash, ents in self.hashdb.items(): 42 | if len(ents) > 1: 43 | print("{} -- {} copies found".format(hash, len(ents))) 44 | for e in sorted(ents): 45 | print(" {} {:8,}".format(e[0], e[1])) 46 | print("") 47 | 48 | 49 | sc = SectorCorrelator() 50 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=sc.process) 51 | sc.print_report() 52 | -------------------------------------------------------------------------------- /demos/demo_plot_times.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | import time 5 | 6 | import fiwalk 7 | 8 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 9 | import dfxml 10 | 11 | if __name__ == "__main__": 12 | import sys 13 | from optparse import OptionParser 14 | from sys import stdout 15 | 16 | parser = OptionParser() 17 | parser.usage = "%prog [options] (xmlfile or imagefile)" 18 | (options, args) = parser.parse_args() 19 | 20 | if not args: 21 | parser.print_usage() 22 | exit(1) 23 | 24 | sizes = [] 25 | dates = {} 26 | 27 | def callback(fi): 28 | sizes.append(fi.filesize()) 29 | for tag, val in fi.times().iteritems(): 30 | date = val.datetime() 31 | dates[date] = dates.get(date, 0) + 1 32 | 33 | fn = args[0] 34 | if fn.endswith(".xml"): 35 | fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=callback) 36 | else: 37 | fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=callback) 38 | 39 | print("Here is the dates array:") 40 | for d in sorted(dates.keys()): 41 | print("{} {}".format(d, dates[d])) 42 | -------------------------------------------------------------------------------- /demos/demo_readtimes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Reads an fiwalk XML file and reports how many of the files are still in the image...""" 3 | 4 | import os 5 | import sys 6 | 7 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 8 | import time 9 | 10 | import dfxml 11 | import dfxml.fiwalk as fiwalk 12 | 13 | 14 | def calc_jumps(fis, title): 15 | print(title) 16 | print("Count: %d" % (len(fis))) 17 | from histogram import histogram 18 | 19 | h = histogram() 20 | pos = 0 21 | backwards = 0 22 | prev_frag_count = 0 23 | for fi in fis: 24 | for i in range(0, len(fi.byte_runs())): 25 | run = fi.byte_runs()[i] 26 | try: 27 | sector = run.start_sector() 28 | if sector < pos: 29 | backwards += 1 30 | h.add((prev_frag_count, i)) 31 | pos = sector 32 | except AttributeError: 33 | pass 34 | pref_frag_count = len(fi.byte_runs()) 35 | 36 | print("Backwards Jumps: %d" % backwards) 37 | print("Histogram of backwards:") 38 | h.print_top(10) 39 | 40 | 41 | if __name__ == "__main__": 42 | import sys 43 | from optparse import OptionParser 44 | from subprocess import PIPE, Popen 45 | 46 | global options 47 | 48 | parser = OptionParser() 49 | parser.add_option("-d", "--debug", help="prints debugging info", dest="debug") 50 | parser.add_option("-x", "--xmlfile", help="XML file (optional)") 51 | parser.add_option("-i", "--imagefile", help="image file (required)") 52 | parser.usage = "%prog [options] xmlfile diskimage" 53 | (options, args) = parser.parse_args() 54 | 55 | if not options.xmlfile or not options.imagefile: 56 | parser.print_help() 57 | sys.exit(1) 58 | 59 | # Read the redaction configuration file 60 | imagefile = open(options.imagefile, "r") 61 | if options.xmlfile: 62 | xmlfile = open(options.xmlfile, "r") 63 | else: 64 | xmlfile = None 65 | 66 | t0 = time.time() 67 | fis = fiwalk.fileobjects_using_sax(imagefile=imagefile, xmlfile=xmlfile) 68 | t1 = time.time() 69 | print("Time to read file objects: {} seconds".format(t1 - t0)) 70 | 71 | # Create a new array with just those that we can read 72 | def resident_file(fi): 73 | if len(fi.byte_runs()) == 0: 74 | return False 75 | if len(fi.byte_runs()) > 2: 76 | return False 77 | if hasattr(fi.byte_runs()[0], "uncompressed_len"): 78 | return False 79 | if not hasattr(fi.byte_runs()[0], "img_offset"): 80 | return False 81 | return True 82 | 83 | fis = filter(resident_file, fis) 84 | 85 | print("Native order: ") 86 | calc_jumps(fis, "Native Order") 87 | 88 | def sort_function(a, b): 89 | a0 = a.byte_runs()[0].start_sector() 90 | b0 = b.byte_runs()[0].start_sector() 91 | if a0 < b0: 92 | return -1 93 | if a0 == b0: 94 | return 0 95 | return 1 96 | 97 | fis.sort(sort_function) 98 | calc_jumps(fis, "Sorted Order") 99 | -------------------------------------------------------------------------------- /demos/demo_registry_timeline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 6 | import dfxml 7 | 8 | timeline = [] 9 | 10 | 11 | def process(co): 12 | mtime = co.mtime() 13 | if mtime != None: 14 | timeline.append([co.mtime(), co.full_path(), " modified"]) 15 | 16 | 17 | def main(): 18 | if len(sys.argv) < 2: 19 | print("Usage: {} ".format(sys.argv[0])) 20 | exit(1) 21 | dfxml.read_regxml(xmlfile=open(sys.argv[1], "rb"), callback=process) 22 | timeline.sort() 23 | for record in timeline: 24 | print("\t".join(map(str, record))) 25 | 26 | 27 | if __name__ == "__main__": 28 | main() 29 | -------------------------------------------------------------------------------- /demos/demo_sizes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.2 2 | 3 | # 4 | # Demo program that shows how to calculate the average size of file objects in a DFXML file 5 | # 6 | 7 | import collections 8 | import math 9 | import os 10 | import sys 11 | 12 | sys.path.append(os.path.join(os.path.dirname(__file__), "..")) 13 | import dfxml 14 | 15 | sums = collections.Counter() 16 | sum_of_squares = collections.Counter() 17 | count = collections.Counter() 18 | 19 | 20 | def func(fi): 21 | ext = fi.ext() 22 | count[ext] += 1 23 | sums[ext] += fi.filesize() 24 | sum_of_squares[ext] = fi.filesize() ** 2 25 | 26 | 27 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=func) 28 | fmt = "{:8} {:8} {:8} {:8} {:8}" 29 | print(fmt.format("Ext", "Count", "Total", "Average", "StdDev")) 30 | for ext in sums.keys(): 31 | print( 32 | fmt.format( 33 | ext, 34 | count[ext], 35 | sums[ext], 36 | sums[ext] / count[ext], 37 | math.sqrt(sum_of_squares[ext] / count[ext] - (sums[ext] / count[ext]) ** 2), 38 | ) 39 | ) 40 | -------------------------------------------------------------------------------- /demos/demo_spark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Shows how DFXML works with spark. 4 | # This program runs spark if it is not already running 5 | 6 | 7 | import os 8 | import sys 9 | 10 | sys.path.append("../python") 11 | from dfxml_writer import DFXMLWriter 12 | 13 | 14 | def spark_demo(): 15 | """A small spark program. Must be run under spark""" 16 | import operator 17 | 18 | from pyspark import SparkConf, SparkContext 19 | 20 | conf = SparkConf() 21 | sc = SparkContext(conf=conf) 22 | m = 1000000 23 | result = sc.parallelize(range(0, m + 1)).reduce(operator.add) 24 | print(f"The sum of the numbers 0 to {m} is {result}") 25 | assert result == 500000500000 26 | 27 | 28 | def run_spark(): 29 | # If we are running under spark, just call check_spark. 30 | # Otherwise, run recursively under spark-submit 31 | import os 32 | 33 | if "SPARK_ENV_LOADED" in os.environ: 34 | return # yea! Spark is running 35 | 36 | # 37 | # Re-run this script under spark, and then exit. 38 | # 39 | import subprocess 40 | 41 | r = subprocess.run(["spark-submit", __file__] + sys.argv[1:]) 42 | assert r.returncode == 0 43 | exit(0) 44 | 45 | 46 | if __name__ == "__main__": 47 | import argparse 48 | import time 49 | 50 | parser = argparse.ArgumentParser() 51 | args = parser.parse_args() 52 | 53 | run_spark() 54 | 55 | dfxml = DFXMLWriter( 56 | filename=f"demo_spark_{int(time.time())}.dfxml", prettyprint=True 57 | ) 58 | spark_demo() 59 | # DFXML file gets written automatically when program exits. 60 | exit(0) 61 | -------------------------------------------------------------------------------- /demos/spark/demo_spark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Shows how DFXML works with Spark. 4 | # This program runs Spark if it is not already running 5 | 6 | 7 | import os 8 | import sys 9 | 10 | sys.path.append("../python") 11 | from dfxml_writer import DFXMLWriter 12 | 13 | 14 | def spark_demo(): 15 | """A small Spark program. Must be run under Spark""" 16 | import operator 17 | 18 | from pyspark import SparkConf, SparkContext 19 | 20 | conf = SparkConf() 21 | sc = SparkContext(conf=conf) 22 | m = 1000000 23 | result = sc.parallelize(range(0, m + 1)).reduce(operator.add) 24 | print(f"The sum of the numbers 0 to {m} is {result}") 25 | assert result == 500000500000 26 | 27 | 28 | def run_spark(): 29 | # If we are running under Spark, just call check_spark. 30 | # Otherwise, run recursively under spark-submit 31 | import os 32 | 33 | if "SPARK_ENV_LOADED" in os.environ: 34 | return # yea! Spark is running 35 | 36 | # 37 | # Re-run this script under Spark, and then exit. 38 | # 39 | import subprocess 40 | 41 | r = subprocess.run(["spark-submit", __file__] + sys.argv[1:]) 42 | assert r.returncode == 0 43 | exit(0) 44 | 45 | 46 | if __name__ == "__main__": 47 | import argparse 48 | import time 49 | 50 | parser = argparse.ArgumentParser() 51 | args = parser.parse_args() 52 | 53 | run_spark() 54 | 55 | dfxml = DFXMLWriter( 56 | filename=f"demo_spark_{int(time.time())}.dfxml", prettyprint=True 57 | ) 58 | spark_demo() 59 | # DFXML file gets written automatically when program exits. 60 | exit(0) 61 | -------------------------------------------------------------------------------- /demos/vmstats/Makefile: -------------------------------------------------------------------------------- 1 | all:vmstats_pretty.dfxml vmstatsN 2 | 3 | vmstats_pretty.dfxml: vmstats.py 4 | python3 vmstats.py --prettyprint vmstats_pretty.dfxml 5 | 6 | vmstatsN: vmstats.py 7 | python3 vmstats.py --repeat 24 --interval 10 vmstatsN-new.dfxml 8 | /bin/mv -f vmstatsN-new.dfxml vmstatsN.dfxml 9 | 10 | -------------------------------------------------------------------------------- /demos/vmstats/vmstats_decode.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | History for {{host}} 5 | 6 | 7 | 14 | 15 | 16 |
17 | 18 | 19 |
[PREV] {{host}} {{start_time}} [NEXT]
20 |
21 |
22 |

Stats

23 | 24 | 25 | 26 |
CPU Utilization: {{cpu_percent}} %
Mem Utilization: {{mem_percent}} %
27 |
28 | 29 |
30 |

Processes

31 | 32 | 33 | 34 | 35 | {% for ps in ps_list %} 36 | 37 | {% endfor %} 38 |
CPU Time
PIDNAMEUserSystem
{{ps.pid}} {{ps.name}} {{ps.user}} {{ps.system}}
39 |
40 | 41 | 42 | -------------------------------------------------------------------------------- /demos/vmstats/vmstats_json.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | History for my host 5 | 6 | 7 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 | 22 | 23 | 24 | 25 |
[PREV] [NEXT]
26 |
27 |
28 |

Stats

29 | 30 | 31 | 32 | 33 | 34 | 35 |
Host:
Time:
CPU Utilization:
Mem Utilization:
Page: 1
36 |
37 |
38 |

Processes

39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 |
CPU Time
PIDNAMEUserSystemRSS
52 |
53 | 54 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /dfxml/bin/.gitignore: -------------------------------------------------------------------------------- 1 | .pytest_cache 2 | -------------------------------------------------------------------------------- /dfxml/bin/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | all: \ 17 | check 18 | 19 | .PHONY: \ 20 | check-cat_fileobjects \ 21 | check-dfxml_tool \ 22 | check-idifference-dfxml \ 23 | check-mac_timelines \ 24 | check-Objects \ 25 | clean-Objects 26 | 27 | #WORKING_DIR = $(shell pwd) 28 | 29 | # Export the directories to use 30 | #check: export PYTHONPATH = /home/user01/dfxml_python/ 31 | check: export DFXML_DIR = $(shell cd ../.. ; pwd) 32 | check: export PYTHONPATH = $(DFXML_DIR) 33 | check: export TOOL_DIR = $(shell pwd) 34 | check: export TEST_DIR = ./tests 35 | check: export DEMO_DIR = ../../demos 36 | check: export SAMPLE_DIR = ../../samples 37 | check: \ 38 | check-cat_fileobjects \ 39 | check-dfxml_tool \ 40 | check-idifference-dfxml \ 41 | check-mac_timelines \ 42 | check-Objects 43 | @echo "" 44 | @echo "Tests passed!" 45 | @echo "Clean up the test results with 'make clean'." 46 | 47 | doc: \ 48 | Objects.html 49 | 50 | Objects.html: \ 51 | Objects.py 52 | python3 -m pydoc -w Objects 53 | 54 | check-cat_fileobjects: 55 | $(TEST_DIR)/test_cat_fileobjects.sh 56 | 57 | check-dfxml_tool: 58 | $(TEST_DIR)/test_dfxml_tool.sh 59 | 60 | check-idifference-dfxml: 61 | $(TEST_DIR)/test_idifference_to_dfxml.sh 62 | 63 | # TODO: Investigate cause of state transition exception 64 | # 65 | # Skip this temporarily, since there is an unexpected state 66 | # transition exception thrown by line 4840 in python/dxml/object.py, 67 | # when the file samples/simple.xml is processed 68 | #check-mac_timelines: 69 | # ./tests/test_mac_timelines.sh 70 | 71 | clean: clean-Objects 72 | rm -f dfxml_tool_*xml 73 | rm -f cat_test_*.dfxml 74 | rm -f idifference_test.txt 75 | rm -f idifference_test*.dfxml 76 | rm -f demo_mac_timeline*.txt 77 | -------------------------------------------------------------------------------- /dfxml/bin/README.md: -------------------------------------------------------------------------------- 1 | # Tools for working with DFXML-files 2 | 3 | This directory contains scripts that can be run when the `dfxml` package is installed. Some of these tools are added to the shell's `PATH` when the `dfxml` package is installed. Others should be called in-place, e.g. with `python3 $PWD/allocation_counter.py`. 4 | 5 | 6 | ## Installed tools 7 | 8 | Some tools are provided as command-line programs when the `dfxml` module is installed. Their source is in this directory, with the suffix `.py`. The link in this table goes to the tool's documentation and testing directory. 9 | 10 | | Program name | Short description | 11 | |-------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------| 12 | | [`walk_to_dfxml`](../../tests/walk_to_dfxml/#walk_to_dfxml) | Fully walk the current working directory and record all files encountered. | 13 | | [`make_differential_dfxml`](../../tests/make_differential_dfxml/#make_differential_dfxml) | Produce a DFXML file denoting file system changes noted by two input DFXML files. | 14 | 15 | ## In-place scripts 16 | 17 | The following DFXML tools are provided in this directory: 18 | 19 | | Script name | Short description | 20 | |----------------------------|--------------------------------------------------------------------------------------| 21 | | `allocation_counter.py` | Produces a cross-tabulation of the allocation state of each file's inode and name. | 22 | | `cat_fileobjects.py` | Prints a new DFXML of all fileobjects in an input DFXML file to stdout. | 23 | | `cat_partitions.py` | Concatenates dfxml-files containing one partition each and prints result to stdout. | 24 | | `deidentify_xml.py` | Removes PII from filenames in a DFXML file. | 25 | | `dfxinfo.py` | Print a summary of a DFXML file - summary of all files, duplicate files, file types. | 26 | | `dfxml_gen.py` | generates DFXML. Based on the C generator. | 27 | | `dfxml_html.py` | A collection of functions for generating HTML. | 28 | | `Extractor.py` | Extracts files specified in a XML-file (or all) from an image to a target directory. | 29 | | `hash_sectors.py` | Outputs sector hashes for sectors with files matching a predicate. | 30 | | `iblkfind.py` | Outputs files, which are located in a given set of sectors. | 31 | | `icarvingtruth.py` | Finds the ground truth in a predefined series of disk images. | 32 | | `idifference.py` | Generates a report about what's different between two disk images. | 33 | | `igrep.py` | Find files in image, which contain the given string. | 34 | | `ihistogram.py` | Draws a quick histogram of the timestamps in an XML file. | 35 | | `imap.py` | Map image files and try to find "missing" data by comparing with the other imgs. | 36 | | `iredact.py` | Image redaction tool using rules described in the file. | 37 | | `ireport.py` | Generates stats from a DFXML file(s). | 38 | | `iverify.py` | Reads an XML file and image and verifies that the files are present. | 39 | | `rdifference.py` | Finds and reports differences in two Windows registry hive-files. | 40 | | `report_silent_changes.py` | Takes a differentially-annotated DFXML file and outputs subtle and 'silent' changes. | 41 | 42 | 43 | ### Work needed 44 | 45 | - `dfxml_tool.py` 46 | - `idifference2.py` 47 | - `iexport.py` 48 | - `exp_slack.py` 49 | - `validate_dfxml.py` 50 | - `nsrl_rds.py` 51 | - `corpus_sync.py` 52 | 53 | 54 | ### Uncategorized 55 | 56 | - `break_out_diffs_by_anno.py` 57 | - `mem_info.py` (no dependencies) 58 | -------------------------------------------------------------------------------- /dfxml/bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dfxml-working-group/dfxml_python/7897c419bdf376220955aea03a43f2b084d7d901/dfxml/bin/__init__.py -------------------------------------------------------------------------------- /dfxml/bin/allocation_counter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | """ 17 | For a disk image or DFXML file, this program produces a cross-tabulation of the allocation state of each file's inode and name. 18 | """ 19 | 20 | __version__ = "0.1.1" 21 | # Version 0.2.0: 22 | # * Tabular output in HTML 23 | # * Tabular output in LaTeX 24 | 25 | import collections 26 | import logging 27 | import os 28 | import sys 29 | import xml.etree.ElementTree as ET 30 | 31 | import dfxml.bin.make_differential_dfxml 32 | import dfxml.objects as Objects 33 | 34 | _logger = logging.getLogger(os.path.basename(__file__)) 35 | 36 | 37 | def main(): 38 | counter = collections.defaultdict(lambda: 0) 39 | prev_obj = None 40 | for event, obj in Objects.iterparse(args.input_image): 41 | if isinstance(obj, Objects.FileObject): 42 | if ( 43 | args.ignore_virtual_files 44 | and dfxml.bin.make_differential_dfxml.ignorable_name(obj.filename) 45 | ): 46 | continue 47 | counter[(obj.alloc_inode, obj.alloc_name)] += 1 48 | 49 | # Inspect weird data 50 | if args.debug and obj.alloc_inode is None and obj.alloc_name is None: 51 | _logger.debug("Encountered a file with all-null allocation.") 52 | _logger.debug("Event: %r." % event) 53 | _logger.debug( 54 | "Previous object: %s." % ET.tostring(prev_obj.to_Element()) 55 | ) 56 | _logger.debug("Current object: %s." % ET.tostring(obj.to_Element())) 57 | prev_obj = obj 58 | print(repr(counter)) 59 | 60 | 61 | if __name__ == "__main__": 62 | import argparse 63 | 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument( 66 | "--ignore-virtual-files", 67 | action="store_true", 68 | help="Use the same file-ignoring rules as make_differential_dfxml.py.", 69 | ) 70 | parser.add_argument( 71 | "-d", "--debug", action="store_true", help="Enable debug printing." 72 | ) 73 | parser.add_argument("input_image", help="Disk image, or DFXML file.") 74 | args = parser.parse_args() 75 | 76 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 77 | 78 | main() 79 | -------------------------------------------------------------------------------- /dfxml/bin/break_out_diffs_by_anno.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | """ 17 | This program reads a DFXML file with differential annotations and produces a table. 18 | 19 | Columns: FileObject annotation (is it a new file? renamed? etc.). 20 | Rows: Counts of instances of a property being changed per FileObject annotation. One row per FileObject direct-child element. 21 | """ 22 | 23 | __version__ = "0.1.0" 24 | 25 | import collections 26 | import sys 27 | 28 | import dfxml.objects as Objects 29 | 30 | 31 | def main(): 32 | # Key: (annotation, histogram) 33 | hist = collections.defaultdict(int) 34 | for event, obj in Objects.iterparse(sys.argv[1]): 35 | if event != "end" or not isinstance(obj, Objects.FileObject): 36 | continue 37 | # Loop through annotations 38 | for anno in obj.annos: 39 | # Loop through diffs 40 | for diff in obj.diffs: 41 | hist[(anno, diff)] += 1 42 | 43 | annos = Objects.FileObject._diff_attr_names.keys() 44 | print( 45 | """ 46 | 47 | 48 | 49 | 50 | """ 51 | ) 52 | for anno in annos: 53 | print(" " % anno) 54 | print( 55 | """ 56 | 57 | 58 | 59 | 60 | """ 61 | ) 62 | for diff in sorted(Objects.FileObject._all_properties): 63 | print(" ") 64 | if diff in Objects.FileObject._incomparable_properties: 65 | continue 66 | print(" " % diff) 67 | for anno in annos: 68 | print(" " % hist[(anno, diff)]) 69 | print(" ") 70 | print( 71 | """ 72 | 73 |
Property%s
%s%d
74 | """ 75 | ) 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /dfxml/bin/cat_fileobjects.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | """ 17 | Make a new DFXML file of all fileobjects in an input DFXML file. 18 | """ 19 | 20 | __version__ = "0.4.0" 21 | 22 | import logging 23 | import os 24 | import sys 25 | import xml.etree.ElementTree as ET 26 | 27 | import dfxml 28 | 29 | _logger = logging.getLogger(os.path.basename(__file__)) 30 | 31 | if sys.version < "3": 32 | _logger.error( 33 | "Due to Unicode issues with Python 2's ElementTree, Python 3 and up is required.\n" 34 | ) 35 | exit(1) 36 | 37 | 38 | def main(): 39 | print( 40 | """\ 41 | 42 | 46 | 47 | 48 | %s 49 | %s 50 | 51 | %s 52 | 53 | 54 | 55 | %s 56 | \ 57 | """ 58 | % ( 59 | dfxml.XMLNS_DFXML, 60 | dfxml.XMLNS_DELTA, 61 | dfxml.DFXML_VERSION, 62 | sys.argv[0], 63 | __version__, 64 | " ".join(sys.argv), 65 | args.filename, 66 | ) 67 | ) 68 | 69 | ET.register_namespace("delta", dfxml.XMLNS_DELTA) 70 | 71 | xs = [] 72 | for fi in dfxml.iter_dfxml( 73 | xmlfile=open(args.filename, "rb"), preserve_elements=True 74 | ): 75 | _logger.debug("Processing: %s" % str(fi)) 76 | if args.cache: 77 | xs.append(fi.xml_element) 78 | else: 79 | _logger.debug("Printing without cache: %s" % str(fi)) 80 | print(dfxml.ET_tostring(fi.xml_element, encoding="unicode")) 81 | if args.cache: 82 | for x in xs: 83 | _logger.debug("Printing with cache: %s" % str(fi)) 84 | print(dfxml.ET_tostring(x, encoding="unicode")) 85 | 86 | print("""""") 87 | 88 | 89 | if __name__ == "__main__": 90 | import argparse 91 | 92 | parser = argparse.ArgumentParser() 93 | parser.add_argument("filename") 94 | parser.add_argument("--cache", action="store_true") 95 | parser.add_argument("--debug", action="store_true") 96 | args = parser.parse_args() 97 | 98 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 99 | 100 | main() 101 | -------------------------------------------------------------------------------- /dfxml/bin/conftest.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is empty, but it permits test discovery of the subdirectory. 3 | # See: 4 | # https://stackoverflow.com/questions/10253826/path-issue-with-pytest-importerror-no-module-named-yadayadayada 5 | -------------------------------------------------------------------------------- /dfxml/bin/corpus_sync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.2 2 | # 3 | # sync corpus based on DFXML files 4 | 5 | from collections import defaultdict 6 | 7 | import dfxml 8 | import dfxml.fiwalk as fiwalk 9 | 10 | 11 | class CorpusDB: 12 | def __init__(self): 13 | self.all = [] 14 | self.md5db = defaultdict(list) # maps from 15 | self.pathdb = dict() 16 | 17 | def process_fi(self, fi): 18 | self.all.append(fi) 19 | self.md5db[fi.md5()].append(fi) 20 | self.pathdb[fi.filename()] = fi 21 | 22 | def ingest_dfxml(self, fname): 23 | fiwalk.fiwalk_using_sax( 24 | xmlfile=open(fname, "rb"), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi 25 | ) 26 | 27 | def __iter__(self): 28 | return self.all.__iter__() 29 | 30 | def __delitem__(self, fi): 31 | self.all.remove(fi) 32 | self.md5db[fi.md5()].remove(fi) 33 | del self.pathdb[fi.filename()] 34 | 35 | 36 | if __name__ == "__main__": 37 | from copy import deepcopy 38 | from optparse import OptionParser 39 | 40 | parser = OptionParser() 41 | (options, args) = parser.parse_args() 42 | 43 | (fn1, fn2) = args[0:2] 44 | print("# Reading B - the master {}".format(fn1)) 45 | b = CorpusDB() 46 | b.ingest_dfxml(fn1) 47 | 48 | print("# Reading A - the current system {}".format(fn2)) 49 | a = CorpusDB() 50 | a.ingest_dfxml(fn2) 51 | 52 | print("# Files in A that should not be in B:") 53 | rmlist = [afi for afi in a if (afi.md5() not in b.md5db)] 54 | for afi in rmlist: 55 | print("rm {}".format(afi.filename())) 56 | del a[afi] 57 | 58 | fixups = [] 59 | for bfi in b: 60 | if bfi.filename() in a.pathdb and bfi.md5() == a.pathdb[bfi.filename()].md5(): 61 | continue 62 | if bfi.md5() not in a.md5db: 63 | print("get {}".format(bfi.filename())) 64 | continue 65 | 66 | afi = a.md5db[bfi.md5()][0] 67 | nfn = bfi.filename() + ".new" 68 | print("ln {} {}".format(afi.filename(), nfn)) 69 | fixups.append((nfn, bfi.filename())) 70 | 71 | for nfn, bfi_filename in fixups: 72 | print("mv {} {}".format(nfn, bfi_filename)) 73 | -------------------------------------------------------------------------------- /dfxml/bin/dedup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # dedup - detect and optionally remove duplicates based on a DFXML file 4 | 5 | import os 6 | import xml 7 | 8 | import dfxml 9 | 10 | 11 | class dedup: 12 | def __init__(self): 13 | from collections import defaultdict 14 | 15 | self.seen = defaultdict(list) 16 | self.files = 0 17 | self.md5s = 0 18 | 19 | def process(self, fi): 20 | self.files += 1 21 | if fi.md5(): 22 | self.seen[fi.md5()].append(fi.filename()) 23 | self.md5s += 1 24 | 25 | def find_dups(self, cb=None): 26 | for md5, names in self.seen.items(): 27 | if cb and len(names) > 1: 28 | cb(names) 29 | 30 | def report(self, func, cb): 31 | for md5, names in self.seen.items(): 32 | if func(names): 33 | cb(names) 34 | 35 | 36 | def process_dups(names): 37 | print("dups: ", names) 38 | 39 | 40 | if __name__ == "__main__": 41 | from argparse import ArgumentParser 42 | 43 | global options 44 | 45 | parser = ArgumentParser() 46 | parser.add_argument("dfxml", type=str) 47 | parser.add_argument("--verbose", action="store_true") 48 | parser.add_argument( 49 | "--prefix", type=str, help="Only output files with the given prefix" 50 | ) 51 | parser.add_argument( 52 | "--distinct", action="store_true", help="Report the distinct files" 53 | ) 54 | parser.add_argument( 55 | "--dups", 56 | action="store_true", 57 | help="Report the files that are dups, and give dup count", 58 | ) 59 | args = parser.parse_args() 60 | 61 | dobj = dedup() 62 | 63 | try: 64 | dfxml.read_dfxml(open(args.dfxml, "rb"), callback=dobj.process) 65 | except xml.parsers.expat.ExpatError: 66 | pass 67 | 68 | print( 69 | "Total files: {:,} total MD5s processed: {:,} Unique MD5s: {:,}".format( 70 | dobj.files, dobj.md5s, len(dobj.seen) 71 | ) 72 | ) 73 | 74 | if args.distinct: 75 | 76 | def report_distinct(names): 77 | if args.prefix and not names[0].startswith(args.prefix): 78 | return 79 | print("distinct: ", names[0]) 80 | 81 | dobj.report(lambda names: len(names) == 1, report_distinct) 82 | 83 | if args.dups: 84 | 85 | def report_dups(names): 86 | for name in names: 87 | if not args.prefix or name.startswith(args.prefix): 88 | print("dups: {} {}".format(name, len(names))) 89 | 90 | dobj.report(lambda names: len(names) > 1, report_dups) 91 | -------------------------------------------------------------------------------- /dfxml/bin/deidentify_xml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # deidentify_xml.py: 4 | # Given XML for a disk, remove information that might be personally identifying from filenames. 5 | # remember the mapping so that directory names don't get changed. 6 | # 7 | # 2012-10-27 slg - updated to Python3 8 | 9 | import typing 10 | 11 | private_dirs = ["home/", "usr/home", "Users"] 12 | ok_top_paths_win = ["program files/", "System", "Windows"] 13 | ok_top_paths_mac = [ 14 | "bin/", 15 | "usr", 16 | "etc", 17 | "private", 18 | "applications", 19 | "developer", 20 | "bin", 21 | "sbin", 22 | "lib", 23 | "dev", 24 | ] 25 | ok_top_paths = ok_top_paths_win + ok_top_paths_mac + ["$orphanfiles"] 26 | acceptable_extensions = ["exe", "dll", "sys", "com", "hlp"] 27 | 28 | import os 29 | import os.path 30 | import sys 31 | 32 | partdir: typing.Dict[str, str] = dict() 33 | 34 | 35 | def sanitize_part(part): 36 | """Sanitize a part of a pathname in a consistent manner""" 37 | if part not in partdir: 38 | partdir[part] = "P%07d" % (len(partdir) + 1) 39 | return partdir[part] 40 | 41 | 42 | def sanitize_filename(fname): 43 | """Given a filename, sanitize each part and return it.""" 44 | ofn = fname 45 | jfn = fname 46 | if jfn[0] == "/": 47 | jfn = jfn[1:] 48 | pathok = False 49 | for p in ok_top_paths: 50 | if jfn.lower().startswith(p): 51 | pathok = True 52 | 53 | if not pathok: 54 | # if the path is not okay, replace all of the parts 55 | # and the name up to the .ext 56 | parts = fname.split("/") 57 | parts[:-1] = [sanitize_part(s) for s in parts[:-1]] 58 | (root, ext) = os.path.splitext(parts[-1]) 59 | if ext not in acceptable_extensions: 60 | parts[-1] = sanitize_part(root) + ext 61 | fname = "/".join(parts) 62 | if ofn[0] == "/" and fname[0] != "/": 63 | fname = "/" + fname 64 | return fname 65 | 66 | 67 | class xml_sanitizer: 68 | """Read and write the XML, but sanitize the filename elements.""" 69 | 70 | def __init__(self, out): 71 | self.out = out 72 | self.cdata = "" 73 | 74 | def _start_element(self, name, attrs): 75 | """Handles the start of an element for the XPAT scanner""" 76 | s = ["<", name] 77 | if attrs: 78 | for a, v in attrs.items(): 79 | if '"' not in v: 80 | s += [" ", a, '="', v, '"'] 81 | else: 82 | s += [" ", a, "='", v, "'"] 83 | s += [">"] 84 | self.out.write("".join(s)) 85 | self.cdata = "" # new element 86 | 87 | def _end_element(self, name): 88 | """Handles the end of an element for the XPAT scanner""" 89 | if name == "filename": 90 | self.cdata = sanitize_filename(self.cdata) 91 | if self.cdata == "\n": 92 | self.cdata = "" 93 | self.out.write("".join([self.cdata, ""])) 94 | self.cdata = "" 95 | 96 | def _char_data(self, data): 97 | """Handles XML data""" 98 | self.cdata += data 99 | 100 | def process_xml_stream(self, xml_stream): 101 | "Run the reader on a given XML input stream" 102 | import xml.parsers.expat 103 | 104 | p = xml.parsers.expat.ParserCreate() 105 | p.StartElementHandler = self._start_element 106 | p.EndElementHandler = self._end_element 107 | p.CharacterDataHandler = self._char_data 108 | p.ParseFile(xml_stream) 109 | 110 | 111 | if __name__ == "__main__": 112 | from optparse import OptionParser 113 | 114 | global options 115 | parser = OptionParser() 116 | parser.add_option("-t", "--test", help="Test a specific pathanme to sanitize") 117 | (options, args) = parser.parse_args() 118 | 119 | if options.test: 120 | if os.path.isdir(options.test): 121 | for dirpath, dirnames, filenames in os.walk(options.test): 122 | for filename in filenames: 123 | fn = dirpath + "/" + filename 124 | print("%s\n %s" % (fn, sanitize_filename(fn))) 125 | 126 | x = xml_sanitizer(sys.stdout) 127 | x.process_xml_stream(open(args[0], "rb")) 128 | -------------------------------------------------------------------------------- /dfxml/bin/exp_slack.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.2 2 | # 3 | # exp_slack.py: experiment on the slack space 4 | # quantify slack space 5 | # 6 | # (c) Martin Mulazzani, 2012 7 | # Additions by Simson Garfinkel 8 | 9 | import os 10 | import re 11 | import sys 12 | 13 | import dfxml.fiwalk as fiwalk 14 | 15 | 16 | def proc(fi): 17 | # Skip the virtual files? 18 | if fi.filename()[0:1] in ["$"]: 19 | return 20 | if fi.has_contents() and fi.is_file(): 21 | outstring = ( 22 | str(fi.partition()) 23 | + "\t" 24 | + fi.filename() 25 | + "\t" 26 | + str(fi.filesize()) 27 | + "\t" 28 | + str(fi.times()) 29 | + "\n" 30 | ) 31 | f_out.write(outstring) 32 | 33 | 34 | if __name__ == "__main__": 35 | if len(sys.argv) != 2: 36 | print("usage: ./fast_slack.py ") 37 | sys.exit(1) 38 | 39 | # input 40 | file_name = sys.argv[1] 41 | f = open(file_name, "rb") 42 | 43 | # output is to stdout 44 | outfile = sys.stdout 45 | 46 | # find partition information, blocksize and filesystem 47 | # 1st partition has no. 1, to correspond to fiwalk output 48 | partitioncounter = 0 49 | f.write( 50 | "********************************** PARTITIONS **********************************" 51 | ) 52 | f.write("\nNo\tBlocksize\tFilesystem\n") 53 | 54 | for line in f: 55 | if re.search("block_size", line): 56 | partitioncounter += 1 57 | f_out.write(str(partitioncounter)) 58 | f_out.write("\t") 59 | f_out.write(re.split(">|<", line)[2]) 60 | if re.search("ftype_str", line): 61 | f_out.write("\t\t") 62 | f_out.write(re.split(">|<", line)[2]) 63 | f_out.write("\n") 64 | 65 | f_out.write( 66 | "\n\n************************************* DATA *************************************\n" 67 | ) 68 | f_out.write("Partition\tFilename\tSize\tTimestamps\n") 69 | f.close() 70 | 71 | # re-open file for binary reading 72 | # file processing 73 | f = open(file_name, "rb") 74 | fiwalk.fiwalk_using_sax(xmlfile=f, callback=proc) 75 | -------------------------------------------------------------------------------- /dfxml/bin/filesdb.py: -------------------------------------------------------------------------------- 1 | # 2 | # filesdb 3 | # a module that holds a database of DFXML files 4 | # 5 | 6 | import sys 7 | from collections import defaultdict 8 | 9 | import dfxml 10 | 11 | 12 | class filesdb: 13 | def __init__(self, fname=None): 14 | self.sha1db = defaultdict(list) # fi's by hashdb 15 | self.md5db = defaultdict(list) # fi's by hashdb 16 | self.fnamedb = defaultdict(list) # fi's by fname 17 | self.dirs = defaultdict(list) # fi's by directory name 18 | self.fis = [] 19 | self.prefix = None 20 | self.delfix = None 21 | if fname: 22 | self.read(fname) 23 | 24 | def __iter__(self): 25 | """The iterator for filesdb iterates through all the files""" 26 | return self.fis.__iter__() 27 | 28 | def read(self, f): 29 | if type(f) == str: 30 | self.fname = f 31 | f = open(f, "rb") 32 | dfxml.read_dfxml(xmlfile=f, callback=self.pass1) 33 | 34 | def read_with_prefix(self, fname): 35 | if ":" in fname: 36 | (fmt, fname) = fname.split(":") 37 | if fmt[0] == "+": 38 | self.prefix = fmt[1:] 39 | if fmt[0] == "=": 40 | self.delfix = fmt[1:] 41 | if fmt[0] != "+" and fmt[0] != "=": 42 | self.prefix = fmt 43 | self.read(fname) 44 | 45 | def pass1(self, fi): 46 | """First pass for reading fi objects""" 47 | import os 48 | 49 | self.fis.append(fi) 50 | if fi.sha1(): 51 | self.sha1db[fi.sha1()].append(fi) 52 | if fi.md5(): 53 | self.md5db[fi.md5()].append(fi) 54 | if fi.filename(): 55 | fname = fi.filename() 56 | if self.delfix: 57 | if fname.startswith(self.delfix): 58 | fname = fname[len(self.delfix) :] 59 | if self.prefix: 60 | fname = self.prefix + fname 61 | self.sha1db[fname].append(fi) 62 | self.dirs[os.path.dirname(fname)].append(fi) 63 | 64 | def print_stats(self, f=sys.stdout): 65 | """Returns a text string of the stats""" 66 | ret = [ 67 | ["Total directories", len(self.dirs)], 68 | ["Total files", len(self.fis)], 69 | ["Total bytes", sum([int(fi.filesize()) for fi in self.fis])], 70 | ["Total sha1s", len(self.sha1db)], 71 | ["Total md5s", len(self.md5db)], 72 | ] 73 | print("\n".join(["{:20}: {:14,}".format(a[0], a[1]) for a in ret])) 74 | 75 | mtime_min = [fi.mtime() for fi in self.fis] 76 | # print('mtime=',len(mtime_min)) 77 | # flt = list(filter(lambda a:a!=None,mtime_min)) 78 | # print('flt=',flt,len(flt)) 79 | 80 | # print('mtime_min=',mtime_min) 81 | # print(['ctime range',mtime_min]) 82 | # exit(0) 83 | 84 | def del_dirs(self, targetdb): 85 | """Given a targetdb, provide the dirs to get there.""" 86 | return set(self.dirs.keys()).difference(set(targetdb.dirs.keys())) 87 | 88 | def del_files(self, targetdb): 89 | """Given an targetdb, provide the files needed to get there.""" 90 | return set(self.filesdb).difference(set(db.filesdb)) 91 | 92 | def new_dirs(self, db): 93 | """Given an older db, provide the dirs that are new.""" 94 | return set(db.dirs.keys()).difference(set(self.dirs.keys())) 95 | 96 | def search(self, mfi, hash=False, name=False): 97 | """Return the matching fis""" 98 | if hash and not name: 99 | return self.md5db[mfi.md5()] 100 | if name and not hash: 101 | return self.fnamedb[mfi.filename()] 102 | if hash and name: 103 | return filter( 104 | lambda fi: fi.filename() == mfi.filename(), self.md5db[mfi.md5()] 105 | ) 106 | return [] 107 | 108 | 109 | # 110 | # test program. Reads a database and dumps it. 111 | # 112 | if __name__ == "__main__": 113 | from argparse import ArgumentParser 114 | 115 | parser = ArgumentParser( 116 | description="Test the files database with one or more DFXML files" 117 | ) 118 | parser.add_argument("xmlfiles", help="XML files to process", nargs="+") 119 | 120 | args = parser.parse_args() 121 | db = filesdb() 122 | for fn in args.xmlfiles: 123 | db.read(fn) 124 | db.print_stats() 125 | -------------------------------------------------------------------------------- /dfxml/bin/iblkfind.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Usage: iblkfind imagefile.iso s1 [s2 s3 ...] ... 3 | 4 | Reports the files in which sectors s1, s2, s3... are located. 5 | """ 6 | import sys 7 | 8 | import dfxml 9 | 10 | if __name__ == "__main__": 11 | from optparse import OptionParser 12 | 13 | parser = OptionParser() 14 | parser.usage = "%prog [options] imagefile-or-xmlfile s1 [s2 s3 s3 ...]" 15 | parser.add_option( 16 | "--offset", help="values are byte offsets, not sectors", action="store_true" 17 | ) 18 | parser.add_option("--blocksize", help="specify sector blockszie", default=512) 19 | (options, args) = parser.parse_args() 20 | 21 | if len(args) < 1: 22 | parser.print_help() 23 | sys.exit(1) 24 | fn = args[0] 25 | 26 | print(args) 27 | print("Processing %s" % fn) 28 | print("Searching for %s" % ", ".join(args[1:])) 29 | 30 | divisor = 1 31 | if options.offset: 32 | divisor = options.blocksize 33 | 34 | sectors = set([int(s) / divisor for s in args[1:]]) 35 | 36 | def process(fi): 37 | for s in sectors: 38 | if fi.has_sector(s): 39 | print("%d\t%s" % (s, fi.filename())) 40 | 41 | if not fn.endswith(".xml"): 42 | print("iblkfind requires an XML file") 43 | exit(1) 44 | dfxml.read_dfxml(xmlfile=open(args[0], "rb"), callback=process) 45 | -------------------------------------------------------------------------------- /dfxml/bin/iexport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """iexport.py: export the unallocated spaces.""" 3 | 4 | 5 | class Run: 6 | """Keeps track of a single run""" 7 | 8 | def __init__(self, start, len): 9 | self.start = start 10 | self.len = len 11 | self.end = start + len - 1 12 | 13 | def __str__(self): 14 | return "Run<%d--%d> (len %d)" % (self.start, self.end, self.len) 15 | 16 | def contains(self, b): 17 | """Returns true if b is inside self.""" 18 | print( 19 | "%d <= %d <= %d = %s" 20 | % (self.start, b, self.end, (self.start <= b <= self.end)) 21 | ) 22 | return self.start <= b <= self.end 23 | 24 | def intersects_run(self, r): 25 | """Return true if self intersects r. This may be because r.start is 26 | inside the run, r.end is inside the run, or self is inside the run.""" 27 | return self.contains(r.start) or self.contains(r.end) or r.contains(self.start) 28 | 29 | def contains_run(self, r): 30 | """Returns true if self completely contains r""" 31 | return self.contains(r.start) and self.contains(r.end) 32 | 33 | 34 | class RunDB: 35 | """The RunDB maintains a list of all the runs in a disk image. The 36 | RunDB is created with a single run that represents all of the sectors 37 | in the disk image. Runs can then be removed, which causes existing 38 | runs to be split. Finally all of the remaining runs can be removed.""" 39 | 40 | def __init__(self, start, len): 41 | self.runs = [Run(start, len)] 42 | 43 | def __str__(self): 44 | return "RunDB\n" + "\n".join([str(p) for p in self.runs]) 45 | 46 | def intersecting_runs(self, r): 47 | """Return a list of all the Runs that intersect with r. 48 | This may be because r.start is inside the run, r.end is inside 49 | the run, because the run completely encloses r, or because r completely 50 | encloses the run.""" 51 | return filter(lambda x: x.intersects_run(r), self.runs) 52 | 53 | def remove(self, r): 54 | """Remove run r""" 55 | for p in self.intersecting_runs(r): 56 | self.runs.remove(p) 57 | 58 | # if P is completely inside r, just remove it 59 | if r.contains_run(p): 60 | continue 61 | 62 | # Split p into before and after r; add the non-zero pieces 63 | before_len = r.start - p.start 64 | if before_len > 0: 65 | self.runs.append(Run(p.start, before_len)) 66 | after_len = p.end - r.end 67 | if after_len > 0: 68 | self.runs.append(Run(r.end, after_len)) 69 | -------------------------------------------------------------------------------- /dfxml/bin/iextract.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import datetime 4 | import os 5 | import os.path 6 | import sys 7 | import zipfile 8 | 9 | import dfxml 10 | import dfxml.fiwalk as fiwalk 11 | 12 | if __name__ == "__main__": 13 | from optparse import OptionParser 14 | 15 | parser = OptionParser() 16 | parser.add_option( 17 | "-x", 18 | "--xml", 19 | dest="xmlfilename", 20 | help="Already-created DFXML file for imagefile", 21 | ) 22 | parser.usage = "%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile" 23 | (options, args) = parser.parse_args() 24 | 25 | if len(args) < 3: 26 | parser.print_help() 27 | exit(1) 28 | 29 | imagefilename = args[0] 30 | xmlfilename = options.xmlfilename 31 | xmlfh = None 32 | if xmlfilename != None: 33 | xmlfh = open(xmlfilename, "r") 34 | zipfilename = args[1] 35 | targets = set([fn.lower() for fn in args[2:]]) 36 | zfile = zipfile.ZipFile(zipfilename, "w", allowZip64=True) 37 | 38 | def proc(fi): 39 | basename = os.path.basename(fi.filename()).lower() 40 | if basename in targets: 41 | info = zipfile.ZipInfo( 42 | fi.filename(), 43 | datetime.datetime.fromtimestamp(fi.mtime().timestamp()).utctimetuple(), 44 | ) 45 | info.internal_attr = 1 46 | info.external_attr = 2175008768 # specifies mode 0644 47 | zfile.writestr(info, fi.contents()) 48 | 49 | fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc) 50 | -------------------------------------------------------------------------------- /dfxml/bin/igrep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Usage: igrep imagefile.iso string ... 3 | 4 | Reports the files in which files have the string. 5 | """ 6 | import fiwalk 7 | 8 | import dfxml 9 | 10 | if __name__ == "__main__": 11 | import sys 12 | from optparse import OptionParser 13 | 14 | parser = OptionParser() 15 | parser.usage = "%prog [options] image.iso s1" 16 | parser.add_option("-d", "--debug", help="debug", action="store_true") 17 | (options, args) = parser.parse_args() 18 | 19 | if len(args) != 2: 20 | parser.print_help() 21 | sys.exit(1) 22 | 23 | (imagefn, data) = args 24 | 25 | def process(fi): 26 | offset = fi.contents().find(data) 27 | if offset > 0: 28 | print("%s (offset=%d)" % (fi.filename(), offset)) 29 | 30 | fiwalk.fiwalk_using_sax(imagefile=open(imagefn), callback=process) 31 | -------------------------------------------------------------------------------- /dfxml/bin/ihistogram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Draw a quick histogram of the timestamps on the hard drive""" 3 | 4 | import matplotlib 5 | 6 | matplotlib.use("agg.pdf") 7 | 8 | 9 | import datetime 10 | import time 11 | 12 | from matplotlib.dates import ( 13 | MONDAY, 14 | SATURDAY, 15 | DateFormatter, 16 | MonthLocator, 17 | WeekdayLocator, 18 | ) 19 | from pylab import * 20 | 21 | import dfxml.fiwalk as fiwalk 22 | 23 | 24 | def get_dates_and_counts(times): 25 | from datetime import date 26 | 27 | data = {} 28 | for t in times: 29 | gm = time.gmtime(t) 30 | d = date(gm[0], gm[1], gm[2]) 31 | data[d] = data.get(d, 0) + 1 32 | 33 | # Create a list of key,val items so you can sort by date 34 | dates_and_counts = [(date, count) for date, count in data.items()] 35 | dates_and_counts = sorted(dates_and_counts) 36 | return dates_and_counts 37 | 38 | 39 | def version1(times): 40 | import pylab 41 | 42 | pylab.grid() 43 | pylab.hist(times, 100) 44 | pylab.show() 45 | 46 | 47 | def version2(times): 48 | # see http://mail.python.org/pipermail/python-list/2003-November/236559.html 49 | # http://www.gossamer-threads.com/lists/python/python/665014 50 | from matplotlib.pylab import ( 51 | bar, 52 | gca, 53 | plot, 54 | plot_date, 55 | savefig, 56 | show, 57 | title, 58 | xlabel, 59 | ylabel, 60 | ) 61 | 62 | dates_and_counts = get_dates_and_counts(times) 63 | dates, counts = zip(*dates_and_counts) 64 | # bar(dates,counts) 65 | plot_date(dates, counts) 66 | xlabel("Date") 67 | ylabel("count") 68 | show() 69 | 70 | 71 | def version3(times): 72 | import datetime 73 | 74 | import matplotlib 75 | import matplotlib.dates as mdates 76 | import matplotlib.mlab as mlab 77 | import matplotlib.pyplot as pyplot 78 | import numpy as np 79 | 80 | dates_and_counts = get_dates_and_counts(times) 81 | dates, counts = zip(*dates_and_counts) 82 | 83 | years = mdates.YearLocator() # every year 84 | months = mdates.MonthLocator() # every month 85 | yearsFmt = mdates.DateFormatter("%Y") 86 | 87 | fig = pyplot.figure() 88 | ax = fig.add_subplot(111) 89 | ax.bar(dates, counts) 90 | 91 | ax.set_ylabel("file count") 92 | ax.set_xlabel("file modification time (mtime)") 93 | 94 | # ax.set_yscale('log') 95 | 96 | # Format the ticks 97 | 98 | ax.xaxis.set_major_locator(years) 99 | ax.xaxis.set_major_formatter(yearsFmt) 100 | # ax.xaxis.set_minor_locator(months) 101 | 102 | datemin = datetime.date(min(dates).year, 1, 1) 103 | datemax = datetime.date(max(dates).year, 1, 1) 104 | ax.set_xlim(datemin, datemax) 105 | ax.set_ylim(0, max(counts)) 106 | 107 | # format the coords message box 108 | def price(x): 109 | return "$%1.2f" % x 110 | 111 | ax.format_xdata = mdates.DateFormatter("%Y-%m-%d") 112 | ax.format_ydata = price 113 | ax.grid(True) 114 | 115 | # rotates and right aligns the x labels, and moves the bottom of the 116 | # axes up to make room for them 117 | fig.autofmt_xdate() 118 | plt.savefig("hist.pdf", format="pdf") 119 | 120 | print("dates:", dates) 121 | print("num dates:", len(dates)) 122 | 123 | 124 | if __name__ == "__main__": 125 | import sys 126 | from optparse import OptionParser 127 | from sys import stdout 128 | 129 | parser = OptionParser() 130 | parser.usage = "%prog [options] xmlfile " 131 | (options, args) = parser.parse_args() 132 | 133 | import time 134 | 135 | times = [] 136 | for fi in fiwalk.fileobjects_using_sax(xmlfile=open(args[0])): 137 | try: 138 | times.append(fi.mtime()) 139 | except KeyError: 140 | pass 141 | 142 | version3(times) 143 | -------------------------------------------------------------------------------- /dfxml/bin/imap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Usage: imap imagefile0.iso imagefile1.iso imagefile2.iso ... 3 | 4 | Produces a map of imagefile0.iso, using the other image files as "hints" for missing 5 | data. Only reports files that have been allocated; deleted files are reported only if 6 | they can be found allocated in another file. 7 | """ 8 | import dfxml.fiwalk as fiwalk 9 | 10 | ################################################################ 11 | if __name__ == "__main__": 12 | import sys 13 | from optparse import OptionParser 14 | from sys import stdout 15 | 16 | parser = OptionParser() 17 | parser.usage = "%prog [options] image.iso " 18 | parser.add_option("-d", "--debug", help="debug", action="store_true") 19 | (options, args) = parser.parse_args() 20 | 21 | if len(args) < 1: 22 | parser.print_help() 23 | sys.exit(1) 24 | 25 | imagefile = open(args[0], "r") 26 | annotated_runs = [] 27 | # TODO - This debug statement needs to moved to somewhere appropriate after an image read. 28 | # if options.debug: print("Read %d file objects from %s" % (len(fileobjects),imagefile.name)) 29 | 30 | def cb(fi): 31 | if options.debug: 32 | print("Read " + str(fi)) 33 | fragment_num = 1 34 | for run in fi.byte_runs(): 35 | annotated_runs.append((run.img_offset, run, fragment_num, fi)) 36 | fragment_num += 1 37 | 38 | fiwalk.fiwalk_using_sax(imagefile=imagefile, callback=cb) 39 | 40 | next_sector = 0 41 | 42 | for ip, run, fragment_num, fi in sorted(annotated_runs): 43 | extra = "" 44 | fragment = "" 45 | start_sector = run.img_offset / 512 46 | sector_count = int(run.bytes / 512) 47 | partial = run.bytes % 512 48 | 49 | if not fi.allocated(): 50 | print("***") 51 | 52 | if not fi.file_present(): # it's not here! 53 | continue 54 | 55 | if partial > 0: 56 | sector_count += 1 57 | extra = "(%3d bytes slack)" % (512 - partial) 58 | 59 | if fi.fragments() > 2: 60 | fragment = "fragment %d" % fragment_num 61 | 62 | if next_sector != start_sector: 63 | print( 64 | " <-- %5d unallocated sectors @ sector %5d -->" 65 | % (start_sector - next_sector, next_sector) 66 | ) 67 | 68 | print( 69 | "[ %6d -> %6d sectors %18s ] %s %s " 70 | % (start_sector, sector_count, extra, fi.filename(), fragment) 71 | ) 72 | 73 | next_sector = start_sector + sector_count 74 | -------------------------------------------------------------------------------- /dfxml/bin/iredact-config.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Paths to the disk image and fiwalk XML output 3 | # 4 | IMAGEFILE /home/bcadmin/Desktop/jowork.raw.raw 5 | XMLFILE /home/bcadmin/Desktop/jofiwalk.xml 6 | 7 | # 8 | # Redaction patterns 9 | # 10 | #FILEPAT *.dll FUZZ 11 | #FILEPAT *.com FUZZ 12 | FILEPAT *.exe FUZZ 13 | 14 | # 15 | # Other examples 16 | # 17 | #KEY 100200300400 18 | #MD5 db06069ef1c9f40986ffa06db4fe8fd7 FILL 0x44 19 | #FILENAME file3.txt ENCRYPT 20 | #FILEPAT file*.txt ENCRYPT 21 | #CONTAINS This FILL 0x44 22 | #FILEPAT *Spotlight* FILL 0x44 23 | 24 | # 25 | # Uncomment this line to actually commit the redaction: 26 | # 27 | COMMIT 28 | 29 | 30 | -------------------------------------------------------------------------------- /dfxml/bin/iverify.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """Reads an fiwalk XML file and reports how many of the files are still in the image...""" 3 | 4 | import hashlib 5 | import os.path 6 | import sys 7 | 8 | from dfxml import fiwalk 9 | 10 | present = [] 11 | not_present = [] 12 | 13 | 14 | def process_fi(fi): 15 | print("process file", fi.filename()) 16 | if fi.filesize() == 0: 17 | return 18 | try: 19 | if fi.file_present(): 20 | present.append(fi) 21 | return 22 | else: 23 | not_present.append(fi) 24 | return 25 | except ValueError(e): 26 | sys.stderr.write(str(e) + "\n") 27 | 28 | 29 | def main(): 30 | import sys 31 | from optparse import OptionParser 32 | from subprocess import PIPE, Popen 33 | 34 | global options 35 | 36 | parser = OptionParser() 37 | parser.add_option("-d", "--debug", help="prints debugging info", dest="debug") 38 | parser.add_option("-g", "--ground", help="ground truth XML file", dest="ground") 39 | parser.usage = "%prog [options] image.iso" 40 | (options, args) = parser.parse_args() 41 | 42 | if not options.ground: 43 | parser.print_help() 44 | sys.exit(1) 45 | 46 | # Read the XML file 47 | reader = fiwalk.fileobject_reader() 48 | reader.set_imagefilename(args[0]) 49 | reader.process_xml_stream(open(options.ground, "r"), process_fi) 50 | 51 | if len(present) == 0: 52 | print("None of the files are present in the image") 53 | sys.exit(0) 54 | 55 | if len(not_present) == 0: 56 | print("All of the files are present in the image") 57 | sys.exit(0) 58 | 59 | print("\n\n") 60 | print("Present in image:") 61 | print("=================") 62 | print("\n".join([fi.filename() for fi in present])) 63 | 64 | print("\n") 65 | print("Not Present or altered in image:") 66 | print("=====================") 67 | for fi in not_present: 68 | print(fi.filename()) 69 | 70 | 71 | ################################################################ 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /dfxml/bin/mem_info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | mem_info.py: report the memory used by a program that wrote results to a dfxml file 4 | """ 5 | 6 | 7 | import sys 8 | import xml.etree.ElementTree as ET 9 | 10 | 11 | def fmt(n): 12 | if args.h: 13 | for p, let in reversed((3, "K"), (6, "M"), (9, "G"), (12, "T"), (15, "P")): 14 | if n > 10**p: 15 | return f"{n/10**p}{let}" 16 | return n 17 | 18 | 19 | def process_dfxml(dfxml): 20 | root = ET.parse(dfxml) 21 | start_time = root.find(".//start_time").text[0:19].replace("T", " ") 22 | command_line = " ".join(root.find(".//command_line").text.split()[1:]) 23 | maxrss = 0 24 | for e in root.findall(".//rusage/maxrss"): 25 | maxrss += int(e.text) 26 | print(start_time, fmt(maxrss), command_line) 27 | 28 | 29 | if __name__ == "__main__": 30 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser 31 | 32 | parser = ArgumentParser( 33 | formatter_class=ArgumentDefaultsHelpFormatter, 34 | description="report memory utilization from DFXML file", 35 | ) 36 | parser.add_argument("--h", help="human format", action="store_true") 37 | parser.add_argument("dfxml", nargs="*") 38 | args = parser.parse_args() 39 | bad_files = [] 40 | for fname in args.dfxml: 41 | try: 42 | process_dfxml(fname) 43 | except ET.ParseError as e: 44 | bad_files.append(fname) 45 | if bad_files: 46 | print("Could not read:", file=sys.stderr) 47 | print("\n".join(bad_files), file=sys.stderr) 48 | -------------------------------------------------------------------------------- /dfxml/bin/nsrl_rds.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Demonstrates how to communicate with NPS NSRL RDS 4 | # 5 | 6 | RDS_SERVER = "https://domex.nps.edu/www-noauth/nsrl_rds.cgi" 7 | 8 | import xmlrpclib 9 | 10 | if __name__ == "__main__": 11 | print("Demonstration of NSRL RDS service at %s\n" % RDS_SERVER) 12 | print("") 13 | p = xmlrpclib.ServerProxy(RDS_SERVER) 14 | try: 15 | avail = p.available() 16 | except xmlrpclib.ProtocolError as e: 17 | print("Cannot access " + RDS_SERVER) 18 | print(e) 19 | raise RuntimeError 20 | 21 | print("Available RDS sets: %s " % avail) 22 | 23 | md5_val = "EB714443AA2FC1A3D16E39EB8007A0B2" 24 | 25 | # Build a search term 26 | search = {"db": avail[0], "md5": md5_val} # pick the first search term 27 | 28 | print("Here are the files with a md5 of " + md5_val) 29 | ret = p.search(search) 30 | fields = ret["fields"] 31 | for row in ret["result"]: 32 | for a, b in zip(fields, row): 33 | print(a, "=", b) 34 | print("") 35 | 36 | print( 37 | "Now we will do a query for multiple MD5 values. You can do this by specifying\n" 38 | + "a value as an array." 39 | ) 40 | searchm = { 41 | "db": avail[0], 42 | "md5": ["EB714443AA2FC1A3D16E39EB8007A0B2", "9B3702B0E788C6D62996392FE3C9786A"], 43 | } 44 | print("sending:", searchm) 45 | ret = p.search(searchm) 46 | print("got:", ret) 47 | fields = ret["fields"] 48 | for row in ret["result"]: 49 | for a, b in zip(fields, row): 50 | print(a, "=", b) 51 | print("") 52 | -------------------------------------------------------------------------------- /dfxml/bin/report_silent_changes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | """ 17 | This program takes a differentially-annotated DFXML file as input, and outputs a DFXML document that contains 'Silent' changes. For instance, a changed checksum with no changed timestamps would be 'Silent.' 18 | """ 19 | 20 | __version__ = "0.2.2" 21 | 22 | import logging 23 | import os 24 | import sys 25 | 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | 28 | import make_differential_dfxml 29 | 30 | import dfxml.objects as Objects 31 | 32 | 33 | def main(): 34 | d = Objects.DFXMLObject("1.2.0") 35 | d.program = sys.argv[0] 36 | d.program_version = __version__ 37 | d.command_line = " ".join(sys.argv) 38 | d.dc["type"] = "File system silent-change report" 39 | d.add_creator_library( 40 | "Python", ".".join(map(str, sys.version_info[0:3])) 41 | ) # A bit of a bend, but gets the major version information out. 42 | d.add_creator_library("Objects.py", Objects.__version__) 43 | d.add_creator_library("dfxml.py", Objects.dfxml.__version__) 44 | 45 | current_appender = d 46 | tally = 0 47 | for event, obj in Objects.iterparse(args.infile): 48 | if event == "start": 49 | # Inherit namespaces 50 | if isinstance(obj, Objects.DFXMLObject): 51 | for prefix, url in obj.iter_namespaces(): 52 | d.add_namespace(prefix, url) 53 | # Group files by volume 54 | elif isinstance(obj, Objects.VolumeObject): 55 | d.append(obj) 56 | current_appender = obj 57 | elif event == "end": 58 | if isinstance(obj, Objects.VolumeObject): 59 | current_appender = d 60 | elif isinstance(obj, Objects.FileObject): 61 | if "_changed" not in obj.diffs: 62 | if "_modified" in obj.diffs or "_renamed" in obj.diffs: 63 | current_appender.append(obj) 64 | tally += 1 65 | print(d.to_dfxml()) 66 | _logger.info("Found %d suspiciously-changed files." % tally) 67 | 68 | 69 | if __name__ == "__main__": 70 | import argparse 71 | 72 | parser = argparse.ArgumentParser() 73 | parser.add_argument("-d", "--debug", action="store_true") 74 | parser.add_argument("infile") 75 | args = parser.parse_args() 76 | 77 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 78 | 79 | if not args.infile.endswith("xml"): 80 | raise Exception( 81 | "Input file should be a DFXML file, and should end with 'xml': %r." 82 | % args.infile 83 | ) 84 | 85 | if not os.path.exists(args.infile): 86 | raise Exception("Input file does not exist: %r." % args.infile) 87 | 88 | main() 89 | -------------------------------------------------------------------------------- /dfxml/bin/tcpdiff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software was developed in whole or in part by employees of the 4 | # Federal Government in the course of their official duties, and with 5 | # other Federal assistance. Pursuant to title 17 Section 105 of the 6 | # United States Code portions of this software authored by Federal 7 | # employees are not subject to copyright protection within the United 8 | # States. For portions not authored by Federal employees, the Federal 9 | # Government has been granted unlimited rights, and no claim to 10 | # copyright is made. The Federal Government assumes no responsibility 11 | # whatsoever for its use by other parties, and makes no guarantees, 12 | # expressed or implied, about its quality, reliability, or any other 13 | # characteristic. 14 | # 15 | # We would appreciate acknowledgement if the software is used. 16 | 17 | """tcpdiff.py 18 | 19 | Generates a report about what's different between two tcp DFXML files 20 | produced by tcpflow. 21 | 22 | Process: 23 | 24 | """ 25 | 26 | import sys 27 | import time 28 | 29 | if sys.version_info < (3, 1): 30 | raise RuntimeError("rdifference.py requires Python 3.1 or above") 31 | 32 | import dfxml 33 | import dfxml.dfxml_html as dfxml_html 34 | import dfxml.fiwalk as fiwalk 35 | 36 | 37 | def ptime(t): 38 | """Print the time in the requested format. T is a dfxml time value""" 39 | global options 40 | if t is None: 41 | return None 42 | elif options.timestamp: 43 | return str(t.timestamp()) 44 | else: 45 | return str(t.iso8601()) 46 | 47 | 48 | def dprint(x): 49 | "Debug print" 50 | global options 51 | if options.debug: 52 | print(x) 53 | 54 | 55 | # 56 | # This program keeps track of the current and previous TCP connections in a single 57 | # object called "FlowState". Another way to do that would have been to have 58 | # the instance built from the XML file and then have another function that compares 59 | # them. 60 | # 61 | 62 | 63 | class FlowState: 64 | def __init__(self, fname): 65 | self.options = options 66 | self.connections = set() 67 | self.process(fname) 68 | 69 | def process(self, fname): 70 | self.fname = fname 71 | dfxml.read_dfxml(xmlfile=open(fname, "rb"), callback=self.process_fi) 72 | 73 | def process_fi(self, fi): 74 | self.connections.add(fi) 75 | 76 | def report(self): 77 | dfxml_html.header() 78 | dfxml_html.h1("DFXML file:" + self.current_fname) 79 | dfxml_html.table(["Total Connections", str(len(self.connections))]) 80 | 81 | 82 | if __name__ == "__main__": 83 | from copy import deepcopy 84 | from optparse import OptionParser 85 | 86 | global options 87 | 88 | parser = OptionParser() 89 | parser.usage = "%prog [options] file1 file2 (files MUST be tcpflow DFXML files)" 90 | parser.add_option("-d", "--debug", help="debug", action="store_true") 91 | 92 | (options, args) = parser.parse_args() 93 | 94 | if len(args) != 2: 95 | parser.print_help() 96 | sys.exit(1) 97 | 98 | a = FlowState(fname=args[0]) 99 | a.report() 100 | 101 | b = FlowState(fname=args[1]) 102 | b.report() 103 | 104 | print("Difference:") 105 | -------------------------------------------------------------------------------- /dfxml/bin/validate_dfxml.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | from optparse import OptionParser 4 | from sys import stdout 5 | 6 | import dfxml.fiwalk as fiwalk 7 | 8 | 9 | def demo_dfxml_time_bug(filename): 10 | parser = OptionParser() 11 | parser.usage = "%prog% [options] xmlfile " 12 | (options, args) = parser.parse_args() 13 | for fi in fiwalk.fileobjects_using_sax(xmlfile=open(filename, "rb")): 14 | fsize = fi.filesize() 15 | try: 16 | mt = fi.mtime() 17 | print("Type of mt:", type(mt)) 18 | print("Normal mtime:") 19 | print(mt) 20 | except KeyboardInterrupt: 21 | raise 22 | except: 23 | raise RuntimeException("Abnormal mtime for file with size {}".format(fsize)) 24 | 25 | 26 | if __name__ == "__main__": 27 | filename = sys.argv[1] 28 | demo_dfxml_time_bug(filename) 29 | -------------------------------------------------------------------------------- /dfxml/bin/xdiff.py: -------------------------------------------------------------------------------- 1 | # 2 | # Report the difference between two dfxml files 3 | # 4 | import sys 5 | 6 | from filesdb import filesdb 7 | 8 | import dfxml 9 | 10 | # 11 | # test program. Reads a database and dumps it. 12 | # 13 | if __name__ == "__main__": 14 | from argparse import ArgumentParser 15 | 16 | parser = ArgumentParser( 17 | description="Test the files database with one or more DFXML files" 18 | ) 19 | parser.add_argument("xmlfiles", help="XML files to process", nargs="+") 20 | 21 | args = parser.parse_args() 22 | db0 = None 23 | for fn in args.xmlfiles: 24 | db1 = filesdb() 25 | db1.fname = fn 26 | db1.read(fn) 27 | print("{} stats:".format(fn)) 28 | db1.print_stats(sys.stdout) 29 | if db0: 30 | print("") 31 | print("Difference from {}".format(db0.fname)) 32 | db0 = db1 33 | -------------------------------------------------------------------------------- /dfxml/bin/xmirror.py: -------------------------------------------------------------------------------- 1 | # 2 | # Using two XML files make the current system look like the master 3 | # 4 | import sys 5 | 6 | from filesdb import filesdb 7 | 8 | import dfxml 9 | 10 | # 11 | # test program. Reads a database and dumps it. 12 | # 13 | if __name__ == "__main__": 14 | from argparse import ArgumentParser 15 | 16 | parser = ArgumentParser(description="Make the local system look like the master") 17 | parser.add_argument("--commit", help="Actually do the job", action="store_true") 18 | parser.add_argument( 19 | "--local", 20 | help="specifies an XML file that describes the local system (required)", 21 | ) 22 | parser.add_argument( 23 | "masterfiles", 24 | help="XML files to process. Files may be prefixed with an [xml] path", 25 | nargs="+", 26 | ) 27 | 28 | args = parser.parse_args() 29 | 30 | if not args.local: 31 | parser.print_help() 32 | exit(1) 33 | 34 | masterdb = filesdb() 35 | for fn in args.masterfiles: 36 | masterdb.read_with_prefix(fn) 37 | print("Master stats:") 38 | masterdb.print_stats(sys.stdout) 39 | print("\n") 40 | print("Local mirror stats:") 41 | ldb = filesdb() 42 | ldb.read_with_prefix(args.local) 43 | 44 | # Create new directories if needed 45 | for newdir in ldb.new_dirs(masterdb): 46 | print("mkdir {}".format(newdir)) 47 | 48 | keep_files = [] 49 | mv_files = [] 50 | rm_files = [] 51 | 52 | def process_fi(fi): 53 | # If hash is same and name is the same, ignore: 54 | for nfi in masterdb.search(fi, hash=True, name=True): 55 | keep_files.append(fi.filename()) 56 | return # in the database 57 | 58 | # If hash is same and name is different, move it 59 | for nfi in masterdb.search(fi, hash=True): 60 | mv_files.append((fi.filename(), nfi.filename())) 61 | return 62 | 63 | # If name is same and hash is different, erase it 64 | for nfi in masterdb.search(fi, name=True): 65 | rm_files.append(fi.filename()) 66 | return 67 | 68 | # Otherwise, erase the hash 69 | rm_files.append(fi.filename()) 70 | return 71 | 72 | # Rename files that need to be renamed 73 | for fi in ldb: 74 | process_fi(fi) 75 | 76 | print("Files to keep: {:12,}".format(len(keep_files))) 77 | print("Files to rm: {:12,}".format(len(rm_files))) 78 | print("Files to mv: {:12,}".format(len(mv_files))) 79 | -------------------------------------------------------------------------------- /dfxml/conftest.py: -------------------------------------------------------------------------------- 1 | # 2 | # This file is empty, but it permits test discovery of the subdirectory. 3 | # See: 4 | # https://stackoverflow.com/questions/10253826/path-issue-with-pytest-importerror-no-module-named-yadayadayada 5 | -------------------------------------------------------------------------------- /dfxml/dfxml_html.py: -------------------------------------------------------------------------------- 1 | # This software was developed in whole or in part by employees of the 2 | # Federal Government in the course of their official duties, and with 3 | # other Federal assistance. Pursuant to title 17 Section 105 of the 4 | # United States Code portions of this software authored by Federal 5 | # employees are not subject to copyright protection within the United 6 | # States. For portions not authored by Federal employees, the Federal 7 | # Government has been granted unlimited rights, and no claim to 8 | # copyright is made. The Federal Government assumes no responsibility 9 | # whatsoever for its use by other parties, and makes no guarantees, 10 | # expressed or implied, about its quality, reliability, or any other 11 | # characteristic. 12 | # 13 | # We would appreciate acknowledgement if the software is used. 14 | 15 | # dfxml_html.py: 16 | # A collection of functions for generating HTML 17 | 18 | html = False 19 | 20 | 21 | def header(): 22 | if html: 23 | print( 24 | """ 25 | 26 | 27 | 32 | """ 33 | ) 34 | 35 | 36 | def h1(title): 37 | global options 38 | if html: 39 | print("

%s

" % title) 40 | return 41 | print("\n\n%s\n" % title) 42 | 43 | 44 | def h2(title): 45 | global options 46 | if html: 47 | print("

%s

" % title) 48 | return 49 | print("\n%s\n" % title) 50 | 51 | 52 | def table(rows, styles=None, break_on_change=False): 53 | import sys 54 | 55 | global options 56 | 57 | def alldigits(x): 58 | if type(x) != str and type(x) != unicode: 59 | return False 60 | for ch in x: 61 | if ch.isdigit() == False: 62 | return False 63 | return True 64 | 65 | def fmt(x): 66 | if x == None: 67 | return "" 68 | if type(x) == int: 69 | return "%12d" % x 70 | if alldigits(x): 71 | return "%12d" % int(x) 72 | if type(x) == unicode: 73 | return x 74 | return unicode(x) 75 | 76 | if html: 77 | print("") 78 | for row in rows: 79 | print("") 80 | if not styles: 81 | styles = [""] * len(rows) 82 | for col, style in zip(row, styles): 83 | sys.stdout.write("" % (style, col)) 84 | print("") 85 | print("
%s
") 86 | return 87 | lastRowCol0 = None 88 | for row in rows: 89 | if row[0] != lastRowCol0: 90 | sys.stdout.write("\n") 91 | lastRowCol0 = row[0] 92 | try: 93 | line = "\t".join([fmt(col) for col in row]) 94 | sys.stdout.write(line) 95 | sys.stdout.write("\n") 96 | except UnicodeEncodeError: 97 | # Fall back to manual join 98 | for col in row: 99 | for ch in fmt(col): 100 | try: 101 | sys.stdout.write(ch) 102 | except UnicodeEncodeError: 103 | sys.stdout.write("?") 104 | sys.stdout.write("\t") 105 | print("(UNICODE ERROR)") 106 | -------------------------------------------------------------------------------- /dfxml/py.typed: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology by employees of the Federal Government in the course 3 | # of their official duties. Pursuant to title 17 Section 105 of the 4 | # United States Code this software is not subject to copyright 5 | # protection and is in the public domain. NIST assumes no 6 | # responsibility whatsoever for its use by other parties, and makes 7 | # no guarantees, expressed or implied, about its quality, 8 | # reliability, or any other characteristic. 9 | # 10 | # We would appreciate acknowledgement if the software is used. 11 | 12 | # This file is defined to support PEP 561: 13 | # https://www.python.org/dev/peps/pep-0561/ 14 | -------------------------------------------------------------------------------- /samples/.gitignore: -------------------------------------------------------------------------------- 1 | *.err.log 2 | *.validates.log 3 | -------------------------------------------------------------------------------- /samples/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | SHELL ?= /bin/bash 15 | 16 | XMLLINT ?= $(shell which xmllint) 17 | ifeq ($(XMLLINT),) 18 | $(error XMLLINT not found) 19 | endif 20 | 21 | SAMPLE_FILES__PASS := \ 22 | difference_test_0.xml \ 23 | difference_test_1.xml \ 24 | difference_test_2.xml \ 25 | difference_test_3.xml \ 26 | fileobjectexample.xml 27 | 28 | # TODO Any remaining issues with the upstream tool should be resolved. 29 | SAMPLE_FILES__SKIP := \ 30 | piecewise.xml \ 31 | simple.xml 32 | 33 | SAMPLE_FILES := \ 34 | $(SAMPLE_FILES__FAIL) \ 35 | $(SAMPLE_FILES__SKIP) 36 | 37 | VALIDATES_LOG_FILES__PASS := $(foreach sample_file,$(SAMPLE_FILES__PASS),$(sample_file).validates.log) 38 | 39 | VALIDATES_LOG_FILES__SKIP := $(foreach sample_file,$(SAMPLE_FILES__SKIP),$(sample_file).validates.log) 40 | 41 | VALIDATES_LOG_FILES := \ 42 | $(VALIDATES_LOG_FILES__PASS) \ 43 | $(VALIDATES_LOG_FILES__SKIP) 44 | 45 | all: 46 | 47 | .PHONY: \ 48 | check-TODO 49 | 50 | %.validates.log: \ 51 | % \ 52 | ../schema/dfxml.xsd 53 | $(XMLLINT) \ 54 | --noout \ 55 | --schema ../schema/dfxml.xsd \ 56 | $< \ 57 | 2> $<.err.log 58 | touch $@ 59 | 60 | ../schema/dfxml.xsd: 61 | @echo "dfxml.xsd not found. To check out the DFXML schema (necessary to run unit tests in the /samples directory), please run 'make schema-init' in the repository root." >&2 62 | exit 2 63 | 64 | check: \ 65 | $(VALIDATES_LOG_FILES__PASS) 66 | 67 | check-TODO: \ 68 | $(VALIDATES_LOG_FILES__SKIP) 69 | 70 | clean: 71 | @rm -f *.err.log *.validates.log 72 | -------------------------------------------------------------------------------- /samples/README.md: -------------------------------------------------------------------------------- 1 | # Sample DFXML 2 | This directory contains sample DFXML files. The Makefile here runs tests for conformance against the DFXML Schema with `make check`. 3 | 4 | Not all of these files are currently conformant; these can be seen with `make --keep-going check-TODO`. 5 | -------------------------------------------------------------------------------- /samples/difference_test_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | Sample 9 | 10 | 11 | vi 12 | 8.0 13 | 14 | vi pre.xml 15 | 16 | 17 | 18 | 19 | i_will_be_deleted.txt 20 | r 21 | 20 22 | 123456 23 | 2013-01-01T00:00:00Z 24 | 2013-01-01T00:00:00Z 25 | 2013-01-01T00:00:00Z 26 | 27 | 28 | 29 | e834b5c2f64759832fb33ec53c8b5028 30 | 9125cb87b8f0035c22d3efad2b0473367cc456ca 31 | c75d73927a6ca221ccc71c4f4dee9286fce2b5cf7122950c73157cbf821af07f 32 | 33 | 34 | i_will_be_modified.txt 35 | r 36 | 22 37 | 123457 38 | 2013-01-01T00:00:00Z 39 | 2013-01-01T00:00:00Z 40 | 2013-01-01T00:00:00Z 41 | 42 | 43 | 44 | e91577092351461d7800ef7b870a2bcf 45 | 44e426344f15bd7621ca2f9ffea70d29752dccda 46 | 1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357 47 | 48 | 49 | i_will_be_accessed.txt 50 | r 51 | 12 52 | 123458 53 | 2013-01-01T00:00:00Z 54 | 2013-01-01T00:00:00Z 55 | 2013-01-01T00:00:00Z 56 | 57 | 58 | 59 | f3a8f17b47f1fe899805c25b8f5a26b0 60 | b439e832cb243e18f6bfc21ca0150de3ef4c6f27 61 | 3c4ace963a2a069a92d8abaa7c77d88e118758eff65c5180fed6534e75889bf3 62 | 63 | 64 | -------------------------------------------------------------------------------- /samples/difference_test_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | Sample 9 | 10 | 11 | vi 12 | 8.0 13 | 14 | vi post.xml 15 | 16 | 17 | 18 | 19 | i_am_new.txt 20 | r 21 | 40 22 | 123459 23 | 2013-05-16T21:01:00Z 24 | 2013-05-16T21:01:00Z 25 | 2013-05-16T21:01:00Z 26 | 27 | 28 | 29 | 55b228770d96e4dbd1b218f4f07d8aae 30 | 8632a06e80eefbaf702ac6a44e633937e2be7186 31 | 77f380ce33609d55f8b874833c4495282fdf54869912822cde05c68090a60a18 32 | 33 | 34 | i_will_be_modified.txt 35 | r 36 | 23 37 | 123457 38 | 2013-05-16T20:59:00Z 39 | 2013-05-16T20:59:00Z 40 | 2013-05-16T20:59:00Z 41 | 42 | 43 | 44 | a6d9ebd95bcd3602b757ea63f9dd02ab 45 | 1e087807678a33ebbde2624341184c14303675a3 46 | e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281 47 | 48 | 49 | i_will_be_accessed.txt 50 | r 51 | 12 52 | 123458 53 | 2013-01-01T00:00:00Z 54 | 2013-01-01T00:00:00Z 55 | 2013-05-16T21:00:00Z 56 | 57 | 58 | 59 | f3a8f17b47f1fe899805c25b8f5a26b0 60 | b439e832cb243e18f6bfc21ca0150de3ef4c6f27 61 | 3c4ace963a2a069a92d8abaa7c77d88e118758eff65c5180fed6534e75889bf3 62 | 63 | 64 | -------------------------------------------------------------------------------- /samples/fileobjectexample.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /samples/simple.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | Hash List 8 | 9 | 10 | MD5DEEP 11 | 4.0.0_beta2-002 12 | 13 | GCC 4.2 14 | 15 | 16 | Darwin 17 | 11.3.0 18 | Darwin Kernel Version 11.3.0: Thu Jan 12 18:47:41 PST 2012; root:xnu-1699.24.23~1/RELEASE_X86_64 19 | Mucha.local 20 | x86_64 21 | md5deep -dp512 /Users/simsong/uploads/einstein template.jpg /Users/simsong/uploads/image1.jpg /Users/simsong/uploads/image2.jpg /Users/simsong/uploads/image3.jpg 22 | 502 23 | 2012-02-23T16:35:11Z 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | /Users/simsong/uploads/image2.jpg 37 | 12833 38 | 2012-02-22T03:53:05Z 39 | 2012-02-22T03:53:05Z 40 | 2012-02-23T16:34:27Z 41 | d7ced55e7d7f5b9995fc3cbac7942155 42 | 43 | 44 | /Users/simsong/uploads/image1.jpg 45 | 12551 46 | 2012-02-22T03:53:54Z 47 | 2012-02-22T03:53:54Z 48 | 2012-02-23T16:34:27Z 49 | 3bb144b5abc65312099f79caa69ff94f 50 | 51 | 52 | /Users/simsong/uploads/image3.jpg 53 | 12545 54 | 2012-02-22T03:55:38Z 55 | 2012-02-22T03:55:38Z 56 | 2012-02-23T16:34:27Z 57 | 6377d89ab3165a3fe24b390b513f47d7 58 | 59 | 60 | /Users/simsong/uploads/einstein template.jpg 61 | 43819 62 | 2012-02-22T03:54:19Z 63 | 2012-02-22T03:54:19Z 64 | 2012-02-23T16:34:27Z 65 | 702da00183448a42f5a861c95973f4f3 66 | 67 | 68 | 0.008982 69 | 0.003041 70 | 1069056 71 | 391 72 | 0 73 | 0 74 | 0 75 | 0 76 | 0.006578 77 | 78 | 79 | -------------------------------------------------------------------------------- /samples/tcpflow_zip_generic_header.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | Sample 9 | 10 | 11 | vi 12 | 8.0 13 | 14 | vi tcpflow_zip_generic_header.xml 15 | 16 | 17 | 18 | ../../tcpflow/tests/airsnort-linux-browser_page_load.pcap 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 205.134.188.162.00080-008.030.072.112.38568 29 | 4135 30 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = dfxml 3 | version = attr: dfxml.__version__ 4 | url = https://github.com/dfxml-working-group/dfxml_python 5 | classifiers = 6 | License :: Public Domain 7 | Programming Language :: Python :: 3 8 | 9 | [options] 10 | include_package_data = true 11 | packages = find: 12 | python_requires = >=3.9 13 | 14 | # See CONTRIBUTE.md before adding a console script line. 15 | [options.entry_points] 16 | console_scripts = 17 | make_differential_dfxml = dfxml.bin.make_differential_dfxml:main 18 | walk_to_dfxml = dfxml.bin.walk_to_dfxml:main 19 | 20 | [options.package_data] 21 | dfxml = py.typed 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed in whole or in part by employees of the 4 | # Federal Government in the course of their official duties, and with 5 | # other Federal assistance. Pursuant to title 17 Section 105 of the 6 | # United States Code portions of this software authored by Federal 7 | # employees are not subject to copyright protection within the United 8 | # States. For portions not authored by Federal employees, the Federal 9 | # Government has been granted unlimited rights, and no claim to 10 | # copyright is made. The Federal Government assumes no responsibility 11 | # whatsoever for its use by other parties, and makes no guarantees, 12 | # expressed or implied, about its quality, reliability, or any other 13 | # characteristic. 14 | # 15 | # We would appreciate acknowledgement if the software is used. 16 | 17 | import setuptools 18 | 19 | setuptools.setup() 20 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | # Bash selection is described in the top-level Makefile. 15 | ifeq ($(shell basename $(SHELL)),sh) 16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash) 17 | endif 18 | 19 | top_srcdir := $(shell cd .. ; pwd) 20 | 21 | PYTHON3 ?= python3 22 | ifeq ($(PYTHON3),) 23 | $(error python3 not found) 24 | endif 25 | 26 | all: \ 27 | all-make_differential_dfxml \ 28 | all-walk_to_dfxml 29 | 30 | .PHONY: \ 31 | all-make_differential_dfxml \ 32 | all-walk_to_dfxml \ 33 | check-mypy \ 34 | check-mypy-stricter 35 | 36 | all-make_differential_dfxml: \ 37 | .venv.done.log 38 | $(MAKE) \ 39 | --directory make_differential_dfxml 40 | 41 | all-walk_to_dfxml: \ 42 | .venv.done.log 43 | $(MAKE) \ 44 | --directory walk_to_dfxml 45 | 46 | .venv.done.log: \ 47 | $(top_srcdir)/setup.cfg \ 48 | $(top_srcdir)/setup.py \ 49 | requirements.txt 50 | rm -rf venv 51 | $(PYTHON3) -m venv \ 52 | venv 53 | source venv/bin/activate \ 54 | && pip install \ 55 | --upgrade \ 56 | pip \ 57 | setuptools 58 | source venv/bin/activate \ 59 | && cd $(top_srcdir) \ 60 | && pip install \ 61 | --editable \ 62 | . 63 | source venv/bin/activate \ 64 | && pip install \ 65 | --requirement requirements.txt 66 | touch $@ 67 | 68 | check: \ 69 | all-make_differential_dfxml \ 70 | all-walk_to_dfxml \ 71 | check-mypy 72 | source venv/bin/activate \ 73 | && pytest \ 74 | --log-level=DEBUG 75 | 76 | #TODO - Type-checking would best be done against all of ../dfxml, when someone finds some time to do so. 77 | check-mypy: \ 78 | check-mypy-stricter 79 | source venv/bin/activate \ 80 | && mypy \ 81 | ../dfxml/bin/idifference.py \ 82 | ../dfxml/bin/summarize_differential_dfxml.py \ 83 | ../dfxml/__init__.py \ 84 | ../dfxml/fiwalk.py \ 85 | ../dfxml/objects.py \ 86 | misc_bin_tests \ 87 | misc_object_tests 88 | @echo "INFO:tests/Makefile:mypy is currently run against a subset of the dfxml directory." >&2 89 | 90 | #TODO - Strict type-checking is another long-term goal, likewise eventually done against all of ../dfxml. 91 | check-mypy-stricter: \ 92 | .venv.done.log 93 | source venv/bin/activate \ 94 | && mypy \ 95 | ../demos/demo_fiwalk_diskimage.py \ 96 | ../dfxml/bin/idifference2.py \ 97 | ../dfxml/bin/make_differential_dfxml.py \ 98 | ../dfxml/bin/walk_to_dfxml.py \ 99 | make_differential_dfxml \ 100 | walk_to_dfxml \ 101 | *.py 102 | 103 | clean: 104 | @$(MAKE) \ 105 | --directory misc_object_tests \ 106 | clean 107 | @$(MAKE) \ 108 | --directory make_differential_dfxml \ 109 | clean 110 | @$(MAKE) \ 111 | --directory walk_to_dfxml \ 112 | clean 113 | @rm -f \ 114 | .venv.done.log 115 | @rm -rf \ 116 | venv 117 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | Contents of this directory test the functionality of `dfxml` as an importable Python module. 2 | 3 | Running `make check` in this directory will build a Python virtual environment, install the top source directory into that virtual environment as a module, and then run further tests with `pytest`. 4 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/.gitignore: -------------------------------------------------------------------------------- 1 | *.dfxml 2 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | # Bash selection is described in the top-level Makefile. 15 | ifeq ($(shell basename $(SHELL)),sh) 16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash) 17 | endif 18 | 19 | top_srcdir := $(shell cd ../.. ; pwd) 20 | 21 | tests_srcdir := $(top_srcdir)/tests 22 | 23 | all: \ 24 | differential_dfxml_test_by_path_01.txt \ 25 | differential_dfxml_test_by_path_23.txt \ 26 | differential_dfxml_test_by_times_01.txt \ 27 | differential_dfxml_test_by_times_23.txt 28 | 29 | check: \ 30 | differential_dfxml_test_by_path_01.txt \ 31 | differential_dfxml_test_by_path_23.txt \ 32 | differential_dfxml_test_by_times_01.txt \ 33 | differential_dfxml_test_by_times_23.txt 34 | source $(tests_srcdir)/venv/bin/activate \ 35 | && pytest \ 36 | --log-level=DEBUG 37 | 38 | clean: 39 | @rm -f \ 40 | *.dfxml \ 41 | *.txt 42 | @#Restore Git-tracked version of these files, so deletions aren't accidentally committed. 43 | @git checkout \ 44 | -- \ 45 | differential_dfxml_test_by_path_01.txt \ 46 | differential_dfxml_test_by_path_23.txt \ 47 | differential_dfxml_test_by_times_01.txt \ 48 | differential_dfxml_test_by_times_23.txt \ 49 | || true 50 | 51 | differential_dfxml_test_01.dfxml: \ 52 | $(tests_srcdir)/.venv.done.log \ 53 | $(top_srcdir)/dfxml/bin/make_differential_dfxml.py \ 54 | $(top_srcdir)/samples/difference_test_0.xml \ 55 | $(top_srcdir)/samples/difference_test_1.xml 56 | rm -f \ 57 | __$@ \ 58 | _$@ 59 | source $(tests_srcdir)/venv/bin/activate \ 60 | && make_differential_dfxml \ 61 | $(top_srcdir)/samples/difference_test_0.xml \ 62 | $(top_srcdir)/samples/difference_test_1.xml \ 63 | > __$@ 64 | xmllint \ 65 | --format \ 66 | __$@ \ 67 | > _$@ 68 | rm __$@ 69 | mv _$@ $@ 70 | 71 | differential_dfxml_test_by_path_01.txt: \ 72 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 73 | differential_dfxml_test_01.dfxml 74 | source $(tests_srcdir)/venv/bin/activate \ 75 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 76 | --debug \ 77 | --sort-by path \ 78 | differential_dfxml_test_01.dfxml \ 79 | > _$@ 80 | mv _$@ $@ 81 | 82 | differential_dfxml_test_by_times_01.txt: \ 83 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 84 | differential_dfxml_test_01.dfxml 85 | source $(tests_srcdir)/venv/bin/activate \ 86 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 87 | --debug \ 88 | --sort-by times \ 89 | differential_dfxml_test_01.dfxml \ 90 | > _$@ 91 | mv _$@ $@ 92 | 93 | differential_dfxml_test_23.dfxml: \ 94 | $(tests_srcdir)/.venv.done.log \ 95 | $(top_srcdir)/dfxml/bin/make_differential_dfxml.py \ 96 | $(top_srcdir)/samples/difference_test_2.xml \ 97 | $(top_srcdir)/samples/difference_test_3.xml 98 | rm -f \ 99 | __$@ \ 100 | _$@ 101 | source $(tests_srcdir)/venv/bin/activate \ 102 | && make_differential_dfxml \ 103 | $(top_srcdir)/samples/difference_test_2.xml \ 104 | $(top_srcdir)/samples/difference_test_3.xml \ 105 | > __$@ 106 | xmllint \ 107 | --format \ 108 | __$@ \ 109 | > _$@ 110 | rm __$@ 111 | mv _$@ $@ 112 | 113 | differential_dfxml_test_by_path_23.txt: \ 114 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 115 | differential_dfxml_test_23.dfxml 116 | source $(tests_srcdir)/venv/bin/activate \ 117 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 118 | --debug \ 119 | --sort-by path \ 120 | differential_dfxml_test_23.dfxml \ 121 | > _$@ 122 | mv _$@ $@ 123 | 124 | differential_dfxml_test_by_times_23.txt: \ 125 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 126 | differential_dfxml_test_23.dfxml 127 | source $(tests_srcdir)/venv/bin/activate \ 128 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \ 129 | --debug \ 130 | --sort-by times \ 131 | differential_dfxml_test_23.dfxml \ 132 | > _$@ 133 | mv _$@ $@ 134 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/README.md: -------------------------------------------------------------------------------- 1 | # `make_differential_dfxml` 2 | 3 | *Source*: [`../../dfxml/bin/make_differential_dfxml.py`](../../dfxml/bin/make_differential_dfxml.py) 4 | 5 | This command takes as input two DFXML files, and outputs a DFXML document showing differential annotations. Output is sent to `stdout`. 6 | 7 | This tool was introduced in [Nelson et al., DFRWS 2014](https://doi.org/10.1016/j.diin.2014.05.004). 8 | 9 | 10 | ## Usage 11 | 12 | ```bash 13 | make_differential_dfxml input_1.dfxml input_2.dfxml > deltas.dfxml 14 | ``` 15 | 16 | If one is using the [DFXML Objects module](../../dfxml/objects.py), the differentially-annotated DFXML can be analyzed by referring to each encountered `FileObject`'s property `.annos`. See e.g. [`summarize_differential_dfxml.py`](../../dfxml/bin/summarize_differential_dfxml.py)'s output for [changes scoped to single file systems](differential_dfxml_test_by_path_01.txt), or [changes that cross file systems](differential_dfxml_test_by_times_23.txt). 17 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/differential_dfxml_test_by_path_01.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | New files: 4 | ========== 5 | 2013-05-16T21:01:00Z i_am_new.txt 40 6 | 7 | 8 | Deleted files: 9 | ============== 10 | 2013-01-01T00:00:00Z i_will_be_deleted.txt 20 11 | 12 | 13 | Renamed files: 14 | ============== 15 | 16 | 17 | Files with modified contents: 18 | ============================= 19 | 20 | i_will_be_modified.txt mtime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z 21 | i_will_be_modified.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z 22 | i_will_be_modified.txt ctime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z 23 | i_will_be_modified.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=22)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=23)]) 24 | i_will_be_modified.txt filesize changed, 22 23 25 | i_will_be_modified.txt md5 changed, e91577092351461d7800ef7b870a2bcf a6d9ebd95bcd3602b757ea63f9dd02ab 26 | i_will_be_modified.txt sha1 changed, 44e426344f15bd7621ca2f9ffea70d29752dccda 1e087807678a33ebbde2624341184c14303675a3 27 | i_will_be_modified.txt sha256 changed, 1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357 e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281 28 | 29 | 30 | Files with changed properties: 31 | ============================== 32 | 33 | i_will_be_accessed.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T21:00:00Z 34 | i_will_be_accessed.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=34512, file_offset=0, len=12)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=12)]) 35 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/differential_dfxml_test_by_path_23.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | New files: 4 | ========== 5 | 2007-08-09T12:34:58Z CHANGE___content_and_mtime 4097 6 | 2007-08-09T12:35:00Z CHANGE___erased___replaced_by_other_partition_file 4097 7 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4098 8 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097 9 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097 10 | 2007-08-09T12:34:57Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097 11 | 2007-08-09T12:34:59Z CHANGE___new_file 4097 12 | 2007-08-09T05:34:56-07:00 CHANGE___timestamp_changes_format_only 4097 13 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097 14 | 2007-08-09T12:34:56Z NO_CHANGE 4097 15 | 2007-08-09T12:34:56Z _CHANGE___move_from_P1M_to_P3G___change_name 4097 16 | 2007-08-09T12:34:56Z _CHANGE___renamed 4097 17 | 18 | 19 | Deleted files: 20 | ============== 21 | 2007-08-09T12:34:56Z CHANGE___content_and_mtime 4097 22 | 2007-08-09T12:34:56Z CHANGE___erased 4097 23 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_other_partition_file 4097 24 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4097 25 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097 26 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097 27 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097 28 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_name 4097 29 | 2007-08-09T12:34:56Z CHANGE___moved_to_erased_P1G_file 4097 30 | 2007-08-09T12:34:56Z CHANGE___renamed 4097 31 | 2007-08-09T12:34:56Z CHANGE___renamed_to_erased_sibling___change_checksum_and_mtime 4097 32 | 2007-08-09T12:34:56Z CHANGE___timestamp_changes_format_only 4097 33 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097 34 | 2007-08-09T12:34:56Z NO_CHANGE 4097 35 | 36 | 37 | Renamed files: 38 | ============== 39 | 40 | 41 | Files with modified contents: 42 | ============================= 43 | 44 | 45 | Files with changed properties: 46 | ============================== 47 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/differential_dfxml_test_by_times_01.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | New files: 4 | ========== 5 | 2013-05-16T21:01:00Z i_am_new.txt 40 6 | 7 | 8 | Deleted files: 9 | ============== 10 | 2013-01-01T00:00:00Z i_will_be_deleted.txt 20 11 | 12 | 13 | Renamed files: 14 | ============== 15 | 16 | 17 | Files with modified contents: 18 | ============================= 19 | 20 | i_will_be_modified.txt mtime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z 21 | i_will_be_modified.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z 22 | i_will_be_modified.txt ctime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z 23 | i_will_be_modified.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=22)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=23)]) 24 | i_will_be_modified.txt filesize changed, 22 23 25 | i_will_be_modified.txt md5 changed, e91577092351461d7800ef7b870a2bcf a6d9ebd95bcd3602b757ea63f9dd02ab 26 | i_will_be_modified.txt sha1 changed, 44e426344f15bd7621ca2f9ffea70d29752dccda 1e087807678a33ebbde2624341184c14303675a3 27 | i_will_be_modified.txt sha256 changed, 1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357 e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281 28 | 29 | 30 | Files with changed properties: 31 | ============================== 32 | 33 | i_will_be_accessed.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T21:00:00Z 34 | i_will_be_accessed.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=34512, file_offset=0, len=12)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=12)]) 35 | -------------------------------------------------------------------------------- /tests/make_differential_dfxml/differential_dfxml_test_by_times_23.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | New files: 4 | ========== 5 | 2007-08-09T05:34:56-07:00 CHANGE___timestamp_changes_format_only 4097 6 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4098 7 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097 8 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097 9 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097 10 | 2007-08-09T12:34:56Z NO_CHANGE 4097 11 | 2007-08-09T12:34:56Z _CHANGE___move_from_P1M_to_P3G___change_name 4097 12 | 2007-08-09T12:34:56Z _CHANGE___renamed 4097 13 | 2007-08-09T12:34:57Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097 14 | 2007-08-09T12:34:58Z CHANGE___content_and_mtime 4097 15 | 2007-08-09T12:34:59Z CHANGE___new_file 4097 16 | 2007-08-09T12:35:00Z CHANGE___erased___replaced_by_other_partition_file 4097 17 | 18 | 19 | Deleted files: 20 | ============== 21 | 2007-08-09T12:34:56Z CHANGE___content_and_mtime 4097 22 | 2007-08-09T12:34:56Z CHANGE___erased 4097 23 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_other_partition_file 4097 24 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4097 25 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097 26 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097 27 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097 28 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_name 4097 29 | 2007-08-09T12:34:56Z CHANGE___moved_to_erased_P1G_file 4097 30 | 2007-08-09T12:34:56Z CHANGE___renamed 4097 31 | 2007-08-09T12:34:56Z CHANGE___renamed_to_erased_sibling___change_checksum_and_mtime 4097 32 | 2007-08-09T12:34:56Z CHANGE___timestamp_changes_format_only 4097 33 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097 34 | 2007-08-09T12:34:56Z NO_CHANGE 4097 35 | 36 | 37 | Renamed files: 38 | ============== 39 | 40 | 41 | Files with modified contents: 42 | ============================= 43 | 44 | 45 | Files with changed properties: 46 | ============================== 47 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/README.md: -------------------------------------------------------------------------------- 1 | The tests in this directory needed to be moved to address a new behavior in a deployed static type checker. The intent is to empty this directory, moving its tests to appropriate locations under `/tests`. 2 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/_pick_pythons.sh: -------------------------------------------------------------------------------- 1 | 2 | # This software was developed at the National Institute of Standards 3 | # and Technology in whole or in part by employees of the Federal 4 | # Government in the course of their official duties. Pursuant to 5 | # title 17 Section 105 of the United States Code portions of this 6 | # software authored by NIST employees are not subject to copyright 7 | # protection and are in the public domain. For portions not authored 8 | # by NIST employees, NIST has been granted unlimited rights. NIST 9 | # assumes no responsibility whatsoever for its use by other parties, 10 | # and makes no guarantees, expressed or implied, about its quality, 11 | # reliability, or any other characteristic. 12 | # 13 | # We would appreciate acknowledgement if the software is used. 14 | 15 | # This script is meant to be included in Bash scripts that need a Python v2 and v3. 16 | # An autotool configure script would also suffice. 17 | # The 'or echo' statements keep the subshell from returning an error exit status on missing a Python version. 18 | # 19 | # This script defines two variables, PYTHON2 and PYTHON3, providing the highest-available Python binary for each major version. 20 | # 21 | 22 | PYTHON2=`which python2` 23 | 24 | PYTHON3=`which python3.6 2>/dev/null || echo` 25 | if [ -z "$PYTHON3" ]; then 26 | PYTHON3=`which python3.5 2>/dev/null || echo` 27 | if [ -z "$PYTHON3" ]; then 28 | PYTHON3=`which python3.4 2>/dev/null || echo` 29 | if [ -z "$PYTHON3" ]; then 30 | PYTHON3=`which python3 2>/dev/null || echo` 31 | if [ -z "$PYTHON3" ]; then 32 | echo "Error: Could not find a python3 executable." >&2 33 | exit 1 34 | fi 35 | fi 36 | fi 37 | fi 38 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/_sane_defaults.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | SCRIPT_DIR="$1" 4 | 5 | # Guarantee sane defaults 6 | if [ -z ${TEST_DIR} ]; 7 | then 8 | TEST_DIR="${SCRIPT_DIR}" 9 | fi 10 | 11 | if [ -z ${TOOL_DIR} ]; 12 | then 13 | TOOL_DIR="$(dirname ${SCRIPT_DIR})" 14 | fi 15 | 16 | if [ -z ${SAMPLE_DIR} ]; 17 | then 18 | SAMPLE_DIR="$(dirname $(dirname ${SCRIPT_DIR}))/samples" 19 | fi 20 | 21 | if [ -z ${PYTHONPATH} ]; 22 | then 23 | PYTHONPATH="$(dirname $(dirname ${SCRIPT_DIR}))" 24 | export PYTHONPATH; 25 | fi 26 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/iexport_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from dfxml.bin.iexport import * 5 | 6 | 7 | def test_iexport(): 8 | r1 = Run(0, 1000) 9 | r2 = Run(50, 60) 10 | assert r1.intersects_run(r2) 11 | assert r2.intersects_run(r1) 12 | 13 | disk = RunDB(0, 1000) 14 | print(disk) 15 | disk.remove(Run(50, 60)) 16 | disk.remove(Run(0, 10)) 17 | disk.remove(Run(40, 20)) 18 | print(disk) 19 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/paths.sh: -------------------------------------------------------------------------------- 1 | source tests/_pick_pythons.sh 2 | 3 | #DEMO_DIR=../demos 4 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/test_cat_fileobjects.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | # Determine script location 17 | SCRIPT="$(realpath $0)" 18 | SCRIPT_DIR="$(dirname ${SCRIPT})" 19 | 20 | # Guarantee sane defaults 21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR} 22 | 23 | # Choose python interpreter 24 | . ${TEST_DIR}/_pick_pythons.sh 25 | 26 | XMLLINT=`which xmllint` 27 | 28 | # Halt on error 29 | set -e 30 | # Display all executed commands 31 | set -x 32 | 33 | #NOTE: Python2's ETree does not understand the "unicode" output encoding. 34 | #"$PYTHON2" cat_fileobjects.py ../${SAMPLE_DIR}/simple.xml 35 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug ${SAMPLE_DIR}/simple.xml >cat_test_nocache.dfxml 36 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug --cache ${SAMPLE_DIR}/simple.xml >cat_test_cache.dfxml 37 | 38 | #This checks that the XML structure wasn't changed by cache cleaning. Only the tail is hashed because the head contains metadata. 39 | subj0="x$(tail -n 10 cat_test_nocache.dfxml | openssl dgst -sha1)" 40 | subj1="x$(tail -n 10 cat_test_cache.dfxml | openssl dgst -sha1)" 41 | test "$subj0" != "x" 42 | test "$subj1" != "x" 43 | test "$subj0" == "$subj1" 44 | 45 | if [ -x "$XMLLINT" ]; then 46 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py ${SAMPLE_DIR}/simple.xml | "$XMLLINT" - 47 | else 48 | echo "Warning: xmllint not found. Skipped check for if generated DFXML is valid XML." >&2 49 | fi 50 | 51 | test $(grep '&2 50 | # "$PYTHON2" ../dfxml_tool.py $x ../src > dfxml_tool_p2_${iter}.dfxml 51 | "$PYTHON3" ${TOOL_DIR}/dfxml_tool.py $x -- ../samples > dfxml_tool_p3_${iter}.dfxml 52 | iter=$(($iter+1)) 53 | done 54 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/test_hfsj.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | hdiutil create -size 10m -fs HFS+J -nospotlight -attach -volname image -ov -layout NONE \ 4 | -imagekey diskimage-class=CRawDiskImage image.dmg 5 | echo "This is file 1 - snarf" > /Volumes/image/file1.txt 6 | echo "This is file 2 - snarf" > /Volumes/image/file2.txt 7 | sync 8 | hdiutil detach /Volumes/image 9 | cp image.dmg image.gen0.dmg 10 | echo "look for file1 and file2:" 11 | strings -o image.dmg | grep snarf 12 | echo "mount the disk and overwrite the contents of file2" 13 | hdiutil attach image.dmg 14 | echo "New file 1 contents - snarf" | dd of=/Volumes/image/file1.txt 15 | echo "" 16 | echo "===file1.txt===" 17 | cat /Volumes/image/file1.txt 18 | echo "" 19 | echo "===file2.txt===" 20 | cat /Volumes/image/file2.txt 21 | echo "" 22 | hdiutil detach /Volumes/image 23 | cp image.dmg image.gen1.dmg 24 | strings -o image.dmg | grep snarf 25 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/test_idifference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Test script. Evaluates idifference.py on a sequence of disk images. 4 | """ 5 | 6 | import os 7 | import subprocess 8 | import sys 9 | 10 | if __name__ == "__main__": 11 | from optparse import OptionParser 12 | 13 | parser = OptionParser() 14 | parser.usage = "%prog [options] dfxml_sequence_list.txt output_zip" 15 | parser.add_option( 16 | "-p", "--prefix", help="prepend prefix to every test image path", dest="prefix" 17 | ) 18 | parser.add_option( 19 | "-v", 20 | "--verbose", 21 | help="verbose output: print call to difference program", 22 | dest="verbose", 23 | action="store_true", 24 | ) 25 | parser.add_option( 26 | "-d", 27 | "--diff-program", 28 | help="use this path to the diff program", 29 | dest="diff_program", 30 | ) 31 | # parser.add_option("-z", "--zap", help="Zap output directory (erases if present)" dest="zap") 32 | 33 | (options, args) = parser.parse_args() 34 | if len(args) < 2: 35 | parser.print_help() 36 | sys.exit(1) 37 | 38 | prefix = "" 39 | if options.prefix: 40 | prefix = options.prefix 41 | # Convert file contents to list 42 | files = [prefix + x.strip() for x in open(args[0], "r")] 43 | 44 | # Verify we'll run at least one difference 45 | if len(files) < 2: 46 | sys.stderr.write("Differencing requires 2 or more files.\n") 47 | 48 | # Check that the list lines actually point to files 49 | for f in files: 50 | assert os.path.isfile(f) 51 | 52 | # Run differences 53 | if options.diff_program: 54 | diff_program = options.diff_program 55 | else: 56 | diff_program = os.path.dirname(sys.argv[0]) + "/idifference.py" 57 | 58 | diff_command = [ 59 | "python", 60 | diff_program, 61 | "--zipfile=" + args[1], 62 | "--imagefile", 63 | ] + files 64 | if options.verbose: 65 | print(" ".join(diff_command)) 66 | subprocess.call(diff_command) 67 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/test_idifference_to_dfxml.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | # Determine script location 17 | SCRIPT="$(realpath $0)" 18 | SCRIPT_DIR="$(dirname ${SCRIPT})" 19 | 20 | # Guarantee sane defaults 21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR} 22 | 23 | # Choose python interpreter 24 | . ${TEST_DIR}/_pick_pythons.sh 25 | 26 | XMLLINT=`which xmllint` 27 | 28 | # Halt on error 29 | set -e 30 | # Display all executed commands 31 | set -x 32 | 33 | #Ensure the non-XML output doesn't fail, first. 34 | "$PYTHON3" ${TOOL_DIR}/idifference.py --summary ${SAMPLE_DIR}/difference_test_[01].xml > idifference_test.txt 35 | 36 | #Generate XML output. 37 | "$PYTHON3" ${TOOL_DIR}/idifference.py --xml idifference_test.dfxml ${SAMPLE_DIR}/difference_test_[01].xml 38 | if [ ! -x "$XMLLINT" ]; then 39 | echo "Error: xmllint not found. Can't check for whether generated DFXML is valid XML. Install libxml2 (or possibly xmlutils) to complete these unit tests." >&2 40 | exit 1 41 | fi 42 | 43 | "$XMLLINT" --format idifference_test.dfxml >idifference_test_formatted.dfxml 44 | 45 | _check_counts() { 46 | #Check expected number of fileobjects appears 47 | test 4 == $(grep ' idifference_test_cat.dfxml 59 | "$XMLLINT" --format idifference_test_cat.dfxml >idifference_test_cat_formatted.dfxml 60 | _check_counts idifference_test_cat_formatted.dfxml 61 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/test_mac_timelines.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | # Determine script location 17 | SCRIPT="$(realpath $0)" 18 | SCRIPT_DIR="$(dirname ${SCRIPT})" 19 | 20 | # Guarantee sane defaults 21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR} 22 | 23 | # Choose python interpreter 24 | source ${TEST_DIR}/_pick_pythons.sh 25 | 26 | # Halt on error 27 | set -e 28 | # Display all executed commands 29 | set -x 30 | 31 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p2.txt 32 | test 12 == $(cat demo_mac_timeline_simple_p2.txt | wc -l) 33 | 34 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p3.txt 35 | test 12 == $(cat demo_mac_timeline_simple_p3.txt | wc -l) 36 | 37 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p2.txt 38 | test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l) 39 | 40 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p3.txt 41 | test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l) 42 | 43 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/simple.xml >demo_mac_timeline_objects_simple_p2.txt 44 | test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l) 45 | 46 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/simple.xml >demo_mac_timeline_objects_simple_p3.txt 47 | test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l) 48 | 49 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline.py ../samples/difference_test_1.xml >demo_mac_timeline_dt1.txt 50 | test 9 == $(cat demo_mac_timeline_dt1.txt | wc -l) 51 | 52 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/difference_test_1.xml >demo_mac_timeline_iter_dt1.txt 53 | test 9 == $(cat demo_mac_timeline_iter_dt1.txt | wc -l) 54 | 55 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/difference_test_1.xml >demo_mac_timeline_objects_dt1.txt 56 | test 9 == $(cat demo_mac_timeline_objects_dt1.txt | wc -l) 57 | -------------------------------------------------------------------------------- /tests/misc_bin_tests/test_redact.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | /bin/rm -f testdisk.dmg redact.cfg 3 | hdiutil create -size 1m -fs MS-DOS -nospotlight -attach -volname testdisk testdisk.dmg 4 | echo "This is the zero file. FILE0001." > /Volumes/TESTDISK/file0.txt 5 | echo "This is the first file. FILE0001." > /Volumes/TESTDISK/file1.txt 6 | echo "This is the second file. FILE0002." > /Volumes/TESTDISK/file2.txt 7 | echo "This is the third file. FILE0003." > /Volumes/TESTDISK/file3.txt 8 | echo "This is the fourth file. FILE0004." > /Volumes/TESTDISK/file4.txt 9 | echo "This is the fifth file. FILE0005." > /Volumes/TESTDISK/file5.txt 10 | echo "This is the dixth file. FILE0006." > /Volumes/TESTDISK/file6.txt 11 | hdiutil detach /Volumes/TESTDISK 12 | cat > redact.cfg < None: 30 | dobj = Objects.DFXMLObject() 31 | fobj = Objects.FileObject() 32 | dobj.append(fobj) 33 | 34 | # Do file I/O round trip. 35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 36 | try: 37 | fobj_reconst = dobj_reconst.files[0] 38 | assert fobj == fobj_reconst 39 | except: 40 | _logger.debug("tmp_filename = %r." % tmp_filename) 41 | raise 42 | os.remove(tmp_filename) 43 | 44 | 45 | def test_blank_file_object_filename() -> None: 46 | dobj = Objects.DFXMLObject() 47 | fobj = Objects.FileObject() 48 | dobj.append(fobj) 49 | 50 | fobj.filename = "" 51 | 52 | # Do file I/O round trip. 53 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 54 | try: 55 | fobj_reconst = dobj_reconst.files[0] 56 | assert fobj == fobj_reconst 57 | except: 58 | _logger.debug("tmp_filename = %r." % tmp_filename) 59 | raise 60 | os.remove(tmp_filename) 61 | -------------------------------------------------------------------------------- /tests/misc_object_tests/LibraryObject_read_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology by employees of the Federal Government in the course 3 | # of their official duties. Pursuant to title 17 Section 105 of the 4 | # United States Code this software is not subject to copyright 5 | # protection and is in the public domain. NIST assumes no 6 | # responsibility whatsoever for its use by other parties, and makes 7 | # no guarantees, expressed or implied, about its quality, 8 | # reliability, or any other characteristic. 9 | # 10 | # We would appreciate acknowledgement if the software is used. 11 | 12 | """ 13 | Run test against DFXML file generated by the _write counterpart script. 14 | """ 15 | 16 | __version__ = "0.1.1" 17 | 18 | import logging 19 | import os 20 | import sys 21 | 22 | import dfxml 23 | import dfxml.objects as Objects 24 | 25 | if __name__ == "__main__": 26 | logging.basicConfig(level=logging.DEBUG) 27 | _logger = logging.getLogger(os.path.basename(__file__)) 28 | 29 | dobj = Objects.parse(sys.argv[1]) 30 | 31 | _logger.debug("dobj.creator_libraries = %r." % dobj.creator_libraries) 32 | 33 | assert Objects.LibraryObject("libfoo", "1.2.3") in dobj.creator_libraries 34 | assert Objects.LibraryObject("libbaz", "4.5") in dobj.build_libraries 35 | 36 | found = None 37 | for library in dobj.creator_libraries: 38 | if library.relaxed_eq(Objects.LibraryObject("libfoo")): 39 | found = True 40 | break 41 | assert found 42 | -------------------------------------------------------------------------------- /tests/misc_object_tests/LibraryObject_write_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology by employees of the Federal Government in the course 3 | # of their official duties. Pursuant to title 17 Section 105 of the 4 | # United States Code this software is not subject to copyright 5 | # protection and is in the public domain. NIST assumes no 6 | # responsibility whatsoever for its use by other parties, and makes 7 | # no guarantees, expressed or implied, about its quality, 8 | # reliability, or any other characteristic. 9 | # 10 | # We would appreciate acknowledgement if the software is used. 11 | 12 | __version__ = "0.1.1" 13 | 14 | import logging 15 | import os 16 | import sys 17 | 18 | import dfxml 19 | import dfxml.objects as Objects 20 | 21 | if __name__ == "__main__": 22 | logging.basicConfig(level=logging.DEBUG) 23 | _logger = logging.getLogger(os.path.basename(__file__)) 24 | 25 | lobj = Objects.LibraryObject() 26 | 27 | _logger.debug("lobj = %r" % lobj) 28 | _logger.debug("lobj.to_Element() = %r" % lobj.to_Element()) 29 | 30 | dobj = Objects.DFXMLObject() 31 | dobj.add_creator_library(lobj) 32 | dobj.add_creator_library("libfoo", "1.2.3") 33 | dobj.add_creator_library( 34 | "Python", ".".join(map(str, sys.version_info[0:3])) 35 | ) # A bit of a bend, but gets the major version information out. 36 | try: 37 | dobj.add_creator_library("libbar", None) 38 | except ValueError: 39 | _logger.info("Caught expected value error from passing in incorrect types.") 40 | pass 41 | dobj.add_build_library("libbaz", "4.5") 42 | 43 | with open(sys.argv[1], "w") as fh: 44 | dobj.print_dfxml(fh) 45 | -------------------------------------------------------------------------------- /tests/misc_object_tests/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | # Bash selection is described in the top-level Makefile. 17 | ifeq ($(shell basename $(SHELL)),sh) 18 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash) 19 | endif 20 | 21 | top_srcdir := $(shell cd ../.. ; pwd) 22 | 23 | PYTHON3 ?= python3 24 | 25 | OBJECTS := $(top_srcdir)/dfxml/objects.py 26 | 27 | SAMPLES_DIR := $(top_srcdir)/samples 28 | 29 | TOOLS_DIR := $(top_srcdir)/dfxml/bin 30 | 31 | all: \ 32 | check 33 | 34 | .PHONY: \ 35 | check-diff_file_ignore-py3 \ 36 | check-versioned 37 | 38 | check: \ 39 | check-diff_file_ignore-py3 \ 40 | check-libraries-py3 \ 41 | check-versioned 42 | source $(top_srcdir)/tests/venv/bin/activate \ 43 | && $(PYTHON3) "$(TOOLS_DIR)/cat_partitions.py" \ 44 | 12345678:$(SAMPLES_DIR)/difference_test_0.xml \ 45 | 87654321:$(SAMPLES_DIR)/difference_test_1.xml \ 46 | > __cat_patterns_test.sh.dfxml 47 | xmllint \ 48 | --format \ 49 | __cat_patterns_test.sh.dfxml \ 50 | > _cat_patterns_test.sh.dfxml 51 | rm \ 52 | __cat_patterns_test.sh.dfxml 53 | mv \ 54 | _cat_patterns_test.sh.dfxml \ 55 | cat_patterns_test.sh.dfxml 56 | 57 | check-diff_file_ignore-py3: \ 58 | diff_file_ignore_sample-py3.dfxml \ 59 | diff_file_ignore_test.py 60 | source $(top_srcdir)/tests/venv/bin/activate \ 61 | && $(PYTHON3) diff_file_ignore_test.py --debug diff_file_ignore_sample-py3.dfxml 62 | 63 | check-libraries-py3: 64 | source $(top_srcdir)/tests/venv/bin/activate \ 65 | && $(PYTHON3) LibraryObject_write_test.py LibraryObject_py3_test.dfxml 66 | source $(top_srcdir)/tests/venv/bin/activate \ 67 | && $(PYTHON3) LibraryObject_read_test.py LibraryObject_py3_test.dfxml 68 | 69 | check-versioned: 70 | $(PYTHON3) $(OBJECTS) 71 | source $(top_srcdir)/tests/venv/bin/activate \ 72 | && $(PYTHON3) DFXMLObject_program_test.py \ 73 | $(SAMPLES_DIR)/difference_test_0.xml \ 74 | vi \ 75 | 8.0 76 | 77 | clean: 78 | rm -f difference_counts_test.py-d* 79 | rm -f cat_partitions_test.sh.dfxml 80 | rm -f diff_file_ignore_sample-py3.dfxml 81 | rm -f LibraryObject_py3_test.dfxml 82 | rm -f *~ 83 | 84 | 85 | diff_file_ignore_sample-py3.dfxml: \ 86 | $(OBJECTS) \ 87 | diff_file_ignore_sample_dfxml_test.py 88 | rm -f _$@ 89 | source $(top_srcdir)/tests/venv/bin/activate \ 90 | && $(PYTHON3) diff_file_ignore_sample_dfxml_test.py --debug _$@ 91 | mv _$@ $@ 92 | -------------------------------------------------------------------------------- /tests/misc_object_tests/Makefile_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed in whole or in part by employees of the 4 | # Federal Government in the course of their official duties, and with 5 | # other Federal assistance. Pursuant to title 17 Section 105 of the 6 | # United States Code portions of this software authored by Federal 7 | # employees are not subject to copyright protection within the United 8 | # States. For portions not authored by Federal employees, the Federal 9 | # Government has been granted unlimited rights, and no claim to 10 | # copyright is made. The Federal Government assumes no responsibility 11 | # whatsoever for its use by other parties, and makes no guarantees, 12 | # expressed or implied, about its quality, reliability, or any other 13 | # characteristic. 14 | # 15 | # We would appreciate acknowledgement if the software is used. 16 | 17 | # run 'make check' and 'make clean' under py.test 18 | 19 | # TODO Some of the tests in the Makefile are currently known to be redundantly called when using py.test. 20 | 21 | import os 22 | import subprocess 23 | import sys 24 | 25 | 26 | def test_make_all(): 27 | if sys.platform == "win32": 28 | return # don't run on win32 29 | os.chdir(os.path.dirname(__file__)) 30 | subprocess.call(["make", "check"]) 31 | 32 | 33 | def test_make_clean(): 34 | if sys.platform == "win32": 35 | return # don't run on win32 36 | os.chdir(os.path.dirname(__file__)) 37 | subprocess.call(["make", "clean"]) 38 | -------------------------------------------------------------------------------- /tests/misc_object_tests/PartitionObject_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | __version__ = "0.1.1" 17 | 18 | import logging 19 | import os 20 | import sys 21 | 22 | import libtest 23 | 24 | import dfxml.objects as Objects 25 | 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | 28 | 29 | def test_empty_object(): 30 | dobj = Objects.DFXMLObject() 31 | pobj = Objects.PartitionObject() 32 | dobj.append(pobj) 33 | 34 | # Do file I/O round trip. 35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 36 | try: 37 | pobj_reconst = dobj_reconst.partitions[0] 38 | except: 39 | _logger.debug("tmp_filename = %r." % tmp_filename) 40 | raise 41 | os.remove(tmp_filename) 42 | 43 | 44 | def test_cfreds_macwd_properties(): 45 | """ 46 | These were drawn from a CFReDS sample Mac disk image. 47 | """ 48 | dobj = Objects.DFXMLObject() 49 | pobj = Objects.PartitionObject() 50 | dobj.append(pobj) 51 | 52 | pobj.ptype_str = "Apple_Boot" 53 | pobj.partition_index = 8 54 | 55 | # Do file I/O round trip. 56 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 57 | try: 58 | pobj_reconst = dobj_reconst.partitions[0] 59 | assert pobj_reconst.ptype_str == "Apple_Boot" 60 | assert pobj_reconst.partition_index == "8" 61 | except: 62 | _logger.debug("tmp_filename = %r." % tmp_filename) 63 | raise 64 | os.remove(tmp_filename) 65 | 66 | 67 | def test_bsd_disklabel_properties(): 68 | """ 69 | These were drawn from a BSD Disk Label sample image. 70 | """ 71 | dobj = Objects.DFXMLObject() 72 | pobj_a = Objects.PartitionObject() 73 | pobj_c = Objects.PartitionObject() 74 | dobj.append(pobj_a) 75 | dobj.append(pobj_c) 76 | 77 | pobj_a.partition_index = "a" 78 | pobj_c.partition_index = "c" 79 | 80 | # Do file I/O round trip. 81 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 82 | try: 83 | pobj_a_reconst = dobj_reconst.partitions[0] 84 | pobj_c_reconst = dobj_reconst.partitions[1] 85 | assert pobj_a_reconst.partition_index == "a" 86 | assert pobj_c_reconst.partition_index == "c" 87 | except: 88 | _logger.debug("tmp_filename = %r." % tmp_filename) 89 | raise 90 | os.remove(tmp_filename) 91 | -------------------------------------------------------------------------------- /tests/misc_object_tests/PartitionSystemObject_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | __version__ = "0.1.1" 17 | 18 | import logging 19 | import os 20 | import sys 21 | 22 | import libtest 23 | 24 | import dfxml.objects as Objects 25 | 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | 28 | 29 | def test_empty_object(): 30 | dobj = Objects.DFXMLObject() 31 | psobj = Objects.PartitionSystemObject() 32 | dobj.append(psobj) 33 | 34 | # Do file I/O round trip. 35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 36 | try: 37 | psobj_reconst = dobj_reconst.partition_systems[0] 38 | except: 39 | _logger.debug("tmp_filename = %r." % tmp_filename) 40 | raise 41 | os.remove(tmp_filename) 42 | 43 | 44 | def test_error_element_order(): 45 | dobj = Objects.DFXMLObject() 46 | psobj = Objects.PartitionSystemObject() 47 | fobj = Objects.FileObject() 48 | 49 | psobj.pstype_str = "gpt" 50 | 51 | # The error element should come after the fileobject stream. 52 | psobj.error = "foo" 53 | 54 | # Add a unallocated file object found floating in the partition system. 55 | fobj.alloc_inode = False 56 | fobj.alloc_name = False 57 | 58 | dobj.append(psobj) 59 | psobj.append(fobj) 60 | 61 | el = dobj.to_Element() 62 | 63 | # Confirm error comes after file stream. 64 | assert el[-1][0].tag.endswith("pstype_str") 65 | assert el[-1][-2].tag.endswith("fileobject") 66 | assert el[-1][-1].tag.endswith("error") 67 | 68 | # Do file I/O round trip. 69 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 70 | psobj_reconst = dobj_reconst.partition_systems[0] 71 | try: 72 | assert psobj_reconst.pstype_str == "gpt" 73 | assert psobj_reconst.error == "foo" 74 | except: 75 | _logger.debug("tmp_filename = %r." % tmp_filename) 76 | raise 77 | os.remove(tmp_filename) 78 | -------------------------------------------------------------------------------- /tests/misc_object_tests/README.md: -------------------------------------------------------------------------------- 1 | The tests in this directory needed to be moved to address a new behavior in a deployed static type checker. The intent is to empty this directory, moving its tests to appropriate locations under `/tests`. 2 | -------------------------------------------------------------------------------- /tests/misc_object_tests/RegXMLObject_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import os 17 | import sys 18 | 19 | import diffing_CellObject_test 20 | import diffing_HiveObject_test 21 | 22 | import dfxml.objects as Objects 23 | 24 | 25 | def test_all(): 26 | ro = Objects.RegXMLObject(version="0.2") 27 | ho = Objects.HiveObject() 28 | ho.append(diffing_CellObject_test.get_co()) 29 | ho.append(diffing_CellObject_test.get_nco()) 30 | ro.append(diffing_HiveObject_test.get_ho()) 31 | ro.print_regxml() 32 | -------------------------------------------------------------------------------- /tests/misc_object_tests/VolumeObject_hash_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | __version__ = "0.1.1" 17 | 18 | import logging 19 | import os 20 | import sys 21 | 22 | import dfxml.objects as Objects 23 | 24 | 25 | def test_all(): 26 | logging.basicConfig(level=logging.DEBUG) 27 | _logger = logging.getLogger(os.path.basename(__file__)) 28 | 29 | s0 = set() 30 | 31 | v0 = Objects.VolumeObject() 32 | v1 = Objects.VolumeObject() 33 | 34 | s0.add(v0) 35 | s0.add(v1) 36 | 37 | _logger.debug("len(s0) = %r" % len(s0)) 38 | assert len(s0) == 2 39 | 40 | f0 = Objects.FileObject() 41 | f1 = Objects.FileObject() 42 | f0.volume_object = v0 43 | f1.volume_object = v0 44 | 45 | s1 = set() 46 | s1.add(f0.volume_object) 47 | s1.add(f1.volume_object) 48 | _logger.debug("len(s1) = %r" % len(s1)) 49 | assert len(s1) == 1 50 | -------------------------------------------------------------------------------- /tests/misc_object_tests/VolumeObject_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | __version__ = "0.1.1" 17 | 18 | import logging 19 | import os 20 | import sys 21 | 22 | import libtest 23 | 24 | import dfxml.objects as Objects 25 | 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | 28 | 29 | def test_empty_object(): 30 | dobj = Objects.DFXMLObject() 31 | vobj = Objects.VolumeObject() 32 | dobj.append(vobj) 33 | 34 | # Do file I/O round trip. 35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj) 36 | try: 37 | vobj_reconst = dobj_reconst.volumes[0] 38 | except: 39 | _logger.debug("tmp_filename = %r." % tmp_filename) 40 | raise 41 | os.remove(tmp_filename) 42 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import logging 17 | import os 18 | import sys 19 | 20 | import dfxml.objects as Objects 21 | 22 | 23 | def main(): 24 | dobj = Objects.DFXMLObject() 25 | dobj.diff_file_ignores.add("atime") 26 | dobj.diff_file_ignores.add("crtime") 27 | with open(args.out_dfxml, "w") as fh: 28 | dobj.print_dfxml(fh) 29 | 30 | 31 | if __name__ == "__main__": 32 | import argparse 33 | 34 | parser = argparse.ArgumentParser() 35 | parser.add_argument("-d", "--debug", action="store_true") 36 | parser.add_argument("out_dfxml") 37 | args = parser.parse_args() 38 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 39 | main() 40 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diff_file_ignore_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import logging 17 | import os 18 | import sys 19 | 20 | import dfxml.objects as Objects 21 | 22 | 23 | def main(): 24 | dobj = Objects.parse(args.in_dfxml) 25 | assert not dobj is None 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | _logger.debug("dobj.diff_file_ignores = %r." % dobj.diff_file_ignores) 28 | assert "atime" in dobj.diff_file_ignores 29 | assert "crtime" in dobj.diff_file_ignores 30 | 31 | 32 | if __name__ == "__main__": 33 | import argparse 34 | 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument("-d", "--debug", action="store_true") 37 | parser.add_argument("in_dfxml") 38 | args = parser.parse_args() 39 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 40 | main() 41 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diffing_ByteRuns_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import copy 17 | import logging 18 | import os 19 | import sys 20 | 21 | import dfxml.objects as Objects 22 | 23 | 24 | def get_brs(): 25 | logging.basicConfig(level=logging.DEBUG) 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | 28 | br = Objects.ByteRun() 29 | br.file_offset = 4128 30 | br.len = 133 31 | brs = Objects.ByteRuns() 32 | brs.append(br) 33 | return brs 34 | 35 | 36 | def test_all(): 37 | logging.basicConfig(level=logging.DEBUG) 38 | _logger = logging.getLogger(os.path.basename(__file__)) 39 | brs = get_brs() 40 | cbrs1 = copy.deepcopy(brs) 41 | 42 | _logger.debug("brs = %r." % brs) 43 | _logger.debug("cbrs1 = %r." % cbrs1) 44 | assert cbrs1 == brs 45 | 46 | cbrs1[0].file_offset += 133 47 | _logger.debug("cbrs1 = %r." % cbrs1) 48 | assert cbrs1 != brs 49 | 50 | cbrs2 = copy.deepcopy(brs) 51 | cbrs2[0].type = "unknown" 52 | assert cbrs2 != brs 53 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diffing_CellObject_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import logging 17 | import os 18 | import sys 19 | 20 | import diffing_ByteRuns_test 21 | 22 | import dfxml.objects as Objects 23 | 24 | 25 | def get_co(): 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | co = Objects.CellObject() 28 | _logger.debug("co = %r" % co) 29 | _logger.debug("co.to_regxml() = %r" % co.to_regxml()) 30 | 31 | co.root = 1 32 | co.cellpath = "\\Deleted_root" 33 | co.basename = "Deleted_root" 34 | co.name_type = "k" 35 | co.alloc = 1 36 | co.mtime = "2009-01-23T01:23:45Z" 37 | co.mtime.prec = "100ns" 38 | co.byte_runs = diffing_ByteRuns_test.get_brs() 39 | _logger.debug("co = %r" % co) 40 | _logger.debug("co.to_regxml() = %r" % co.to_regxml()) 41 | return co 42 | 43 | 44 | def get_nco(): 45 | _logger = logging.getLogger(os.path.basename(__file__)) 46 | co = get_co() 47 | coe = co.to_Element() 48 | nco = Objects.CellObject() 49 | nco.populate_from_Element(coe) 50 | diffs = co.compare_to_other(nco) 51 | _logger.debug("nco.to_regxml() = %r" % nco.to_regxml()) 52 | _logger.debug("diffs = %r" % diffs) 53 | assert co == nco 54 | 55 | # Modify 56 | nco.basename = "(Doubled)" 57 | nco.root = False 58 | nco.original_cellobject = co 59 | nco.compare_to_original() 60 | assert nco.diffs == set(["basename", "root"]) 61 | _logger.debug("nco.to_regxml() = %r" % nco.to_regxml()) 62 | _logger.debug("nco.diffs = %r" % nco.diffs) 63 | return nco 64 | 65 | 66 | def test_all(): 67 | logging.basicConfig(level=logging.DEBUG) 68 | 69 | get_nco() 70 | 71 | 72 | if __name__ == "__main__": 73 | test_all() 74 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diffing_FileObject_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import logging 17 | import os 18 | import sys 19 | 20 | import dfxml.objects as Objects 21 | 22 | 23 | def test_all(): 24 | logging.basicConfig(level=logging.DEBUG) 25 | _logger = logging.getLogger(os.path.basename(__file__)) 26 | 27 | f0 = Objects.FileObject() 28 | 29 | fo = Objects.FileObject() 30 | pfo = Objects.FileObject() 31 | pfo.inode = 234 32 | f0.parent_object = pfo 33 | f0.filename = "test file" 34 | f0.error = "Neither a real file, nor real error" 35 | f0.partition = 2 36 | f0.id = 235 37 | f0.name_type = "r" 38 | f0.filesize = 1234 39 | f0.unalloc = 0 40 | f0.unused = 0 41 | f0.orphan = 0 42 | f0.compressed = 1 43 | f0.inode = 6543 44 | f0.libmagic = "data" 45 | f0.meta_type = 8 46 | f0.mode = 755 47 | f0.nlink = 1 48 | f0.uid = "S-1-234-etc" 49 | f0.gid = "S-2-234-etc" 50 | f0.mtime = "1999-12-31T12:34:56Z" 51 | f0.ctime = "1998-12-31T12:34:56Z" 52 | f0.atime = "1997-12-31T12:34:56Z" 53 | f0.crtime = "1996-12-31T12:34:56Z" 54 | f0.seq = 3 55 | f0.dtime = "1995-12-31T12:34:56Z" 56 | f0.bkup_time = "1994-12-31T12:34:56Z" 57 | f0.link_target = "Nonexistent file" 58 | f0.libmagic = "Some kind of compressed" 59 | f0.md5 = "db72d20e83d0ae39771403bc4cdde040" 60 | f0.sha1 = "866e1f426b2380aaf74a091aa0f39f62ae8a2de7" 61 | f0.sha256 = "4bc5996997ab9196b2d998b05ef302ed1dc167d74ec881533ee35008b5168630" 62 | f0.sha384 = "2ec378692eeae4b855f58832664f95bb85411caac8dcebe7cd3916e915559d3f0ccda688a1fad1e3f47801fe15298ac0" 63 | # fo.brs = brs #TODO 64 | _logger.debug("f0 = %r" % f0) 65 | _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml()) 66 | 67 | e0 = f0.to_Element() 68 | _logger.debug("e0 = %r" % e0) 69 | 70 | # f1 = eval(repr(f0)) #TODO The recursive evals cause namespace confusion (Objects.foo); replace the next two lines when that's settled. 71 | f1 = Objects.FileObject() 72 | f1.populate_from_Element(e0) 73 | 74 | f2 = Objects.FileObject() 75 | f2.populate_from_Element(e0) 76 | 77 | # The id property should not be included in the comparisons 78 | f1.id = 111 79 | f1.alloc = False 80 | 81 | f2.mtime = "2999-12-31T12:34:56Z" 82 | f2.md5 = "593c8fe4a2236f3eeba7f4577b663876" 83 | f2.sha1 = "0c0c20c03bdb8913da8ea120bd59ba5f596deceb" 84 | f2.sha256 = "4f6dcb46e0f7b0ad748d083f6e92d7df586d0298a94acc3795287ff156614540" 85 | f2.sha384 = "2af87ca47d01989009caf3927a84be215528a53629dd935a828921ac0a4b22202bcba20d38fdd16d719b8c4241fcdacb" 86 | 87 | _logger.debug("f1 = %r" % f1) 88 | d01 = f0.compare_to_other(f1) 89 | _logger.debug("d01 = %r" % d01) 90 | assert d01 == set(["alloc"]) or d01 == set(["alloc", "unalloc"]) 91 | 92 | d02 = f0.compare_to_other(f2) 93 | 94 | _logger.debug("d02 = %r" % d02) 95 | assert d02 == set(["mtime", "md5", "sha1", "sha256", "sha384"]) 96 | 97 | f2.original_fileobject = f0 98 | f2.compare_to_original() 99 | _logger.debug("f2.diffs = %r" % f2.diffs) 100 | assert f2.diffs == d02 101 | 102 | # TODO include byte_runs 103 | 104 | 105 | if __name__ == "__main__": 106 | test_all() 107 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diffing_HiveObject_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import copy 17 | import logging 18 | import os 19 | import sys 20 | 21 | import dfxml.objects as Objects 22 | 23 | 24 | def get_ho(): 25 | ho = Objects.HiveObject() 26 | ho.mtime = "2010-01-02T03:45:00Z" 27 | return ho 28 | 29 | 30 | def test_all(): 31 | _logger = logging.getLogger(os.path.basename(__file__)) 32 | logging.basicConfig(level=logging.DEBUG) 33 | 34 | ho = get_ho() 35 | 36 | hoc = copy.deepcopy(ho) 37 | 38 | diffs = hoc.compare_to_other(ho) 39 | _logger.debug(repr(diffs)) 40 | assert len(diffs) == 0 41 | 42 | hoc.mtime = "2011-01-02T03:45:00Z" 43 | 44 | diffs = hoc.compare_to_other(ho) 45 | _logger.debug(repr(diffs)) 46 | assert len(diffs) == 1 47 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diffing_TimestampObject_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.1" 15 | 16 | import copy 17 | import logging 18 | import os 19 | import sys 20 | 21 | import dfxml.objects as Objects 22 | 23 | 24 | def test_all(): 25 | t0 = Objects.TimestampObject() 26 | t0.name = "mtime" 27 | t0.prec = "2s" 28 | 29 | t1 = copy.deepcopy(t0) 30 | 31 | assert t0 == t1 32 | 33 | t0e = t0.to_Element() 34 | t2 = Objects.TimestampObject() 35 | t2.populate_from_Element(t0e) 36 | 37 | assert t0 == t2 38 | 39 | t2.prec = "100" 40 | 41 | assert t0 != t2 42 | -------------------------------------------------------------------------------- /tests/misc_object_tests/diffing_VolumeObject_test.py: -------------------------------------------------------------------------------- 1 | # This software was developed at the National Institute of Standards 2 | # and Technology in whole or in part by employees of the Federal 3 | # Government in the course of their official duties. Pursuant to 4 | # title 17 Section 105 of the United States Code portions of this 5 | # software authored by NIST employees are not subject to copyright 6 | # protection and are in the public domain. For portions not authored 7 | # by NIST employees, NIST has been granted unlimited rights. NIST 8 | # assumes no responsibility whatsoever for its use by other parties, 9 | # and makes no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | __version__ = "0.1.0" 15 | 16 | import logging 17 | import os 18 | import sys 19 | 20 | import dfxml 21 | import dfxml.objects as Objects 22 | 23 | 24 | def test_all(): 25 | logging.basicConfig(level=logging.DEBUG) 26 | _logger = logging.getLogger(os.path.basename(__file__)) 27 | 28 | v0 = Objects.VolumeObject() 29 | 30 | v0.sector_size = 512 31 | v0.block_size = 4096 32 | v0.partition_offset = 32256 33 | v0.ftype = -1 34 | assert v0.ftype == -1 35 | v0.ftype_str = 1 36 | v0.block_count = 100000 37 | v0.allocated_only = False 38 | v0.first_block = 0 39 | v0.last_block = v0.block_count 40 | 41 | _logger.debug(repr(v0)) 42 | v1 = eval("Objects." + repr(v0)) 43 | 44 | e0 = v0.to_Element() 45 | _logger.debug("e0 = %r" % e0) 46 | 47 | v2 = Objects.VolumeObject() 48 | v2.populate_from_Element(e0) 49 | 50 | v1.block_size = 512 51 | v2.partition_offset = v0.partition_offset + v0.block_count * v0.block_size 52 | 53 | d01 = v0.compare_to_other(v1) 54 | d02 = v0.compare_to_other(v2) 55 | 56 | _logger.debug("d01 = %r" % d01) 57 | assert d01 == set(["block_size"]) 58 | 59 | _logger.debug("d02 = %r" % d02) 60 | assert d02 == set(["partition_offset"]) 61 | -------------------------------------------------------------------------------- /tests/misc_object_tests/objects_test.py: -------------------------------------------------------------------------------- 1 | # Unit tests for objects 2 | 3 | 4 | __version__ = "0.1.1" 5 | 6 | import os 7 | import sys 8 | 9 | from dfxml.objects import * 10 | from dfxml.objects import _intcast, _logger, _qsplit 11 | 12 | 13 | def test_all(): 14 | assert _intcast(-1) == -1 15 | assert _intcast("-1") == -1 16 | assert _qsplit("{http://www.w3.org/2001/XMLSchema}all") == ( 17 | "http://www.w3.org/2001/XMLSchema", 18 | "all", 19 | ) 20 | assert _qsplit("http://www.w3.org/2001/XMLSchema}all") == ( 21 | None, 22 | "http://www.w3.org/2001/XMLSchema}all", 23 | ) 24 | 25 | fi = FileObject() 26 | 27 | # Check property setting 28 | fi.mtime = "1999-12-31T23:59:59Z" 29 | _logger.debug("fi = %r" % fi) 30 | 31 | # Check bad property setting 32 | failed = None 33 | try: 34 | fi.mtime = "Not a timestamp" 35 | failed = False 36 | except: 37 | failed = True 38 | _logger.debug("fi = %r" % fi) 39 | _logger.debug("failed = %r" % failed) 40 | assert failed == True 41 | 42 | t0 = TimestampObject(prec="100ns", name="mtime") 43 | _logger.debug("t0 = %r" % t0) 44 | assert t0.prec[0] == 100 45 | assert t0.prec[1] == "ns" 46 | t1 = TimestampObject("2009-01-23T01:23:45Z", prec="2", name="atime") 47 | _logger.debug("t1 = %r" % t1) 48 | assert t1.prec[0] == 2 49 | assert t1.prec[1] == "s" 50 | -------------------------------------------------------------------------------- /tests/misc_object_tests/test_TCPFlowObjects.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | import logging 15 | import os.path 16 | import pathlib 17 | import sys 18 | 19 | import pytest 20 | 21 | # TODO - It seems TCPFlowObjects might be better served from /dfxml instead of /dfxml/bin. 22 | import dfxml.bin.TCPFlowObjects 23 | import dfxml.objects as Objects 24 | 25 | 26 | @pytest.fixture 27 | def top_srcdir() -> pathlib.Path: 28 | srcdir = pathlib.Path(__file__).parent 29 | return srcdir / ".." / ".." 30 | 31 | 32 | def test_TCPFlowObjects(top_srcdir: pathlib.Path) -> None: 33 | path_to_sample = top_srcdir / "samples" / "tcpflow_zip_generic_header.xml" 34 | assert ( 35 | path_to_sample.exists() 36 | ), "Hard-coded path from test to sample is no longer valid." 37 | 38 | for event, obj in Objects.iterparse(str(path_to_sample)): 39 | if not isinstance(obj, Objects.FileObject): 40 | continue 41 | results = dfxml.bin.TCPFlowObjects.scanner_results_from_FileObject(obj) 42 | assert len(results) == 1 43 | # TODO - This could do with a better presentation in relation to the pytest framework. 44 | print("Flow name: %r." % obj.filename) 45 | for result in results: 46 | result.print_report() 47 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | mypy 2 | pytest 3 | -------------------------------------------------------------------------------- /tests/test_reads.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | import os 15 | 16 | import pytest 17 | 18 | import dfxml 19 | import dfxml.objects 20 | 21 | 22 | def nop(x: object) -> None: 23 | pass 24 | 25 | 26 | @pytest.fixture 27 | def top_srcdir() -> str: 28 | srcdir = os.path.dirname(__file__) 29 | retval = os.path.join(srcdir, "..") 30 | assert os.path.isdir( 31 | os.path.join(retval, "samples") 32 | ), "Hard-coded expected path not found, '${top_srcdir}/samples/'." 33 | return retval 34 | 35 | 36 | @pytest.fixture 37 | def difference_test_0_filepath(top_srcdir: str) -> str: 38 | retval = os.path.join(top_srcdir, "samples", "difference_test_0.xml") 39 | assert os.path.exists( 40 | retval 41 | ), "Hard-coded path to file did not find expected file, '${top_srcdir}/samples/difference_test_0.xml'." 42 | return retval 43 | 44 | 45 | def test_read_dfxml(difference_test_0_filepath: str) -> None: 46 | """ 47 | This test confirms that the DFXML pip-managed packaging exposes the dfxml package and the objects.py module. 48 | """ 49 | with open(difference_test_0_filepath, "rb") as fh: 50 | dfxml.read_dfxml(fh, callback=nop) 51 | 52 | 53 | def test_objects_iterparse(difference_test_0_filepath: str) -> None: 54 | """ 55 | This test confirms that the DFXML pip-managed packaging exposes the dfxml package's objects.py module. 56 | """ 57 | for event, obj in dfxml.objects.iterparse(difference_test_0_filepath): 58 | pass 59 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | import dfxml 15 | 16 | 17 | def test_version() -> None: 18 | assert not dfxml.__version__ is None 19 | -------------------------------------------------------------------------------- /tests/walk_to_dfxml/.gitignore: -------------------------------------------------------------------------------- 1 | *.dfxml 2 | walk_ignore_test/ 3 | -------------------------------------------------------------------------------- /tests/walk_to_dfxml/Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology by employees of the Federal Government in the course 5 | # of their official duties. Pursuant to title 17 Section 105 of the 6 | # United States Code this software is not subject to copyright 7 | # protection and is in the public domain. NIST assumes no 8 | # responsibility whatsoever for its use by other parties, and makes 9 | # no guarantees, expressed or implied, about its quality, 10 | # reliability, or any other characteristic. 11 | # 12 | # We would appreciate acknowledgement if the software is used. 13 | 14 | # Bash selection is described in the top-level Makefile. 15 | ifeq ($(shell basename $(SHELL)),sh) 16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash) 17 | endif 18 | 19 | top_srcdir := $(shell cd ../.. ; pwd) 20 | 21 | tests_srcdir := $(top_srcdir)/tests 22 | 23 | all: \ 24 | walk_ignore_genprops.dfxml \ 25 | walk_ignore_hashes.dfxml 26 | 27 | .scaffolding.done.log: 28 | rm -rf walk_ignore_test 29 | mkdir -p walk_ignore_test/foo/bar/baz 30 | echo 'contents c' > walk_ignore_test/foo/bar/baz/c 31 | echo 'contents b' > walk_ignore_test/foo/bar/b 32 | echo 'contents a' > walk_ignore_test/foo/a 33 | touch $@ 34 | 35 | check: \ 36 | walk_ignore_genprops.dfxml \ 37 | walk_ignore_hashes.dfxml 38 | source $(tests_srcdir)/venv/bin/activate \ 39 | && pytest \ 40 | --log-level=DEBUG 41 | 42 | clean: 43 | @rm -f \ 44 | .scaffolding.done.log \ 45 | *.dfxml 46 | @rm -rf \ 47 | walk_ignore_test/ 48 | 49 | walk_ignore_genprops.dfxml: \ 50 | $(tests_srcdir)/.venv.done.log \ 51 | $(top_srcdir)/dfxml/bin/walk_to_dfxml.py \ 52 | .scaffolding.done.log 53 | rm -f \ 54 | __$@ \ 55 | _$@ 56 | source $(tests_srcdir)/venv/bin/activate \ 57 | && cd walk_ignore_test \ 58 | && walk_to_dfxml \ 59 | -i atime \ 60 | -i ctime \ 61 | -i crtime \ 62 | -i gid \ 63 | -i inode \ 64 | -i mtime@d \ 65 | -i uid \ 66 | > ../__$@ 67 | xmllint \ 68 | --format \ 69 | __$@ \ 70 | > _$@ 71 | rm __$@ 72 | mv _$@ $@ 73 | 74 | walk_ignore_hashes.dfxml: \ 75 | $(tests_srcdir)/.venv.done.log \ 76 | $(top_srcdir)/dfxml/bin/walk_to_dfxml.py \ 77 | .scaffolding.done.log 78 | rm -f \ 79 | __$@ \ 80 | _$@ 81 | source $(tests_srcdir)/venv/bin/activate \ 82 | && cd walk_ignore_test \ 83 | && walk_to_dfxml \ 84 | --ignore-hashes \ 85 | > ../__$@ 86 | xmllint \ 87 | --format \ 88 | __$@ \ 89 | > _$@ 90 | rm __$@ 91 | mv _$@ $@ 92 | -------------------------------------------------------------------------------- /tests/walk_to_dfxml/README.md: -------------------------------------------------------------------------------- 1 | # `walk_to_dfxml` 2 | 3 | *Source*: [`../../dfxml/bin/walk_to_dfxml.py`](../../dfxml/bin/walk_to_dfxml.py) 4 | 5 | This command walks a directory, producing a `` for each encountered file and directory, and then recurses into each directory. Output is sent to `stdout`. 6 | 7 | File characteristics are drawn from: 8 | * the path 9 | * hashes of the file contents for regular files (i.e., not directories, not device files; also, not soft links) 10 | * the `stat` structure for the file 11 | * the referenced path (for soft links) 12 | 13 | Any directory that can be navigated to can be characterized with this script. This has been tested from the root directory of a (offline) Linux system's root-filesystem partition. The tool can handle the `/dev` directory without issue. 14 | 15 | This tool can be used to walk a network file system, such as a share. However, be aware that if it is hashing, that would mean the tool is reading the file contents over the network. 16 | 17 | 18 | ## Usage 19 | 20 | ```bash 21 | cd .../my_directory 22 | walk_to_dfxml > /tmp/my_directory.dfxml 23 | ``` 24 | 25 | This will record all characteristics available for each file in and below `.../my_directory`. 26 | 27 | Output should be captured outside of the present working directory, such as the parent directory. Note that this command will include the hash of an empty file `output.dfxml`: 28 | 29 | ```bash 30 | cd .../my_directory 31 | walk_to_dfxml > output.dfxml 32 | ``` 33 | 34 | The `-i` (`--ignore`) flag will cause the named file characteristic to not be gathered into the output. E.g. this command will not collect access time: 35 | 36 | ```bash 37 | walk_to_dfxml -i atime > /tmp/walk.dfxml 38 | ``` 39 | 40 | (Testing: See the [`Makefile`](Makefile) recipe for `walk_ignore_genprops.dfxml`, which is tested in [`test_walk_to_dfxml.py`](test_walk_to_dfxml.py)'s function `test_walk_ignore_genprops`.) 41 | 42 | The program can run without gathering any file hashes, by using the `--ignore-hashes` flag: 43 | 44 | ```bash 45 | walk_to_dfxml --ignore-hashes > /tmp/walk.dfxml 46 | ``` 47 | 48 | (Testing: See the [`Makefile`](Makefile) recipe for `walk_ignore_hashes.dfxml`, which is tested in [`test_walk_to_dfxml.py`](test_walk_to_dfxml.py)'s function `test_walk_ignore_hashes`.) 49 | -------------------------------------------------------------------------------- /tests/walk_to_dfxml/test_walk_to_dfxml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # This software was developed at the National Institute of Standards 4 | # and Technology in whole or in part by employees of the Federal 5 | # Government in the course of their official duties. Pursuant to 6 | # title 17 Section 105 of the United States Code portions of this 7 | # software authored by NIST employees are not subject to copyright 8 | # protection and are in the public domain. For portions not authored 9 | # by NIST employees, NIST has been granted unlimited rights. NIST 10 | # assumes no responsibility whatsoever for its use by other parties, 11 | # and makes no guarantees, expressed or implied, about its quality, 12 | # reliability, or any other characteristic. 13 | # 14 | # We would appreciate acknowledgement if the software is used. 15 | 16 | __version__ = "0.2.0" 17 | 18 | import logging 19 | import os 20 | 21 | import pytest 22 | 23 | import dfxml.objects as Objects 24 | 25 | _logger = logging.getLogger(os.path.basename(__file__)) 26 | 27 | 28 | @pytest.fixture 29 | def srcdir() -> str: 30 | retval = os.path.dirname(__file__) 31 | return retval 32 | 33 | 34 | def test_walk_ignore_genprops(srcdir: str) -> None: 35 | files_encountered = 0 36 | for event, obj in Objects.iterparse( 37 | os.path.join(srcdir, "walk_ignore_genprops.dfxml") 38 | ): 39 | if not isinstance(obj, Objects.FileObject): 40 | continue 41 | files_encountered += 1 42 | for propname in ["atime", "ctime", "crtime", "gid", "inode", "mtime", "uid"]: 43 | try: 44 | assert ( 45 | getattr(obj, propname) is None 46 | ), "Found property that should have been ignored." 47 | except: 48 | if propname == "mtime" and obj.name_type != "d": 49 | continue 50 | _logger.error("obj.filename = %r.", obj.filename) 51 | _logger.error("propname = %r.", propname) 52 | raise 53 | assert files_encountered > 0, "Encountered no files in walk_ignore_genprops.dfxml." 54 | 55 | 56 | def test_walk_ignore_hashes(srcdir: str) -> None: 57 | files_encountered = 0 58 | for event, obj in Objects.iterparse( 59 | os.path.join(srcdir, "walk_ignore_hashes.dfxml") 60 | ): 61 | if not isinstance(obj, Objects.FileObject): 62 | continue 63 | files_encountered += 1 64 | for propname in Objects.FileObject._hash_properties: 65 | try: 66 | assert ( 67 | getattr(obj, propname) is None 68 | ), "Found hash property when none was expected." 69 | except: 70 | _logger.error("obj.filename = %r.", obj.filename) 71 | _logger.error("propname = %r.", propname) 72 | raise 73 | assert files_encountered > 0, "Encountered no files in walk_ignore_hashes.dfxml." 74 | --------------------------------------------------------------------------------