├── .gitattributes
├── .github
    └── workflows
    │   ├── continuous-integration.yml
    │   └── supply-chain.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── CONTRIBUTE.md
├── ChangeLog
├── LICENSE.md
├── Makefile
├── README.md
├── demos
    ├── .gitignore
    ├── demo_fiwalk_diskimage.py
    ├── demo_mac_timeline.py
    ├── demo_mac_timeline_iter.py
    ├── demo_mac_timeline_objects.py
    ├── demo_piecewise.py
    ├── demo_plot_times.py
    ├── demo_readtimes.py
    ├── demo_registry_timeline.py
    ├── demo_sizes.py
    ├── demo_spark.py
    ├── spark
    │   └── demo_spark.py
    └── vmstats
    │   ├── Makefile
    │   ├── skeleton.css
    │   ├── vmstats.py
    │   ├── vmstats_decode.html
    │   ├── vmstats_decode.py
    │   └── vmstats_json.html
├── dfxml
    ├── __init__.py
    ├── bin
    │   ├── .gitignore
    │   ├── Extractor.py
    │   ├── Makefile
    │   ├── README.md
    │   ├── TCPFlowObjects.py
    │   ├── __init__.py
    │   ├── allocation_counter.py
    │   ├── break_out_diffs_by_anno.py
    │   ├── cat_fileobjects.py
    │   ├── cat_partitions.py
    │   ├── conftest.py
    │   ├── corpus_sync.py
    │   ├── dedup.py
    │   ├── deidentify_xml.py
    │   ├── dfxinfo.py
    │   ├── dfxml_tool.py
    │   ├── exp_slack.py
    │   ├── filesdb.py
    │   ├── hash_sectors.py
    │   ├── iblkfind.py
    │   ├── icarvingtruth.py
    │   ├── idifference.py
    │   ├── idifference2.py
    │   ├── iexport.py
    │   ├── iextract.py
    │   ├── igrep.py
    │   ├── ihistogram.py
    │   ├── imap.py
    │   ├── imicrosoft_redact.py
    │   ├── iredact-config.txt
    │   ├── iredact.py
    │   ├── ireport.py
    │   ├── iverify.py
    │   ├── make_differential_dfxml.py
    │   ├── mem_info.py
    │   ├── nsrl_rds.py
    │   ├── rdifference.py
    │   ├── report_silent_changes.py
    │   ├── summarize_differential_dfxml.py
    │   ├── tcpdiff.py
    │   ├── validate_dfxml.py
    │   ├── walk_to_dfxml.py
    │   ├── xdiff.py
    │   ├── xmirror.py
    │   └── xml2body.py
    ├── conftest.py
    ├── dfxml_html.py
    ├── fiwalk.py
    ├── histogram.py
    ├── objects.py
    ├── py.typed
    └── writer.py
├── samples
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── difference_test_0.xml
    ├── difference_test_1.xml
    ├── difference_test_2.xml
    ├── difference_test_3.xml
    ├── fileobjectexample.xml
    ├── piecewise.xml
    ├── simple.xml
    └── tcpflow_zip_generic_header.xml
├── setup.cfg
├── setup.py
└── tests
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── make_differential_dfxml
        ├── .gitignore
        ├── Makefile
        ├── README.md
        ├── differential_dfxml_test_by_path_01.txt
        ├── differential_dfxml_test_by_path_23.txt
        ├── differential_dfxml_test_by_times_01.txt
        ├── differential_dfxml_test_by_times_23.txt
        └── test_differential_dfxml.py
    ├── misc_bin_tests
        ├── README.md
        ├── _pick_pythons.sh
        ├── _sane_defaults.sh
        ├── dfxml_test.py
        ├── iexport_test.py
        ├── paths.sh
        ├── test_cat_fileobjects.sh
        ├── test_dfxml_tool.sh
        ├── test_hfsj.sh
        ├── test_idifference.py
        ├── test_idifference_to_dfxml.sh
        ├── test_mac_timelines.sh
        ├── test_redact.sh
        └── test_regxml.sh
    ├── misc_object_tests
        ├── .gitignore
        ├── ByteRun_test.py
        ├── ByteRuns_test.py
        ├── CellObject_test.py
        ├── DFXMLObject_program_test.py
        ├── DiskImageObject_test.py
        ├── FileObject_allocation_test.py
        ├── FileObject_byte_run_facets_test.py
        ├── FileObject_externals_test.py
        ├── FileObject_from_stat_test.py
        ├── FileObject_test.py
        ├── LibraryObject_read_test.py
        ├── LibraryObject_write_test.py
        ├── Makefile
        ├── Makefile_test.py
        ├── PartitionObject_test.py
        ├── PartitionSystemObject_test.py
        ├── README.md
        ├── RegXMLObject_test.py
        ├── VolumeObject_externals_test.py
        ├── VolumeObject_hash_test.py
        ├── VolumeObject_test.py
        ├── diff_file_ignore_sample_dfxml_test.py
        ├── diff_file_ignore_test.py
        ├── diffing_ByteRuns_test.py
        ├── diffing_CellObject_test.py
        ├── diffing_FileObject_test.py
        ├── diffing_HiveObject_test.py
        ├── diffing_TimestampObject_test.py
        ├── diffing_VolumeObject_test.py
        ├── error_test.py
        ├── libtest.py
        ├── objects_test.py
        ├── storage_layers_test.py
        └── test_TCPFlowObjects.py
    ├── requirements.txt
    ├── test_objects.py
    ├── test_reads.py
    ├── test_version.py
    └── walk_to_dfxml
        ├── .gitignore
        ├── Makefile
        ├── README.md
        └── test_walk_to_dfxml.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | # NOTE: At the time this rule was written, all files tracked in this repository were known to be text files.  From documentation on this file at git-scm.com, it seems possible this might trip up commiting a binary file in the future.
3 | * text=auto
4 | 


--------------------------------------------------------------------------------
/.github/workflows/continuous-integration.yml:
--------------------------------------------------------------------------------
 1 | # This file based on https://gist.github.com/mwouts/9842452d020c08faf9e84a3bba38a66f
 2 | # See: https://help.github.com/en/actions/reference/software-installed-on-github-hosted-runners
 3 | # 2020-06-22 - slg - customized
 4 | # 2020-06-27 - slg - expanded to G++ for MacOS
 5 | #
 6 | name: CI (python)
 7 | on: [push, pull_request]
 8 | 
 9 | env:
10 |   COVERAGE_OS: ubuntu-latest
11 |   COVERAGE_PYTHON_VERSION: 3.13
12 | 
13 | jobs:
14 |   build:
15 |     runs-on: ${{ matrix.os }}
16 |     strategy:
17 |       matrix:
18 |         os: ['ubuntu-latest', 'macos-latest']
19 |         python-version: ['3.9','3.13']
20 | 
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v4
24 | 
25 |       - name: Set up Python ${{ matrix.python-version }}
26 |         uses: actions/setup-python@v5
27 |         with:
28 |           python-version: ${{ matrix.python-version }}
29 | 
30 |       - name: Install Python dependencies
31 |         run: |
32 |           python -m pip install --upgrade pip
33 |           pip install pytest pytest-cov
34 |           if [ -r requirements.txt ];     then pip install -r requirements.txt ;     fi
35 |           if [ -r requirements-dev.txt ]; then pip install -r requirements-dev.txt ; fi
36 | 
37 |       - name:  Install xmllint on ubuntu
38 |         if: runner.os == 'Linux'
39 |         run: |
40 |           sudo apt update
41 |           sudo apt install --yes libxml2-utils
42 | 
43 |       - name: Pre-commit Checks
44 |         run: |
45 |           pip -q install pre-commit
46 |           pre-commit run --all-files
47 | 
48 |       - name: Make check
49 |         run: make check
50 | 
51 |       - name: Make check-tools
52 |         run: make check-tools
53 | 
54 |       - name: Test with pytest
55 |         run: pytest --cov=dfxml --cov-report=xml .
56 | 
57 |       - name: Upload to codecov.io
58 |         if: matrix.os == env.COVERAGE_OS && matrix.python-version == env.COVERAGE_PYTHON_VERSION
59 |         uses: codecov/codecov-action@v4
60 |         with:
61 |           token: ${{ secrets.CODECOV_TOKEN }}
62 |           verbose: true
63 |           files: ./coverage.xml
64 | 


--------------------------------------------------------------------------------
/.github/workflows/supply-chain.yml:
--------------------------------------------------------------------------------
 1 | # Portions of this file contributed by NIST are governed by the
 2 | # following statement:
 3 | #
 4 | # This software was developed at the National Institute of Standards
 5 | # and Technology by employees of the Federal Government in the course
 6 | # of their official duties. Pursuant to title 17 Section 105 of the
 7 | # United States Code this software is not subject to copyright
 8 | # protection and is in the public domain. NIST assumes no
 9 | # responsibility whatsoever for its use by other parties, and makes
10 | # no guarantees, expressed or implied, about its quality,
11 | # reliability, or any other characteristic.
12 | #
13 | # We would appreciate acknowledgement if the software is used.
14 | 
15 | # This workflow uses Make to review direct dependencies of this
16 | # repository.
17 | 
18 | name: Supply Chain
19 | 
20 | on:
21 |   schedule:
22 |     - cron: '15 5 * * 1,2,3,4,5'
23 | 
24 | jobs:
25 |   build:
26 | 
27 |     runs-on: ubuntu-latest
28 |     strategy:
29 |       matrix:
30 |         python-version:
31 |           - '3.9'
32 |           - '3.13'
33 | 
34 |     steps:
35 |     - uses: actions/checkout@v4
36 |       with:
37 |         fetch-depth: 0
38 |     - name: Set up Python ${{ matrix.python-version }}
39 |       uses: actions/setup-python@v5
40 |       with:
41 |         python-version: ${{ matrix.python-version }}
42 |     - name: Review dependencies
43 |       run: make check-supply-chain
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *~
 3 | __pycache__
 4 | _deps
 5 | python/demo.dfxml
 6 | 
 7 | .DS_Store
 8 | build
 9 | python/demo.dfxml
10 | .cache
11 | .pytest_cache
12 | *.egg-info
13 | *.log
14 | .venv-pre-commit
15 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "dependencies/dfxml_schema"]
2 | 	path = dependencies/dfxml_schema
3 | 	url = https://github.com/dfxml-working-group/dfxml_schema.git
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 |   - repo: https://github.com/psf/black
3 |     rev: 25.1.0
4 |     hooks:
5 |       - id: black
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTE.md:
--------------------------------------------------------------------------------
 1 | # Contributing to DFXML's Python code base
 2 | 
 3 | 
 4 | ## Pre-commit
 5 | 
 6 | This project uses [the `pre-commit` tool](https://pre-commit.com/) for linting.
 7 | 
 8 | `pre-commit` hooks into Git's commit machinery to run a set of linters and static analyzers over each change. To install `pre-commit` into Git's hooks, run one (not both) of the following sets of commands:
 9 | 
10 | ```bash
11 | pip install pre-commit
12 | pre-commit --version
13 | pre-commit install
14 | ```
15 | 
16 | Or:
17 | 
18 | ```bash
19 | make
20 | ```
21 | 
22 | 
23 | ## Installable tools versus in-place scripts
24 | 
25 | The [`dfxml/bin/`](dfxml/bin/) directory contains scripts for interacting with DFXML.  Some of the tools are installed in the command-line `$PATH` when the `dfxml` package is installed.
26 | 
27 | If there is a request to add a tool to the package's installed-tools list, the tool should have these implemented:
28 | 1. A unit test suite that exercises the tool's command line features, such as flags, and `pytest` tests to confirm expected output.
29 | 2. A documentation page, preferably a README alongside the unit test suite.  The documentation should include command-line usage.
30 | 3. A row in [`dfxml/bin/README.md`](dfxml/bin/README.md)'s table of installed tools, linking to the documentation.
31 | 4. The tool should be analyzed with a static type checker.  See e.g. the target `check-mypy` in the [tests Makefile](tests/Makefile) that is run as part of CI.  (Note this would be started by adding type signatures to the tool's functions.)
32 | 
33 | 
34 | ## Version management
35 | 
36 | **Note that DFXML 1.0.2 DOES NOT YET follow SEMVER practices.**
37 | 
38 | This project plans to adopt [SEMVER](https://semver.org/) to denote expected stability of its offered resources.  The project *has not yet* adopted SEMVER; when it does, a note will be added to the README.
39 | 
40 | Once a SEMVER-adherent major version is declared, backwards-incompatible commits will be merged into the `release-x.0.0` branch (where `x` is the next major version) instead of `develop`.
41 | 
42 | Following SEMVER's `major.minor.patch` version designation:
43 | * The `major` version will increment on deploying changes that are backwards-incompatible with the prior major release.
44 | * The `minor` version will increment on new functionality being added.
45 | * The `patch` version will increment on new tests for existing functionality being added, or a bug being fixed, with some discretion to be used for any needed interface corrections.
46 | 
47 | The following are this repository's policies on backwards compatibility for this project's resources.
48 | 
49 | 
50 | ### Version of the DFXML Python code base
51 | 
52 | The overall package version of `dfxml` is stored in one location, the `__version__` variable of `dfxml/__init__.py`.
53 | 
54 | Other resources may track their own version independently.
55 | 
56 | 
57 | ### Package resources
58 | 
59 | The set of command-line tools offered in the package (defined in `setup.cfg`) is considered in-scope for backwards compatibility.
60 | 
61 | 
62 | ### Command-line functionality
63 | 
64 | Tests that illustrate expected command-line behavior are available under the [`tests/`](tests/) directory.  See the `Makefile`s under the directories named after the provided tools.  Recipes that include activating a virtual environment (e.g. `source $(tests_srcdir)/venv/bin/activate`) show command line execution patterns.
65 | 
66 | The command-line functionality demonstrated by the `Makefile`s under `tests/` is considered in-scope for backwards compatibility.
67 | 
68 | 
69 | ### Module functionality
70 | 
71 | This project uses the [`pytest`](https://docs.pytest.org) framework to run unit tests.  These tests encode the expected behaviors of command-line results, and of module functions.  Tests generally follow a "Ground-truth comparison" model, where an expected set of results is compared to a computed set of results (generally, as `expected_X == computed_X`).
72 | 
73 | The module functionality exercised by the `pytest` unit tests is considered in-scope for backwards compatibility.
74 | 
75 | 
76 | ## Merge model
77 | 
78 | On adoption of SEMVER, this project will follow the `git-flow` merge model.  In short:
79 | * The `main` branch will contain tagged release commits only.
80 | * The `develop` branch will be the target of Pull Requests for new features.
81 | * `release-x.y.z` branches will be made off of `develop` when a new release is to be tagged , and merged into `main` and back into `develop`.
82 | 
83 | The above practice can be seen illustrated in the first figure on [this page](https://nvie.com/posts/a-successful-git-branching-model/).
84 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/make -f
  2 | 
  3 | # This software was developed at the National Institute of Standards
  4 | # and Technology by employees of the Federal Government in the course
  5 | # of their official duties. Pursuant to title 17 Section 105 of the
  6 | # United States Code this software is not subject to copyright
  7 | # protection and is in the public domain. NIST assumes no
  8 | # responsibility whatsoever for its use by other parties, and makes
  9 | # no guarantees, expressed or implied, about its quality,
 10 | # reliability, or any other characteristic.
 11 | #
 12 | # We would appreciate acknowledgement if the software is used.
 13 | 
 14 | # While SHELL would typically be set with ":=" assignment, some
 15 | # environments do not have Bash at /bin/bash (e.g. FreeBSD stores Bash
 16 | # at /usr/local/bin/bash).
 17 | ifeq ($(shell basename $(SHELL)),sh)
 18 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
 19 | endif
 20 | 
 21 | PYTHON3 ?= python3
 22 | ifeq ($(PYTHON3),)
 23 | $(error python3 not found)
 24 | endif
 25 | 
 26 | all: \
 27 |   .venv-pre-commit/var/.pre-commit-built.log
 28 | 
 29 | .PHONY: \
 30 |   check-mypy \
 31 |   check-supply-chain \
 32 |   check-supply-chain-pre-commit
 33 | 
 34 | .git_submodule_init.done.log: .gitmodules
 35 | 	# Confirm dfxml_schema has been checked out at least once.
 36 | 	test -r dependencies/dfxml_schema/dfxml.xsd \
 37 | 	  || (git submodule init dependencies/dfxml_schema && git submodule update dependencies/dfxml_schema)
 38 | 	test -r dependencies/dfxml_schema/dfxml.xsd
 39 | 	touch $@
 40 | 
 41 | # This virtual environment is meant to be built once and then persist, even through 'make clean'.
 42 | # If a recipe is written to remove this flag file, it should first run `pre-commit uninstall`.
 43 | .venv-pre-commit/var/.pre-commit-built.log:
 44 | 	rm -rf .venv-pre-commit
 45 | 	test -r .pre-commit-config.yaml \
 46 | 	  || (echo "ERROR:Makefile:pre-commit is expected to install for this repository, but .pre-commit-config.yaml does not seem to exist." >&2 ; exit 1)
 47 | 	$(PYTHON3) -m venv \
 48 | 	  .venv-pre-commit
 49 | 	source .venv-pre-commit/bin/activate \
 50 | 	  && pip install \
 51 | 	    --upgrade \
 52 | 	    pip \
 53 | 	    setuptools \
 54 | 	    wheel
 55 | 	source .venv-pre-commit/bin/activate \
 56 | 	  && pip install \
 57 | 	    pre-commit
 58 | 	source .venv-pre-commit/bin/activate \
 59 | 	  && pre-commit install
 60 | 	mkdir -p \
 61 | 	  .venv-pre-commit/var
 62 | 	touch $@
 63 | 
 64 | clean:
 65 | 	find . -name '*~' -exec rm {} \;
 66 | 	$(MAKE) \
 67 | 	  --directory tests \
 68 | 	  clean
 69 | 
 70 | check: \
 71 |   .git_submodule_init.done.log \
 72 |   .venv-pre-commit/var/.pre-commit-built.log
 73 | 	$(MAKE) \
 74 | 	  PYTHON3=$(PYTHON3) \
 75 | 	  SHELL=$(SHELL) \
 76 | 	  --directory tests \
 77 | 	  check
 78 | 
 79 | check-mypy: \
 80 |   .git_submodule_init.done.log
 81 | 	$(MAKE) \
 82 | 	  PYTHON3=$(PYTHON3) \
 83 | 	  SHELL=$(SHELL) \
 84 | 	  --directory tests \
 85 | 	  check-mypy
 86 | 
 87 | check-supply-chain: \
 88 |   check-supply-chain-pre-commit \
 89 |   check-mypy
 90 | 
 91 | # Update pre-commit configuration and use the updated config file to
 92 | # review code.  Only have Make exit if 'pre-commit run' modifies files.
 93 | check-supply-chain-pre-commit: \
 94 |   .venv-pre-commit/var/.pre-commit-built.log
 95 | 	source .venv-pre-commit/bin/activate \
 96 | 	  && pre-commit autoupdate
 97 | 	git diff \
 98 | 	  --exit-code \
 99 | 	  .pre-commit-config.yaml \
100 | 	  || ( \
101 | 	      source .venv-pre-commit/bin/activate \
102 | 	        && pre-commit run \
103 | 	          --all-files \
104 | 	          --config .pre-commit-config.yaml \
105 | 	    ) \
106 | 	    || git diff \
107 | 	      --stat \
108 | 	      --exit-code \
109 | 	      || ( \
110 | 	          echo \
111 | 	            "WARNING:Makefile:pre-commit configuration can be updated.  It appears the updated would change file formatting." \
112 | 	            >&2 \
113 | 	            ; exit 1 \
114 |                 )
115 | 	@git diff \
116 | 	  --exit-code \
117 | 	  .pre-commit-config.yaml \
118 | 	  || echo \
119 | 	    "INFO:Makefile:pre-commit configuration can be updated.  It appears the update would not change file formatting." \
120 | 	    >&2
121 | 
122 | check-tools:
123 | 	(cd tests/misc_object_tests;make check)
124 | 


--------------------------------------------------------------------------------
/demos/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | *.xml
3 | 


--------------------------------------------------------------------------------
/demos/demo_fiwalk_diskimage.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | """
 3 | This demo shows how to invoke Fiwalk as a subprocess, taking a disk image as
 4 | input. Fiwalk's dfxml XML output is sent to an in-memory buffer, which is then
 5 | written to an output file. Note that this may fail for very large disk images
 6 | if the required buffer size exceeds available RAM!
 7 | """
 8 | 
 9 | import io
10 | import sys
11 | 
12 | from dfxml import fiwalk
13 | 
14 | 
15 | def writeDfxml(imageFile: str, outFile: str) -> None:
16 |     """Generate filesystem metadata for disk image and and write resulting
17 |     dfxml to file"""
18 |     # Analyse image file
19 |     with open(imageFile, "rb") as ifs:
20 |         fwOutBuffer = fiwalk.fiwalk_xml_stream(imagefile=ifs)
21 |         fwOut = fwOutBuffer.read()
22 | 
23 |     # Write dfxml to output file
24 |     with io.open(outFile, "wb") as fOut:
25 |         fOut.write(fwOut)
26 | 
27 | 
28 | def main() -> None:
29 |     if len(sys.argv) < 3:
30 |         print("Usage: {} <imageFile> <outFile>".format(sys.argv[0]))
31 |         exit(1)
32 |     imageFile = sys.argv[1]
33 |     outFile = sys.argv[2]
34 |     writeDfxml(imageFile, outFile)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/demos/demo_mac_timeline.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # produce a MAC-times timeline.
 3 | # works under either Python2 or Python3
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 8 | import dfxml
 9 | 
10 | timeline = []
11 | 
12 | 
13 | def process(fi):
14 |     if fi.mtime() != None:
15 |         timeline.append([fi.mtime(), fi.filename(), " modified"])
16 |     if fi.crtime() != None:
17 |         timeline.append([fi.crtime(), fi.filename(), " created"])
18 |     if fi.ctime() != None:
19 |         timeline.append([fi.ctime(), fi.filename(), " changed"])
20 |     if fi.atime() != None:
21 |         timeline.append([fi.atime(), fi.filename(), " accessed"])
22 | 
23 | 
24 | def main():
25 |     if len(sys.argv) < 2:
26 |         print("Usage: {} <filename.xml>".format(sys.argv[0]))
27 |         exit(1)
28 |     dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=process)
29 |     timeline.sort()
30 |     for record in timeline:
31 |         print("\t".join(map(str, record)))
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     main()
36 | 


--------------------------------------------------------------------------------
/demos/demo_mac_timeline_iter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | # produce a MAC-times timeline using the iterative DFXML interface.
17 | # works under either Python2 or Python3
18 | 
19 | import os
20 | import sys
21 | 
22 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
23 | import dfxml
24 | 
25 | 
26 | def main():
27 |     if len(sys.argv) < 2:
28 |         print("Usage: {} <filename.xml>".format(sys.argv[0]))
29 |         exit(1)
30 | 
31 |     timeline = []
32 | 
33 |     for fi in dfxml.iter_dfxml(xmlfile=open(sys.argv[1], "rb")):
34 |         if fi.mtime() != None:
35 |             timeline.append([fi.mtime(), fi.filename(), " modified"])
36 |         if fi.crtime() != None:
37 |             timeline.append([fi.crtime(), fi.filename(), " created"])
38 |         if fi.ctime() != None:
39 |             timeline.append([fi.ctime(), fi.filename(), " changed"])
40 |         if fi.atime() != None:
41 |             timeline.append([fi.atime(), fi.filename(), " accessed"])
42 | 
43 |     timeline.sort()
44 | 
45 |     for record in timeline:
46 |         print("\t".join(map(str, record)))
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     main()
51 | 


--------------------------------------------------------------------------------
/demos/demo_mac_timeline_objects.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | # produce a MAC-times timeline using the DFXML Objects interface.
17 | # works under either Python2 or Python3
18 | 
19 | import os
20 | import sys
21 | 
22 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
23 | import dfxml
24 | import dfxml.objects as Objects
25 | 
26 | 
27 | def main():
28 |     if len(sys.argv) < 2:
29 |         print("Usage: {} <filename.xml>".format(sys.argv[0]))
30 |         exit(1)
31 | 
32 |     timeline = []
33 | 
34 |     for event, obj in Objects.iterparse(sys.argv[1]):
35 |         # Only work on FileObjects
36 |         if not isinstance(obj, Objects.FileObject):
37 |             continue
38 |         if not obj.mtime is None:
39 |             timeline.append([obj.mtime, obj.filename, " modified"])
40 |         if not obj.crtime is None:
41 |             timeline.append([obj.crtime, obj.filename, " created"])
42 |         if not obj.ctime is None:
43 |             timeline.append([obj.ctime, obj.filename, " changed"])
44 |         if not obj.atime is None:
45 |             timeline.append([obj.atime, obj.filename, " accessed"])
46 | 
47 |     timeline.sort()
48 | 
49 |     for record in timeline:
50 |         print("\t".join(map(str, record)))
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()
55 | 


--------------------------------------------------------------------------------
/demos/demo_piecewise.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3.2
 2 | 
 3 | #
 4 | # Demo program that prints piecewise hashes and reports on co-occurrence of hashes.
 5 | #
 6 | # Multimap from http://stackoverflow.com/questions/1731971/is-there-multimap-implementation-in-python
 7 | 
 8 | import os
 9 | import sys
10 | 
11 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
12 | import collections
13 | import math
14 | import sys
15 | 
16 | import dfxml
17 | 
18 | 
19 | class SectorCorrelator:
20 |     def __init__(self):
21 |         self.hashdb = collections.defaultdict(
22 |             list
23 |         )  #  key is the MD5 code, value is a list of matches
24 |         self.files = 0
25 |         self.sectors = 0
26 | 
27 |     def process(self, fi):
28 |         """Process the <fileobject> objects as they are read from the XML file"""
29 |         self.files += 1
30 |         print(fi.filename())
31 |         for br in fi.byte_runs():
32 |             self.sectors += 1
33 |             self.hashdb[br.hashdigest["md5"]].append((fi.filename(), br.file_offset))
34 | 
35 |     def print_report(self):
36 |         print("Files processed: {}".format(self.files))
37 |         print("Sectors processed: {}".format(self.sectors))
38 |         print("")
39 |         print("The following duplicates were found:")
40 |         print("Hash   Filename           Offset in file")
41 |         for hash, ents in self.hashdb.items():
42 |             if len(ents) > 1:
43 |                 print("{}  -- {} copies found".format(hash, len(ents)))
44 |                 for e in sorted(ents):
45 |                     print("  {}  {:8,}".format(e[0], e[1]))
46 |                 print("")
47 | 
48 | 
49 | sc = SectorCorrelator()
50 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=sc.process)
51 | sc.print_report()
52 | 


--------------------------------------------------------------------------------
/demos/demo_plot_times.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import os
 3 | import sys
 4 | import time
 5 | 
 6 | import fiwalk
 7 | 
 8 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 9 | import dfxml
10 | 
11 | if __name__ == "__main__":
12 |     import sys
13 |     from optparse import OptionParser
14 |     from sys import stdout
15 | 
16 |     parser = OptionParser()
17 |     parser.usage = "%prog [options] (xmlfile or imagefile)"
18 |     (options, args) = parser.parse_args()
19 | 
20 |     if not args:
21 |         parser.print_usage()
22 |         exit(1)
23 | 
24 |     sizes = []
25 |     dates = {}
26 | 
27 |     def callback(fi):
28 |         sizes.append(fi.filesize())
29 |         for tag, val in fi.times().iteritems():
30 |             date = val.datetime()
31 |             dates[date] = dates.get(date, 0) + 1
32 | 
33 |     fn = args[0]
34 |     if fn.endswith(".xml"):
35 |         fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=callback)
36 |     else:
37 |         fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=callback)
38 | 
39 |     print("Here is the dates array:")
40 |     for d in sorted(dates.keys()):
41 |         print("{}   {}".format(d, dates[d]))
42 | 


--------------------------------------------------------------------------------
/demos/demo_readtimes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """Reads an fiwalk XML file and reports how many of the files are still in the image..."""
 3 | 
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 8 | import time
 9 | 
10 | import dfxml
11 | import dfxml.fiwalk as fiwalk
12 | 
13 | 
14 | def calc_jumps(fis, title):
15 |     print(title)
16 |     print("Count: %d" % (len(fis)))
17 |     from histogram import histogram
18 | 
19 |     h = histogram()
20 |     pos = 0
21 |     backwards = 0
22 |     prev_frag_count = 0
23 |     for fi in fis:
24 |         for i in range(0, len(fi.byte_runs())):
25 |             run = fi.byte_runs()[i]
26 |             try:
27 |                 sector = run.start_sector()
28 |                 if sector < pos:
29 |                     backwards += 1
30 |                     h.add((prev_frag_count, i))
31 |                 pos = sector
32 |             except AttributeError:
33 |                 pass
34 |         pref_frag_count = len(fi.byte_runs())
35 | 
36 |     print("Backwards Jumps: %d" % backwards)
37 |     print("Histogram of backwards:")
38 |     h.print_top(10)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     import sys
43 |     from optparse import OptionParser
44 |     from subprocess import PIPE, Popen
45 | 
46 |     global options
47 | 
48 |     parser = OptionParser()
49 |     parser.add_option("-d", "--debug", help="prints debugging info", dest="debug")
50 |     parser.add_option("-x", "--xmlfile", help="XML file (optional)")
51 |     parser.add_option("-i", "--imagefile", help="image file (required)")
52 |     parser.usage = "%prog [options] xmlfile diskimage"
53 |     (options, args) = parser.parse_args()
54 | 
55 |     if not options.xmlfile or not options.imagefile:
56 |         parser.print_help()
57 |         sys.exit(1)
58 | 
59 |     # Read the redaction configuration file
60 |     imagefile = open(options.imagefile, "r")
61 |     if options.xmlfile:
62 |         xmlfile = open(options.xmlfile, "r")
63 |     else:
64 |         xmlfile = None
65 | 
66 |     t0 = time.time()
67 |     fis = fiwalk.fileobjects_using_sax(imagefile=imagefile, xmlfile=xmlfile)
68 |     t1 = time.time()
69 |     print("Time to read file objects: {} seconds".format(t1 - t0))
70 | 
71 |     # Create a new array with just those that we can read
72 |     def resident_file(fi):
73 |         if len(fi.byte_runs()) == 0:
74 |             return False
75 |         if len(fi.byte_runs()) > 2:
76 |             return False
77 |         if hasattr(fi.byte_runs()[0], "uncompressed_len"):
78 |             return False
79 |         if not hasattr(fi.byte_runs()[0], "img_offset"):
80 |             return False
81 |         return True
82 | 
83 |     fis = filter(resident_file, fis)
84 | 
85 |     print("Native order: ")
86 |     calc_jumps(fis, "Native Order")
87 | 
88 |     def sort_function(a, b):
89 |         a0 = a.byte_runs()[0].start_sector()
90 |         b0 = b.byte_runs()[0].start_sector()
91 |         if a0 < b0:
92 |             return -1
93 |         if a0 == b0:
94 |             return 0
95 |         return 1
96 | 
97 |     fis.sort(sort_function)
98 |     calc_jumps(fis, "Sorted Order")
99 | 


--------------------------------------------------------------------------------
/demos/demo_registry_timeline.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | 
 5 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 6 | import dfxml
 7 | 
 8 | timeline = []
 9 | 
10 | 
11 | def process(co):
12 |     mtime = co.mtime()
13 |     if mtime != None:
14 |         timeline.append([co.mtime(), co.full_path(), " modified"])
15 | 
16 | 
17 | def main():
18 |     if len(sys.argv) < 2:
19 |         print("Usage: {} <input.regxml>".format(sys.argv[0]))
20 |         exit(1)
21 |     dfxml.read_regxml(xmlfile=open(sys.argv[1], "rb"), callback=process)
22 |     timeline.sort()
23 |     for record in timeline:
24 |         print("\t".join(map(str, record)))
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     main()
29 | 


--------------------------------------------------------------------------------
/demos/demo_sizes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3.2
 2 | 
 3 | #
 4 | # Demo program that shows how to calculate the average size of file objects in a DFXML file
 5 | #
 6 | 
 7 | import collections
 8 | import math
 9 | import os
10 | import sys
11 | 
12 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
13 | import dfxml
14 | 
15 | sums = collections.Counter()
16 | sum_of_squares = collections.Counter()
17 | count = collections.Counter()
18 | 
19 | 
20 | def func(fi):
21 |     ext = fi.ext()
22 |     count[ext] += 1
23 |     sums[ext] += fi.filesize()
24 |     sum_of_squares[ext] = fi.filesize() ** 2
25 | 
26 | 
27 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=func)
28 | fmt = "{:8}    {:8} {:8} {:8} {:8}"
29 | print(fmt.format("Ext", "Count", "Total", "Average", "StdDev"))
30 | for ext in sums.keys():
31 |     print(
32 |         fmt.format(
33 |             ext,
34 |             count[ext],
35 |             sums[ext],
36 |             sums[ext] / count[ext],
37 |             math.sqrt(sum_of_squares[ext] / count[ext] - (sums[ext] / count[ext]) ** 2),
38 |         )
39 |     )
40 | 


--------------------------------------------------------------------------------
/demos/demo_spark.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Shows how DFXML works with spark.
 4 | # This program runs spark if it is not already running
 5 | 
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | sys.path.append("../python")
11 | from dfxml_writer import DFXMLWriter
12 | 
13 | 
14 | def spark_demo():
15 |     """A small spark program. Must be run under spark"""
16 |     import operator
17 | 
18 |     from pyspark import SparkConf, SparkContext
19 | 
20 |     conf = SparkConf()
21 |     sc = SparkContext(conf=conf)
22 |     m = 1000000
23 |     result = sc.parallelize(range(0, m + 1)).reduce(operator.add)
24 |     print(f"The sum of the numbers 0 to {m} is {result}")
25 |     assert result == 500000500000
26 | 
27 | 
28 | def run_spark():
29 |     # If we are running under spark, just call check_spark.
30 |     # Otherwise, run recursively under spark-submit
31 |     import os
32 | 
33 |     if "SPARK_ENV_LOADED" in os.environ:
34 |         return  # yea! Spark is running
35 | 
36 |     #
37 |     # Re-run this script under spark, and then exit.
38 |     #
39 |     import subprocess
40 | 
41 |     r = subprocess.run(["spark-submit", __file__] + sys.argv[1:])
42 |     assert r.returncode == 0
43 |     exit(0)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     import argparse
48 |     import time
49 | 
50 |     parser = argparse.ArgumentParser()
51 |     args = parser.parse_args()
52 | 
53 |     run_spark()
54 | 
55 |     dfxml = DFXMLWriter(
56 |         filename=f"demo_spark_{int(time.time())}.dfxml", prettyprint=True
57 |     )
58 |     spark_demo()
59 |     # DFXML file gets written automatically when program exits.
60 |     exit(0)
61 | 


--------------------------------------------------------------------------------
/demos/spark/demo_spark.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | #
 3 | # Shows how DFXML works with Spark.
 4 | # This program runs Spark if it is not already running
 5 | 
 6 | 
 7 | import os
 8 | import sys
 9 | 
10 | sys.path.append("../python")
11 | from dfxml_writer import DFXMLWriter
12 | 
13 | 
14 | def spark_demo():
15 |     """A small Spark program. Must be run under Spark"""
16 |     import operator
17 | 
18 |     from pyspark import SparkConf, SparkContext
19 | 
20 |     conf = SparkConf()
21 |     sc = SparkContext(conf=conf)
22 |     m = 1000000
23 |     result = sc.parallelize(range(0, m + 1)).reduce(operator.add)
24 |     print(f"The sum of the numbers 0 to {m} is {result}")
25 |     assert result == 500000500000
26 | 
27 | 
28 | def run_spark():
29 |     # If we are running under Spark, just call check_spark.
30 |     # Otherwise, run recursively under spark-submit
31 |     import os
32 | 
33 |     if "SPARK_ENV_LOADED" in os.environ:
34 |         return  # yea! Spark is running
35 | 
36 |     #
37 |     # Re-run this script under Spark, and then exit.
38 |     #
39 |     import subprocess
40 | 
41 |     r = subprocess.run(["spark-submit", __file__] + sys.argv[1:])
42 |     assert r.returncode == 0
43 |     exit(0)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     import argparse
48 |     import time
49 | 
50 |     parser = argparse.ArgumentParser()
51 |     args = parser.parse_args()
52 | 
53 |     run_spark()
54 | 
55 |     dfxml = DFXMLWriter(
56 |         filename=f"demo_spark_{int(time.time())}.dfxml", prettyprint=True
57 |     )
58 |     spark_demo()
59 |     # DFXML file gets written automatically when program exits.
60 |     exit(0)
61 | 


--------------------------------------------------------------------------------
/demos/vmstats/Makefile:
--------------------------------------------------------------------------------
 1 | all:vmstats_pretty.dfxml vmstatsN
 2 | 
 3 | vmstats_pretty.dfxml: vmstats.py
 4 | 	python3 vmstats.py --prettyprint vmstats_pretty.dfxml
 5 | 
 6 | vmstatsN: vmstats.py
 7 | 	python3 vmstats.py --repeat  24 --interval 10 vmstatsN-new.dfxml
 8 | 	/bin/mv -f vmstatsN-new.dfxml vmstatsN.dfxml
 9 | 
10 | 


--------------------------------------------------------------------------------
/demos/vmstats/vmstats_decode.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |   <head>
 4 |     <title>History for {{host}}</title>
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <link href="skeleton.css" rel="stylesheet" >
 7 |     <style>
 8 |      .processes    {line-height: 1.0; }
 9 |      th,td         { padding: 1px 1px;}
10 |      th            { text-align: center;}
11 |      .num          { text-align: right;}
12 |      
13 |     </style>
14 |   </head>
15 |   <body>
16 |     <div>
17 |       <table>
18 |         <tr><th><a href="{{prev_fname}}">[PREV]</a></th>  <th>{{host}} {{start_time}}</th>  <th><a href="{{next_fname}}">[NEXT]</a></th></tr>
19 |       </table>
20 |     </div>
21 |     <div class="u-full-width">
22 |       <h2>Stats</h2>
23 |       <table class='stats'>
24 |         <tr><th>CPU Utilization:</th><td> {{cpu_percent}} % </td> </tr>
25 |         <tr><th>Mem Utilization:</th><td> {{mem_percent}} % </td> </tr>
26 |       </table>
27 |     </div>
28 | 
29 |     <div class="u-full-width">
30 |       <h2>Processes</h2>
31 |       <table class='processes'>
32 |         <tr><th></th><th></th><th colspan='2'>CPU Time</th></tr>
33 |         <tr><th class='num'>PID</th><th>NAME</th><th class='num'>User</th><th class='num'>System</th></tr>
34 | 
35 |         {% for ps in ps_list %}
36 |         <tr><td>{{ps.pid}}</td> <td>{{ps.name}}</td>  <th>{{ps.user}}</th>  <th>{{ps.system}}</th> </tr>
37 |         {% endfor %}
38 |       </table>
39 |     </div>
40 |   </body>
41 | </html>
42 | 


--------------------------------------------------------------------------------
/demos/vmstats/vmstats_json.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>History for my host</title>
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <link href="skeleton.css" rel="stylesheet" >
  7 |     <style>
  8 |      .processes    {line-height: 1.0; }
  9 |      th,td         { padding: 1px 1px;}
 10 |      th            { text-align: center;}
 11 |      .num          { text-align: right;}
 12 |      
 13 |     </style>
 14 |     <script src='../jquery.min.js'></script> 
 15 |     <script src='data.js'></script> 
 16 |     </script>
 17 |   </head>
 18 |   <body>
 19 |     <div class="u-full-width">
 20 |       <table>
 21 |         <tr>
 22 |             <th id='prev'> [PREV] </th>
 23 |             <th id='next'> [NEXT] </th>
 24 |         </tr>
 25 |       </table>
 26 |     </div>
 27 |     <div class="u-full-width">
 28 |       <h2>Stats</h2>
 29 |       <table class='stats'>
 30 |         <tr><th>Host:</th><td> <span id='host'/> </td> </tr>
 31 |         <tr><th>Time:</th><td> <span id='start_time'/> </td> </tr>
 32 |         <tr><th>CPU Utilization:</th><td> <span id='cpu_percent'/> </td> </tr>
 33 |         <tr><th>Mem Utilization:</th><td> <span id='mem_percent'/> </td> </tr>
 34 |         <tr><th>Page:</th><td> <span id='page_number'> 1</span></td> </tr>
 35 |       </table>
 36 |     </div>
 37 |     <div class="u-full-width">
 38 |       <h2>Processes</h2>
 39 |       <table id='process_list'>
 40 |         <thead>
 41 |           <tr><th></th><th></th><th colspan='2'>CPU Time</th><th></th></tr>
 42 |           <tr>
 43 |             <th class='num'>PID</th><th>NAME</th><th class='num'>User</th>
 44 |             <th class='num'>System</th><th>RSS</th>
 45 |           </tr>
 46 |         </thead>
 47 |         <tbody>
 48 |         </tbody>
 49 |         <tfooter>
 50 |         </tfooter>
 51 |       </table>
 52 |     </div>
 53 | 
 54 |     <script>
 55 |      function click_next(event) {
 56 |          var n = parseInt($('#page_number').text()) + 1;
 57 |          set_page(n);
 58 |      }
 59 |      function click_prev(event) {
 60 |          var n = parseInt($('#page_number').text()) - 1;
 61 |          set_page(n);
 62 |      }
 63 |      function set_page(n) {
 64 |          if (n<0 || n>=data.length) return;
 65 |          $('#page_number').text( n );
 66 |          $('#start_time').text( data[n]['stats']['start_time'] );
 67 |          $('#cpu_percent').text( data[n]['stats']['cpu_percent'] );
 68 |          $('#mem_percent').text( data[n]['stats']['mem_percent'] );
 69 |          var plist = [];
 70 |          for(var i=0; i < data[n]['processes'].length; i++){
 71 |              p = data[n]['processes'][i];
 72 |              plist.push("<tr><td>" + p['pid'] + "</td><td>" + p['name'] + "</td><td>" 
 73 |                       + p['user'] + "</td><td>" + p['system'] + "</td><td>" + p['rss'] + "</td></tr>")
 74 |          }
 75 |          $("#process_list > tbody:last-child").html( plist.join("\n"));
 76 |      }
 77 | 
 78 |      $(document).ready(function() {
 79 |          $('#next').on('click', click_next);
 80 |          $('#prev').on('click', click_prev);
 81 | 
 82 |          $(document).keydown(function(e) {
 83 |              switch(e.which) {
 84 |                  case 37: // left
 85 |                      click_prev(e);
 86 |                      break;
 87 |                  case 38: // up
 88 |                      break;
 89 |                  case 39: // right
 90 |                      click_next(e);
 91 |                      break;
 92 |                  case 40: // down
 93 |                      break;
 94 |                  default:
 95 |                      return; // exit this handler for other keys
 96 |              }
 97 |              e.preventDefault(); // prevent the default action (scroll / move caret)
 98 |          });
 99 |      });
100 |     </script>
101 |   </body>
102 | </html>
103 | 


--------------------------------------------------------------------------------
/dfxml/bin/.gitignore:
--------------------------------------------------------------------------------
1 | .pytest_cache
2 | 


--------------------------------------------------------------------------------
/dfxml/bin/Makefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | all: \
17 |   check
18 | 
19 | .PHONY: \
20 |   check-cat_fileobjects \
21 |   check-dfxml_tool \
22 |   check-idifference-dfxml \
23 |   check-mac_timelines \
24 |   check-Objects \
25 |   clean-Objects
26 | 
27 | #WORKING_DIR = $(shell pwd)
28 | 
29 | # Export the directories to use
30 | #check: export PYTHONPATH = /home/user01/dfxml_python/
31 | check: export DFXML_DIR = $(shell cd ../.. ; pwd)
32 | check: export PYTHONPATH = $(DFXML_DIR)
33 | check: export TOOL_DIR = $(shell pwd)
34 | check: export TEST_DIR = ./tests
35 | check: export DEMO_DIR = ../../demos
36 | check: export SAMPLE_DIR = ../../samples
37 | check: \
38 |   check-cat_fileobjects \
39 |   check-dfxml_tool \
40 |   check-idifference-dfxml \
41 |   check-mac_timelines \
42 |   check-Objects 
43 | 	@echo ""
44 | 	@echo "Tests passed!"
45 | 	@echo "Clean up the test results with 'make clean'."
46 | 
47 | doc: \
48 |   Objects.html
49 | 
50 | Objects.html: \
51 |   Objects.py
52 | 	python3 -m pydoc -w Objects
53 | 
54 | check-cat_fileobjects:
55 | 	$(TEST_DIR)/test_cat_fileobjects.sh
56 | 
57 | check-dfxml_tool:
58 | 	$(TEST_DIR)/test_dfxml_tool.sh
59 | 
60 | check-idifference-dfxml:
61 | 	$(TEST_DIR)/test_idifference_to_dfxml.sh
62 | 
63 | # TODO: Investigate cause of state transition exception
64 | #
65 | # Skip this temporarily, since there is an unexpected state
66 | # transition exception thrown by line 4840 in python/dxml/object.py, 
67 | # when the file samples/simple.xml is processed 
68 | #check-mac_timelines:
69 | #	./tests/test_mac_timelines.sh
70 | 
71 | clean: clean-Objects
72 | 	rm -f dfxml_tool_*xml
73 | 	rm -f cat_test_*.dfxml
74 | 	rm -f idifference_test.txt
75 | 	rm -f idifference_test*.dfxml
76 | 	rm -f demo_mac_timeline*.txt
77 | 


--------------------------------------------------------------------------------
/dfxml/bin/README.md:
--------------------------------------------------------------------------------
 1 | # Tools for working with DFXML-files
 2 | 
 3 | This directory contains scripts that can be run when the `dfxml` package is installed.  Some of these tools are added to the shell's `PATH` when the `dfxml` package is installed.  Others should be called in-place, e.g. with `python3 $PWD/allocation_counter.py`.
 4 | 
 5 | 
 6 | ## Installed tools
 7 | 
 8 | Some tools are provided as command-line programs when the `dfxml` module is installed.  Their source is in this directory, with the suffix `.py`.  The link in this table goes to the tool's documentation and testing directory.
 9 | 
10 | | Program name | Short description |
11 | |-------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|
12 | | [`walk_to_dfxml`](../../tests/walk_to_dfxml/#walk_to_dfxml) | Fully walk the current working directory and record all files encountered. |
13 | | [`make_differential_dfxml`](../../tests/make_differential_dfxml/#make_differential_dfxml) | Produce a DFXML file denoting file system changes noted by two input DFXML files. |
14 | 
15 | ## In-place scripts
16 | 
17 | The following DFXML tools are provided in this directory:
18 | 
19 | | Script name                | Short description                                                                    |
20 | |----------------------------|--------------------------------------------------------------------------------------|
21 | | `allocation_counter.py`    | Produces a cross-tabulation of the allocation state of each file's inode and name.   |
22 | | `cat_fileobjects.py`       | Prints a new DFXML of all fileobjects in an input DFXML file to stdout.              |
23 | | `cat_partitions.py`        | Concatenates dfxml-files containing one partition each and prints result to stdout.  |
24 | | `deidentify_xml.py`        | Removes PII from filenames in a DFXML file.                                          |
25 | | `dfxinfo.py`               | Print a summary of a DFXML file - summary of all files, duplicate files, file types. |
26 | | `dfxml_gen.py`             | generates DFXML. Based on the C generator.                                           |
27 | | `dfxml_html.py`            | A collection of functions for generating HTML.                                       |
28 | | `Extractor.py`             | Extracts files specified in a XML-file (or all) from an image to a target directory. |
29 | | `hash_sectors.py`          | Outputs sector hashes for sectors with files matching a predicate.                   |
30 | | `iblkfind.py`              | Outputs files, which are located in a given set of sectors.                          |
31 | | `icarvingtruth.py`         | Finds the ground truth in a predefined series of disk images.                        |
32 | | `idifference.py`           | Generates a report about what's different between two disk images.                   |
33 | | `igrep.py`                 | Find files in image, which contain the given string.                                 |
34 | | `ihistogram.py`            | Draws a quick histogram of the timestamps in an XML file.                            |
35 | | `imap.py`                  | Map image files and try to find "missing" data by comparing with the other imgs.     |
36 | | `iredact.py`               | Image redaction tool using rules described in the file.                              |
37 | | `ireport.py`               | Generates stats from a DFXML file(s).                                                |
38 | | `iverify.py`               | Reads an XML file and image and verifies that the files are present.                 |
39 | | `rdifference.py`           | Finds and reports differences in two Windows registry hive-files.                    |
40 | | `report_silent_changes.py` | Takes a differentially-annotated DFXML file and outputs subtle and 'silent' changes. |
41 | 
42 | 
43 | ### Work needed
44 | 
45 | - `dfxml_tool.py`
46 | - `idifference2.py`
47 | - `iexport.py`
48 | - `exp_slack.py`
49 | - `validate_dfxml.py`
50 | - `nsrl_rds.py`
51 | - `corpus_sync.py`
52 | 
53 | 
54 | ### Uncategorized
55 | 
56 | - `break_out_diffs_by_anno.py`
57 | - `mem_info.py` (no dependencies)
58 | 


--------------------------------------------------------------------------------
/dfxml/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dfxml-working-group/dfxml_python/7897c419bdf376220955aea03a43f2b084d7d901/dfxml/bin/__init__.py


--------------------------------------------------------------------------------
/dfxml/bin/allocation_counter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | """
17 | For a disk image or DFXML file, this program produces a cross-tabulation of the allocation state of each file's inode and name.
18 | """
19 | 
20 | __version__ = "0.1.1"
21 | # Version 0.2.0:
22 | # * Tabular output in HTML
23 | # * Tabular output in LaTeX
24 | 
25 | import collections
26 | import logging
27 | import os
28 | import sys
29 | import xml.etree.ElementTree as ET
30 | 
31 | import dfxml.bin.make_differential_dfxml
32 | import dfxml.objects as Objects
33 | 
34 | _logger = logging.getLogger(os.path.basename(__file__))
35 | 
36 | 
37 | def main():
38 |     counter = collections.defaultdict(lambda: 0)
39 |     prev_obj = None
40 |     for event, obj in Objects.iterparse(args.input_image):
41 |         if isinstance(obj, Objects.FileObject):
42 |             if (
43 |                 args.ignore_virtual_files
44 |                 and dfxml.bin.make_differential_dfxml.ignorable_name(obj.filename)
45 |             ):
46 |                 continue
47 |             counter[(obj.alloc_inode, obj.alloc_name)] += 1
48 | 
49 |             # Inspect weird data
50 |             if args.debug and obj.alloc_inode is None and obj.alloc_name is None:
51 |                 _logger.debug("Encountered a file with all-null allocation.")
52 |                 _logger.debug("Event: %r." % event)
53 |                 _logger.debug(
54 |                     "Previous object: %s." % ET.tostring(prev_obj.to_Element())
55 |                 )
56 |                 _logger.debug("Current  object: %s." % ET.tostring(obj.to_Element()))
57 |         prev_obj = obj
58 |     print(repr(counter))
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     import argparse
63 | 
64 |     parser = argparse.ArgumentParser()
65 |     parser.add_argument(
66 |         "--ignore-virtual-files",
67 |         action="store_true",
68 |         help="Use the same file-ignoring rules as make_differential_dfxml.py.",
69 |     )
70 |     parser.add_argument(
71 |         "-d", "--debug", action="store_true", help="Enable debug printing."
72 |     )
73 |     parser.add_argument("input_image", help="Disk image, or DFXML file.")
74 |     args = parser.parse_args()
75 | 
76 |     logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
77 | 
78 |     main()
79 | 


--------------------------------------------------------------------------------
/dfxml/bin/break_out_diffs_by_anno.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | """
17 | This program reads a DFXML file with differential annotations and produces a table.
18 | 
19 | Columns: FileObject annotation (is it a new file? renamed? etc.).
20 | Rows: Counts of instances of a property being changed per FileObject annotation.  One row per FileObject direct-child element.
21 | """
22 | 
23 | __version__ = "0.1.0"
24 | 
25 | import collections
26 | import sys
27 | 
28 | import dfxml.objects as Objects
29 | 
30 | 
31 | def main():
32 |     # Key: (annotation, histogram)
33 |     hist = collections.defaultdict(int)
34 |     for event, obj in Objects.iterparse(sys.argv[1]):
35 |         if event != "end" or not isinstance(obj, Objects.FileObject):
36 |             continue
37 |         # Loop through annotations
38 |         for anno in obj.annos:
39 |             # Loop through diffs
40 |             for diff in obj.diffs:
41 |                 hist[(anno, diff)] += 1
42 | 
43 |     annos = Objects.FileObject._diff_attr_names.keys()
44 |     print(
45 |         """
46 | <table>
47 |   <thead>
48 |     <tr>
49 |       <th>Property</th>
50 | """
51 |     )
52 |     for anno in annos:
53 |         print("      <th>%s</th>" % anno)
54 |     print(
55 |         """
56 |     </tr>
57 |   </thead>
58 |   <tfoot></tfoot>
59 |   <tbody>
60 | """
61 |     )
62 |     for diff in sorted(Objects.FileObject._all_properties):
63 |         print("    <tr>")
64 |         if diff in Objects.FileObject._incomparable_properties:
65 |             continue
66 |         print("      <th style='text-align:left;'>%s</th>" % diff)
67 |         for anno in annos:
68 |             print("      <td>%d</td>" % hist[(anno, diff)])
69 |         print("    </tr>")
70 |     print(
71 |         """
72 |   </tbody>
73 | </table>
74 | """
75 |     )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     main()
80 | 


--------------------------------------------------------------------------------
/dfxml/bin/cat_fileobjects.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # This software was developed at the National Institute of Standards
  4 | # and Technology in whole or in part by employees of the Federal
  5 | # Government in the course of their official duties. Pursuant to
  6 | # title 17 Section 105 of the United States Code portions of this
  7 | # software authored by NIST employees are not subject to copyright
  8 | # protection and are in the public domain. For portions not authored
  9 | # by NIST employees, NIST has been granted unlimited rights. NIST
 10 | # assumes no responsibility whatsoever for its use by other parties,
 11 | # and makes no guarantees, expressed or implied, about its quality,
 12 | # reliability, or any other characteristic.
 13 | #
 14 | # We would appreciate acknowledgement if the software is used.
 15 | 
 16 | """
 17 | Make a new DFXML file of all fileobjects in an input DFXML file.
 18 | """
 19 | 
 20 | __version__ = "0.4.0"
 21 | 
 22 | import logging
 23 | import os
 24 | import sys
 25 | import xml.etree.ElementTree as ET
 26 | 
 27 | import dfxml
 28 | 
 29 | _logger = logging.getLogger(os.path.basename(__file__))
 30 | 
 31 | if sys.version < "3":
 32 |     _logger.error(
 33 |         "Due to Unicode issues with Python 2's ElementTree, Python 3 and up is required.\n"
 34 |     )
 35 |     exit(1)
 36 | 
 37 | 
 38 | def main():
 39 |     print(
 40 |         """\
 41 | <?xml version="1.0" encoding="UTF-8"?>
 42 | <dfxml
 43 |   xmlns="%s"
 44 |   xmlns:delta="%s"
 45 |   version="%s">
 46 |   <metadata/>
 47 |   <creator>
 48 |     <program>%s</program>
 49 |     <version>%s</version>
 50 |     <execution_environment>
 51 |       <command_line>%s</command_line>
 52 |     </execution_environment>
 53 |   </creator>
 54 |   <source>
 55 |     <image_filename>%s</image_filename>
 56 |   </source>\
 57 | """
 58 |         % (
 59 |             dfxml.XMLNS_DFXML,
 60 |             dfxml.XMLNS_DELTA,
 61 |             dfxml.DFXML_VERSION,
 62 |             sys.argv[0],
 63 |             __version__,
 64 |             " ".join(sys.argv),
 65 |             args.filename,
 66 |         )
 67 |     )
 68 | 
 69 |     ET.register_namespace("delta", dfxml.XMLNS_DELTA)
 70 | 
 71 |     xs = []
 72 |     for fi in dfxml.iter_dfxml(
 73 |         xmlfile=open(args.filename, "rb"), preserve_elements=True
 74 |     ):
 75 |         _logger.debug("Processing: %s" % str(fi))
 76 |         if args.cache:
 77 |             xs.append(fi.xml_element)
 78 |         else:
 79 |             _logger.debug("Printing without cache: %s" % str(fi))
 80 |             print(dfxml.ET_tostring(fi.xml_element, encoding="unicode"))
 81 |     if args.cache:
 82 |         for x in xs:
 83 |             _logger.debug("Printing with cache: %s" % str(fi))
 84 |             print(dfxml.ET_tostring(x, encoding="unicode"))
 85 | 
 86 |     print("""</dfxml>""")
 87 | 
 88 | 
 89 | if __name__ == "__main__":
 90 |     import argparse
 91 | 
 92 |     parser = argparse.ArgumentParser()
 93 |     parser.add_argument("filename")
 94 |     parser.add_argument("--cache", action="store_true")
 95 |     parser.add_argument("--debug", action="store_true")
 96 |     args = parser.parse_args()
 97 | 
 98 |     logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
 99 | 
100 |     main()
101 | 


--------------------------------------------------------------------------------
/dfxml/bin/conftest.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is empty, but it permits test discovery of the subdirectory.
3 | # See:
4 | # https://stackoverflow.com/questions/10253826/path-issue-with-pytest-importerror-no-module-named-yadayadayada
5 | 


--------------------------------------------------------------------------------
/dfxml/bin/corpus_sync.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3.2
 2 | #
 3 | # sync corpus based on DFXML files
 4 | 
 5 | from collections import defaultdict
 6 | 
 7 | import dfxml
 8 | import dfxml.fiwalk as fiwalk
 9 | 
10 | 
11 | class CorpusDB:
12 |     def __init__(self):
13 |         self.all = []
14 |         self.md5db = defaultdict(list)  # maps from
15 |         self.pathdb = dict()
16 | 
17 |     def process_fi(self, fi):
18 |         self.all.append(fi)
19 |         self.md5db[fi.md5()].append(fi)
20 |         self.pathdb[fi.filename()] = fi
21 | 
22 |     def ingest_dfxml(self, fname):
23 |         fiwalk.fiwalk_using_sax(
24 |             xmlfile=open(fname, "rb"), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi
25 |         )
26 | 
27 |     def __iter__(self):
28 |         return self.all.__iter__()
29 | 
30 |     def __delitem__(self, fi):
31 |         self.all.remove(fi)
32 |         self.md5db[fi.md5()].remove(fi)
33 |         del self.pathdb[fi.filename()]
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     from copy import deepcopy
38 |     from optparse import OptionParser
39 | 
40 |     parser = OptionParser()
41 |     (options, args) = parser.parse_args()
42 | 
43 |     (fn1, fn2) = args[0:2]
44 |     print("# Reading B - the master {}".format(fn1))
45 |     b = CorpusDB()
46 |     b.ingest_dfxml(fn1)
47 | 
48 |     print("# Reading A - the current system {}".format(fn2))
49 |     a = CorpusDB()
50 |     a.ingest_dfxml(fn2)
51 | 
52 |     print("# Files in A that should not be in B:")
53 |     rmlist = [afi for afi in a if (afi.md5() not in b.md5db)]
54 |     for afi in rmlist:
55 |         print("rm {}".format(afi.filename()))
56 |         del a[afi]
57 | 
58 |     fixups = []
59 |     for bfi in b:
60 |         if bfi.filename() in a.pathdb and bfi.md5() == a.pathdb[bfi.filename()].md5():
61 |             continue
62 |         if bfi.md5() not in a.md5db:
63 |             print("get {}".format(bfi.filename()))
64 |             continue
65 | 
66 |         afi = a.md5db[bfi.md5()][0]
67 |         nfn = bfi.filename() + ".new"
68 |         print("ln {} {}".format(afi.filename(), nfn))
69 |         fixups.append((nfn, bfi.filename()))
70 | 
71 |     for nfn, bfi_filename in fixups:
72 |         print("mv {} {}".format(nfn, bfi_filename))
73 | 


--------------------------------------------------------------------------------
/dfxml/bin/dedup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #
 3 | # dedup  - detect and optionally remove duplicates based on a DFXML file
 4 | 
 5 | import os
 6 | import xml
 7 | 
 8 | import dfxml
 9 | 
10 | 
11 | class dedup:
12 |     def __init__(self):
13 |         from collections import defaultdict
14 | 
15 |         self.seen = defaultdict(list)
16 |         self.files = 0
17 |         self.md5s = 0
18 | 
19 |     def process(self, fi):
20 |         self.files += 1
21 |         if fi.md5():
22 |             self.seen[fi.md5()].append(fi.filename())
23 |             self.md5s += 1
24 | 
25 |     def find_dups(self, cb=None):
26 |         for md5, names in self.seen.items():
27 |             if cb and len(names) > 1:
28 |                 cb(names)
29 | 
30 |     def report(self, func, cb):
31 |         for md5, names in self.seen.items():
32 |             if func(names):
33 |                 cb(names)
34 | 
35 | 
36 | def process_dups(names):
37 |     print("dups: ", names)
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     from argparse import ArgumentParser
42 | 
43 |     global options
44 | 
45 |     parser = ArgumentParser()
46 |     parser.add_argument("dfxml", type=str)
47 |     parser.add_argument("--verbose", action="store_true")
48 |     parser.add_argument(
49 |         "--prefix", type=str, help="Only output files with the given prefix"
50 |     )
51 |     parser.add_argument(
52 |         "--distinct", action="store_true", help="Report the distinct files"
53 |     )
54 |     parser.add_argument(
55 |         "--dups",
56 |         action="store_true",
57 |         help="Report the files that are dups, and give dup count",
58 |     )
59 |     args = parser.parse_args()
60 | 
61 |     dobj = dedup()
62 | 
63 |     try:
64 |         dfxml.read_dfxml(open(args.dfxml, "rb"), callback=dobj.process)
65 |     except xml.parsers.expat.ExpatError:
66 |         pass
67 | 
68 |     print(
69 |         "Total files: {:,}  total MD5s processed: {:,}  Unique MD5s: {:,}".format(
70 |             dobj.files, dobj.md5s, len(dobj.seen)
71 |         )
72 |     )
73 | 
74 |     if args.distinct:
75 | 
76 |         def report_distinct(names):
77 |             if args.prefix and not names[0].startswith(args.prefix):
78 |                 return
79 |             print("distinct: ", names[0])
80 | 
81 |         dobj.report(lambda names: len(names) == 1, report_distinct)
82 | 
83 |     if args.dups:
84 | 
85 |         def report_dups(names):
86 |             for name in names:
87 |                 if not args.prefix or name.startswith(args.prefix):
88 |                     print("dups: {} {}".format(name, len(names)))
89 | 
90 |         dobj.report(lambda names: len(names) > 1, report_dups)
91 | 


--------------------------------------------------------------------------------
/dfxml/bin/deidentify_xml.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #
  3 | # deidentify_xml.py:
  4 | # Given XML for a disk, remove information that might be personally identifying from filenames.
  5 | # remember the mapping so that directory names don't get changed.
  6 | #
  7 | # 2012-10-27 slg - updated to Python3
  8 | 
  9 | import typing
 10 | 
 11 | private_dirs = ["home/", "usr/home", "Users"]
 12 | ok_top_paths_win = ["program files/", "System", "Windows"]
 13 | ok_top_paths_mac = [
 14 |     "bin/",
 15 |     "usr",
 16 |     "etc",
 17 |     "private",
 18 |     "applications",
 19 |     "developer",
 20 |     "bin",
 21 |     "sbin",
 22 |     "lib",
 23 |     "dev",
 24 | ]
 25 | ok_top_paths = ok_top_paths_win + ok_top_paths_mac + ["$orphanfiles"]
 26 | acceptable_extensions = ["exe", "dll", "sys", "com", "hlp"]
 27 | 
 28 | import os
 29 | import os.path
 30 | import sys
 31 | 
 32 | partdir: typing.Dict[str, str] = dict()
 33 | 
 34 | 
 35 | def sanitize_part(part):
 36 |     """Sanitize a part of a pathname in a consistent manner"""
 37 |     if part not in partdir:
 38 |         partdir[part] = "P%07d" % (len(partdir) + 1)
 39 |     return partdir[part]
 40 | 
 41 | 
 42 | def sanitize_filename(fname):
 43 |     """Given a filename, sanitize each part and return it."""
 44 |     ofn = fname
 45 |     jfn = fname
 46 |     if jfn[0] == "/":
 47 |         jfn = jfn[1:]
 48 |     pathok = False
 49 |     for p in ok_top_paths:
 50 |         if jfn.lower().startswith(p):
 51 |             pathok = True
 52 | 
 53 |     if not pathok:
 54 |         # if the path is not okay, replace all of the parts
 55 |         # and the name up to the .ext
 56 |         parts = fname.split("/")
 57 |         parts[:-1] = [sanitize_part(s) for s in parts[:-1]]
 58 |         (root, ext) = os.path.splitext(parts[-1])
 59 |         if ext not in acceptable_extensions:
 60 |             parts[-1] = sanitize_part(root) + ext
 61 |         fname = "/".join(parts)
 62 |     if ofn[0] == "/" and fname[0] != "/":
 63 |         fname = "/" + fname
 64 |     return fname
 65 | 
 66 | 
 67 | class xml_sanitizer:
 68 |     """Read and write the XML, but sanitize the filename elements."""
 69 | 
 70 |     def __init__(self, out):
 71 |         self.out = out
 72 |         self.cdata = ""
 73 | 
 74 |     def _start_element(self, name, attrs):
 75 |         """Handles the start of an element for the XPAT scanner"""
 76 |         s = ["<", name]
 77 |         if attrs:
 78 |             for a, v in attrs.items():
 79 |                 if '"' not in v:
 80 |                     s += [" ", a, '="', v, '"']
 81 |                 else:
 82 |                     s += [" ", a, "='", v, "'"]
 83 |         s += [">"]
 84 |         self.out.write("".join(s))
 85 |         self.cdata = ""  # new element
 86 | 
 87 |     def _end_element(self, name):
 88 |         """Handles the end of an element for the XPAT scanner"""
 89 |         if name == "filename":
 90 |             self.cdata = sanitize_filename(self.cdata)
 91 |         if self.cdata == "\n":
 92 |             self.cdata = ""
 93 |         self.out.write("".join([self.cdata, "</", name, ">"]))
 94 |         self.cdata = ""
 95 | 
 96 |     def _char_data(self, data):
 97 |         """Handles XML data"""
 98 |         self.cdata += data
 99 | 
100 |     def process_xml_stream(self, xml_stream):
101 |         "Run the reader on a given XML input stream"
102 |         import xml.parsers.expat
103 | 
104 |         p = xml.parsers.expat.ParserCreate()
105 |         p.StartElementHandler = self._start_element
106 |         p.EndElementHandler = self._end_element
107 |         p.CharacterDataHandler = self._char_data
108 |         p.ParseFile(xml_stream)
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     from optparse import OptionParser
113 | 
114 |     global options
115 |     parser = OptionParser()
116 |     parser.add_option("-t", "--test", help="Test a specific pathanme to sanitize")
117 |     (options, args) = parser.parse_args()
118 | 
119 |     if options.test:
120 |         if os.path.isdir(options.test):
121 |             for dirpath, dirnames, filenames in os.walk(options.test):
122 |                 for filename in filenames:
123 |                     fn = dirpath + "/" + filename
124 |                     print("%s\n   %s" % (fn, sanitize_filename(fn)))
125 | 
126 |     x = xml_sanitizer(sys.stdout)
127 |     x.process_xml_stream(open(args[0], "rb"))
128 | 


--------------------------------------------------------------------------------
/dfxml/bin/exp_slack.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3.2
 2 | #
 3 | # exp_slack.py: experiment on the slack space
 4 | # quantify slack space
 5 | #
 6 | # (c) Martin Mulazzani, 2012
 7 | # Additions by Simson Garfinkel
 8 | 
 9 | import os
10 | import re
11 | import sys
12 | 
13 | import dfxml.fiwalk as fiwalk
14 | 
15 | 
16 | def proc(fi):
17 |     # Skip the virtual files?
18 |     if fi.filename()[0:1] in ["$"]:
19 |         return
20 |     if fi.has_contents() and fi.is_file():
21 |         outstring = (
22 |             str(fi.partition())
23 |             + "\t"
24 |             + fi.filename()
25 |             + "\t"
26 |             + str(fi.filesize())
27 |             + "\t"
28 |             + str(fi.times())
29 |             + "\n"
30 |         )
31 |         f_out.write(outstring)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     if len(sys.argv) != 2:
36 |         print("usage: ./fast_slack.py <input.xml>")
37 |         sys.exit(1)
38 | 
39 |     # input
40 |     file_name = sys.argv[1]
41 |     f = open(file_name, "rb")
42 | 
43 |     # output is to stdout
44 |     outfile = sys.stdout
45 | 
46 |     # find partition information, blocksize and filesystem
47 |     # 1st partition has no. 1, to correspond to fiwalk output
48 |     partitioncounter = 0
49 |     f.write(
50 |         "********************************** PARTITIONS **********************************"
51 |     )
52 |     f.write("\nNo\tBlocksize\tFilesystem\n")
53 | 
54 |     for line in f:
55 |         if re.search("block_size", line):
56 |             partitioncounter += 1
57 |             f_out.write(str(partitioncounter))
58 |             f_out.write("\t")
59 |             f_out.write(re.split(">|<", line)[2])
60 |         if re.search("ftype_str", line):
61 |             f_out.write("\t\t")
62 |             f_out.write(re.split(">|<", line)[2])
63 |             f_out.write("\n")
64 | 
65 |     f_out.write(
66 |         "\n\n************************************* DATA *************************************\n"
67 |     )
68 |     f_out.write("Partition\tFilename\tSize\tTimestamps\n")
69 |     f.close()
70 | 
71 |     # re-open file for binary reading
72 |     # file processing
73 |     f = open(file_name, "rb")
74 |     fiwalk.fiwalk_using_sax(xmlfile=f, callback=proc)
75 | 


--------------------------------------------------------------------------------
/dfxml/bin/filesdb.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # filesdb
  3 | # a module that holds a database of DFXML files
  4 | #
  5 | 
  6 | import sys
  7 | from collections import defaultdict
  8 | 
  9 | import dfxml
 10 | 
 11 | 
 12 | class filesdb:
 13 |     def __init__(self, fname=None):
 14 |         self.sha1db = defaultdict(list)  # fi's by hashdb
 15 |         self.md5db = defaultdict(list)  # fi's by hashdb
 16 |         self.fnamedb = defaultdict(list)  # fi's by fname
 17 |         self.dirs = defaultdict(list)  # fi's by directory name
 18 |         self.fis = []
 19 |         self.prefix = None
 20 |         self.delfix = None
 21 |         if fname:
 22 |             self.read(fname)
 23 | 
 24 |     def __iter__(self):
 25 |         """The iterator for filesdb iterates through all the files"""
 26 |         return self.fis.__iter__()
 27 | 
 28 |     def read(self, f):
 29 |         if type(f) == str:
 30 |             self.fname = f
 31 |             f = open(f, "rb")
 32 |         dfxml.read_dfxml(xmlfile=f, callback=self.pass1)
 33 | 
 34 |     def read_with_prefix(self, fname):
 35 |         if ":" in fname:
 36 |             (fmt, fname) = fname.split(":")
 37 |             if fmt[0] == "+":
 38 |                 self.prefix = fmt[1:]
 39 |             if fmt[0] == "=":
 40 |                 self.delfix = fmt[1:]
 41 |             if fmt[0] != "+" and fmt[0] != "=":
 42 |                 self.prefix = fmt
 43 |         self.read(fname)
 44 | 
 45 |     def pass1(self, fi):
 46 |         """First pass for reading fi objects"""
 47 |         import os
 48 | 
 49 |         self.fis.append(fi)
 50 |         if fi.sha1():
 51 |             self.sha1db[fi.sha1()].append(fi)
 52 |         if fi.md5():
 53 |             self.md5db[fi.md5()].append(fi)
 54 |         if fi.filename():
 55 |             fname = fi.filename()
 56 |             if self.delfix:
 57 |                 if fname.startswith(self.delfix):
 58 |                     fname = fname[len(self.delfix) :]
 59 |             if self.prefix:
 60 |                 fname = self.prefix + fname
 61 |             self.sha1db[fname].append(fi)
 62 |             self.dirs[os.path.dirname(fname)].append(fi)
 63 | 
 64 |     def print_stats(self, f=sys.stdout):
 65 |         """Returns a text string of the stats"""
 66 |         ret = [
 67 |             ["Total directories", len(self.dirs)],
 68 |             ["Total files", len(self.fis)],
 69 |             ["Total bytes", sum([int(fi.filesize()) for fi in self.fis])],
 70 |             ["Total sha1s", len(self.sha1db)],
 71 |             ["Total md5s", len(self.md5db)],
 72 |         ]
 73 |         print("\n".join(["{:20}: {:14,}".format(a[0], a[1]) for a in ret]))
 74 | 
 75 |         mtime_min = [fi.mtime() for fi in self.fis]
 76 |         # print('mtime=',len(mtime_min))
 77 |         # flt = list(filter(lambda a:a!=None,mtime_min))
 78 |         # print('flt=',flt,len(flt))
 79 | 
 80 |         # print('mtime_min=',mtime_min)
 81 |         # print(['ctime range',mtime_min])
 82 |         # exit(0)
 83 | 
 84 |     def del_dirs(self, targetdb):
 85 |         """Given a targetdb, provide the dirs to get there."""
 86 |         return set(self.dirs.keys()).difference(set(targetdb.dirs.keys()))
 87 | 
 88 |     def del_files(self, targetdb):
 89 |         """Given an targetdb, provide the files needed to get there."""
 90 |         return set(self.filesdb).difference(set(db.filesdb))
 91 | 
 92 |     def new_dirs(self, db):
 93 |         """Given an older db, provide the dirs that are new."""
 94 |         return set(db.dirs.keys()).difference(set(self.dirs.keys()))
 95 | 
 96 |     def search(self, mfi, hash=False, name=False):
 97 |         """Return the matching fis"""
 98 |         if hash and not name:
 99 |             return self.md5db[mfi.md5()]
100 |         if name and not hash:
101 |             return self.fnamedb[mfi.filename()]
102 |         if hash and name:
103 |             return filter(
104 |                 lambda fi: fi.filename() == mfi.filename(), self.md5db[mfi.md5()]
105 |             )
106 |         return []
107 | 
108 | 
109 | #
110 | # test program. Reads a database and dumps it.
111 | #
112 | if __name__ == "__main__":
113 |     from argparse import ArgumentParser
114 | 
115 |     parser = ArgumentParser(
116 |         description="Test the files database with one or more DFXML files"
117 |     )
118 |     parser.add_argument("xmlfiles", help="XML files to process", nargs="+")
119 | 
120 |     args = parser.parse_args()
121 |     db = filesdb()
122 |     for fn in args.xmlfiles:
123 |         db.read(fn)
124 |     db.print_stats()
125 | 


--------------------------------------------------------------------------------
/dfxml/bin/iblkfind.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """Usage: iblkfind imagefile.iso s1 [s2 s3 ...] ...
 3 | 
 4 | Reports the files in which sectors s1, s2, s3... are located.
 5 | """
 6 | import sys
 7 | 
 8 | import dfxml
 9 | 
10 | if __name__ == "__main__":
11 |     from optparse import OptionParser
12 | 
13 |     parser = OptionParser()
14 |     parser.usage = "%prog [options] imagefile-or-xmlfile s1 [s2 s3 s3 ...]"
15 |     parser.add_option(
16 |         "--offset", help="values are byte offsets, not sectors", action="store_true"
17 |     )
18 |     parser.add_option("--blocksize", help="specify sector blockszie", default=512)
19 |     (options, args) = parser.parse_args()
20 | 
21 |     if len(args) < 1:
22 |         parser.print_help()
23 |         sys.exit(1)
24 |     fn = args[0]
25 | 
26 |     print(args)
27 |     print("Processing %s" % fn)
28 |     print("Searching for %s" % ", ".join(args[1:]))
29 | 
30 |     divisor = 1
31 |     if options.offset:
32 |         divisor = options.blocksize
33 | 
34 |     sectors = set([int(s) / divisor for s in args[1:]])
35 | 
36 |     def process(fi):
37 |         for s in sectors:
38 |             if fi.has_sector(s):
39 |                 print("%d\t%s" % (s, fi.filename()))
40 | 
41 |     if not fn.endswith(".xml"):
42 |         print("iblkfind requires an XML file")
43 |         exit(1)
44 |     dfxml.read_dfxml(xmlfile=open(args[0], "rb"), callback=process)
45 | 


--------------------------------------------------------------------------------
/dfxml/bin/iexport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """iexport.py: export the unallocated spaces."""
 3 | 
 4 | 
 5 | class Run:
 6 |     """Keeps track of a single run"""
 7 | 
 8 |     def __init__(self, start, len):
 9 |         self.start = start
10 |         self.len = len
11 |         self.end = start + len - 1
12 | 
13 |     def __str__(self):
14 |         return "Run<%d--%d> (len %d)" % (self.start, self.end, self.len)
15 | 
16 |     def contains(self, b):
17 |         """Returns true if b is inside self."""
18 |         print(
19 |             "%d <= %d <= %d = %s"
20 |             % (self.start, b, self.end, (self.start <= b <= self.end))
21 |         )
22 |         return self.start <= b <= self.end
23 | 
24 |     def intersects_run(self, r):
25 |         """Return true if self intersects r.  This may be because r.start is
26 |         inside the run, r.end is inside the run, or self is inside the run."""
27 |         return self.contains(r.start) or self.contains(r.end) or r.contains(self.start)
28 | 
29 |     def contains_run(self, r):
30 |         """Returns true if self completely contains r"""
31 |         return self.contains(r.start) and self.contains(r.end)
32 | 
33 | 
34 | class RunDB:
35 |     """The RunDB maintains a list of all the runs in a disk image. The
36 |     RunDB is created with a single run that represents all of the sectors
37 |     in the disk image. Runs can then be removed, which causes existing
38 |     runs to be split. Finally all of the remaining runs can be removed."""
39 | 
40 |     def __init__(self, start, len):
41 |         self.runs = [Run(start, len)]
42 | 
43 |     def __str__(self):
44 |         return "RunDB\n" + "\n".join([str(p) for p in self.runs])
45 | 
46 |     def intersecting_runs(self, r):
47 |         """Return a list of all the Runs that intersect with r.
48 |         This may be because r.start is inside the run, r.end is inside
49 |         the run, because the run completely encloses r, or because r completely
50 |         encloses the run."""
51 |         return filter(lambda x: x.intersects_run(r), self.runs)
52 | 
53 |     def remove(self, r):
54 |         """Remove run r"""
55 |         for p in self.intersecting_runs(r):
56 |             self.runs.remove(p)
57 | 
58 |             # if P is completely inside r, just remove it
59 |             if r.contains_run(p):
60 |                 continue
61 | 
62 |             # Split p into before and after r; add the non-zero pieces
63 |             before_len = r.start - p.start
64 |             if before_len > 0:
65 |                 self.runs.append(Run(p.start, before_len))
66 |             after_len = p.end - r.end
67 |             if after_len > 0:
68 |                 self.runs.append(Run(r.end, after_len))
69 | 


--------------------------------------------------------------------------------
/dfxml/bin/iextract.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import datetime
 4 | import os
 5 | import os.path
 6 | import sys
 7 | import zipfile
 8 | 
 9 | import dfxml
10 | import dfxml.fiwalk as fiwalk
11 | 
12 | if __name__ == "__main__":
13 |     from optparse import OptionParser
14 | 
15 |     parser = OptionParser()
16 |     parser.add_option(
17 |         "-x",
18 |         "--xml",
19 |         dest="xmlfilename",
20 |         help="Already-created DFXML file for imagefile",
21 |     )
22 |     parser.usage = "%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile"
23 |     (options, args) = parser.parse_args()
24 | 
25 |     if len(args) < 3:
26 |         parser.print_help()
27 |         exit(1)
28 | 
29 |     imagefilename = args[0]
30 |     xmlfilename = options.xmlfilename
31 |     xmlfh = None
32 |     if xmlfilename != None:
33 |         xmlfh = open(xmlfilename, "r")
34 |     zipfilename = args[1]
35 |     targets = set([fn.lower() for fn in args[2:]])
36 |     zfile = zipfile.ZipFile(zipfilename, "w", allowZip64=True)
37 | 
38 |     def proc(fi):
39 |         basename = os.path.basename(fi.filename()).lower()
40 |         if basename in targets:
41 |             info = zipfile.ZipInfo(
42 |                 fi.filename(),
43 |                 datetime.datetime.fromtimestamp(fi.mtime().timestamp()).utctimetuple(),
44 |             )
45 |             info.internal_attr = 1
46 |             info.external_attr = 2175008768  # specifies mode 0644
47 |             zfile.writestr(info, fi.contents())
48 | 
49 |     fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc)
50 | 


--------------------------------------------------------------------------------
/dfxml/bin/igrep.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """Usage: igrep imagefile.iso string ...
 3 | 
 4 | Reports the files in which files have the string.
 5 | """
 6 | import fiwalk
 7 | 
 8 | import dfxml
 9 | 
10 | if __name__ == "__main__":
11 |     import sys
12 |     from optparse import OptionParser
13 | 
14 |     parser = OptionParser()
15 |     parser.usage = "%prog [options] image.iso  s1"
16 |     parser.add_option("-d", "--debug", help="debug", action="store_true")
17 |     (options, args) = parser.parse_args()
18 | 
19 |     if len(args) != 2:
20 |         parser.print_help()
21 |         sys.exit(1)
22 | 
23 |     (imagefn, data) = args
24 | 
25 |     def process(fi):
26 |         offset = fi.contents().find(data)
27 |         if offset > 0:
28 |             print("%s (offset=%d)" % (fi.filename(), offset))
29 | 
30 |     fiwalk.fiwalk_using_sax(imagefile=open(imagefn), callback=process)
31 | 


--------------------------------------------------------------------------------
/dfxml/bin/ihistogram.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | """Draw a quick histogram of the timestamps on the hard drive"""
  3 | 
  4 | import matplotlib
  5 | 
  6 | matplotlib.use("agg.pdf")
  7 | 
  8 | 
  9 | import datetime
 10 | import time
 11 | 
 12 | from matplotlib.dates import (
 13 |     MONDAY,
 14 |     SATURDAY,
 15 |     DateFormatter,
 16 |     MonthLocator,
 17 |     WeekdayLocator,
 18 | )
 19 | from pylab import *
 20 | 
 21 | import dfxml.fiwalk as fiwalk
 22 | 
 23 | 
 24 | def get_dates_and_counts(times):
 25 |     from datetime import date
 26 | 
 27 |     data = {}
 28 |     for t in times:
 29 |         gm = time.gmtime(t)
 30 |         d = date(gm[0], gm[1], gm[2])
 31 |         data[d] = data.get(d, 0) + 1
 32 | 
 33 |     # Create a list of key,val items so you can sort by date
 34 |     dates_and_counts = [(date, count) for date, count in data.items()]
 35 |     dates_and_counts = sorted(dates_and_counts)
 36 |     return dates_and_counts
 37 | 
 38 | 
 39 | def version1(times):
 40 |     import pylab
 41 | 
 42 |     pylab.grid()
 43 |     pylab.hist(times, 100)
 44 |     pylab.show()
 45 | 
 46 | 
 47 | def version2(times):
 48 |     # see http://mail.python.org/pipermail/python-list/2003-November/236559.html
 49 |     # http://www.gossamer-threads.com/lists/python/python/665014
 50 |     from matplotlib.pylab import (
 51 |         bar,
 52 |         gca,
 53 |         plot,
 54 |         plot_date,
 55 |         savefig,
 56 |         show,
 57 |         title,
 58 |         xlabel,
 59 |         ylabel,
 60 |     )
 61 | 
 62 |     dates_and_counts = get_dates_and_counts(times)
 63 |     dates, counts = zip(*dates_and_counts)
 64 |     # bar(dates,counts)
 65 |     plot_date(dates, counts)
 66 |     xlabel("Date")
 67 |     ylabel("count")
 68 |     show()
 69 | 
 70 | 
 71 | def version3(times):
 72 |     import datetime
 73 | 
 74 |     import matplotlib
 75 |     import matplotlib.dates as mdates
 76 |     import matplotlib.mlab as mlab
 77 |     import matplotlib.pyplot as pyplot
 78 |     import numpy as np
 79 | 
 80 |     dates_and_counts = get_dates_and_counts(times)
 81 |     dates, counts = zip(*dates_and_counts)
 82 | 
 83 |     years = mdates.YearLocator()  # every year
 84 |     months = mdates.MonthLocator()  # every month
 85 |     yearsFmt = mdates.DateFormatter("%Y")
 86 | 
 87 |     fig = pyplot.figure()
 88 |     ax = fig.add_subplot(111)
 89 |     ax.bar(dates, counts)
 90 | 
 91 |     ax.set_ylabel("file count")
 92 |     ax.set_xlabel("file modification time (mtime)")
 93 | 
 94 |     # ax.set_yscale('log')
 95 | 
 96 |     # Format the ticks
 97 | 
 98 |     ax.xaxis.set_major_locator(years)
 99 |     ax.xaxis.set_major_formatter(yearsFmt)
100 |     # ax.xaxis.set_minor_locator(months)
101 | 
102 |     datemin = datetime.date(min(dates).year, 1, 1)
103 |     datemax = datetime.date(max(dates).year, 1, 1)
104 |     ax.set_xlim(datemin, datemax)
105 |     ax.set_ylim(0, max(counts))
106 | 
107 |     # format the coords message box
108 |     def price(x):
109 |         return "$%1.2f" % x
110 | 
111 |     ax.format_xdata = mdates.DateFormatter("%Y-%m-%d")
112 |     ax.format_ydata = price
113 |     ax.grid(True)
114 | 
115 |     # rotates and right aligns the x labels, and moves the bottom of the
116 |     # axes up to make room for them
117 |     fig.autofmt_xdate()
118 |     plt.savefig("hist.pdf", format="pdf")
119 | 
120 |     print("dates:", dates)
121 |     print("num dates:", len(dates))
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     import sys
126 |     from optparse import OptionParser
127 |     from sys import stdout
128 | 
129 |     parser = OptionParser()
130 |     parser.usage = "%prog [options] xmlfile "
131 |     (options, args) = parser.parse_args()
132 | 
133 |     import time
134 | 
135 |     times = []
136 |     for fi in fiwalk.fileobjects_using_sax(xmlfile=open(args[0])):
137 |         try:
138 |             times.append(fi.mtime())
139 |         except KeyError:
140 |             pass
141 | 
142 |     version3(times)
143 | 


--------------------------------------------------------------------------------
/dfxml/bin/imap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """Usage: imap imagefile0.iso imagefile1.iso imagefile2.iso ...
 3 | 
 4 | Produces a map of imagefile0.iso, using the other image files as "hints" for missing
 5 | data. Only reports files that have been allocated; deleted files are reported only if
 6 | they can be found allocated in another file.
 7 | """
 8 | import dfxml.fiwalk as fiwalk
 9 | 
10 | ################################################################
11 | if __name__ == "__main__":
12 |     import sys
13 |     from optparse import OptionParser
14 |     from sys import stdout
15 | 
16 |     parser = OptionParser()
17 |     parser.usage = "%prog [options] image.iso "
18 |     parser.add_option("-d", "--debug", help="debug", action="store_true")
19 |     (options, args) = parser.parse_args()
20 | 
21 |     if len(args) < 1:
22 |         parser.print_help()
23 |         sys.exit(1)
24 | 
25 |     imagefile = open(args[0], "r")
26 |     annotated_runs = []
27 |     # TODO - This debug statement needs to moved to somewhere appropriate after an image read.
28 |     # if options.debug: print("Read %d file objects from %s" % (len(fileobjects),imagefile.name))
29 | 
30 |     def cb(fi):
31 |         if options.debug:
32 |             print("Read " + str(fi))
33 |         fragment_num = 1
34 |         for run in fi.byte_runs():
35 |             annotated_runs.append((run.img_offset, run, fragment_num, fi))
36 |             fragment_num += 1
37 | 
38 |     fiwalk.fiwalk_using_sax(imagefile=imagefile, callback=cb)
39 | 
40 |     next_sector = 0
41 | 
42 |     for ip, run, fragment_num, fi in sorted(annotated_runs):
43 |         extra = ""
44 |         fragment = ""
45 |         start_sector = run.img_offset / 512
46 |         sector_count = int(run.bytes / 512)
47 |         partial = run.bytes % 512
48 | 
49 |         if not fi.allocated():
50 |             print("***")
51 | 
52 |         if not fi.file_present():  # it's not here!
53 |             continue
54 | 
55 |         if partial > 0:
56 |             sector_count += 1
57 |             extra = "(%3d bytes slack)" % (512 - partial)
58 | 
59 |         if fi.fragments() > 2:
60 |             fragment = "fragment %d" % fragment_num
61 | 
62 |         if next_sector != start_sector:
63 |             print(
64 |                 "  <-- %5d unallocated sectors @ sector %5d -->"
65 |                 % (start_sector - next_sector, next_sector)
66 |             )
67 | 
68 |         print(
69 |             "[ %6d  -> %6d sectors %18s ]   %s  %s "
70 |             % (start_sector, sector_count, extra, fi.filename(), fragment)
71 |         )
72 | 
73 |         next_sector = start_sector + sector_count
74 | 


--------------------------------------------------------------------------------
/dfxml/bin/iredact-config.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Paths to the disk image and fiwalk XML output
 3 | #
 4 | IMAGEFILE /home/bcadmin/Desktop/jowork.raw.raw 
 5 | XMLFILE /home/bcadmin/Desktop/jofiwalk.xml
 6 | 
 7 | #
 8 | # Redaction patterns
 9 | #
10 | #FILEPAT *.dll FUZZ
11 | #FILEPAT *.com FUZZ
12 | FILEPAT *.exe FUZZ
13 | 
14 | #
15 | # Other examples
16 | #
17 | #KEY 100200300400
18 | #MD5 db06069ef1c9f40986ffa06db4fe8fd7 FILL 0x44
19 | #FILENAME file3.txt ENCRYPT
20 | #FILEPAT file*.txt ENCRYPT
21 | #CONTAINS This FILL 0x44
22 | #FILEPAT *Spotlight* FILL 0x44
23 | 
24 | # 
25 | # Uncomment this line to actually commit the redaction:
26 | #
27 | COMMIT
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/dfxml/bin/iverify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """Reads an fiwalk XML file and reports how many of the files are still in the image..."""
 3 | 
 4 | import hashlib
 5 | import os.path
 6 | import sys
 7 | 
 8 | from dfxml import fiwalk
 9 | 
10 | present = []
11 | not_present = []
12 | 
13 | 
14 | def process_fi(fi):
15 |     print("process file", fi.filename())
16 |     if fi.filesize() == 0:
17 |         return
18 |     try:
19 |         if fi.file_present():
20 |             present.append(fi)
21 |             return
22 |         else:
23 |             not_present.append(fi)
24 |             return
25 |     except ValueError(e):
26 |         sys.stderr.write(str(e) + "\n")
27 | 
28 | 
29 | def main():
30 |     import sys
31 |     from optparse import OptionParser
32 |     from subprocess import PIPE, Popen
33 | 
34 |     global options
35 | 
36 |     parser = OptionParser()
37 |     parser.add_option("-d", "--debug", help="prints debugging info", dest="debug")
38 |     parser.add_option("-g", "--ground", help="ground truth XML file", dest="ground")
39 |     parser.usage = "%prog [options] image.iso"
40 |     (options, args) = parser.parse_args()
41 | 
42 |     if not options.ground:
43 |         parser.print_help()
44 |         sys.exit(1)
45 | 
46 |     # Read the XML file
47 |     reader = fiwalk.fileobject_reader()
48 |     reader.set_imagefilename(args[0])
49 |     reader.process_xml_stream(open(options.ground, "r"), process_fi)
50 | 
51 |     if len(present) == 0:
52 |         print("None of the files are present in the image")
53 |         sys.exit(0)
54 | 
55 |     if len(not_present) == 0:
56 |         print("All of the files are present in the image")
57 |         sys.exit(0)
58 | 
59 |     print("\n\n")
60 |     print("Present in image:")
61 |     print("=================")
62 |     print("\n".join([fi.filename() for fi in present]))
63 | 
64 |     print("\n")
65 |     print("Not Present or altered in image:")
66 |     print("=====================")
67 |     for fi in not_present:
68 |         print(fi.filename())
69 | 
70 | 
71 | ################################################################
72 | if __name__ == "__main__":
73 |     main()
74 | 


--------------------------------------------------------------------------------
/dfxml/bin/mem_info.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | mem_info.py: report the memory used by a program that wrote results to a dfxml file
 4 | """
 5 | 
 6 | 
 7 | import sys
 8 | import xml.etree.ElementTree as ET
 9 | 
10 | 
11 | def fmt(n):
12 |     if args.h:
13 |         for p, let in reversed((3, "K"), (6, "M"), (9, "G"), (12, "T"), (15, "P")):
14 |             if n > 10**p:
15 |                 return f"{n/10**p}{let}"
16 |     return n
17 | 
18 | 
19 | def process_dfxml(dfxml):
20 |     root = ET.parse(dfxml)
21 |     start_time = root.find(".//start_time").text[0:19].replace("T", " ")
22 |     command_line = " ".join(root.find(".//command_line").text.split()[1:])
23 |     maxrss = 0
24 |     for e in root.findall(".//rusage/maxrss"):
25 |         maxrss += int(e.text)
26 |     print(start_time, fmt(maxrss), command_line)
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
31 | 
32 |     parser = ArgumentParser(
33 |         formatter_class=ArgumentDefaultsHelpFormatter,
34 |         description="report memory utilization from DFXML file",
35 |     )
36 |     parser.add_argument("--h", help="human format", action="store_true")
37 |     parser.add_argument("dfxml", nargs="*")
38 |     args = parser.parse_args()
39 |     bad_files = []
40 |     for fname in args.dfxml:
41 |         try:
42 |             process_dfxml(fname)
43 |         except ET.ParseError as e:
44 |             bad_files.append(fname)
45 |     if bad_files:
46 |         print("Could not read:", file=sys.stderr)
47 |         print("\n".join(bad_files), file=sys.stderr)
48 | 


--------------------------------------------------------------------------------
/dfxml/bin/nsrl_rds.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Demonstrates how to communicate with NPS NSRL RDS
 4 | #
 5 | 
 6 | RDS_SERVER = "https://domex.nps.edu/www-noauth/nsrl_rds.cgi"
 7 | 
 8 | import xmlrpclib
 9 | 
10 | if __name__ == "__main__":
11 |     print("Demonstration of NSRL RDS service at %s\n" % RDS_SERVER)
12 |     print("")
13 |     p = xmlrpclib.ServerProxy(RDS_SERVER)
14 |     try:
15 |         avail = p.available()
16 |     except xmlrpclib.ProtocolError as e:
17 |         print("Cannot access " + RDS_SERVER)
18 |         print(e)
19 |         raise RuntimeError
20 | 
21 |     print("Available RDS sets: %s " % avail)
22 | 
23 |     md5_val = "EB714443AA2FC1A3D16E39EB8007A0B2"
24 | 
25 |     # Build a search term
26 |     search = {"db": avail[0], "md5": md5_val}  # pick the first search term
27 | 
28 |     print("Here are the files with a md5 of " + md5_val)
29 |     ret = p.search(search)
30 |     fields = ret["fields"]
31 |     for row in ret["result"]:
32 |         for a, b in zip(fields, row):
33 |             print(a, "=", b)
34 |         print("")
35 | 
36 |     print(
37 |         "Now we will do a query for multiple MD5 values. You can do this by specifying\n"
38 |         + "a value as an array."
39 |     )
40 |     searchm = {
41 |         "db": avail[0],
42 |         "md5": ["EB714443AA2FC1A3D16E39EB8007A0B2", "9B3702B0E788C6D62996392FE3C9786A"],
43 |     }
44 |     print("sending:", searchm)
45 |     ret = p.search(searchm)
46 |     print("got:", ret)
47 |     fields = ret["fields"]
48 |     for row in ret["result"]:
49 |         for a, b in zip(fields, row):
50 |             print(a, "=", b)
51 |         print("")
52 | 


--------------------------------------------------------------------------------
/dfxml/bin/report_silent_changes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | """
17 | This program takes a differentially-annotated DFXML file as input, and outputs a DFXML document that contains 'Silent' changes.  For instance, a changed checksum with no changed timestamps would be 'Silent.'
18 | """
19 | 
20 | __version__ = "0.2.2"
21 | 
22 | import logging
23 | import os
24 | import sys
25 | 
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 | import make_differential_dfxml
29 | 
30 | import dfxml.objects as Objects
31 | 
32 | 
33 | def main():
34 |     d = Objects.DFXMLObject("1.2.0")
35 |     d.program = sys.argv[0]
36 |     d.program_version = __version__
37 |     d.command_line = " ".join(sys.argv)
38 |     d.dc["type"] = "File system silent-change report"
39 |     d.add_creator_library(
40 |         "Python", ".".join(map(str, sys.version_info[0:3]))
41 |     )  # A bit of a bend, but gets the major version information out.
42 |     d.add_creator_library("Objects.py", Objects.__version__)
43 |     d.add_creator_library("dfxml.py", Objects.dfxml.__version__)
44 | 
45 |     current_appender = d
46 |     tally = 0
47 |     for event, obj in Objects.iterparse(args.infile):
48 |         if event == "start":
49 |             # Inherit namespaces
50 |             if isinstance(obj, Objects.DFXMLObject):
51 |                 for prefix, url in obj.iter_namespaces():
52 |                     d.add_namespace(prefix, url)
53 |             # Group files by volume
54 |             elif isinstance(obj, Objects.VolumeObject):
55 |                 d.append(obj)
56 |                 current_appender = obj
57 |         elif event == "end":
58 |             if isinstance(obj, Objects.VolumeObject):
59 |                 current_appender = d
60 |             elif isinstance(obj, Objects.FileObject):
61 |                 if "_changed" not in obj.diffs:
62 |                     if "_modified" in obj.diffs or "_renamed" in obj.diffs:
63 |                         current_appender.append(obj)
64 |                         tally += 1
65 |     print(d.to_dfxml())
66 |     _logger.info("Found %d suspiciously-changed files." % tally)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     import argparse
71 | 
72 |     parser = argparse.ArgumentParser()
73 |     parser.add_argument("-d", "--debug", action="store_true")
74 |     parser.add_argument("infile")
75 |     args = parser.parse_args()
76 | 
77 |     logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
78 | 
79 |     if not args.infile.endswith("xml"):
80 |         raise Exception(
81 |             "Input file should be a DFXML file, and should end with 'xml': %r."
82 |             % args.infile
83 |         )
84 | 
85 |     if not os.path.exists(args.infile):
86 |         raise Exception("Input file does not exist: %r." % args.infile)
87 | 
88 |     main()
89 | 


--------------------------------------------------------------------------------
/dfxml/bin/tcpdiff.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # This software was developed in whole or in part by employees of the
  4 | # Federal Government in the course of their official duties, and with
  5 | # other Federal assistance. Pursuant to title 17 Section 105 of the
  6 | # United States Code portions of this software authored by Federal
  7 | # employees are not subject to copyright protection within the United
  8 | # States. For portions not authored by Federal employees, the Federal
  9 | # Government has been granted unlimited rights, and no claim to
 10 | # copyright is made. The Federal Government assumes no responsibility
 11 | # whatsoever for its use by other parties, and makes no guarantees,
 12 | # expressed or implied, about its quality, reliability, or any other
 13 | # characteristic.
 14 | #
 15 | # We would appreciate acknowledgement if the software is used.
 16 | 
 17 | """tcpdiff.py
 18 | 
 19 | Generates a report about what's different between two tcp DFXML files
 20 | produced by tcpflow.
 21 | 
 22 | Process:
 23 | 
 24 | """
 25 | 
 26 | import sys
 27 | import time
 28 | 
 29 | if sys.version_info < (3, 1):
 30 |     raise RuntimeError("rdifference.py requires Python 3.1 or above")
 31 | 
 32 | import dfxml
 33 | import dfxml.dfxml_html as dfxml_html
 34 | import dfxml.fiwalk as fiwalk
 35 | 
 36 | 
 37 | def ptime(t):
 38 |     """Print the time in the requested format. T is a dfxml time value"""
 39 |     global options
 40 |     if t is None:
 41 |         return None
 42 |     elif options.timestamp:
 43 |         return str(t.timestamp())
 44 |     else:
 45 |         return str(t.iso8601())
 46 | 
 47 | 
 48 | def dprint(x):
 49 |     "Debug print"
 50 |     global options
 51 |     if options.debug:
 52 |         print(x)
 53 | 
 54 | 
 55 | #
 56 | # This program keeps track of the current and previous TCP connections in a single
 57 | # object called "FlowState". Another way to do that would have been to have
 58 | # the instance built from the XML file and then have another function that compares
 59 | # them.
 60 | #
 61 | 
 62 | 
 63 | class FlowState:
 64 |     def __init__(self, fname):
 65 |         self.options = options
 66 |         self.connections = set()
 67 |         self.process(fname)
 68 | 
 69 |     def process(self, fname):
 70 |         self.fname = fname
 71 |         dfxml.read_dfxml(xmlfile=open(fname, "rb"), callback=self.process_fi)
 72 | 
 73 |     def process_fi(self, fi):
 74 |         self.connections.add(fi)
 75 | 
 76 |     def report(self):
 77 |         dfxml_html.header()
 78 |         dfxml_html.h1("DFXML file:" + self.current_fname)
 79 |         dfxml_html.table(["Total Connections", str(len(self.connections))])
 80 | 
 81 | 
 82 | if __name__ == "__main__":
 83 |     from copy import deepcopy
 84 |     from optparse import OptionParser
 85 | 
 86 |     global options
 87 | 
 88 |     parser = OptionParser()
 89 |     parser.usage = "%prog [options] file1 file2 (files MUST be tcpflow DFXML files)"
 90 |     parser.add_option("-d", "--debug", help="debug", action="store_true")
 91 | 
 92 |     (options, args) = parser.parse_args()
 93 | 
 94 |     if len(args) != 2:
 95 |         parser.print_help()
 96 |         sys.exit(1)
 97 | 
 98 |     a = FlowState(fname=args[0])
 99 |     a.report()
100 | 
101 |     b = FlowState(fname=args[1])
102 |     b.report()
103 | 
104 |     print("Difference:")
105 | 


--------------------------------------------------------------------------------
/dfxml/bin/validate_dfxml.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import sys
 3 | from optparse import OptionParser
 4 | from sys import stdout
 5 | 
 6 | import dfxml.fiwalk as fiwalk
 7 | 
 8 | 
 9 | def demo_dfxml_time_bug(filename):
10 |     parser = OptionParser()
11 |     parser.usage = "%prog% [options] xmlfile "
12 |     (options, args) = parser.parse_args()
13 |     for fi in fiwalk.fileobjects_using_sax(xmlfile=open(filename, "rb")):
14 |         fsize = fi.filesize()
15 |         try:
16 |             mt = fi.mtime()
17 |             print("Type of mt:", type(mt))
18 |             print("Normal mtime:")
19 |             print(mt)
20 |         except KeyboardInterrupt:
21 |             raise
22 |         except:
23 |             raise RuntimeException("Abnormal mtime for file with size {}".format(fsize))
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     filename = sys.argv[1]
28 |     demo_dfxml_time_bug(filename)
29 | 


--------------------------------------------------------------------------------
/dfxml/bin/xdiff.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Report the difference between two dfxml files
 3 | #
 4 | import sys
 5 | 
 6 | from filesdb import filesdb
 7 | 
 8 | import dfxml
 9 | 
10 | #
11 | # test program. Reads a database and dumps it.
12 | #
13 | if __name__ == "__main__":
14 |     from argparse import ArgumentParser
15 | 
16 |     parser = ArgumentParser(
17 |         description="Test the files database with one or more DFXML files"
18 |     )
19 |     parser.add_argument("xmlfiles", help="XML files to process", nargs="+")
20 | 
21 |     args = parser.parse_args()
22 |     db0 = None
23 |     for fn in args.xmlfiles:
24 |         db1 = filesdb()
25 |         db1.fname = fn
26 |         db1.read(fn)
27 |         print("{} stats:".format(fn))
28 |         db1.print_stats(sys.stdout)
29 |         if db0:
30 |             print("")
31 |             print("Difference from {}".format(db0.fname))
32 |         db0 = db1
33 | 


--------------------------------------------------------------------------------
/dfxml/bin/xmirror.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Using two XML files make the current system look like the master
 3 | #
 4 | import sys
 5 | 
 6 | from filesdb import filesdb
 7 | 
 8 | import dfxml
 9 | 
10 | #
11 | # test program. Reads a database and dumps it.
12 | #
13 | if __name__ == "__main__":
14 |     from argparse import ArgumentParser
15 | 
16 |     parser = ArgumentParser(description="Make the local system look like the master")
17 |     parser.add_argument("--commit", help="Actually do the job", action="store_true")
18 |     parser.add_argument(
19 |         "--local",
20 |         help="specifies an XML file that describes the local system (required)",
21 |     )
22 |     parser.add_argument(
23 |         "masterfiles",
24 |         help="XML files to process. Files may be prefixed with an [xml] path",
25 |         nargs="+",
26 |     )
27 | 
28 |     args = parser.parse_args()
29 | 
30 |     if not args.local:
31 |         parser.print_help()
32 |         exit(1)
33 | 
34 |     masterdb = filesdb()
35 |     for fn in args.masterfiles:
36 |         masterdb.read_with_prefix(fn)
37 |     print("Master stats:")
38 |     masterdb.print_stats(sys.stdout)
39 |     print("\n")
40 |     print("Local mirror stats:")
41 |     ldb = filesdb()
42 |     ldb.read_with_prefix(args.local)
43 | 
44 |     # Create new directories if needed
45 |     for newdir in ldb.new_dirs(masterdb):
46 |         print("mkdir {}".format(newdir))
47 | 
48 |     keep_files = []
49 |     mv_files = []
50 |     rm_files = []
51 | 
52 |     def process_fi(fi):
53 |         # If hash is same and name is the same, ignore:
54 |         for nfi in masterdb.search(fi, hash=True, name=True):
55 |             keep_files.append(fi.filename())
56 |             return  # in the database
57 | 
58 |         # If hash is same and name is different, move it
59 |         for nfi in masterdb.search(fi, hash=True):
60 |             mv_files.append((fi.filename(), nfi.filename()))
61 |             return
62 | 
63 |         # If name is same and hash is different, erase it
64 |         for nfi in masterdb.search(fi, name=True):
65 |             rm_files.append(fi.filename())
66 |             return
67 | 
68 |         # Otherwise, erase the hash
69 |         rm_files.append(fi.filename())
70 |         return
71 | 
72 |     # Rename files that need to be renamed
73 |     for fi in ldb:
74 |         process_fi(fi)
75 | 
76 |     print("Files to keep: {:12,}".format(len(keep_files)))
77 |     print("Files to rm:   {:12,}".format(len(rm_files)))
78 |     print("Files to mv:   {:12,}".format(len(mv_files)))
79 | 


--------------------------------------------------------------------------------
/dfxml/conftest.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is empty, but it permits test discovery of the subdirectory.
3 | # See:
4 | # https://stackoverflow.com/questions/10253826/path-issue-with-pytest-importerror-no-module-named-yadayadayada
5 | 


--------------------------------------------------------------------------------
/dfxml/dfxml_html.py:
--------------------------------------------------------------------------------
  1 | # This software was developed in whole or in part by employees of the
  2 | # Federal Government in the course of their official duties, and with
  3 | # other Federal assistance. Pursuant to title 17 Section 105 of the
  4 | # United States Code portions of this software authored by Federal
  5 | # employees are not subject to copyright protection within the United
  6 | # States. For portions not authored by Federal employees, the Federal
  7 | # Government has been granted unlimited rights, and no claim to
  8 | # copyright is made. The Federal Government assumes no responsibility
  9 | # whatsoever for its use by other parties, and makes no guarantees,
 10 | # expressed or implied, about its quality, reliability, or any other
 11 | # characteristic.
 12 | #
 13 | # We would appreciate acknowledgement if the software is used.
 14 | 
 15 | # dfxml_html.py:
 16 | # A collection of functions for generating HTML
 17 | 
 18 | html = False
 19 | 
 20 | 
 21 | def header():
 22 |     if html:
 23 |         print(
 24 |             """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN http://www.w3.org/TR/html4/loose.dtd">
 25 | <html>
 26 | <body>
 27 | <style>
 28 | body  { font-family: Sans-serif;}
 29 | .sha1 { font-family: monospace; font-size: small;}
 30 | .filesize { padding-left: 15px; padding-right: 15px; text-align: right;}
 31 | </style>
 32 | """
 33 |         )
 34 | 
 35 | 
 36 | def h1(title):
 37 |     global options
 38 |     if html:
 39 |         print("<h1>%s</h1>" % title)
 40 |         return
 41 |     print("\n\n%s\n" % title)
 42 | 
 43 | 
 44 | def h2(title):
 45 |     global options
 46 |     if html:
 47 |         print("<h2>%s</h2>" % title)
 48 |         return
 49 |     print("\n%s\n" % title)
 50 | 
 51 | 
 52 | def table(rows, styles=None, break_on_change=False):
 53 |     import sys
 54 | 
 55 |     global options
 56 | 
 57 |     def alldigits(x):
 58 |         if type(x) != str and type(x) != unicode:
 59 |             return False
 60 |         for ch in x:
 61 |             if ch.isdigit() == False:
 62 |                 return False
 63 |         return True
 64 | 
 65 |     def fmt(x):
 66 |         if x == None:
 67 |             return ""
 68 |         if type(x) == int:
 69 |             return "%12d" % x
 70 |         if alldigits(x):
 71 |             return "%12d" % int(x)
 72 |         if type(x) == unicode:
 73 |             return x
 74 |         return unicode(x)
 75 | 
 76 |     if html:
 77 |         print("<table>")
 78 |         for row in rows:
 79 |             print("<tr>")
 80 |             if not styles:
 81 |                 styles = [""] * len(rows)
 82 |             for col, style in zip(row, styles):
 83 |                 sys.stdout.write("<td class='%s'>%s</td>" % (style, col))
 84 |             print("<tr>")
 85 |         print("</table>")
 86 |         return
 87 |     lastRowCol0 = None
 88 |     for row in rows:
 89 |         if row[0] != lastRowCol0:
 90 |             sys.stdout.write("\n")
 91 |             lastRowCol0 = row[0]
 92 |         try:
 93 |             line = "\t".join([fmt(col) for col in row])
 94 |             sys.stdout.write(line)
 95 |             sys.stdout.write("\n")
 96 |         except UnicodeEncodeError:
 97 |             # Fall back to manual join
 98 |             for col in row:
 99 |                 for ch in fmt(col):
100 |                     try:
101 |                         sys.stdout.write(ch)
102 |                     except UnicodeEncodeError:
103 |                         sys.stdout.write("?")
104 |                 sys.stdout.write("\t")
105 |             print("(UNICODE ERROR)")
106 | 


--------------------------------------------------------------------------------
/dfxml/py.typed:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology by employees of the Federal Government in the course
 3 | # of their official duties. Pursuant to title 17 Section 105 of the
 4 | # United States Code this software is not subject to copyright
 5 | # protection and is in the public domain. NIST assumes no
 6 | # responsibility whatsoever for its use by other parties, and makes
 7 | # no guarantees, expressed or implied, about its quality,
 8 | # reliability, or any other characteristic.
 9 | #
10 | # We would appreciate acknowledgement if the software is used.
11 | 
12 | # This file is defined to support PEP 561:
13 | # https://www.python.org/dev/peps/pep-0561/
14 | 


--------------------------------------------------------------------------------
/samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.err.log
2 | *.validates.log
3 | 


--------------------------------------------------------------------------------
/samples/Makefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | SHELL ?= /bin/bash
15 | 
16 | XMLLINT ?= $(shell which xmllint)
17 | ifeq ($(XMLLINT),)
18 | $(error XMLLINT not found)
19 | endif
20 | 
21 | SAMPLE_FILES__PASS := \
22 |   difference_test_0.xml \
23 |   difference_test_1.xml \
24 |   difference_test_2.xml \
25 |   difference_test_3.xml \
26 |   fileobjectexample.xml
27 | 
28 | # TODO Any remaining issues with the upstream tool should be resolved.
29 | SAMPLE_FILES__SKIP := \
30 |   piecewise.xml \
31 |   simple.xml
32 | 
33 | SAMPLE_FILES := \
34 |   $(SAMPLE_FILES__FAIL) \
35 |   $(SAMPLE_FILES__SKIP)
36 | 
37 | VALIDATES_LOG_FILES__PASS := $(foreach sample_file,$(SAMPLE_FILES__PASS),$(sample_file).validates.log)
38 | 
39 | VALIDATES_LOG_FILES__SKIP := $(foreach sample_file,$(SAMPLE_FILES__SKIP),$(sample_file).validates.log)
40 | 
41 | VALIDATES_LOG_FILES := \
42 |   $(VALIDATES_LOG_FILES__PASS) \
43 |   $(VALIDATES_LOG_FILES__SKIP)
44 | 
45 | all:
46 | 
47 | .PHONY: \
48 |   check-TODO
49 | 
50 | %.validates.log: \
51 |   % \
52 |   ../schema/dfxml.xsd
53 | 	$(XMLLINT) \
54 | 	  --noout \
55 | 	  --schema ../schema/dfxml.xsd \
56 | 	  $< \
57 | 	  2> $<.err.log
58 | 	touch $@
59 | 
60 | ../schema/dfxml.xsd:
61 | 	@echo "dfxml.xsd not found.  To check out the DFXML schema (necessary to run unit tests in the /samples directory), please run 'make schema-init' in the repository root." >&2
62 | 	exit 2
63 | 
64 | check: \
65 |   $(VALIDATES_LOG_FILES__PASS)
66 | 
67 | check-TODO: \
68 |   $(VALIDATES_LOG_FILES__SKIP)
69 | 
70 | clean:
71 | 	@rm -f *.err.log *.validates.log
72 | 


--------------------------------------------------------------------------------
/samples/README.md:
--------------------------------------------------------------------------------
1 | # Sample DFXML
2 | This directory contains sample DFXML files.  The Makefile here runs tests for conformance against the DFXML Schema with `make check`.
3 | 
4 | Not all of these files are currently conformant; these can be seen with `make --keep-going check-TODO`.
5 | 


--------------------------------------------------------------------------------
/samples/difference_test_0.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | <dfxml
 3 |   xmlns='http://www.forensicswiki.org/wiki/Category:Digital_Forensics_XML'
 4 |   xmlns:dc='http://purl.org/dc/elements/1.1/'
 5 |   xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
 6 |   version='2.0.0-beta.0'>
 7 |   <metadata>
 8 |     <dc:type>Sample</dc:type>
 9 |   </metadata>
10 |   <creator>
11 |     <program>vi</program>
12 |     <version>8.0</version>
13 |     <execution_environment>
14 |       <command_line>vi pre.xml</command_line>
15 |     </execution_environment>
16 |   </creator>
17 |   <source/>
18 |   <fileobject>
19 |     <filename>i_will_be_deleted.txt</filename>
20 |     <name_type>r</name_type>
21 |     <filesize>20</filesize>
22 |     <inode>123456</inode>
23 |     <mtime>2013-01-01T00:00:00Z</mtime>
24 |     <ctime>2013-01-01T00:00:00Z</ctime>
25 |     <atime>2013-01-01T00:00:00Z</atime>
26 |     <byte_runs>
27 |       <byte_run file_offset='0' img_offset='1234512' len='20'/>
28 |     </byte_runs>
29 |     <hashdigest type='md5'>e834b5c2f64759832fb33ec53c8b5028</hashdigest>
30 |     <hashdigest type='sha1'>9125cb87b8f0035c22d3efad2b0473367cc456ca</hashdigest>
31 |     <hashdigest type='sha256'>c75d73927a6ca221ccc71c4f4dee9286fce2b5cf7122950c73157cbf821af07f</hashdigest>
32 |   </fileobject>
33 |   <fileobject>
34 |     <filename>i_will_be_modified.txt</filename>
35 |     <name_type>r</name_type>
36 |     <filesize>22</filesize>
37 |     <inode>123457</inode>
38 |     <mtime>2013-01-01T00:00:00Z</mtime>
39 |     <ctime>2013-01-01T00:00:00Z</ctime>
40 |     <atime>2013-01-01T00:00:00Z</atime>
41 |     <byte_runs>
42 |       <byte_run file_offset='0' img_offset='234512' len='22'/>
43 |     </byte_runs>
44 |     <hashdigest type='md5'>e91577092351461d7800ef7b870a2bcf</hashdigest>
45 |     <hashdigest type='sha1'>44e426344f15bd7621ca2f9ffea70d29752dccda</hashdigest>
46 |     <hashdigest type='sha256'>1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357</hashdigest>
47 |   </fileobject>
48 |   <fileobject>
49 |     <filename>i_will_be_accessed.txt</filename>
50 |     <name_type>r</name_type>
51 |     <filesize>12</filesize>
52 |     <inode>123458</inode>
53 |     <mtime>2013-01-01T00:00:00Z</mtime>
54 |     <ctime>2013-01-01T00:00:00Z</ctime>
55 |     <atime>2013-01-01T00:00:00Z</atime>
56 |     <byte_runs>
57 |       <byte_run file_offset='0' img_offset='34512' len='12'/>
58 |     </byte_runs>
59 |     <hashdigest type='md5'>f3a8f17b47f1fe899805c25b8f5a26b0</hashdigest>
60 |     <hashdigest type='sha1'>b439e832cb243e18f6bfc21ca0150de3ef4c6f27</hashdigest>
61 |     <hashdigest type='sha256'>3c4ace963a2a069a92d8abaa7c77d88e118758eff65c5180fed6534e75889bf3</hashdigest>
62 |   </fileobject>
63 | </dfxml>
64 | 


--------------------------------------------------------------------------------
/samples/difference_test_1.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | <dfxml
 3 |   xmlns='http://www.forensicswiki.org/wiki/Category:Digital_Forensics_XML'
 4 |   xmlns:dc='http://purl.org/dc/elements/1.1/'
 5 |   xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
 6 |   version='2.0.0-beta.0'>
 7 |   <metadata>
 8 |     <dc:type>Sample</dc:type>
 9 |   </metadata>
10 |   <creator>
11 |     <program>vi</program>
12 |     <version>8.0</version>
13 |     <execution_environment>
14 |       <command_line>vi post.xml</command_line>
15 |     </execution_environment>
16 |   </creator>
17 |   <source/>
18 |   <fileobject>
19 |     <filename>i_am_new.txt</filename>
20 |     <name_type>r</name_type>
21 |     <filesize>40</filesize>
22 |     <inode>123459</inode>
23 |     <mtime>2013-05-16T21:01:00Z</mtime>
24 |     <ctime>2013-05-16T21:01:00Z</ctime>
25 |     <atime>2013-05-16T21:01:00Z</atime>
26 |     <byte_runs>
27 |       <byte_run file_offset='0' img_offset='45123' len='40'/>
28 |     </byte_runs>
29 |     <hashdigest type='md5'>55b228770d96e4dbd1b218f4f07d8aae</hashdigest>
30 |     <hashdigest type='sha1'>8632a06e80eefbaf702ac6a44e633937e2be7186</hashdigest>
31 |     <hashdigest type='sha256'>77f380ce33609d55f8b874833c4495282fdf54869912822cde05c68090a60a18</hashdigest>
32 |   </fileobject>
33 |   <fileobject>
34 |     <filename>i_will_be_modified.txt</filename>
35 |     <name_type>r</name_type>
36 |     <filesize>23</filesize>
37 |     <inode>123457</inode>
38 |     <mtime>2013-05-16T20:59:00Z</mtime>
39 |     <ctime>2013-05-16T20:59:00Z</ctime>
40 |     <atime>2013-05-16T20:59:00Z</atime>
41 |     <byte_runs>
42 |       <byte_run file_offset='0' img_offset='234512' len='23'/>
43 |     </byte_runs>
44 |     <hashdigest type='md5'>a6d9ebd95bcd3602b757ea63f9dd02ab</hashdigest>
45 |     <hashdigest type='sha1'>1e087807678a33ebbde2624341184c14303675a3</hashdigest>
46 |     <hashdigest type='sha256'>e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281</hashdigest>
47 |   </fileobject>
48 |   <fileobject>
49 |     <filename>i_will_be_accessed.txt</filename>
50 |     <name_type>r</name_type>
51 |     <filesize>12</filesize>
52 |     <inode>123458</inode>
53 |     <mtime>2013-01-01T00:00:00Z</mtime>
54 |     <ctime>2013-01-01T00:00:00Z</ctime>
55 |     <atime>2013-05-16T21:00:00Z</atime>
56 |     <byte_runs>
57 |       <byte_run file_offset='0' img_offset='234512' len='12'/>
58 |     </byte_runs>
59 |     <hashdigest type='md5'>f3a8f17b47f1fe899805c25b8f5a26b0</hashdigest>
60 |     <hashdigest type='sha1'>b439e832cb243e18f6bfc21ca0150de3ef4c6f27</hashdigest>
61 |     <hashdigest type='sha256'>3c4ace963a2a069a92d8abaa7c77d88e118758eff65c5180fed6534e75889bf3</hashdigest>
62 |   </fileobject>
63 | </dfxml>
64 | 


--------------------------------------------------------------------------------
/samples/fileobjectexample.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <fileobject xmlns="http://www.forensicswiki.org/wiki/Category:Digital_Forensics_XML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://raw.githubusercontent.com/dfxml-working-group/dfxml_schema/master/dfxml.xsd">
3 | <hashdigest type="md5"></hashdigest>
4 | <hashdigest type="sha1"></hashdigest>
5 | <hashdigest type="sha256"></hashdigest>
6 | </fileobject>
7 | 


--------------------------------------------------------------------------------
/samples/simple.xml:
--------------------------------------------------------------------------------
 1 | <?xml version='1.0' encoding='UTF-8'?>
 2 | <dfxml xmloutputversion='1.0'>
 3 |   <metadata 
 4 |   xmlns='http://md5deep.sourceforge.net/md5deep/' 
 5 |   xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' 
 6 |   xmlns:dc='http://purl.org/dc/elements/1.1/'>
 7 |     <dc:type>Hash List</dc:type>
 8 |   </metadata>
 9 |   <creator version='1.0'>
10 |     <program>MD5DEEP</program>
11 |     <version>4.0.0_beta2-002</version>
12 |     <build_environment>
13 |       <compiler>GCC 4.2</compiler>
14 |     </build_environment>
15 |     <execution_environment>
16 |       <os_sysname>Darwin</os_sysname>
17 |       <os_release>11.3.0</os_release>
18 |       <os_version>Darwin Kernel Version 11.3.0: Thu Jan 12 18:47:41 PST 2012; root:xnu-1699.24.23~1/RELEASE_X86_64</os_version>
19 |       <host>Mucha.local</host>
20 |       <arch>x86_64</arch>
21 |       <command_line>md5deep -dp512 /Users/simsong/uploads/einstein template.jpg /Users/simsong/uploads/image1.jpg /Users/simsong/uploads/image2.jpg /Users/simsong/uploads/image3.jpg</command_line>
22 |       <uid>502</uid>
23 |       <start_time>2012-02-23T16:35:11Z</start_time>
24 |     </execution_environment>
25 |   </creator>
26 |   <configuration>
27 |     <algorithms>
28 |       <algorithm name='md5' enabled='1'/>
29 |       <algorithm name='sha1' enabled='0'/>
30 |       <algorithm name='sha256' enabled='0'/>
31 |       <algorithm name='tiger' enabled='0'/>
32 |       <algorithm name='whirlpool' enabled='0'/>
33 |     </algorithms>
34 |   </configuration>
35 |   <fileobject>
36 |     <filename>/Users/simsong/uploads/image2.jpg</filename>
37 |     <filesize>12833</filesize>
38 |     <mtime>2012-02-22T03:53:05Z</mtime>
39 |     <ctime>2012-02-22T03:53:05Z</ctime>
40 |     <atime>2012-02-23T16:34:27Z</atime>
41 |     <hashdigest type='MD5'>d7ced55e7d7f5b9995fc3cbac7942155</hashdigest>
42 |   </fileobject>
43 |   <fileobject>
44 |     <filename>/Users/simsong/uploads/image1.jpg</filename>
45 |     <filesize>12551</filesize>
46 |     <mtime>2012-02-22T03:53:54Z</mtime>
47 |     <ctime>2012-02-22T03:53:54Z</ctime>
48 |     <atime>2012-02-23T16:34:27Z</atime>
49 |     <hashdigest type='MD5'>3bb144b5abc65312099f79caa69ff94f</hashdigest>
50 |   </fileobject>
51 |   <fileobject>
52 |     <filename>/Users/simsong/uploads/image3.jpg</filename>
53 |     <filesize>12545</filesize>
54 |     <mtime>2012-02-22T03:55:38Z</mtime>
55 |     <ctime>2012-02-22T03:55:38Z</ctime>
56 |     <atime>2012-02-23T16:34:27Z</atime>
57 |     <hashdigest type='MD5'>6377d89ab3165a3fe24b390b513f47d7</hashdigest>
58 |   </fileobject>
59 |   <fileobject>
60 |     <filename>/Users/simsong/uploads/einstein template.jpg</filename>
61 |     <filesize>43819</filesize>
62 |     <mtime>2012-02-22T03:54:19Z</mtime>
63 |     <ctime>2012-02-22T03:54:19Z</ctime>
64 |     <atime>2012-02-23T16:34:27Z</atime>
65 |     <hashdigest type='MD5'>702da00183448a42f5a861c95973f4f3</hashdigest>
66 |   </fileobject>
67 |   <rusage>
68 |     <utime>0.008982</utime>
69 |     <stime>0.003041</stime>
70 |     <maxrss>1069056</maxrss>
71 |     <minflt>391</minflt>
72 |     <majflt>0</majflt>
73 |     <nswap>0</nswap>
74 |     <inblock>0</inblock>
75 |     <oublock>0</oublock>
76 |     <clocktime>0.006578</clocktime>
77 |   </rusage>
78 | </dfxml>
79 | 


--------------------------------------------------------------------------------
/samples/tcpflow_zip_generic_header.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <dfxml
 3 |   xmlns='http://www.forensicswiki.org/wiki/Category:Digital_Forensics_XML'
 4 |   xmlns:dc='http://purl.org/dc/elements/1.1/'
 5 |   xmlns:tcpflow='http://www.forensicswiki.org/wiki/Category:Digital_Forensics_XML#tcpflow'
 6 |   version='2.0.0-beta.0'>
 7 |   <metadata>
 8 |     <dc:type>Sample</dc:type>
 9 |   </metadata>
10 |   <creator>
11 |     <program>vi</program>
12 |     <version>8.0</version>
13 |     <execution_environment>
14 |       <command_line>vi tcpflow_zip_generic_header.xml</command_line>
15 |     </execution_environment>
16 |   </creator>
17 |   <source>
18 |     <image_filename>../../tcpflow/tests/airsnort-linux-browser_page_load.pcap</image_filename>
19 |   </source>
20 |   <fileobject>
21 |     <tcpflow:scanner_result name="zip_generic_header_detector" type="Python">
22 |       <byte_runs>
23 |         <byte_run img_offset="1234" len="30" />
24 |         <byte_run img_offset="2345" len="30" />
25 |       </byte_runs>
26 |     </tcpflow:scanner_result>
27 | 
28 |     <filename>205.134.188.162.00080-008.030.072.112.38568</filename>
29 |     <filesize>4135</filesize>
30 |     <!--This element isn't in the DFXML schema.  Should be namespaced and re-named.
31 |     <tcpflow startime='2013-05-15T10:21:07.936300Z' endtime='2013-05-15T10:21:07.957313Z' src_ipn='205.134.188.162' dst_ipn='8.30.72.112' mac_daddr='58:94:6b:7c:37:8c' mac_saddr='b4:14:89:61:fe:c0' packets='5' srcport='80' dstport='38568' family='2' />
32 |     -->
33 |   </fileobject>
34 | </dfxml>
35 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = dfxml
 3 | version = attr: dfxml.__version__
 4 | url = https://github.com/dfxml-working-group/dfxml_python
 5 | classifiers =
 6 |     License :: Public Domain
 7 |     Programming Language :: Python :: 3
 8 | 
 9 | [options]
10 | include_package_data = true
11 | packages = find:
12 | python_requires = >=3.9
13 | 
14 | # See CONTRIBUTE.md before adding a console script line.
15 | [options.entry_points]
16 | console_scripts =
17 |     make_differential_dfxml = dfxml.bin.make_differential_dfxml:main
18 |     walk_to_dfxml = dfxml.bin.walk_to_dfxml:main
19 | 
20 | [options.package_data]
21 | dfxml = py.typed
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed in whole or in part by employees of the
 4 | # Federal Government in the course of their official duties, and with
 5 | # other Federal assistance. Pursuant to title 17 Section 105 of the
 6 | # United States Code portions of this software authored by Federal
 7 | # employees are not subject to copyright protection within the United
 8 | # States. For portions not authored by Federal employees, the Federal
 9 | # Government has been granted unlimited rights, and no claim to
10 | # copyright is made. The Federal Government assumes no responsibility
11 | # whatsoever for its use by other parties, and makes no guarantees,
12 | # expressed or implied, about its quality, reliability, or any other
13 | # characteristic.
14 | #
15 | # We would appreciate acknowledgement if the software is used.
16 | 
17 | import setuptools
18 | 
19 | setuptools.setup()
20 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | 


--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/make -f
  2 | 
  3 | # This software was developed at the National Institute of Standards
  4 | # and Technology by employees of the Federal Government in the course
  5 | # of their official duties. Pursuant to title 17 Section 105 of the
  6 | # United States Code this software is not subject to copyright
  7 | # protection and is in the public domain. NIST assumes no
  8 | # responsibility whatsoever for its use by other parties, and makes
  9 | # no guarantees, expressed or implied, about its quality,
 10 | # reliability, or any other characteristic.
 11 | #
 12 | # We would appreciate acknowledgement if the software is used.
 13 | 
 14 | # Bash selection is described in the top-level Makefile.
 15 | ifeq ($(shell basename $(SHELL)),sh)
 16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
 17 | endif
 18 | 
 19 | top_srcdir := $(shell cd .. ; pwd)
 20 | 
 21 | PYTHON3 ?= python3
 22 | ifeq ($(PYTHON3),)
 23 | $(error python3 not found)
 24 | endif
 25 | 
 26 | all: \
 27 |   all-make_differential_dfxml \
 28 |   all-walk_to_dfxml
 29 | 
 30 | .PHONY: \
 31 |   all-make_differential_dfxml \
 32 |   all-walk_to_dfxml \
 33 |   check-mypy \
 34 |   check-mypy-stricter
 35 | 
 36 | all-make_differential_dfxml: \
 37 |   .venv.done.log
 38 | 	$(MAKE) \
 39 | 	  --directory make_differential_dfxml
 40 | 
 41 | all-walk_to_dfxml: \
 42 |   .venv.done.log
 43 | 	$(MAKE) \
 44 | 	  --directory walk_to_dfxml
 45 | 
 46 | .venv.done.log: \
 47 |   $(top_srcdir)/setup.cfg \
 48 |   $(top_srcdir)/setup.py \
 49 |   requirements.txt
 50 | 	rm -rf venv
 51 | 	$(PYTHON3) -m venv \
 52 | 	  venv
 53 | 	source venv/bin/activate \
 54 | 	  && pip install \
 55 | 	    --upgrade \
 56 | 	    pip \
 57 | 	    setuptools
 58 | 	source venv/bin/activate \
 59 | 	  && cd $(top_srcdir) \
 60 | 	    && pip install \
 61 | 	      --editable \
 62 | 	      .
 63 | 	source venv/bin/activate \
 64 | 	  && pip install \
 65 | 	    --requirement requirements.txt
 66 | 	touch $@
 67 | 
 68 | check: \
 69 |   all-make_differential_dfxml \
 70 |   all-walk_to_dfxml \
 71 |   check-mypy
 72 | 	source venv/bin/activate \
 73 | 	  && pytest \
 74 | 	    --log-level=DEBUG
 75 | 
 76 | #TODO - Type-checking would best be done against all of ../dfxml, when someone finds some time to do so.
 77 | check-mypy: \
 78 |   check-mypy-stricter
 79 | 	source venv/bin/activate \
 80 | 	  && mypy \
 81 | 	    ../dfxml/bin/idifference.py \
 82 | 	    ../dfxml/bin/summarize_differential_dfxml.py \
 83 | 	    ../dfxml/__init__.py \
 84 | 	    ../dfxml/fiwalk.py \
 85 | 	    ../dfxml/objects.py \
 86 | 	    misc_bin_tests \
 87 | 	    misc_object_tests
 88 | 	@echo "INFO:tests/Makefile:mypy is currently run against a subset of the dfxml directory." >&2
 89 | 
 90 | #TODO - Strict type-checking is another long-term goal, likewise eventually done against all of ../dfxml.
 91 | check-mypy-stricter: \
 92 |   .venv.done.log
 93 | 	source venv/bin/activate \
 94 | 	  && mypy \
 95 | 	    ../demos/demo_fiwalk_diskimage.py \
 96 | 	    ../dfxml/bin/idifference2.py \
 97 | 	    ../dfxml/bin/make_differential_dfxml.py \
 98 | 	    ../dfxml/bin/walk_to_dfxml.py \
 99 | 	    make_differential_dfxml \
100 | 	    walk_to_dfxml \
101 | 	    *.py
102 | 
103 | clean:
104 | 	@$(MAKE) \
105 | 	  --directory misc_object_tests \
106 | 	  clean
107 | 	@$(MAKE) \
108 | 	  --directory make_differential_dfxml \
109 | 	  clean
110 | 	@$(MAKE) \
111 | 	  --directory walk_to_dfxml \
112 | 	  clean
113 | 	@rm -f \
114 | 	  .venv.done.log
115 | 	@rm -rf \
116 | 	  venv
117 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | Contents of this directory test the functionality of `dfxml` as an importable Python module.
2 | 
3 | Running `make check` in this directory will build a Python virtual environment, install the top source directory into that virtual environment as a module, and then run further tests with `pytest`.
4 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/Makefile:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/make -f
  2 | 
  3 | # This software was developed at the National Institute of Standards
  4 | # and Technology by employees of the Federal Government in the course
  5 | # of their official duties. Pursuant to title 17 Section 105 of the
  6 | # United States Code this software is not subject to copyright
  7 | # protection and is in the public domain. NIST assumes no
  8 | # responsibility whatsoever for its use by other parties, and makes
  9 | # no guarantees, expressed or implied, about its quality,
 10 | # reliability, or any other characteristic.
 11 | #
 12 | # We would appreciate acknowledgement if the software is used.
 13 | 
 14 | # Bash selection is described in the top-level Makefile.
 15 | ifeq ($(shell basename $(SHELL)),sh)
 16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
 17 | endif
 18 | 
 19 | top_srcdir := $(shell cd ../.. ; pwd)
 20 | 
 21 | tests_srcdir := $(top_srcdir)/tests
 22 | 
 23 | all: \
 24 |   differential_dfxml_test_by_path_01.txt \
 25 |   differential_dfxml_test_by_path_23.txt \
 26 |   differential_dfxml_test_by_times_01.txt \
 27 |   differential_dfxml_test_by_times_23.txt
 28 | 
 29 | check: \
 30 |   differential_dfxml_test_by_path_01.txt \
 31 |   differential_dfxml_test_by_path_23.txt \
 32 |   differential_dfxml_test_by_times_01.txt \
 33 |   differential_dfxml_test_by_times_23.txt
 34 | 	source $(tests_srcdir)/venv/bin/activate \
 35 | 	  && pytest \
 36 | 	    --log-level=DEBUG
 37 | 
 38 | clean:
 39 | 	@rm -f \
 40 | 	  *.dfxml \
 41 | 	  *.txt
 42 | 	@#Restore Git-tracked version of these files, so deletions aren't accidentally committed.
 43 | 	@git checkout \
 44 | 	  -- \
 45 | 	  differential_dfxml_test_by_path_01.txt \
 46 | 	  differential_dfxml_test_by_path_23.txt \
 47 | 	  differential_dfxml_test_by_times_01.txt \
 48 | 	  differential_dfxml_test_by_times_23.txt \
 49 | 	  || true
 50 | 
 51 | differential_dfxml_test_01.dfxml: \
 52 |   $(tests_srcdir)/.venv.done.log \
 53 |   $(top_srcdir)/dfxml/bin/make_differential_dfxml.py \
 54 |   $(top_srcdir)/samples/difference_test_0.xml \
 55 |   $(top_srcdir)/samples/difference_test_1.xml
 56 | 	rm -f \
 57 | 	  __$@ \
 58 | 	  _$@
 59 | 	source $(tests_srcdir)/venv/bin/activate \
 60 | 	  && make_differential_dfxml \
 61 | 	    $(top_srcdir)/samples/difference_test_0.xml \
 62 | 	    $(top_srcdir)/samples/difference_test_1.xml \
 63 | 	    > __$@
 64 | 	xmllint \
 65 | 	  --format \
 66 | 	  __$@ \
 67 | 	  > _$@
 68 | 	rm __$@
 69 | 	mv _$@ $@
 70 | 
 71 | differential_dfxml_test_by_path_01.txt: \
 72 |   $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
 73 |   differential_dfxml_test_01.dfxml
 74 | 	source $(tests_srcdir)/venv/bin/activate \
 75 | 	  && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
 76 | 	    --debug \
 77 | 	    --sort-by path \
 78 | 	    differential_dfxml_test_01.dfxml \
 79 | 	    > _$@
 80 | 	mv _$@ $@
 81 | 
 82 | differential_dfxml_test_by_times_01.txt: \
 83 |   $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
 84 |   differential_dfxml_test_01.dfxml
 85 | 	source $(tests_srcdir)/venv/bin/activate \
 86 | 	  && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
 87 | 	    --debug \
 88 | 	    --sort-by times \
 89 | 	    differential_dfxml_test_01.dfxml \
 90 | 	    > _$@
 91 | 	mv _$@ $@
 92 | 
 93 | differential_dfxml_test_23.dfxml: \
 94 |   $(tests_srcdir)/.venv.done.log \
 95 |   $(top_srcdir)/dfxml/bin/make_differential_dfxml.py \
 96 |   $(top_srcdir)/samples/difference_test_2.xml \
 97 |   $(top_srcdir)/samples/difference_test_3.xml
 98 | 	rm -f \
 99 | 	  __$@ \
100 | 	  _$@
101 | 	source $(tests_srcdir)/venv/bin/activate \
102 | 	  && make_differential_dfxml \
103 | 	    $(top_srcdir)/samples/difference_test_2.xml \
104 | 	    $(top_srcdir)/samples/difference_test_3.xml \
105 | 	    > __$@
106 | 	xmllint \
107 | 	  --format \
108 | 	  __$@ \
109 | 	  > _$@
110 | 	rm __$@
111 | 	mv _$@ $@
112 | 
113 | differential_dfxml_test_by_path_23.txt: \
114 |   $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
115 |   differential_dfxml_test_23.dfxml
116 | 	source $(tests_srcdir)/venv/bin/activate \
117 | 	  && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
118 | 	    --debug \
119 | 	    --sort-by path \
120 | 	    differential_dfxml_test_23.dfxml \
121 | 	    > _$@
122 | 	mv _$@ $@
123 | 
124 | differential_dfxml_test_by_times_23.txt: \
125 |   $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
126 |   differential_dfxml_test_23.dfxml
127 | 	source $(tests_srcdir)/venv/bin/activate \
128 | 	  && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
129 | 	    --debug \
130 | 	    --sort-by times \
131 | 	    differential_dfxml_test_23.dfxml \
132 | 	    > _$@
133 | 	mv _$@ $@
134 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/README.md:
--------------------------------------------------------------------------------
 1 | # `make_differential_dfxml`
 2 | 
 3 | *Source*: [`../../dfxml/bin/make_differential_dfxml.py`](../../dfxml/bin/make_differential_dfxml.py)
 4 | 
 5 | This command takes as input two DFXML files, and outputs a DFXML document showing differential annotations.  Output is sent to `stdout`.
 6 | 
 7 | This tool was introduced in [Nelson et al., DFRWS 2014](https://doi.org/10.1016/j.diin.2014.05.004).
 8 | 
 9 | 
10 | ## Usage
11 | 
12 | ```bash
13 | make_differential_dfxml input_1.dfxml input_2.dfxml > deltas.dfxml
14 | ```
15 | 
16 | If one is using the [DFXML Objects module](../../dfxml/objects.py), the differentially-annotated DFXML can be analyzed by referring to each encountered `FileObject`'s property `.annos`.  See e.g. [`summarize_differential_dfxml.py`](../../dfxml/bin/summarize_differential_dfxml.py)'s output for [changes scoped to single file systems](differential_dfxml_test_by_path_01.txt), or [changes that cross file systems](differential_dfxml_test_by_times_23.txt).
17 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_path_01.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | New files:
 4 | ==========
 5 | 2013-05-16T21:01:00Z	i_am_new.txt	          40
 6 | 
 7 | 
 8 | Deleted files:
 9 | ==============
10 | 2013-01-01T00:00:00Z	i_will_be_deleted.txt	          20
11 | 
12 | 
13 | Renamed files:
14 | ==============
15 | 
16 | 
17 | Files with modified contents:
18 | =============================
19 | 
20 | i_will_be_modified.txt	mtime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T20:59:00Z
21 | i_will_be_modified.txt	atime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T20:59:00Z
22 | i_will_be_modified.txt	ctime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T20:59:00Z
23 | i_will_be_modified.txt	data_brs changed, 	ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=22)])	ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=23)])
24 | i_will_be_modified.txt	filesize changed, 	          22	          23
25 | i_will_be_modified.txt	md5 changed, 	e91577092351461d7800ef7b870a2bcf	a6d9ebd95bcd3602b757ea63f9dd02ab
26 | i_will_be_modified.txt	sha1 changed, 	44e426344f15bd7621ca2f9ffea70d29752dccda	1e087807678a33ebbde2624341184c14303675a3
27 | i_will_be_modified.txt	sha256 changed, 	1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357	e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281
28 | 
29 | 
30 | Files with changed properties:
31 | ==============================
32 | 
33 | i_will_be_accessed.txt	atime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T21:00:00Z
34 | i_will_be_accessed.txt	data_brs changed, 	ByteRuns(run_list=[ByteRun(img_offset=34512, file_offset=0, len=12)])	ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=12)])
35 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_path_23.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | New files:
 4 | ==========
 5 | 2007-08-09T12:34:58Z	CHANGE___content_and_mtime	        4097
 6 | 2007-08-09T12:35:00Z	CHANGE___erased___replaced_by_other_partition_file	        4097
 7 | 2007-08-09T12:34:56Z	CHANGE___erased___replaced_by_sibling	        4098
 8 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1G_to_P2G	        4097
 9 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G	        4097
10 | 2007-08-09T12:34:57Z	CHANGE___move_from_P1M_to_P3G___change_content___change_mtime	        4097
11 | 2007-08-09T12:34:59Z	CHANGE___new_file	        4097
12 | 2007-08-09T05:34:56-07:00	CHANGE___timestamp_changes_format_only	        4097
13 | 2007-08-09T12:34:56Z	CHANGE___unallocated	        4097
14 | 2007-08-09T12:34:56Z	NO_CHANGE	        4097
15 | 2007-08-09T12:34:56Z	_CHANGE___move_from_P1M_to_P3G___change_name	        4097
16 | 2007-08-09T12:34:56Z	_CHANGE___renamed	        4097
17 | 
18 | 
19 | Deleted files:
20 | ==============
21 | 2007-08-09T12:34:56Z	CHANGE___content_and_mtime	        4097
22 | 2007-08-09T12:34:56Z	CHANGE___erased	        4097
23 | 2007-08-09T12:34:56Z	CHANGE___erased___replaced_by_other_partition_file	        4097
24 | 2007-08-09T12:34:56Z	CHANGE___erased___replaced_by_sibling	        4097
25 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1G_to_P2G	        4097
26 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G	        4097
27 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G___change_content___change_mtime	        4097
28 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G___change_name	        4097
29 | 2007-08-09T12:34:56Z	CHANGE___moved_to_erased_P1G_file	        4097
30 | 2007-08-09T12:34:56Z	CHANGE___renamed	        4097
31 | 2007-08-09T12:34:56Z	CHANGE___renamed_to_erased_sibling___change_checksum_and_mtime	        4097
32 | 2007-08-09T12:34:56Z	CHANGE___timestamp_changes_format_only	        4097
33 | 2007-08-09T12:34:56Z	CHANGE___unallocated	        4097
34 | 2007-08-09T12:34:56Z	NO_CHANGE	        4097
35 | 
36 | 
37 | Renamed files:
38 | ==============
39 | 
40 | 
41 | Files with modified contents:
42 | =============================
43 | 
44 | 
45 | Files with changed properties:
46 | ==============================
47 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_times_01.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | New files:
 4 | ==========
 5 | 2013-05-16T21:01:00Z	i_am_new.txt	          40
 6 | 
 7 | 
 8 | Deleted files:
 9 | ==============
10 | 2013-01-01T00:00:00Z	i_will_be_deleted.txt	          20
11 | 
12 | 
13 | Renamed files:
14 | ==============
15 | 
16 | 
17 | Files with modified contents:
18 | =============================
19 | 
20 | i_will_be_modified.txt	mtime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T20:59:00Z
21 | i_will_be_modified.txt	atime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T20:59:00Z
22 | i_will_be_modified.txt	ctime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T20:59:00Z
23 | i_will_be_modified.txt	data_brs changed, 	ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=22)])	ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=23)])
24 | i_will_be_modified.txt	filesize changed, 	          22	          23
25 | i_will_be_modified.txt	md5 changed, 	e91577092351461d7800ef7b870a2bcf	a6d9ebd95bcd3602b757ea63f9dd02ab
26 | i_will_be_modified.txt	sha1 changed, 	44e426344f15bd7621ca2f9ffea70d29752dccda	1e087807678a33ebbde2624341184c14303675a3
27 | i_will_be_modified.txt	sha256 changed, 	1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357	e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281
28 | 
29 | 
30 | Files with changed properties:
31 | ==============================
32 | 
33 | i_will_be_accessed.txt	atime changed, 	2013-01-01T00:00:00Z	->	2013-05-16T21:00:00Z
34 | i_will_be_accessed.txt	data_brs changed, 	ByteRuns(run_list=[ByteRun(img_offset=34512, file_offset=0, len=12)])	ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=12)])
35 | 


--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_times_23.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | New files:
 4 | ==========
 5 | 2007-08-09T05:34:56-07:00	CHANGE___timestamp_changes_format_only	        4097
 6 | 2007-08-09T12:34:56Z	CHANGE___erased___replaced_by_sibling	        4098
 7 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1G_to_P2G	        4097
 8 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G	        4097
 9 | 2007-08-09T12:34:56Z	CHANGE___unallocated	        4097
10 | 2007-08-09T12:34:56Z	NO_CHANGE	        4097
11 | 2007-08-09T12:34:56Z	_CHANGE___move_from_P1M_to_P3G___change_name	        4097
12 | 2007-08-09T12:34:56Z	_CHANGE___renamed	        4097
13 | 2007-08-09T12:34:57Z	CHANGE___move_from_P1M_to_P3G___change_content___change_mtime	        4097
14 | 2007-08-09T12:34:58Z	CHANGE___content_and_mtime	        4097
15 | 2007-08-09T12:34:59Z	CHANGE___new_file	        4097
16 | 2007-08-09T12:35:00Z	CHANGE___erased___replaced_by_other_partition_file	        4097
17 | 
18 | 
19 | Deleted files:
20 | ==============
21 | 2007-08-09T12:34:56Z	CHANGE___content_and_mtime	        4097
22 | 2007-08-09T12:34:56Z	CHANGE___erased	        4097
23 | 2007-08-09T12:34:56Z	CHANGE___erased___replaced_by_other_partition_file	        4097
24 | 2007-08-09T12:34:56Z	CHANGE___erased___replaced_by_sibling	        4097
25 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1G_to_P2G	        4097
26 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G	        4097
27 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G___change_content___change_mtime	        4097
28 | 2007-08-09T12:34:56Z	CHANGE___move_from_P1M_to_P3G___change_name	        4097
29 | 2007-08-09T12:34:56Z	CHANGE___moved_to_erased_P1G_file	        4097
30 | 2007-08-09T12:34:56Z	CHANGE___renamed	        4097
31 | 2007-08-09T12:34:56Z	CHANGE___renamed_to_erased_sibling___change_checksum_and_mtime	        4097
32 | 2007-08-09T12:34:56Z	CHANGE___timestamp_changes_format_only	        4097
33 | 2007-08-09T12:34:56Z	CHANGE___unallocated	        4097
34 | 2007-08-09T12:34:56Z	NO_CHANGE	        4097
35 | 
36 | 
37 | Renamed files:
38 | ==============
39 | 
40 | 
41 | Files with modified contents:
42 | =============================
43 | 
44 | 
45 | Files with changed properties:
46 | ==============================
47 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/README.md:
--------------------------------------------------------------------------------
1 | The tests in this directory needed to be moved to address a new behavior in a deployed static type checker.  The intent is to empty this directory, moving its tests to appropriate locations under `/tests`.
2 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/_pick_pythons.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # This software was developed at the National Institute of Standards
 3 | # and Technology in whole or in part by employees of the Federal
 4 | # Government in the course of their official duties. Pursuant to
 5 | # title 17 Section 105 of the United States Code portions of this
 6 | # software authored by NIST employees are not subject to copyright
 7 | # protection and are in the public domain. For portions not authored
 8 | # by NIST employees, NIST has been granted unlimited rights. NIST
 9 | # assumes no responsibility whatsoever for its use by other parties,
10 | # and makes no guarantees, expressed or implied, about its quality,
11 | # reliability, or any other characteristic.
12 | #
13 | # We would appreciate acknowledgement if the software is used.
14 | 
15 | # This script is meant to be included in Bash scripts that need a Python v2 and v3.
16 | # An autotool configure script would also suffice.
17 | # The 'or echo' statements keep the subshell from returning an error exit status on missing a Python version.
18 | #
19 | # This script defines two variables, PYTHON2 and PYTHON3, providing the highest-available Python binary for each major version.
20 | #
21 | 
22 | PYTHON2=`which python2`
23 | 
24 | PYTHON3=`which python3.6 2>/dev/null || echo`
25 | if [ -z "$PYTHON3" ]; then
26 |   PYTHON3=`which python3.5 2>/dev/null || echo`
27 |   if [ -z "$PYTHON3" ]; then
28 |     PYTHON3=`which python3.4 2>/dev/null || echo`
29 |     if [ -z "$PYTHON3" ]; then
30 |       PYTHON3=`which python3 2>/dev/null || echo`
31 |       if [ -z "$PYTHON3" ]; then
32 |         echo "Error: Could not find a python3 executable." >&2
33 |         exit 1
34 |       fi
35 |     fi
36 |   fi
37 | fi
38 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/_sane_defaults.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | SCRIPT_DIR="$1"
 4 | 
 5 | # Guarantee sane defaults
 6 | if [ -z ${TEST_DIR} ];
 7 | then
 8 |     TEST_DIR="${SCRIPT_DIR}"
 9 | fi
10 | 
11 | if [ -z ${TOOL_DIR} ];
12 | then
13 |     TOOL_DIR="$(dirname ${SCRIPT_DIR})"
14 | fi
15 | 
16 | if [ -z ${SAMPLE_DIR} ];
17 | then
18 |     SAMPLE_DIR="$(dirname $(dirname ${SCRIPT_DIR}))/samples"
19 | fi
20 | 
21 | if [ -z ${PYTHONPATH} ];
22 | then
23 |     PYTHONPATH="$(dirname $(dirname ${SCRIPT_DIR}))"
24 |     export PYTHONPATH;
25 | fi
26 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/iexport_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | from dfxml.bin.iexport import *
 5 | 
 6 | 
 7 | def test_iexport():
 8 |     r1 = Run(0, 1000)
 9 |     r2 = Run(50, 60)
10 |     assert r1.intersects_run(r2)
11 |     assert r2.intersects_run(r1)
12 | 
13 |     disk = RunDB(0, 1000)
14 |     print(disk)
15 |     disk.remove(Run(50, 60))
16 |     disk.remove(Run(0, 10))
17 |     disk.remove(Run(40, 20))
18 |     print(disk)
19 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/paths.sh:
--------------------------------------------------------------------------------
1 | source tests/_pick_pythons.sh
2 | 
3 | #DEMO_DIR=../demos
4 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_cat_fileobjects.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | # Determine script location
17 | SCRIPT="$(realpath $0)"
18 | SCRIPT_DIR="$(dirname ${SCRIPT})"
19 | 
20 | # Guarantee sane defaults
21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
22 | 
23 | # Choose python interpreter
24 | . ${TEST_DIR}/_pick_pythons.sh
25 | 
26 | XMLLINT=`which xmllint`
27 | 
28 | # Halt on error
29 | set -e
30 | # Display all executed commands
31 | set -x
32 | 
33 | #NOTE: Python2's ETree does not understand the "unicode" output encoding.
34 | #"$PYTHON2" cat_fileobjects.py ../${SAMPLE_DIR}/simple.xml
35 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug ${SAMPLE_DIR}/simple.xml >cat_test_nocache.dfxml
36 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug --cache ${SAMPLE_DIR}/simple.xml >cat_test_cache.dfxml
37 | 
38 | #This checks that the XML structure wasn't changed by cache cleaning.  Only the tail is hashed because the head contains metadata.
39 | subj0="x$(tail -n 10 cat_test_nocache.dfxml | openssl dgst -sha1)"
40 | subj1="x$(tail -n 10 cat_test_cache.dfxml | openssl dgst -sha1)"
41 | test "$subj0" != "x"
42 | test "$subj1" != "x"
43 | test "$subj0" == "$subj1"
44 | 
45 | if [ -x "$XMLLINT" ]; then
46 |   "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py ${SAMPLE_DIR}/simple.xml | "$XMLLINT" -
47 | else
48 |   echo "Warning: xmllint not found.  Skipped check for if generated DFXML is valid XML." >&2
49 | fi
50 | 
51 | test $(grep '<fileobject' ${SAMPLE_DIR}/simple.xml | wc -l) == $(grep '<fileobject' cat_test_nocache.dfxml | wc -l)
52 | test $(grep '<fileobject' ${SAMPLE_DIR}/simple.xml | wc -l) == $(grep '<fileobject' cat_test_cache.dfxml | wc -l)
53 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_dfxml_tool.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | 
17 | # Determine script location
18 | SCRIPT="$(realpath $0)"
19 | SCRIPT_DIR="$(dirname ${SCRIPT})"
20 | 
21 | # Guarantee sane defaults
22 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
23 | 
24 | # Choose python interpreter
25 | . ${TEST_DIR}/_pick_pythons.sh
26 | 
27 | # Halt on error
28 | set -e
29 | # Display all executed commands
30 | set -x
31 | 
32 | # Flags listed here in alphabetical order
33 | DT_OPTIONS[0]=
34 | DT_OPTIONS[1]=--allprovenance
35 | DT_OPTIONS[2]=--commandline
36 | DT_OPTIONS[3]=--includedirs
37 | DT_OPTIONS[4]=--iso-8601
38 | DT_OPTIONS[5]=--md5
39 | DT_OPTIONS[6]=--nofilenames
40 | DT_OPTIONS[7]=--nometadata
41 | DT_OPTIONS[8]=--pythonversion
42 | DT_OPTIONS[9]=--sha1
43 | DT_OPTIONS[10]=--sha256
44 | DT_OPTIONS[11]="--stripleaddirs 1"
45 | DT_OPTIONS[12]="--stripprefix .."
46 | 
47 | iter=0
48 | for x in "${DT_OPTIONS[@]}"; do
49 |   echo "Iteration $iter: Testing $x" >&2
50 | #  "$PYTHON2" ../dfxml_tool.py $x ../src > dfxml_tool_p2_${iter}.dfxml
51 |   "$PYTHON3" ${TOOL_DIR}/dfxml_tool.py $x -- ../samples > dfxml_tool_p3_${iter}.dfxml
52 |   iter=$(($iter+1))
53 | done
54 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_hfsj.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | hdiutil create -size 10m -fs HFS+J -nospotlight -attach -volname image -ov -layout NONE \
 4 |     -imagekey diskimage-class=CRawDiskImage image.dmg
 5 | echo "This is file 1 - snarf" > /Volumes/image/file1.txt
 6 | echo "This is file 2 - snarf" > /Volumes/image/file2.txt
 7 | sync
 8 | hdiutil detach /Volumes/image
 9 | cp image.dmg image.gen0.dmg
10 | echo "look for file1 and file2:"
11 | strings -o image.dmg | grep snarf
12 | echo "mount the disk and overwrite the contents of file2"
13 | hdiutil attach image.dmg
14 | echo "New file 1 contents - snarf" | dd of=/Volumes/image/file1.txt
15 | echo ""
16 | echo "===file1.txt==="
17 | cat /Volumes/image/file1.txt
18 | echo ""
19 | echo "===file2.txt==="
20 | cat /Volumes/image/file2.txt
21 | echo ""
22 | hdiutil detach /Volumes/image
23 | cp image.dmg image.gen1.dmg
24 | strings -o image.dmg | grep snarf
25 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_idifference.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Test script. Evaluates idifference.py on a sequence of disk images.
 4 | """
 5 | 
 6 | import os
 7 | import subprocess
 8 | import sys
 9 | 
10 | if __name__ == "__main__":
11 |     from optparse import OptionParser
12 | 
13 |     parser = OptionParser()
14 |     parser.usage = "%prog [options] dfxml_sequence_list.txt output_zip"
15 |     parser.add_option(
16 |         "-p", "--prefix", help="prepend prefix to every test image path", dest="prefix"
17 |     )
18 |     parser.add_option(
19 |         "-v",
20 |         "--verbose",
21 |         help="verbose output: print call to difference program",
22 |         dest="verbose",
23 |         action="store_true",
24 |     )
25 |     parser.add_option(
26 |         "-d",
27 |         "--diff-program",
28 |         help="use this path to the diff program",
29 |         dest="diff_program",
30 |     )
31 |     # parser.add_option("-z", "--zap", help="Zap output directory (erases if present)" dest="zap")
32 | 
33 |     (options, args) = parser.parse_args()
34 |     if len(args) < 2:
35 |         parser.print_help()
36 |         sys.exit(1)
37 | 
38 |     prefix = ""
39 |     if options.prefix:
40 |         prefix = options.prefix
41 |     # Convert file contents to list
42 |     files = [prefix + x.strip() for x in open(args[0], "r")]
43 | 
44 |     # Verify we'll run at least one difference
45 |     if len(files) < 2:
46 |         sys.stderr.write("Differencing requires 2 or more files.\n")
47 | 
48 |     # Check that the list lines actually point to files
49 |     for f in files:
50 |         assert os.path.isfile(f)
51 | 
52 |     # Run differences
53 |     if options.diff_program:
54 |         diff_program = options.diff_program
55 |     else:
56 |         diff_program = os.path.dirname(sys.argv[0]) + "/idifference.py"
57 | 
58 |     diff_command = [
59 |         "python",
60 |         diff_program,
61 |         "--zipfile=" + args[1],
62 |         "--imagefile",
63 |     ] + files
64 |     if options.verbose:
65 |         print(" ".join(diff_command))
66 |     subprocess.call(diff_command)
67 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_idifference_to_dfxml.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | # Determine script location
17 | SCRIPT="$(realpath $0)"
18 | SCRIPT_DIR="$(dirname ${SCRIPT})"
19 | 
20 | # Guarantee sane defaults
21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
22 | 
23 | # Choose python interpreter
24 | . ${TEST_DIR}/_pick_pythons.sh
25 | 
26 | XMLLINT=`which xmllint`
27 | 
28 | # Halt on error
29 | set -e
30 | # Display all executed commands
31 | set -x
32 | 
33 | #Ensure the non-XML output doesn't fail, first.
34 | "$PYTHON3" ${TOOL_DIR}/idifference.py --summary ${SAMPLE_DIR}/difference_test_[01].xml > idifference_test.txt
35 | 
36 | #Generate XML output.
37 | "$PYTHON3" ${TOOL_DIR}/idifference.py --xml idifference_test.dfxml ${SAMPLE_DIR}/difference_test_[01].xml
38 | if [ ! -x "$XMLLINT" ]; then
39 |   echo "Error: xmllint not found.  Can't check for whether generated DFXML is valid XML.  Install libxml2 (or possibly xmlutils) to complete these unit tests." >&2
40 |   exit 1
41 | fi
42 | 
43 | "$XMLLINT" --format idifference_test.dfxml >idifference_test_formatted.dfxml
44 | 
45 | _check_counts() {
46 |   #Check expected number of fileobjects appears
47 |   test 4 == $(grep '<fileobject' $1 | wc -l)
48 |   test 1 == $(grep 'delta:new_file' $1 | wc -l)
49 |   test 1 == $(grep 'delta:deleted_file' $1 | wc -l)
50 |   test 2 == $(grep 'delta:changed_file' $1 | wc -l)
51 |   test 7 == $(grep 'delta:changed_property' $1 | wc -l)
52 | }
53 | 
54 | _check_counts idifference_test_formatted.dfxml
55 | 
56 | #Check that the differential DFXML is cat'able
57 | 
58 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug idifference_test.dfxml > idifference_test_cat.dfxml
59 | "$XMLLINT" --format idifference_test_cat.dfxml >idifference_test_cat_formatted.dfxml
60 | _check_counts idifference_test_cat_formatted.dfxml
61 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_mac_timelines.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | # Determine script location
17 | SCRIPT="$(realpath $0)"
18 | SCRIPT_DIR="$(dirname ${SCRIPT})"
19 | 
20 | # Guarantee sane defaults
21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
22 | 
23 | # Choose python interpreter
24 | source ${TEST_DIR}/_pick_pythons.sh
25 | 
26 | # Halt on error
27 | set -e
28 | # Display all executed commands
29 | set -x
30 | 
31 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p2.txt
32 | test 12 == $(cat demo_mac_timeline_simple_p2.txt | wc -l)
33 | 
34 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p3.txt
35 | test 12 == $(cat demo_mac_timeline_simple_p3.txt | wc -l)
36 | 
37 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p2.txt
38 | test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l)
39 | 
40 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p3.txt
41 | test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l)
42 | 
43 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/simple.xml >demo_mac_timeline_objects_simple_p2.txt
44 | test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l)
45 | 
46 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/simple.xml >demo_mac_timeline_objects_simple_p3.txt
47 | test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l)
48 | 
49 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline.py ../samples/difference_test_1.xml >demo_mac_timeline_dt1.txt
50 | test 9 == $(cat demo_mac_timeline_dt1.txt | wc -l)
51 | 
52 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/difference_test_1.xml >demo_mac_timeline_iter_dt1.txt
53 | test 9 == $(cat demo_mac_timeline_iter_dt1.txt | wc -l)
54 | 
55 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/difference_test_1.xml >demo_mac_timeline_objects_dt1.txt
56 | test 9 == $(cat demo_mac_timeline_objects_dt1.txt | wc -l)
57 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_redact.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | /bin/rm -f testdisk.dmg redact.cfg
 3 | hdiutil create -size 1m -fs MS-DOS -nospotlight -attach -volname testdisk testdisk.dmg
 4 | echo "This is the zero file. FILE0001." > /Volumes/TESTDISK/file0.txt
 5 | echo "This is the first file. FILE0001." > /Volumes/TESTDISK/file1.txt
 6 | echo "This is the second file. FILE0002." > /Volumes/TESTDISK/file2.txt
 7 | echo "This is the third file. FILE0003." > /Volumes/TESTDISK/file3.txt
 8 | echo "This is the fourth file. FILE0004." > /Volumes/TESTDISK/file4.txt
 9 | echo "This is the fifth file. FILE0005." > /Volumes/TESTDISK/file5.txt
10 | echo "This is the dixth file. FILE0006." > /Volumes/TESTDISK/file6.txt
11 | hdiutil detach /Volumes/TESTDISK
12 | cat > redact.cfg <<EOF
13 | FILENAME file1.txt FILL 0x44
14 | FILEPAT file2.* FILL 0x45
15 | MD5 493b48719704853f7f468ac748e3854f FILL 0x46                 # file3
16 | SHA1 2b4357a2f3352d9df67d5184e1bda187e6a92545 FILL 0x47        # file4
17 | EOF
18 | python iredact.py -r redact.cfg testdisk.dmg
19 | hdiutil attach testdisk.dmg
20 | for i in /Volumes/TESTDISK/* ; do echo ===== $i ==== ; cat $i ; echo "" ; echo "" ; done
21 | hdiutil detach /Volumes/TESTDISK
22 | 


--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_regxml.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | source ${TEST_DIR}/_pick_pythons.sh
3 | "$PYTHON2" ${DEMO_DIR}/demo_registry_timeline.py ../tests/m57-charlie-2009-11-20-charlie-ntuser.dat.regxml
4 | "$PYTHON3" ${DEMO_DIR}/demo_registry_timeline.py ../tests/m57-charlie-2009-11-20-charlie-ntuser.dat.regxml
5 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | *.xml
3 | walk_ignore_test/
4 | differential_dfxml_test_??.txt
5 | graph.png
6 | graph_data.json
7 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/ByteRuns_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import copy
17 | import os
18 | import sys
19 | 
20 | import dfxml.objects as Objects
21 | 
22 | 
23 | def test_all():
24 |     br0 = Objects.ByteRun()
25 |     br0.img_offset = 0
26 |     br0.len = 20
27 | 
28 |     br1 = Objects.ByteRun()
29 |     br1.img_offset = 20
30 |     br1.len = 30
31 | 
32 |     br2 = Objects.ByteRun()
33 |     br2.img_offset = 50
34 |     br2.len = 20
35 | 
36 |     brs_contiguous = Objects.ByteRuns()
37 |     brs_contiguous.append(br0)
38 |     brs_contiguous.append(br1)
39 |     brs_contiguous.append(br2)
40 | 
41 |     brs_glommed = Objects.ByteRuns()
42 |     brs_glommed.glom(br0)
43 |     brs_glommed.glom(br1)
44 |     brs_glommed.glom(br2)
45 | 
46 |     brs_discontig = Objects.ByteRuns()
47 |     brs_discontig.glom(br0)
48 |     brs_discontig.glom(br2)
49 | 
50 |     brs_backward = Objects.ByteRuns()
51 |     brs_backward.glom(br1)
52 |     brs_backward.glom(br0)
53 | 
54 |     assert len(brs_contiguous) == 3
55 |     assert len(brs_glommed) == 1
56 |     assert len(brs_discontig) == 2
57 |     assert len(brs_backward) == 2
58 | 
59 |     assert brs_glommed[0].len == 70
60 |     assert brs_backward[0].len == 30
61 |     assert brs_backward[1].len == 20
62 | 
63 |     br_facet_data = Objects.ByteRuns(facet="data")
64 |     br_facet_name = Objects.ByteRuns(facet="name")
65 |     br_facet_default = Objects.ByteRuns()
66 |     assert br_facet_data == br_facet_default
67 |     assert br_facet_name != br_facet_data
68 |     assert br_facet_name != br_facet_default
69 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/CellObject_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | 
20 | import dfxml.objects as Objects
21 | 
22 | 
23 | def test_all():
24 |     logging.basicConfig(level=logging.DEBUG)
25 |     _logger = logging.getLogger(os.path.basename(__file__))
26 | 
27 |     co = Objects.CellObject()
28 | 
29 |     _logger.debug("co = %r" % co)
30 |     _logger.debug("co.to_regxml() = %r" % co.to_regxml())
31 | 
32 |     co.name_type = "v"
33 | 
34 |     # Test value-type tolerance of data_type: should be null, strs and ints.
35 | 
36 |     co.data_type = None
37 |     co.data_type = 0
38 |     co.data_type = "REG_NONE"
39 |     failed = False
40 |     try:
41 |         co.data_type = 0.1
42 |     except:
43 |         failed = True
44 |     assert failed
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     test_all()
49 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/DFXMLObject_program_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import os
19 | import sys
20 | 
21 | import dfxml.objects as Objects
22 | 
23 | 
24 | def main():
25 |     dobj = Objects.parse(args.in_dfxml)
26 |     assert dobj.program == args.expected_program
27 |     assert dobj.program_version == args.expected_program_version
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     import argparse
32 | 
33 |     parser = argparse.ArgumentParser()
34 |     parser.add_argument("in_dfxml")
35 |     parser.add_argument("expected_program")
36 |     parser.add_argument("expected_program_version")
37 |     args = parser.parse_args()
38 | 
39 |     main()
40 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_allocation_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import os
17 | import sys
18 | 
19 | import dfxml.objects as Objects
20 | 
21 | 
22 | def test_all():
23 |     fa1 = Objects.FileObject()
24 |     fa1.alloc = True
25 |     assert fa1.is_allocated() == True
26 | 
27 |     fa2 = Objects.FileObject()
28 |     fa2.alloc = False
29 |     assert fa2.is_allocated() == False
30 | 
31 |     fa3 = Objects.FileObject()
32 |     assert fa3.is_allocated() == None
33 | 
34 |     fin1 = Objects.FileObject()
35 |     fin1.alloc_inode = True
36 |     fin1.alloc_name = True
37 |     assert fin1.is_allocated() == True
38 | 
39 |     fin2 = Objects.FileObject()
40 |     fin2.alloc_inode = True
41 |     fin2.alloc_name = False
42 |     assert fin2.is_allocated() == False
43 | 
44 |     fin3 = Objects.FileObject()
45 |     fin3.alloc_inode = True
46 |     fin3.alloc_name = None
47 |     assert fin3.is_allocated() == False
48 | 
49 |     fin4 = Objects.FileObject()
50 |     fin4.alloc_inode = False
51 |     fin4.alloc_name = True
52 |     assert fin4.is_allocated() == False
53 | 
54 |     fin5 = Objects.FileObject()
55 |     fin5.alloc_inode = False
56 |     fin5.alloc_name = False
57 |     assert fin5.is_allocated() == False
58 | 
59 |     fin6 = Objects.FileObject()
60 |     fin6.alloc_inode = False
61 |     fin6.alloc_name = None
62 |     assert fin6.is_allocated() == False
63 | 
64 |     fin7 = Objects.FileObject()
65 |     fin7.alloc_inode = None
66 |     fin7.alloc_name = True
67 |     assert fin7.is_allocated() == False
68 | 
69 |     fin8 = Objects.FileObject()
70 |     fin8.alloc_inode = None
71 |     fin8.alloc_name = False
72 |     assert fin8.is_allocated() == False
73 | 
74 |     fin9 = Objects.FileObject()
75 |     fin9.alloc_inode = None
76 |     fin9.alloc_name = None
77 |     assert fin9.is_allocated() == None
78 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_byte_run_facets_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | import xml.etree.ElementTree as ET
20 | 
21 | import dfxml.objects as Objects
22 | 
23 | 
24 | def test_all():
25 |     _logger = logging.getLogger(os.path.basename(__file__))
26 |     logging.basicConfig(level=logging.DEBUG)
27 | 
28 |     br1 = Objects.ByteRun(img_offset=1, len=1)
29 |     br2 = Objects.ByteRun(img_offset=2, len=2)
30 |     br3 = Objects.ByteRun(img_offset=4, len=3)
31 | 
32 |     dbr = Objects.ByteRuns()
33 |     ibr = Objects.ByteRuns()
34 |     nbr = Objects.ByteRuns()
35 | 
36 |     dbr.append(br1)
37 |     ibr.append(br2)
38 |     nbr.append(br3)
39 | 
40 |     dbr.facet = "data"
41 |     ibr.facet = "inode"
42 |     nbr.facet = "name"
43 | 
44 |     f1 = Objects.FileObject()
45 |     f1.data_brs = dbr
46 |     f1.inode_brs = ibr
47 |     f1.name_brs = nbr
48 | 
49 |     assert f1.data_brs[0].img_offset == 1
50 |     assert f1.inode_brs[0].img_offset == 2
51 |     assert f1.name_brs[0].img_offset == 4
52 | 
53 |     e1 = f1.to_Element()
54 |     # _logger.debug(f1)
55 |     # _logger.debug(ET.tostring(e1))
56 | 
57 |     f2 = Objects.FileObject()
58 | 
59 |     f2.populate_from_Element(e1)
60 |     # _logger.debug(f2)
61 | 
62 |     assert f2.data_brs[0].img_offset == 1
63 |     assert f2.inode_brs[0].img_offset == 2
64 |     assert f2.name_brs[0].img_offset == 4
65 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_externals_test.py:
--------------------------------------------------------------------------------
  1 | # This software was developed at the National Institute of Standards
  2 | # and Technology in whole or in part by employees of the Federal
  3 | # Government in the course of their official duties. Pursuant to
  4 | # title 17 Section 105 of the United States Code portions of this
  5 | # software authored by NIST employees are not subject to copyright
  6 | # protection and are in the public domain. For portions not authored
  7 | # by NIST employees, NIST has been granted unlimited rights. NIST
  8 | # assumes no responsibility whatsoever for its use by other parties,
  9 | # and makes no guarantees, expressed or implied, about its quality,
 10 | # reliability, or any other characteristic.
 11 | #
 12 | # We would appreciate acknowledgement if the software is used.
 13 | 
 14 | __version__ = "0.1.1"
 15 | 
 16 | import logging
 17 | import os
 18 | import sys
 19 | import xml.etree.ElementTree as ET
 20 | 
 21 | import dfxml.objects as Objects
 22 | 
 23 | 
 24 | def test_all():
 25 |     _logger = logging.getLogger(os.path.basename(__file__))
 26 |     logging.basicConfig(level=logging.DEBUG)
 27 | 
 28 |     XMLNS_TEST_CLAMSCAN = "file:///opt/local/bin/clamscan"
 29 |     XMLNS_TEST_UNREGGED = "file:///dev/random"
 30 | 
 31 |     ET.register_namespace("clam", XMLNS_TEST_CLAMSCAN)
 32 | 
 33 |     fi = Objects.FileObject()
 34 |     fi.filename = "clamscanned"
 35 | 
 36 |     # Try and fail to add a non-Element to the list.
 37 |     failed = None
 38 |     _logger.debug("Before:  " + repr(fi.externals))
 39 |     try:
 40 |         fi.externals.append(1)
 41 |         failed = False
 42 |     except TypeError:
 43 |         failed = True
 44 |     except:
 45 |         failed = True
 46 |         raise
 47 |     _logger.debug("After:  " + repr(fi.externals))
 48 |     assert failed
 49 |     failed = None
 50 | 
 51 |     # Dummy up a non-DFXML namespace element.  This should be appendable.
 52 |     e = ET.Element("{%s}scan_results" % XMLNS_TEST_CLAMSCAN)
 53 |     e.text = "Clean"
 54 |     fi.externals.append(e)
 55 | 
 56 |     # Dummy up a DFXML namespace element.  This should not be appendable (the schema specifies other namespaces).
 57 |     e = ET.Element("{%s}filename" % Objects.dfxml.XMLNS_DFXML)
 58 |     e.text = "Superfluous name"
 59 |     _logger.debug("Before:  " + repr(fi.externals))
 60 |     try:
 61 |         fi.externals.append(e)
 62 |         failed = False
 63 |     except ValueError:
 64 |         failed = True
 65 |     except:
 66 |         failed = True
 67 |         raise
 68 |     _logger.debug("After:  " + repr(fi.externals))
 69 |     assert failed
 70 |     failed = None
 71 | 
 72 |     # Add an element with the colon prefix style
 73 |     e = ET.Element("clam:version")
 74 |     e.text = "20140101"
 75 |     fi.externals.append(e)
 76 | 
 77 |     # Add an element that doesn't have an ET-registered namespace prefix.
 78 |     e = ET.Element("{%s}test2" % XMLNS_TEST_UNREGGED)
 79 |     e.text = "yes"
 80 |     fi.externals.append(e)
 81 | 
 82 |     # Test serialization
 83 |     s = Objects._ET_tostring(
 84 |         fi.to_Element()
 85 |     )  # TODO Maybe this should be more than an internal function.
 86 |     _logger.debug(s)
 87 |     if s.find("scan_results") == -1:
 88 |         raise ValueError(
 89 |             "Serialization did not output other-namespace element 'scan_results'."
 90 |         )
 91 |     if s.find("clam:version") == -1:
 92 |         raise ValueError(
 93 |             "Serialization did not output prefixed element 'clam:version'."
 94 |         )
 95 |     if s.find("test2") == -1:
 96 |         raise ValueError(
 97 |             "Serialization did not output unregistered-prefix element 'test2'."
 98 |         )
 99 | 
100 |     # Test de-serialization
101 |     fir = Objects.FileObject()
102 |     x = ET.XML(s)
103 |     fir.populate_from_Element(x)
104 |     _logger.debug("De-serialized: %r." % fir.externals)
105 |     assert len(fir.externals) == 3
106 | 
107 | 
108 | if __name__ == "__main__":
109 |     test_all()
110 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_from_stat_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | 
20 | import dfxml.objects as Objects
21 | 
22 | 
23 | def test_all():
24 |     logging.basicConfig(level=logging.DEBUG)
25 |     _logger = logging.getLogger(os.path.basename(__file__))
26 | 
27 |     f0 = Objects.FileObject()
28 |     f0.populate_from_stat(os.stat(__file__))
29 |     _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml())
30 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import logging
19 | import os
20 | import sys
21 | 
22 | import libtest
23 | 
24 | import dfxml.objects as Objects
25 | 
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 | 
29 | def test_empty_file_object() -> None:
30 |     dobj = Objects.DFXMLObject()
31 |     fobj = Objects.FileObject()
32 |     dobj.append(fobj)
33 | 
34 |     # Do file I/O round trip.
35 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 |     try:
37 |         fobj_reconst = dobj_reconst.files[0]
38 |         assert fobj == fobj_reconst
39 |     except:
40 |         _logger.debug("tmp_filename = %r." % tmp_filename)
41 |         raise
42 |     os.remove(tmp_filename)
43 | 
44 | 
45 | def test_blank_file_object_filename() -> None:
46 |     dobj = Objects.DFXMLObject()
47 |     fobj = Objects.FileObject()
48 |     dobj.append(fobj)
49 | 
50 |     fobj.filename = ""
51 | 
52 |     # Do file I/O round trip.
53 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
54 |     try:
55 |         fobj_reconst = dobj_reconst.files[0]
56 |         assert fobj == fobj_reconst
57 |     except:
58 |         _logger.debug("tmp_filename = %r." % tmp_filename)
59 |         raise
60 |     os.remove(tmp_filename)
61 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/LibraryObject_read_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology by employees of the Federal Government in the course
 3 | # of their official duties. Pursuant to title 17 Section 105 of the
 4 | # United States Code this software is not subject to copyright
 5 | # protection and is in the public domain. NIST assumes no
 6 | # responsibility whatsoever for its use by other parties, and makes
 7 | # no guarantees, expressed or implied, about its quality,
 8 | # reliability, or any other characteristic.
 9 | #
10 | # We would appreciate acknowledgement if the software is used.
11 | 
12 | """
13 | Run test against DFXML file generated by the _write counterpart script.
14 | """
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import logging
19 | import os
20 | import sys
21 | 
22 | import dfxml
23 | import dfxml.objects as Objects
24 | 
25 | if __name__ == "__main__":
26 |     logging.basicConfig(level=logging.DEBUG)
27 |     _logger = logging.getLogger(os.path.basename(__file__))
28 | 
29 |     dobj = Objects.parse(sys.argv[1])
30 | 
31 |     _logger.debug("dobj.creator_libraries = %r." % dobj.creator_libraries)
32 | 
33 |     assert Objects.LibraryObject("libfoo", "1.2.3") in dobj.creator_libraries
34 |     assert Objects.LibraryObject("libbaz", "4.5") in dobj.build_libraries
35 | 
36 |     found = None
37 |     for library in dobj.creator_libraries:
38 |         if library.relaxed_eq(Objects.LibraryObject("libfoo")):
39 |             found = True
40 |             break
41 |     assert found
42 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/LibraryObject_write_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology by employees of the Federal Government in the course
 3 | # of their official duties. Pursuant to title 17 Section 105 of the
 4 | # United States Code this software is not subject to copyright
 5 | # protection and is in the public domain. NIST assumes no
 6 | # responsibility whatsoever for its use by other parties, and makes
 7 | # no guarantees, expressed or implied, about its quality,
 8 | # reliability, or any other characteristic.
 9 | #
10 | # We would appreciate acknowledgement if the software is used.
11 | 
12 | __version__ = "0.1.1"
13 | 
14 | import logging
15 | import os
16 | import sys
17 | 
18 | import dfxml
19 | import dfxml.objects as Objects
20 | 
21 | if __name__ == "__main__":
22 |     logging.basicConfig(level=logging.DEBUG)
23 |     _logger = logging.getLogger(os.path.basename(__file__))
24 | 
25 |     lobj = Objects.LibraryObject()
26 | 
27 |     _logger.debug("lobj = %r" % lobj)
28 |     _logger.debug("lobj.to_Element() = %r" % lobj.to_Element())
29 | 
30 |     dobj = Objects.DFXMLObject()
31 |     dobj.add_creator_library(lobj)
32 |     dobj.add_creator_library("libfoo", "1.2.3")
33 |     dobj.add_creator_library(
34 |         "Python", ".".join(map(str, sys.version_info[0:3]))
35 |     )  # A bit of a bend, but gets the major version information out.
36 |     try:
37 |         dobj.add_creator_library("libbar", None)
38 |     except ValueError:
39 |         _logger.info("Caught expected value error from passing in incorrect types.")
40 |         pass
41 |     dobj.add_build_library("libbaz", "4.5")
42 | 
43 |     with open(sys.argv[1], "w") as fh:
44 |         dobj.print_dfxml(fh)
45 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/Makefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | # Bash selection is described in the top-level Makefile.
17 | ifeq ($(shell basename $(SHELL)),sh)
18 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
19 | endif
20 | 
21 | top_srcdir := $(shell cd ../.. ; pwd)
22 | 
23 | PYTHON3 ?= python3
24 | 
25 | OBJECTS := $(top_srcdir)/dfxml/objects.py
26 | 
27 | SAMPLES_DIR := $(top_srcdir)/samples
28 | 
29 | TOOLS_DIR := $(top_srcdir)/dfxml/bin
30 | 
31 | all: \
32 |   check
33 | 
34 | .PHONY: \
35 |   check-diff_file_ignore-py3 \
36 |   check-versioned
37 | 
38 | check: \
39 |   check-diff_file_ignore-py3 \
40 |   check-libraries-py3 \
41 |   check-versioned
42 | 	source $(top_srcdir)/tests/venv/bin/activate \
43 | 	  && $(PYTHON3) "$(TOOLS_DIR)/cat_partitions.py" \
44 | 	    12345678:$(SAMPLES_DIR)/difference_test_0.xml \
45 | 	    87654321:$(SAMPLES_DIR)/difference_test_1.xml \
46 | 	    > __cat_patterns_test.sh.dfxml
47 | 	xmllint \
48 | 	  --format \
49 | 	  __cat_patterns_test.sh.dfxml \
50 | 	  > _cat_patterns_test.sh.dfxml
51 | 	rm \
52 | 	  __cat_patterns_test.sh.dfxml
53 | 	mv \
54 | 	  _cat_patterns_test.sh.dfxml \
55 | 	  cat_patterns_test.sh.dfxml
56 | 
57 | check-diff_file_ignore-py3: \
58 |   diff_file_ignore_sample-py3.dfxml \
59 |   diff_file_ignore_test.py
60 | 	source $(top_srcdir)/tests/venv/bin/activate \
61 | 	  && $(PYTHON3) diff_file_ignore_test.py --debug diff_file_ignore_sample-py3.dfxml
62 | 
63 | check-libraries-py3:
64 | 	source $(top_srcdir)/tests/venv/bin/activate \
65 | 	  && $(PYTHON3) LibraryObject_write_test.py LibraryObject_py3_test.dfxml
66 | 	source $(top_srcdir)/tests/venv/bin/activate \
67 | 	  && $(PYTHON3) LibraryObject_read_test.py LibraryObject_py3_test.dfxml
68 | 
69 | check-versioned:
70 | 	$(PYTHON3) $(OBJECTS)
71 | 	source $(top_srcdir)/tests/venv/bin/activate \
72 | 	  && $(PYTHON3) DFXMLObject_program_test.py \
73 | 	    $(SAMPLES_DIR)/difference_test_0.xml \
74 | 	    vi \
75 | 	    8.0
76 | 
77 | clean:
78 | 	rm -f difference_counts_test.py-d*
79 | 	rm -f cat_partitions_test.sh.dfxml
80 | 	rm -f diff_file_ignore_sample-py3.dfxml
81 | 	rm -f LibraryObject_py3_test.dfxml
82 | 	rm -f *~
83 | 
84 | 
85 | diff_file_ignore_sample-py3.dfxml: \
86 |   $(OBJECTS) \
87 |   diff_file_ignore_sample_dfxml_test.py
88 | 	rm -f _$@
89 | 	source $(top_srcdir)/tests/venv/bin/activate \
90 | 	  && $(PYTHON3) diff_file_ignore_sample_dfxml_test.py --debug _$@
91 | 	mv _$@ $@
92 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/Makefile_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed in whole or in part by employees of the
 4 | # Federal Government in the course of their official duties, and with
 5 | # other Federal assistance. Pursuant to title 17 Section 105 of the
 6 | # United States Code portions of this software authored by Federal
 7 | # employees are not subject to copyright protection within the United
 8 | # States. For portions not authored by Federal employees, the Federal
 9 | # Government has been granted unlimited rights, and no claim to
10 | # copyright is made. The Federal Government assumes no responsibility
11 | # whatsoever for its use by other parties, and makes no guarantees,
12 | # expressed or implied, about its quality, reliability, or any other
13 | # characteristic.
14 | #
15 | # We would appreciate acknowledgement if the software is used.
16 | 
17 | # run 'make check' and 'make clean' under py.test
18 | 
19 | # TODO Some of the tests in the Makefile are currently known to be redundantly called when using py.test.
20 | 
21 | import os
22 | import subprocess
23 | import sys
24 | 
25 | 
26 | def test_make_all():
27 |     if sys.platform == "win32":
28 |         return  # don't run on win32
29 |     os.chdir(os.path.dirname(__file__))
30 |     subprocess.call(["make", "check"])
31 | 
32 | 
33 | def test_make_clean():
34 |     if sys.platform == "win32":
35 |         return  # don't run on win32
36 |     os.chdir(os.path.dirname(__file__))
37 |     subprocess.call(["make", "clean"])
38 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/PartitionObject_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import logging
19 | import os
20 | import sys
21 | 
22 | import libtest
23 | 
24 | import dfxml.objects as Objects
25 | 
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 | 
29 | def test_empty_object():
30 |     dobj = Objects.DFXMLObject()
31 |     pobj = Objects.PartitionObject()
32 |     dobj.append(pobj)
33 | 
34 |     # Do file I/O round trip.
35 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 |     try:
37 |         pobj_reconst = dobj_reconst.partitions[0]
38 |     except:
39 |         _logger.debug("tmp_filename = %r." % tmp_filename)
40 |         raise
41 |     os.remove(tmp_filename)
42 | 
43 | 
44 | def test_cfreds_macwd_properties():
45 |     """
46 |     These were drawn from a CFReDS sample Mac disk image.
47 |     """
48 |     dobj = Objects.DFXMLObject()
49 |     pobj = Objects.PartitionObject()
50 |     dobj.append(pobj)
51 | 
52 |     pobj.ptype_str = "Apple_Boot"
53 |     pobj.partition_index = 8
54 | 
55 |     # Do file I/O round trip.
56 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
57 |     try:
58 |         pobj_reconst = dobj_reconst.partitions[0]
59 |         assert pobj_reconst.ptype_str == "Apple_Boot"
60 |         assert pobj_reconst.partition_index == "8"
61 |     except:
62 |         _logger.debug("tmp_filename = %r." % tmp_filename)
63 |         raise
64 |     os.remove(tmp_filename)
65 | 
66 | 
67 | def test_bsd_disklabel_properties():
68 |     """
69 |     These were drawn from a BSD Disk Label sample image.
70 |     """
71 |     dobj = Objects.DFXMLObject()
72 |     pobj_a = Objects.PartitionObject()
73 |     pobj_c = Objects.PartitionObject()
74 |     dobj.append(pobj_a)
75 |     dobj.append(pobj_c)
76 | 
77 |     pobj_a.partition_index = "a"
78 |     pobj_c.partition_index = "c"
79 | 
80 |     # Do file I/O round trip.
81 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
82 |     try:
83 |         pobj_a_reconst = dobj_reconst.partitions[0]
84 |         pobj_c_reconst = dobj_reconst.partitions[1]
85 |         assert pobj_a_reconst.partition_index == "a"
86 |         assert pobj_c_reconst.partition_index == "c"
87 |     except:
88 |         _logger.debug("tmp_filename = %r." % tmp_filename)
89 |         raise
90 |     os.remove(tmp_filename)
91 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/PartitionSystemObject_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import logging
19 | import os
20 | import sys
21 | 
22 | import libtest
23 | 
24 | import dfxml.objects as Objects
25 | 
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 | 
29 | def test_empty_object():
30 |     dobj = Objects.DFXMLObject()
31 |     psobj = Objects.PartitionSystemObject()
32 |     dobj.append(psobj)
33 | 
34 |     # Do file I/O round trip.
35 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 |     try:
37 |         psobj_reconst = dobj_reconst.partition_systems[0]
38 |     except:
39 |         _logger.debug("tmp_filename = %r." % tmp_filename)
40 |         raise
41 |     os.remove(tmp_filename)
42 | 
43 | 
44 | def test_error_element_order():
45 |     dobj = Objects.DFXMLObject()
46 |     psobj = Objects.PartitionSystemObject()
47 |     fobj = Objects.FileObject()
48 | 
49 |     psobj.pstype_str = "gpt"
50 | 
51 |     # The error element should come after the fileobject stream.
52 |     psobj.error = "foo"
53 | 
54 |     # Add a unallocated file object found floating in the partition system.
55 |     fobj.alloc_inode = False
56 |     fobj.alloc_name = False
57 | 
58 |     dobj.append(psobj)
59 |     psobj.append(fobj)
60 | 
61 |     el = dobj.to_Element()
62 | 
63 |     # Confirm error comes after file stream.
64 |     assert el[-1][0].tag.endswith("pstype_str")
65 |     assert el[-1][-2].tag.endswith("fileobject")
66 |     assert el[-1][-1].tag.endswith("error")
67 | 
68 |     # Do file I/O round trip.
69 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
70 |     psobj_reconst = dobj_reconst.partition_systems[0]
71 |     try:
72 |         assert psobj_reconst.pstype_str == "gpt"
73 |         assert psobj_reconst.error == "foo"
74 |     except:
75 |         _logger.debug("tmp_filename = %r." % tmp_filename)
76 |         raise
77 |     os.remove(tmp_filename)
78 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/README.md:
--------------------------------------------------------------------------------
1 | The tests in this directory needed to be moved to address a new behavior in a deployed static type checker.  The intent is to empty this directory, moving its tests to appropriate locations under `/tests`.
2 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/RegXMLObject_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import os
17 | import sys
18 | 
19 | import diffing_CellObject_test
20 | import diffing_HiveObject_test
21 | 
22 | import dfxml.objects as Objects
23 | 
24 | 
25 | def test_all():
26 |     ro = Objects.RegXMLObject(version="0.2")
27 |     ho = Objects.HiveObject()
28 |     ho.append(diffing_CellObject_test.get_co())
29 |     ho.append(diffing_CellObject_test.get_nco())
30 |     ro.append(diffing_HiveObject_test.get_ho())
31 |     ro.print_regxml()
32 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/VolumeObject_hash_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import logging
19 | import os
20 | import sys
21 | 
22 | import dfxml.objects as Objects
23 | 
24 | 
25 | def test_all():
26 |     logging.basicConfig(level=logging.DEBUG)
27 |     _logger = logging.getLogger(os.path.basename(__file__))
28 | 
29 |     s0 = set()
30 | 
31 |     v0 = Objects.VolumeObject()
32 |     v1 = Objects.VolumeObject()
33 | 
34 |     s0.add(v0)
35 |     s0.add(v1)
36 | 
37 |     _logger.debug("len(s0) = %r" % len(s0))
38 |     assert len(s0) == 2
39 | 
40 |     f0 = Objects.FileObject()
41 |     f1 = Objects.FileObject()
42 |     f0.volume_object = v0
43 |     f1.volume_object = v0
44 | 
45 |     s1 = set()
46 |     s1.add(f0.volume_object)
47 |     s1.add(f1.volume_object)
48 |     _logger.debug("len(s1) = %r" % len(s1))
49 |     assert len(s1) == 1
50 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/VolumeObject_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.1.1"
17 | 
18 | import logging
19 | import os
20 | import sys
21 | 
22 | import libtest
23 | 
24 | import dfxml.objects as Objects
25 | 
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 | 
29 | def test_empty_object():
30 |     dobj = Objects.DFXMLObject()
31 |     vobj = Objects.VolumeObject()
32 |     dobj.append(vobj)
33 | 
34 |     # Do file I/O round trip.
35 |     (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 |     try:
37 |         vobj_reconst = dobj_reconst.volumes[0]
38 |     except:
39 |         _logger.debug("tmp_filename = %r." % tmp_filename)
40 |         raise
41 |     os.remove(tmp_filename)
42 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | 
20 | import dfxml.objects as Objects
21 | 
22 | 
23 | def main():
24 |     dobj = Objects.DFXMLObject()
25 |     dobj.diff_file_ignores.add("atime")
26 |     dobj.diff_file_ignores.add("crtime")
27 |     with open(args.out_dfxml, "w") as fh:
28 |         dobj.print_dfxml(fh)
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     import argparse
33 | 
34 |     parser = argparse.ArgumentParser()
35 |     parser.add_argument("-d", "--debug", action="store_true")
36 |     parser.add_argument("out_dfxml")
37 |     args = parser.parse_args()
38 |     logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
39 |     main()
40 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diff_file_ignore_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | 
20 | import dfxml.objects as Objects
21 | 
22 | 
23 | def main():
24 |     dobj = Objects.parse(args.in_dfxml)
25 |     assert not dobj is None
26 |     _logger = logging.getLogger(os.path.basename(__file__))
27 |     _logger.debug("dobj.diff_file_ignores = %r." % dobj.diff_file_ignores)
28 |     assert "atime" in dobj.diff_file_ignores
29 |     assert "crtime" in dobj.diff_file_ignores
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     import argparse
34 | 
35 |     parser = argparse.ArgumentParser()
36 |     parser.add_argument("-d", "--debug", action="store_true")
37 |     parser.add_argument("in_dfxml")
38 |     args = parser.parse_args()
39 |     logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
40 |     main()
41 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_ByteRuns_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import copy
17 | import logging
18 | import os
19 | import sys
20 | 
21 | import dfxml.objects as Objects
22 | 
23 | 
24 | def get_brs():
25 |     logging.basicConfig(level=logging.DEBUG)
26 |     _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 |     br = Objects.ByteRun()
29 |     br.file_offset = 4128
30 |     br.len = 133
31 |     brs = Objects.ByteRuns()
32 |     brs.append(br)
33 |     return brs
34 | 
35 | 
36 | def test_all():
37 |     logging.basicConfig(level=logging.DEBUG)
38 |     _logger = logging.getLogger(os.path.basename(__file__))
39 |     brs = get_brs()
40 |     cbrs1 = copy.deepcopy(brs)
41 | 
42 |     _logger.debug("brs  = %r." % brs)
43 |     _logger.debug("cbrs1 = %r." % cbrs1)
44 |     assert cbrs1 == brs
45 | 
46 |     cbrs1[0].file_offset += 133
47 |     _logger.debug("cbrs1 = %r." % cbrs1)
48 |     assert cbrs1 != brs
49 | 
50 |     cbrs2 = copy.deepcopy(brs)
51 |     cbrs2[0].type = "unknown"
52 |     assert cbrs2 != brs
53 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_CellObject_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | 
20 | import diffing_ByteRuns_test
21 | 
22 | import dfxml.objects as Objects
23 | 
24 | 
25 | def get_co():
26 |     _logger = logging.getLogger(os.path.basename(__file__))
27 |     co = Objects.CellObject()
28 |     _logger.debug("co = %r" % co)
29 |     _logger.debug("co.to_regxml() = %r" % co.to_regxml())
30 | 
31 |     co.root = 1
32 |     co.cellpath = "\\Deleted_root"
33 |     co.basename = "Deleted_root"
34 |     co.name_type = "k"
35 |     co.alloc = 1
36 |     co.mtime = "2009-01-23T01:23:45Z"
37 |     co.mtime.prec = "100ns"
38 |     co.byte_runs = diffing_ByteRuns_test.get_brs()
39 |     _logger.debug("co = %r" % co)
40 |     _logger.debug("co.to_regxml() = %r" % co.to_regxml())
41 |     return co
42 | 
43 | 
44 | def get_nco():
45 |     _logger = logging.getLogger(os.path.basename(__file__))
46 |     co = get_co()
47 |     coe = co.to_Element()
48 |     nco = Objects.CellObject()
49 |     nco.populate_from_Element(coe)
50 |     diffs = co.compare_to_other(nco)
51 |     _logger.debug("nco.to_regxml() = %r" % nco.to_regxml())
52 |     _logger.debug("diffs = %r" % diffs)
53 |     assert co == nco
54 | 
55 |     # Modify
56 |     nco.basename = "(Doubled)"
57 |     nco.root = False
58 |     nco.original_cellobject = co
59 |     nco.compare_to_original()
60 |     assert nco.diffs == set(["basename", "root"])
61 |     _logger.debug("nco.to_regxml() = %r" % nco.to_regxml())
62 |     _logger.debug("nco.diffs = %r" % nco.diffs)
63 |     return nco
64 | 
65 | 
66 | def test_all():
67 |     logging.basicConfig(level=logging.DEBUG)
68 | 
69 |     get_nco()
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     test_all()
74 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_FileObject_test.py:
--------------------------------------------------------------------------------
  1 | # This software was developed at the National Institute of Standards
  2 | # and Technology in whole or in part by employees of the Federal
  3 | # Government in the course of their official duties. Pursuant to
  4 | # title 17 Section 105 of the United States Code portions of this
  5 | # software authored by NIST employees are not subject to copyright
  6 | # protection and are in the public domain. For portions not authored
  7 | # by NIST employees, NIST has been granted unlimited rights. NIST
  8 | # assumes no responsibility whatsoever for its use by other parties,
  9 | # and makes no guarantees, expressed or implied, about its quality,
 10 | # reliability, or any other characteristic.
 11 | #
 12 | # We would appreciate acknowledgement if the software is used.
 13 | 
 14 | __version__ = "0.1.1"
 15 | 
 16 | import logging
 17 | import os
 18 | import sys
 19 | 
 20 | import dfxml.objects as Objects
 21 | 
 22 | 
 23 | def test_all():
 24 |     logging.basicConfig(level=logging.DEBUG)
 25 |     _logger = logging.getLogger(os.path.basename(__file__))
 26 | 
 27 |     f0 = Objects.FileObject()
 28 | 
 29 |     fo = Objects.FileObject()
 30 |     pfo = Objects.FileObject()
 31 |     pfo.inode = 234
 32 |     f0.parent_object = pfo
 33 |     f0.filename = "test file"
 34 |     f0.error = "Neither a real file, nor real error"
 35 |     f0.partition = 2
 36 |     f0.id = 235
 37 |     f0.name_type = "r"
 38 |     f0.filesize = 1234
 39 |     f0.unalloc = 0
 40 |     f0.unused = 0
 41 |     f0.orphan = 0
 42 |     f0.compressed = 1
 43 |     f0.inode = 6543
 44 |     f0.libmagic = "data"
 45 |     f0.meta_type = 8
 46 |     f0.mode = 755
 47 |     f0.nlink = 1
 48 |     f0.uid = "S-1-234-etc"
 49 |     f0.gid = "S-2-234-etc"
 50 |     f0.mtime = "1999-12-31T12:34:56Z"
 51 |     f0.ctime = "1998-12-31T12:34:56Z"
 52 |     f0.atime = "1997-12-31T12:34:56Z"
 53 |     f0.crtime = "1996-12-31T12:34:56Z"
 54 |     f0.seq = 3
 55 |     f0.dtime = "1995-12-31T12:34:56Z"
 56 |     f0.bkup_time = "1994-12-31T12:34:56Z"
 57 |     f0.link_target = "Nonexistent file"
 58 |     f0.libmagic = "Some kind of compressed"
 59 |     f0.md5 = "db72d20e83d0ae39771403bc4cdde040"
 60 |     f0.sha1 = "866e1f426b2380aaf74a091aa0f39f62ae8a2de7"
 61 |     f0.sha256 = "4bc5996997ab9196b2d998b05ef302ed1dc167d74ec881533ee35008b5168630"
 62 |     f0.sha384 = "2ec378692eeae4b855f58832664f95bb85411caac8dcebe7cd3916e915559d3f0ccda688a1fad1e3f47801fe15298ac0"
 63 |     # fo.brs = brs #TODO
 64 |     _logger.debug("f0 = %r" % f0)
 65 |     _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml())
 66 | 
 67 |     e0 = f0.to_Element()
 68 |     _logger.debug("e0 = %r" % e0)
 69 | 
 70 |     # f1 = eval(repr(f0)) #TODO The recursive evals cause namespace confusion (Objects.foo); replace the next two lines when that's settled.
 71 |     f1 = Objects.FileObject()
 72 |     f1.populate_from_Element(e0)
 73 | 
 74 |     f2 = Objects.FileObject()
 75 |     f2.populate_from_Element(e0)
 76 | 
 77 |     # The id property should not be included in the comparisons
 78 |     f1.id = 111
 79 |     f1.alloc = False
 80 | 
 81 |     f2.mtime = "2999-12-31T12:34:56Z"
 82 |     f2.md5 = "593c8fe4a2236f3eeba7f4577b663876"
 83 |     f2.sha1 = "0c0c20c03bdb8913da8ea120bd59ba5f596deceb"
 84 |     f2.sha256 = "4f6dcb46e0f7b0ad748d083f6e92d7df586d0298a94acc3795287ff156614540"
 85 |     f2.sha384 = "2af87ca47d01989009caf3927a84be215528a53629dd935a828921ac0a4b22202bcba20d38fdd16d719b8c4241fcdacb"
 86 | 
 87 |     _logger.debug("f1 = %r" % f1)
 88 |     d01 = f0.compare_to_other(f1)
 89 |     _logger.debug("d01 = %r" % d01)
 90 |     assert d01 == set(["alloc"]) or d01 == set(["alloc", "unalloc"])
 91 | 
 92 |     d02 = f0.compare_to_other(f2)
 93 | 
 94 |     _logger.debug("d02 = %r" % d02)
 95 |     assert d02 == set(["mtime", "md5", "sha1", "sha256", "sha384"])
 96 | 
 97 |     f2.original_fileobject = f0
 98 |     f2.compare_to_original()
 99 |     _logger.debug("f2.diffs = %r" % f2.diffs)
100 |     assert f2.diffs == d02
101 | 
102 |     # TODO include byte_runs
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     test_all()
107 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_HiveObject_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import copy
17 | import logging
18 | import os
19 | import sys
20 | 
21 | import dfxml.objects as Objects
22 | 
23 | 
24 | def get_ho():
25 |     ho = Objects.HiveObject()
26 |     ho.mtime = "2010-01-02T03:45:00Z"
27 |     return ho
28 | 
29 | 
30 | def test_all():
31 |     _logger = logging.getLogger(os.path.basename(__file__))
32 |     logging.basicConfig(level=logging.DEBUG)
33 | 
34 |     ho = get_ho()
35 | 
36 |     hoc = copy.deepcopy(ho)
37 | 
38 |     diffs = hoc.compare_to_other(ho)
39 |     _logger.debug(repr(diffs))
40 |     assert len(diffs) == 0
41 | 
42 |     hoc.mtime = "2011-01-02T03:45:00Z"
43 | 
44 |     diffs = hoc.compare_to_other(ho)
45 |     _logger.debug(repr(diffs))
46 |     assert len(diffs) == 1
47 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_TimestampObject_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.1"
15 | 
16 | import copy
17 | import logging
18 | import os
19 | import sys
20 | 
21 | import dfxml.objects as Objects
22 | 
23 | 
24 | def test_all():
25 |     t0 = Objects.TimestampObject()
26 |     t0.name = "mtime"
27 |     t0.prec = "2s"
28 | 
29 |     t1 = copy.deepcopy(t0)
30 | 
31 |     assert t0 == t1
32 | 
33 |     t0e = t0.to_Element()
34 |     t2 = Objects.TimestampObject()
35 |     t2.populate_from_Element(t0e)
36 | 
37 |     assert t0 == t2
38 | 
39 |     t2.prec = "100"
40 | 
41 |     assert t0 != t2
42 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_VolumeObject_test.py:
--------------------------------------------------------------------------------
 1 | # This software was developed at the National Institute of Standards
 2 | # and Technology in whole or in part by employees of the Federal
 3 | # Government in the course of their official duties. Pursuant to
 4 | # title 17 Section 105 of the United States Code portions of this
 5 | # software authored by NIST employees are not subject to copyright
 6 | # protection and are in the public domain. For portions not authored
 7 | # by NIST employees, NIST has been granted unlimited rights. NIST
 8 | # assumes no responsibility whatsoever for its use by other parties,
 9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | __version__ = "0.1.0"
15 | 
16 | import logging
17 | import os
18 | import sys
19 | 
20 | import dfxml
21 | import dfxml.objects as Objects
22 | 
23 | 
24 | def test_all():
25 |     logging.basicConfig(level=logging.DEBUG)
26 |     _logger = logging.getLogger(os.path.basename(__file__))
27 | 
28 |     v0 = Objects.VolumeObject()
29 | 
30 |     v0.sector_size = 512
31 |     v0.block_size = 4096
32 |     v0.partition_offset = 32256
33 |     v0.ftype = -1
34 |     assert v0.ftype == -1
35 |     v0.ftype_str = 1
36 |     v0.block_count = 100000
37 |     v0.allocated_only = False
38 |     v0.first_block = 0
39 |     v0.last_block = v0.block_count
40 | 
41 |     _logger.debug(repr(v0))
42 |     v1 = eval("Objects." + repr(v0))
43 | 
44 |     e0 = v0.to_Element()
45 |     _logger.debug("e0 = %r" % e0)
46 | 
47 |     v2 = Objects.VolumeObject()
48 |     v2.populate_from_Element(e0)
49 | 
50 |     v1.block_size = 512
51 |     v2.partition_offset = v0.partition_offset + v0.block_count * v0.block_size
52 | 
53 |     d01 = v0.compare_to_other(v1)
54 |     d02 = v0.compare_to_other(v2)
55 | 
56 |     _logger.debug("d01 = %r" % d01)
57 |     assert d01 == set(["block_size"])
58 | 
59 |     _logger.debug("d02 = %r" % d02)
60 |     assert d02 == set(["partition_offset"])
61 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/objects_test.py:
--------------------------------------------------------------------------------
 1 | # Unit tests for objects
 2 | 
 3 | 
 4 | __version__ = "0.1.1"
 5 | 
 6 | import os
 7 | import sys
 8 | 
 9 | from dfxml.objects import *
10 | from dfxml.objects import _intcast, _logger, _qsplit
11 | 
12 | 
13 | def test_all():
14 |     assert _intcast(-1) == -1
15 |     assert _intcast("-1") == -1
16 |     assert _qsplit("{http://www.w3.org/2001/XMLSchema}all") == (
17 |         "http://www.w3.org/2001/XMLSchema",
18 |         "all",
19 |     )
20 |     assert _qsplit("http://www.w3.org/2001/XMLSchema}all") == (
21 |         None,
22 |         "http://www.w3.org/2001/XMLSchema}all",
23 |     )
24 | 
25 |     fi = FileObject()
26 | 
27 |     # Check property setting
28 |     fi.mtime = "1999-12-31T23:59:59Z"
29 |     _logger.debug("fi = %r" % fi)
30 | 
31 |     # Check bad property setting
32 |     failed = None
33 |     try:
34 |         fi.mtime = "Not a timestamp"
35 |         failed = False
36 |     except:
37 |         failed = True
38 |     _logger.debug("fi = %r" % fi)
39 |     _logger.debug("failed = %r" % failed)
40 |     assert failed == True
41 | 
42 |     t0 = TimestampObject(prec="100ns", name="mtime")
43 |     _logger.debug("t0 = %r" % t0)
44 |     assert t0.prec[0] == 100
45 |     assert t0.prec[1] == "ns"
46 |     t1 = TimestampObject("2009-01-23T01:23:45Z", prec="2", name="atime")
47 |     _logger.debug("t1 = %r" % t1)
48 |     assert t1.prec[0] == 2
49 |     assert t1.prec[1] == "s"
50 | 


--------------------------------------------------------------------------------
/tests/misc_object_tests/test_TCPFlowObjects.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | import logging
15 | import os.path
16 | import pathlib
17 | import sys
18 | 
19 | import pytest
20 | 
21 | # TODO - It seems TCPFlowObjects might be better served from /dfxml instead of /dfxml/bin.
22 | import dfxml.bin.TCPFlowObjects
23 | import dfxml.objects as Objects
24 | 
25 | 
26 | @pytest.fixture
27 | def top_srcdir() -> pathlib.Path:
28 |     srcdir = pathlib.Path(__file__).parent
29 |     return srcdir / ".." / ".."
30 | 
31 | 
32 | def test_TCPFlowObjects(top_srcdir: pathlib.Path) -> None:
33 |     path_to_sample = top_srcdir / "samples" / "tcpflow_zip_generic_header.xml"
34 |     assert (
35 |         path_to_sample.exists()
36 |     ), "Hard-coded path from test to sample is no longer valid."
37 | 
38 |     for event, obj in Objects.iterparse(str(path_to_sample)):
39 |         if not isinstance(obj, Objects.FileObject):
40 |             continue
41 |         results = dfxml.bin.TCPFlowObjects.scanner_results_from_FileObject(obj)
42 |         assert len(results) == 1
43 |         # TODO - This could do with a better presentation in relation to the pytest framework.
44 |         print("Flow name: %r." % obj.filename)
45 |         for result in results:
46 |             result.print_report()
47 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | mypy
2 | pytest
3 | 


--------------------------------------------------------------------------------
/tests/test_reads.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | import os
15 | 
16 | import pytest
17 | 
18 | import dfxml
19 | import dfxml.objects
20 | 
21 | 
22 | def nop(x: object) -> None:
23 |     pass
24 | 
25 | 
26 | @pytest.fixture
27 | def top_srcdir() -> str:
28 |     srcdir = os.path.dirname(__file__)
29 |     retval = os.path.join(srcdir, "..")
30 |     assert os.path.isdir(
31 |         os.path.join(retval, "samples")
32 |     ), "Hard-coded expected path not found, '${top_srcdir}/samples/'."
33 |     return retval
34 | 
35 | 
36 | @pytest.fixture
37 | def difference_test_0_filepath(top_srcdir: str) -> str:
38 |     retval = os.path.join(top_srcdir, "samples", "difference_test_0.xml")
39 |     assert os.path.exists(
40 |         retval
41 |     ), "Hard-coded path to file did not find expected file, '${top_srcdir}/samples/difference_test_0.xml'."
42 |     return retval
43 | 
44 | 
45 | def test_read_dfxml(difference_test_0_filepath: str) -> None:
46 |     """
47 |     This test confirms that the DFXML pip-managed packaging exposes the dfxml package and the objects.py module.
48 |     """
49 |     with open(difference_test_0_filepath, "rb") as fh:
50 |         dfxml.read_dfxml(fh, callback=nop)
51 | 
52 | 
53 | def test_objects_iterparse(difference_test_0_filepath: str) -> None:
54 |     """
55 |     This test confirms that the DFXML pip-managed packaging exposes the dfxml package's objects.py module.
56 |     """
57 |     for event, obj in dfxml.objects.iterparse(difference_test_0_filepath):
58 |         pass
59 | 


--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | import dfxml
15 | 
16 | 
17 | def test_version() -> None:
18 |     assert not dfxml.__version__ is None
19 | 


--------------------------------------------------------------------------------
/tests/walk_to_dfxml/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | walk_ignore_test/
3 | 


--------------------------------------------------------------------------------
/tests/walk_to_dfxml/Makefile:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology by employees of the Federal Government in the course
 5 | # of their official duties. Pursuant to title 17 Section 105 of the
 6 | # United States Code this software is not subject to copyright
 7 | # protection and is in the public domain. NIST assumes no
 8 | # responsibility whatsoever for its use by other parties, and makes
 9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 | 
14 | # Bash selection is described in the top-level Makefile.
15 | ifeq ($(shell basename $(SHELL)),sh)
16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
17 | endif
18 | 
19 | top_srcdir := $(shell cd ../.. ; pwd)
20 | 
21 | tests_srcdir := $(top_srcdir)/tests
22 | 
23 | all: \
24 |   walk_ignore_genprops.dfxml \
25 |   walk_ignore_hashes.dfxml
26 | 
27 | .scaffolding.done.log:
28 | 	rm -rf walk_ignore_test
29 | 	mkdir -p walk_ignore_test/foo/bar/baz
30 | 	echo 'contents c' > walk_ignore_test/foo/bar/baz/c
31 | 	echo 'contents b' > walk_ignore_test/foo/bar/b
32 | 	echo 'contents a' > walk_ignore_test/foo/a
33 | 	touch $@
34 | 
35 | check: \
36 |   walk_ignore_genprops.dfxml \
37 |   walk_ignore_hashes.dfxml
38 | 	source $(tests_srcdir)/venv/bin/activate \
39 | 	  && pytest \
40 | 	    --log-level=DEBUG
41 | 
42 | clean:
43 | 	@rm -f \
44 | 	  .scaffolding.done.log \
45 | 	  *.dfxml
46 | 	@rm -rf \
47 | 	  walk_ignore_test/
48 | 
49 | walk_ignore_genprops.dfxml: \
50 |   $(tests_srcdir)/.venv.done.log \
51 |   $(top_srcdir)/dfxml/bin/walk_to_dfxml.py \
52 |   .scaffolding.done.log
53 | 	rm -f \
54 | 	  __$@ \
55 | 	  _$@
56 | 	source $(tests_srcdir)/venv/bin/activate \
57 | 	  && cd walk_ignore_test \
58 | 	    && walk_to_dfxml \
59 | 	      -i atime \
60 | 	      -i ctime \
61 | 	      -i crtime \
62 | 	      -i gid \
63 | 	      -i inode \
64 | 	      -i mtime@d \
65 | 	      -i uid \
66 | 	      > ../__$@
67 | 	xmllint \
68 | 	  --format \
69 | 	  __$@ \
70 | 	  > _$@
71 | 	rm __$@
72 | 	mv _$@ $@
73 | 
74 | walk_ignore_hashes.dfxml: \
75 |   $(tests_srcdir)/.venv.done.log \
76 |   $(top_srcdir)/dfxml/bin/walk_to_dfxml.py \
77 |   .scaffolding.done.log
78 | 	rm -f \
79 | 	  __$@ \
80 | 	  _$@
81 | 	source $(tests_srcdir)/venv/bin/activate \
82 | 	  && cd walk_ignore_test \
83 | 	    && walk_to_dfxml \
84 | 	      --ignore-hashes \
85 | 	      > ../__$@
86 | 	xmllint \
87 | 	  --format \
88 | 	  __$@ \
89 | 	  > _$@
90 | 	rm __$@
91 | 	mv _$@ $@
92 | 


--------------------------------------------------------------------------------
/tests/walk_to_dfxml/README.md:
--------------------------------------------------------------------------------
 1 | # `walk_to_dfxml`
 2 | 
 3 | *Source*: [`../../dfxml/bin/walk_to_dfxml.py`](../../dfxml/bin/walk_to_dfxml.py)
 4 | 
 5 | This command walks a directory, producing a `<fileobject>` for each encountered file and directory, and then recurses into each directory.  Output is sent to `stdout`.
 6 | 
 7 | File characteristics are drawn from:
 8 | * the path
 9 | * hashes of the file contents for regular files (i.e., not directories, not device files; also, not soft links)
10 | * the `stat` structure for the file
11 | * the referenced path (for soft links)
12 | 
13 | Any directory that can be navigated to can be characterized with this script.  This has been tested from the root directory of a (offline) Linux system's root-filesystem partition.  The tool can handle the `/dev` directory without issue.
14 | 
15 | This tool can be used to walk a network file system, such as a share.  However, be aware that if it is hashing, that would mean the tool is reading the file contents over the network.
16 | 
17 | 
18 | ## Usage
19 | 
20 | ```bash
21 | cd .../my_directory
22 | walk_to_dfxml > /tmp/my_directory.dfxml
23 | ```
24 | 
25 | This will record all characteristics available for each file in and below `.../my_directory`.
26 | 
27 | Output should be captured outside of the present working directory, such as the parent directory.  Note that this command will include the hash of an empty file `output.dfxml`:
28 | 
29 | ```bash
30 | cd .../my_directory
31 | walk_to_dfxml > output.dfxml
32 | ```
33 | 
34 | The `-i` (`--ignore`) flag will cause the named file characteristic to not be gathered into the output.  E.g. this command will not collect access time:
35 | 
36 | ```bash
37 | walk_to_dfxml -i atime > /tmp/walk.dfxml
38 | ```
39 | 
40 | (Testing: See the [`Makefile`](Makefile) recipe for `walk_ignore_genprops.dfxml`, which is tested in [`test_walk_to_dfxml.py`](test_walk_to_dfxml.py)'s function `test_walk_ignore_genprops`.)
41 | 
42 | The program can run without gathering any file hashes, by using the `--ignore-hashes` flag:
43 | 
44 | ```bash
45 | walk_to_dfxml --ignore-hashes > /tmp/walk.dfxml
46 | ```
47 | 
48 | (Testing: See the [`Makefile`](Makefile) recipe for `walk_ignore_hashes.dfxml`, which is tested in [`test_walk_to_dfxml.py`](test_walk_to_dfxml.py)'s function `test_walk_ignore_hashes`.)
49 | 


--------------------------------------------------------------------------------
/tests/walk_to_dfxml/test_walk_to_dfxml.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # This software was developed at the National Institute of Standards
 4 | # and Technology in whole or in part by employees of the Federal
 5 | # Government in the course of their official duties. Pursuant to
 6 | # title 17 Section 105 of the United States Code portions of this
 7 | # software authored by NIST employees are not subject to copyright
 8 | # protection and are in the public domain. For portions not authored
 9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 | 
16 | __version__ = "0.2.0"
17 | 
18 | import logging
19 | import os
20 | 
21 | import pytest
22 | 
23 | import dfxml.objects as Objects
24 | 
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 | 
27 | 
28 | @pytest.fixture
29 | def srcdir() -> str:
30 |     retval = os.path.dirname(__file__)
31 |     return retval
32 | 
33 | 
34 | def test_walk_ignore_genprops(srcdir: str) -> None:
35 |     files_encountered = 0
36 |     for event, obj in Objects.iterparse(
37 |         os.path.join(srcdir, "walk_ignore_genprops.dfxml")
38 |     ):
39 |         if not isinstance(obj, Objects.FileObject):
40 |             continue
41 |         files_encountered += 1
42 |         for propname in ["atime", "ctime", "crtime", "gid", "inode", "mtime", "uid"]:
43 |             try:
44 |                 assert (
45 |                     getattr(obj, propname) is None
46 |                 ), "Found property that should have been ignored."
47 |             except:
48 |                 if propname == "mtime" and obj.name_type != "d":
49 |                     continue
50 |                 _logger.error("obj.filename = %r.", obj.filename)
51 |                 _logger.error("propname = %r.", propname)
52 |                 raise
53 |     assert files_encountered > 0, "Encountered no files in walk_ignore_genprops.dfxml."
54 | 
55 | 
56 | def test_walk_ignore_hashes(srcdir: str) -> None:
57 |     files_encountered = 0
58 |     for event, obj in Objects.iterparse(
59 |         os.path.join(srcdir, "walk_ignore_hashes.dfxml")
60 |     ):
61 |         if not isinstance(obj, Objects.FileObject):
62 |             continue
63 |         files_encountered += 1
64 |         for propname in Objects.FileObject._hash_properties:
65 |             try:
66 |                 assert (
67 |                     getattr(obj, propname) is None
68 |                 ), "Found hash property when none was expected."
69 |             except:
70 |                 _logger.error("obj.filename = %r.", obj.filename)
71 |                 _logger.error("propname = %r.", propname)
72 |                 raise
73 |     assert files_encountered > 0, "Encountered no files in walk_ignore_hashes.dfxml."
74 | 


--------------------------------------------------------------------------------