├── dfxml
├── bin
│ ├── __init__.py
│ ├── .gitignore
│ ├── conftest.py
│ ├── iredact-config.txt
│ ├── igrep.py
│ ├── xdiff.py
│ ├── validate_dfxml.py
│ ├── iblkfind.py
│ ├── mem_info.py
│ ├── nsrl_rds.py
│ ├── iextract.py
│ ├── iverify.py
│ ├── exp_slack.py
│ ├── corpus_sync.py
│ ├── Makefile
│ ├── xmirror.py
│ ├── break_out_diffs_by_anno.py
│ ├── imap.py
│ ├── dedup.py
│ ├── iexport.py
│ ├── allocation_counter.py
│ ├── cat_fileobjects.py
│ ├── tcpdiff.py
│ ├── report_silent_changes.py
│ ├── ihistogram.py
│ ├── README.md
│ ├── deidentify_xml.py
│ └── filesdb.py
├── conftest.py
├── py.typed
└── dfxml_html.py
├── tests
├── .gitignore
├── requirements.txt
├── make_differential_dfxml
│ ├── .gitignore
│ ├── README.md
│ ├── differential_dfxml_test_by_path_01.txt
│ ├── differential_dfxml_test_by_times_01.txt
│ ├── differential_dfxml_test_by_path_23.txt
│ ├── differential_dfxml_test_by_times_23.txt
│ └── Makefile
├── walk_to_dfxml
│ ├── .gitignore
│ ├── README.md
│ ├── Makefile
│ └── test_walk_to_dfxml.py
├── misc_bin_tests
│ ├── paths.sh
│ ├── README.md
│ ├── test_regxml.sh
│ ├── iexport_test.py
│ ├── _sane_defaults.sh
│ ├── test_hfsj.sh
│ ├── test_redact.sh
│ ├── _pick_pythons.sh
│ ├── test_dfxml_tool.sh
│ ├── test_idifference.py
│ ├── test_cat_fileobjects.sh
│ ├── test_idifference_to_dfxml.sh
│ └── test_mac_timelines.sh
├── misc_object_tests
│ ├── .gitignore
│ ├── README.md
│ ├── FileObject_from_stat_test.py
│ ├── RegXMLObject_test.py
│ ├── diffing_TimestampObject_test.py
│ ├── diff_file_ignore_sample_dfxml_test.py
│ ├── objects_test.py
│ ├── DFXMLObject_program_test.py
│ ├── diff_file_ignore_test.py
│ ├── VolumeObject_test.py
│ ├── Makefile_test.py
│ ├── LibraryObject_read_test.py
│ ├── diffing_HiveObject_test.py
│ ├── CellObject_test.py
│ ├── VolumeObject_hash_test.py
│ ├── LibraryObject_write_test.py
│ ├── diffing_ByteRuns_test.py
│ ├── test_TCPFlowObjects.py
│ ├── diffing_VolumeObject_test.py
│ ├── FileObject_test.py
│ ├── FileObject_byte_run_facets_test.py
│ ├── ByteRuns_test.py
│ ├── diffing_CellObject_test.py
│ ├── FileObject_allocation_test.py
│ ├── PartitionSystemObject_test.py
│ ├── PartitionObject_test.py
│ ├── Makefile
│ ├── FileObject_externals_test.py
│ └── diffing_FileObject_test.py
├── README.md
├── test_version.py
├── test_reads.py
└── Makefile
├── demos
├── .gitignore
├── vmstats
│ ├── Makefile
│ ├── vmstats_decode.html
│ └── vmstats_json.html
├── demo_registry_timeline.py
├── demo_mac_timeline.py
├── demo_sizes.py
├── demo_plot_times.py
├── demo_fiwalk_diskimage.py
├── demo_spark.py
├── spark
│ └── demo_spark.py
├── demo_piecewise.py
├── demo_mac_timeline_iter.py
├── demo_mac_timeline_objects.py
└── demo_readtimes.py
├── samples
├── .gitignore
├── README.md
├── fileobjectexample.xml
├── tcpflow_zip_generic_header.xml
├── Makefile
├── difference_test_1.xml
├── difference_test_0.xml
└── simple.xml
├── .pre-commit-config.yaml
├── .gitmodules
├── .gitignore
├── .gitattributes
├── setup.cfg
├── setup.py
├── .github
└── workflows
│ ├── supply-chain.yml
│ └── continuous-integration.yml
├── Makefile
└── CONTRIBUTE.md
/dfxml/bin/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 |
--------------------------------------------------------------------------------
/demos/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | *.xml
3 |
--------------------------------------------------------------------------------
/dfxml/bin/.gitignore:
--------------------------------------------------------------------------------
1 | .pytest_cache
2 |
--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | mypy
2 | pytest
3 |
--------------------------------------------------------------------------------
/samples/.gitignore:
--------------------------------------------------------------------------------
1 | *.err.log
2 | *.validates.log
3 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 |
--------------------------------------------------------------------------------
/tests/walk_to_dfxml/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | walk_ignore_test/
3 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/paths.sh:
--------------------------------------------------------------------------------
1 | source tests/_pick_pythons.sh
2 |
3 | #DEMO_DIR=../demos
4 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/psf/black
3 | rev: 25.1.0
4 | hooks:
5 | - id: black
6 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/.gitignore:
--------------------------------------------------------------------------------
1 | *.dfxml
2 | *.xml
3 | walk_ignore_test/
4 | differential_dfxml_test_??.txt
5 | graph.png
6 | graph_data.json
7 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "dependencies/dfxml_schema"]
2 | path = dependencies/dfxml_schema
3 | url = https://github.com/dfxml-working-group/dfxml_schema.git
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *~
3 | __pycache__
4 | _deps
5 | python/demo.dfxml
6 |
7 | .DS_Store
8 | build
9 | python/demo.dfxml
10 | .cache
11 | .pytest_cache
12 | *.egg-info
13 | *.log
14 | .venv-pre-commit
15 |
--------------------------------------------------------------------------------
/dfxml/conftest.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is empty, but it permits test discovery of the subdirectory.
3 | # See:
4 | # https://stackoverflow.com/questions/10253826/path-issue-with-pytest-importerror-no-module-named-yadayadayada
5 |
--------------------------------------------------------------------------------
/dfxml/bin/conftest.py:
--------------------------------------------------------------------------------
1 | #
2 | # This file is empty, but it permits test discovery of the subdirectory.
3 | # See:
4 | # https://stackoverflow.com/questions/10253826/path-issue-with-pytest-importerror-no-module-named-yadayadayada
5 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/README.md:
--------------------------------------------------------------------------------
1 | The tests in this directory needed to be moved to address a new behavior in a deployed static type checker. The intent is to empty this directory, moving its tests to appropriate locations under `/tests`.
2 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/README.md:
--------------------------------------------------------------------------------
1 | The tests in this directory needed to be moved to address a new behavior in a deployed static type checker. The intent is to empty this directory, moving its tests to appropriate locations under `/tests`.
2 |
--------------------------------------------------------------------------------
/samples/README.md:
--------------------------------------------------------------------------------
1 | # Sample DFXML
2 | This directory contains sample DFXML files. The Makefile here runs tests for conformance against the DFXML Schema with `make check`.
3 |
4 | Not all of these files are currently conformant; these can be seen with `make --keep-going check-TODO`.
5 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_regxml.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | source ${TEST_DIR}/_pick_pythons.sh
3 | "$PYTHON2" ${DEMO_DIR}/demo_registry_timeline.py ../tests/m57-charlie-2009-11-20-charlie-ntuser.dat.regxml
4 | "$PYTHON3" ${DEMO_DIR}/demo_registry_timeline.py ../tests/m57-charlie-2009-11-20-charlie-ntuser.dat.regxml
5 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | Contents of this directory test the functionality of `dfxml` as an importable Python module.
2 |
3 | Running `make check` in this directory will build a Python virtual environment, install the top source directory into that virtual environment as a module, and then run further tests with `pytest`.
4 |
--------------------------------------------------------------------------------
/demos/vmstats/Makefile:
--------------------------------------------------------------------------------
1 | all:vmstats_pretty.dfxml vmstatsN
2 |
3 | vmstats_pretty.dfxml: vmstats.py
4 | python3 vmstats.py --prettyprint vmstats_pretty.dfxml
5 |
6 | vmstatsN: vmstats.py
7 | python3 vmstats.py --repeat 24 --interval 10 vmstatsN-new.dfxml
8 | /bin/mv -f vmstatsN-new.dfxml vmstatsN.dfxml
9 |
10 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | # NOTE: At the time this rule was written, all files tracked in this repository were known to be text files. From documentation on this file at git-scm.com, it seems possible this might trip up commiting a binary file in the future.
3 | * text=auto
4 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/iexport_test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from dfxml.bin.iexport import *
5 |
6 |
7 | def test_iexport():
8 | r1 = Run(0, 1000)
9 | r2 = Run(50, 60)
10 | assert r1.intersects_run(r2)
11 | assert r2.intersects_run(r1)
12 |
13 | disk = RunDB(0, 1000)
14 | print(disk)
15 | disk.remove(Run(50, 60))
16 | disk.remove(Run(0, 10))
17 | disk.remove(Run(40, 20))
18 | print(disk)
19 |
--------------------------------------------------------------------------------
/samples/fileobjectexample.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/_sane_defaults.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | SCRIPT_DIR="$1"
4 |
5 | # Guarantee sane defaults
6 | if [ -z ${TEST_DIR} ];
7 | then
8 | TEST_DIR="${SCRIPT_DIR}"
9 | fi
10 |
11 | if [ -z ${TOOL_DIR} ];
12 | then
13 | TOOL_DIR="$(dirname ${SCRIPT_DIR})"
14 | fi
15 |
16 | if [ -z ${SAMPLE_DIR} ];
17 | then
18 | SAMPLE_DIR="$(dirname $(dirname ${SCRIPT_DIR}))/samples"
19 | fi
20 |
21 | if [ -z ${PYTHONPATH} ];
22 | then
23 | PYTHONPATH="$(dirname $(dirname ${SCRIPT_DIR}))"
24 | export PYTHONPATH;
25 | fi
26 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = dfxml
3 | version = attr: dfxml.__version__
4 | url = https://github.com/dfxml-working-group/dfxml_python
5 | classifiers =
6 | License :: Public Domain
7 | Programming Language :: Python :: 3
8 |
9 | [options]
10 | include_package_data = true
11 | packages = find:
12 | python_requires = >=3.10
13 |
14 | # See CONTRIBUTE.md before adding a console script line.
15 | [options.entry_points]
16 | console_scripts =
17 | make_differential_dfxml = dfxml.bin.make_differential_dfxml:main
18 | walk_to_dfxml = dfxml.bin.walk_to_dfxml:main
19 |
20 | [options.package_data]
21 | dfxml = py.typed
22 |
--------------------------------------------------------------------------------
/dfxml/bin/iredact-config.txt:
--------------------------------------------------------------------------------
1 | #
2 | # Paths to the disk image and fiwalk XML output
3 | #
4 | IMAGEFILE /home/bcadmin/Desktop/jowork.raw.raw
5 | XMLFILE /home/bcadmin/Desktop/jofiwalk.xml
6 |
7 | #
8 | # Redaction patterns
9 | #
10 | #FILEPAT *.dll FUZZ
11 | #FILEPAT *.com FUZZ
12 | FILEPAT *.exe FUZZ
13 |
14 | #
15 | # Other examples
16 | #
17 | #KEY 100200300400
18 | #MD5 db06069ef1c9f40986ffa06db4fe8fd7 FILL 0x44
19 | #FILENAME file3.txt ENCRYPT
20 | #FILEPAT file*.txt ENCRYPT
21 | #CONTAINS This FILL 0x44
22 | #FILEPAT *Spotlight* FILL 0x44
23 |
24 | #
25 | # Uncomment this line to actually commit the redaction:
26 | #
27 | COMMIT
28 |
29 |
30 |
--------------------------------------------------------------------------------
/dfxml/py.typed:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology by employees of the Federal Government in the course
3 | # of their official duties. Pursuant to title 17 Section 105 of the
4 | # United States Code this software is not subject to copyright
5 | # protection and is in the public domain. NIST assumes no
6 | # responsibility whatsoever for its use by other parties, and makes
7 | # no guarantees, expressed or implied, about its quality,
8 | # reliability, or any other characteristic.
9 | #
10 | # We would appreciate acknowledgement if the software is used.
11 |
12 | # This file is defined to support PEP 561:
13 | # https://www.python.org/dev/peps/pep-0561/
14 |
--------------------------------------------------------------------------------
/demos/demo_registry_timeline.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
6 | import dfxml
7 |
8 | timeline = []
9 |
10 |
11 | def process(co):
12 | mtime = co.mtime()
13 | if mtime != None:
14 | timeline.append([co.mtime(), co.full_path(), " modified"])
15 |
16 |
17 | def main():
18 | if len(sys.argv) < 2:
19 | print("Usage: {} ".format(sys.argv[0]))
20 | exit(1)
21 | dfxml.read_regxml(xmlfile=open(sys.argv[1], "rb"), callback=process)
22 | timeline.sort()
23 | for record in timeline:
24 | print("\t".join(map(str, record)))
25 |
26 |
27 | if __name__ == "__main__":
28 | main()
29 |
--------------------------------------------------------------------------------
/tests/test_version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | import dfxml
15 |
16 |
17 | def test_version() -> None:
18 | assert not dfxml.__version__ is None
19 |
--------------------------------------------------------------------------------
/dfxml/bin/igrep.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """Usage: igrep imagefile.iso string ...
3 |
4 | Reports the files in which files have the string.
5 | """
6 | import fiwalk
7 |
8 | import dfxml
9 |
10 | if __name__ == "__main__":
11 | import sys
12 | from optparse import OptionParser
13 |
14 | parser = OptionParser()
15 | parser.usage = "%prog [options] image.iso s1"
16 | parser.add_option("-d", "--debug", help="debug", action="store_true")
17 | (options, args) = parser.parse_args()
18 |
19 | if len(args) != 2:
20 | parser.print_help()
21 | sys.exit(1)
22 |
23 | (imagefn, data) = args
24 |
25 | def process(fi):
26 | offset = fi.contents().find(data)
27 | if offset > 0:
28 | print("%s (offset=%d)" % (fi.filename(), offset))
29 |
30 | fiwalk.fiwalk_using_sax(imagefile=open(imagefn), callback=process)
31 |
--------------------------------------------------------------------------------
/dfxml/bin/xdiff.py:
--------------------------------------------------------------------------------
1 | #
2 | # Report the difference between two dfxml files
3 | #
4 | import sys
5 |
6 | from filesdb import filesdb
7 |
8 | import dfxml
9 |
10 | #
11 | # test program. Reads a database and dumps it.
12 | #
13 | if __name__ == "__main__":
14 | from argparse import ArgumentParser
15 |
16 | parser = ArgumentParser(
17 | description="Test the files database with one or more DFXML files"
18 | )
19 | parser.add_argument("xmlfiles", help="XML files to process", nargs="+")
20 |
21 | args = parser.parse_args()
22 | db0 = None
23 | for fn in args.xmlfiles:
24 | db1 = filesdb()
25 | db1.fname = fn
26 | db1.read(fn)
27 | print("{} stats:".format(fn))
28 | db1.print_stats(sys.stdout)
29 | if db0:
30 | print("")
31 | print("Difference from {}".format(db0.fname))
32 | db0 = db1
33 |
--------------------------------------------------------------------------------
/dfxml/bin/validate_dfxml.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import sys
3 | from optparse import OptionParser
4 | from sys import stdout
5 |
6 | import dfxml.fiwalk as fiwalk
7 |
8 |
9 | def demo_dfxml_time_bug(filename):
10 | parser = OptionParser()
11 | parser.usage = "%prog% [options] xmlfile "
12 | (options, args) = parser.parse_args()
13 | for fi in fiwalk.fileobjects_using_sax(xmlfile=open(filename, "rb")):
14 | fsize = fi.filesize()
15 | try:
16 | mt = fi.mtime()
17 | print("Type of mt:", type(mt))
18 | print("Normal mtime:")
19 | print(mt)
20 | except KeyboardInterrupt:
21 | raise
22 | except:
23 | raise RuntimeException("Abnormal mtime for file with size {}".format(fsize))
24 |
25 |
26 | if __name__ == "__main__":
27 | filename = sys.argv[1]
28 | demo_dfxml_time_bug(filename)
29 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_hfsj.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | hdiutil create -size 10m -fs HFS+J -nospotlight -attach -volname image -ov -layout NONE \
4 | -imagekey diskimage-class=CRawDiskImage image.dmg
5 | echo "This is file 1 - snarf" > /Volumes/image/file1.txt
6 | echo "This is file 2 - snarf" > /Volumes/image/file2.txt
7 | sync
8 | hdiutil detach /Volumes/image
9 | cp image.dmg image.gen0.dmg
10 | echo "look for file1 and file2:"
11 | strings -o image.dmg | grep snarf
12 | echo "mount the disk and overwrite the contents of file2"
13 | hdiutil attach image.dmg
14 | echo "New file 1 contents - snarf" | dd of=/Volumes/image/file1.txt
15 | echo ""
16 | echo "===file1.txt==="
17 | cat /Volumes/image/file1.txt
18 | echo ""
19 | echo "===file2.txt==="
20 | cat /Volumes/image/file2.txt
21 | echo ""
22 | hdiutil detach /Volumes/image
23 | cp image.dmg image.gen1.dmg
24 | strings -o image.dmg | grep snarf
25 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed in whole or in part by employees of the
4 | # Federal Government in the course of their official duties, and with
5 | # other Federal assistance. Pursuant to title 17 Section 105 of the
6 | # United States Code portions of this software authored by Federal
7 | # employees are not subject to copyright protection within the United
8 | # States. For portions not authored by Federal employees, the Federal
9 | # Government has been granted unlimited rights, and no claim to
10 | # copyright is made. The Federal Government assumes no responsibility
11 | # whatsoever for its use by other parties, and makes no guarantees,
12 | # expressed or implied, about its quality, reliability, or any other
13 | # characteristic.
14 | #
15 | # We would appreciate acknowledgement if the software is used.
16 |
17 | import setuptools
18 |
19 | setuptools.setup()
20 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/README.md:
--------------------------------------------------------------------------------
1 | # `make_differential_dfxml`
2 |
3 | *Source*: [`../../dfxml/bin/make_differential_dfxml.py`](../../dfxml/bin/make_differential_dfxml.py)
4 |
5 | This command takes as input two DFXML files, and outputs a DFXML document showing differential annotations. Output is sent to `stdout`.
6 |
7 | This tool was introduced in [Nelson et al., DFRWS 2014](https://doi.org/10.1016/j.diin.2014.05.004).
8 |
9 |
10 | ## Usage
11 |
12 | ```bash
13 | make_differential_dfxml input_1.dfxml input_2.dfxml > deltas.dfxml
14 | ```
15 |
16 | If one is using the [DFXML Objects module](../../dfxml/objects.py), the differentially-annotated DFXML can be analyzed by referring to each encountered `FileObject`'s property `.annos`. See e.g. [`summarize_differential_dfxml.py`](../../dfxml/bin/summarize_differential_dfxml.py)'s output for [changes scoped to single file systems](differential_dfxml_test_by_path_01.txt), or [changes that cross file systems](differential_dfxml_test_by_times_23.txt).
17 |
--------------------------------------------------------------------------------
/demos/demo_mac_timeline.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # produce a MAC-times timeline.
3 | # works under either Python2 or Python3
4 | import os
5 | import sys
6 |
7 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
8 | import dfxml
9 |
10 | timeline = []
11 |
12 |
13 | def process(fi):
14 | if fi.mtime() != None:
15 | timeline.append([fi.mtime(), fi.filename(), " modified"])
16 | if fi.crtime() != None:
17 | timeline.append([fi.crtime(), fi.filename(), " created"])
18 | if fi.ctime() != None:
19 | timeline.append([fi.ctime(), fi.filename(), " changed"])
20 | if fi.atime() != None:
21 | timeline.append([fi.atime(), fi.filename(), " accessed"])
22 |
23 |
24 | def main():
25 | if len(sys.argv) < 2:
26 | print("Usage: {} ".format(sys.argv[0]))
27 | exit(1)
28 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=process)
29 | timeline.sort()
30 | for record in timeline:
31 | print("\t".join(map(str, record)))
32 |
33 |
34 | if __name__ == "__main__":
35 | main()
36 |
--------------------------------------------------------------------------------
/demos/demo_sizes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.2
2 |
3 | #
4 | # Demo program that shows how to calculate the average size of file objects in a DFXML file
5 | #
6 |
7 | import collections
8 | import math
9 | import os
10 | import sys
11 |
12 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
13 | import dfxml
14 |
15 | sums = collections.Counter()
16 | sum_of_squares = collections.Counter()
17 | count = collections.Counter()
18 |
19 |
20 | def func(fi):
21 | ext = fi.ext()
22 | count[ext] += 1
23 | sums[ext] += fi.filesize()
24 | sum_of_squares[ext] = fi.filesize() ** 2
25 |
26 |
27 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=func)
28 | fmt = "{:8} {:8} {:8} {:8} {:8}"
29 | print(fmt.format("Ext", "Count", "Total", "Average", "StdDev"))
30 | for ext in sums.keys():
31 | print(
32 | fmt.format(
33 | ext,
34 | count[ext],
35 | sums[ext],
36 | sums[ext] / count[ext],
37 | math.sqrt(sum_of_squares[ext] / count[ext] - (sums[ext] / count[ext]) ** 2),
38 | )
39 | )
40 |
--------------------------------------------------------------------------------
/demos/demo_plot_times.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import os
3 | import sys
4 | import time
5 |
6 | import fiwalk
7 |
8 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
9 | import dfxml
10 |
11 | if __name__ == "__main__":
12 | import sys
13 | from optparse import OptionParser
14 | from sys import stdout
15 |
16 | parser = OptionParser()
17 | parser.usage = "%prog [options] (xmlfile or imagefile)"
18 | (options, args) = parser.parse_args()
19 |
20 | if not args:
21 | parser.print_usage()
22 | exit(1)
23 |
24 | sizes = []
25 | dates = {}
26 |
27 | def callback(fi):
28 | sizes.append(fi.filesize())
29 | for tag, val in fi.times().iteritems():
30 | date = val.datetime()
31 | dates[date] = dates.get(date, 0) + 1
32 |
33 | fn = args[0]
34 | if fn.endswith(".xml"):
35 | fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=callback)
36 | else:
37 | fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=callback)
38 |
39 | print("Here is the dates array:")
40 | for d in sorted(dates.keys()):
41 | print("{} {}".format(d, dates[d]))
42 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_redact.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | /bin/rm -f testdisk.dmg redact.cfg
3 | hdiutil create -size 1m -fs MS-DOS -nospotlight -attach -volname testdisk testdisk.dmg
4 | echo "This is the zero file. FILE0001." > /Volumes/TESTDISK/file0.txt
5 | echo "This is the first file. FILE0001." > /Volumes/TESTDISK/file1.txt
6 | echo "This is the second file. FILE0002." > /Volumes/TESTDISK/file2.txt
7 | echo "This is the third file. FILE0003." > /Volumes/TESTDISK/file3.txt
8 | echo "This is the fourth file. FILE0004." > /Volumes/TESTDISK/file4.txt
9 | echo "This is the fifth file. FILE0005." > /Volumes/TESTDISK/file5.txt
10 | echo "This is the dixth file. FILE0006." > /Volumes/TESTDISK/file6.txt
11 | hdiutil detach /Volumes/TESTDISK
12 | cat > redact.cfg < None:
16 | """Generate filesystem metadata for disk image and and write resulting
17 | dfxml to file"""
18 | # Analyse image file
19 | with open(imageFile, "rb") as ifs:
20 | fwOutBuffer = fiwalk.fiwalk_xml_stream(imagefile=ifs)
21 | fwOut = fwOutBuffer.read()
22 |
23 | # Write dfxml to output file
24 | with io.open(outFile, "wb") as fOut:
25 | fOut.write(fwOut)
26 |
27 |
28 | def main() -> None:
29 | if len(sys.argv) < 3:
30 | print("Usage: {} ".format(sys.argv[0]))
31 | exit(1)
32 | imageFile = sys.argv[1]
33 | outFile = sys.argv[2]
34 | writeDfxml(imageFile, outFile)
35 |
36 |
37 | if __name__ == "__main__":
38 | main()
39 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_from_stat_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 |
20 | import dfxml.objects as Objects
21 |
22 |
23 | def test_all():
24 | logging.basicConfig(level=logging.DEBUG)
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 |
27 | f0 = Objects.FileObject()
28 | f0.populate_from_stat(os.stat(__file__))
29 | _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml())
30 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/RegXMLObject_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import os
17 | import sys
18 |
19 | import diffing_CellObject_test
20 | import diffing_HiveObject_test
21 |
22 | import dfxml.objects as Objects
23 |
24 |
25 | def test_all():
26 | ro = Objects.RegXMLObject(version="0.2")
27 | ho = Objects.HiveObject()
28 | ho.append(diffing_CellObject_test.get_co())
29 | ho.append(diffing_CellObject_test.get_nco())
30 | ro.append(diffing_HiveObject_test.get_ho())
31 | ro.print_regxml()
32 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_TimestampObject_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import copy
17 | import logging
18 | import os
19 | import sys
20 |
21 | import dfxml.objects as Objects
22 |
23 |
24 | def test_all():
25 | t0 = Objects.TimestampObject()
26 | t0.name = "mtime"
27 | t0.prec = "2s"
28 |
29 | t1 = copy.deepcopy(t0)
30 |
31 | assert t0 == t1
32 |
33 | t0e = t0.to_Element()
34 | t2 = Objects.TimestampObject()
35 | t2.populate_from_Element(t0e)
36 |
37 | assert t0 == t2
38 |
39 | t2.prec = "100"
40 |
41 | assert t0 != t2
42 |
--------------------------------------------------------------------------------
/dfxml/bin/iblkfind.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """Usage: iblkfind imagefile.iso s1 [s2 s3 ...] ...
3 |
4 | Reports the files in which sectors s1, s2, s3... are located.
5 | """
6 | import sys
7 |
8 | import dfxml
9 |
10 | if __name__ == "__main__":
11 | from optparse import OptionParser
12 |
13 | parser = OptionParser()
14 | parser.usage = "%prog [options] imagefile-or-xmlfile s1 [s2 s3 s3 ...]"
15 | parser.add_option(
16 | "--offset", help="values are byte offsets, not sectors", action="store_true"
17 | )
18 | parser.add_option("--blocksize", help="specify sector blockszie", default=512)
19 | (options, args) = parser.parse_args()
20 |
21 | if len(args) < 1:
22 | parser.print_help()
23 | sys.exit(1)
24 | fn = args[0]
25 |
26 | print(args)
27 | print("Processing %s" % fn)
28 | print("Searching for %s" % ", ".join(args[1:]))
29 |
30 | divisor = 1
31 | if options.offset:
32 | divisor = options.blocksize
33 |
34 | sectors = set([int(s) / divisor for s in args[1:]])
35 |
36 | def process(fi):
37 | for s in sectors:
38 | if fi.has_sector(s):
39 | print("%d\t%s" % (s, fi.filename()))
40 |
41 | if not fn.endswith(".xml"):
42 | print("iblkfind requires an XML file")
43 | exit(1)
44 | dfxml.read_dfxml(xmlfile=open(args[0], "rb"), callback=process)
45 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diff_file_ignore_sample_dfxml_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 |
20 | import dfxml.objects as Objects
21 |
22 |
23 | def main():
24 | dobj = Objects.DFXMLObject()
25 | dobj.diff_file_ignores.add("atime")
26 | dobj.diff_file_ignores.add("crtime")
27 | with open(args.out_dfxml, "w") as fh:
28 | dobj.print_dfxml(fh)
29 |
30 |
31 | if __name__ == "__main__":
32 | import argparse
33 |
34 | parser = argparse.ArgumentParser()
35 | parser.add_argument("-d", "--debug", action="store_true")
36 | parser.add_argument("out_dfxml")
37 | args = parser.parse_args()
38 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
39 | main()
40 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/objects_test.py:
--------------------------------------------------------------------------------
1 | # Unit tests for objects
2 |
3 |
4 | __version__ = "0.1.1"
5 |
6 | import os
7 | import sys
8 |
9 | from dfxml.objects import *
10 | from dfxml.objects import _intcast, _logger, _qsplit
11 |
12 |
13 | def test_all():
14 | assert _intcast(-1) == -1
15 | assert _intcast("-1") == -1
16 | assert _qsplit("{http://www.w3.org/2001/XMLSchema}all") == (
17 | "http://www.w3.org/2001/XMLSchema",
18 | "all",
19 | )
20 | assert _qsplit("http://www.w3.org/2001/XMLSchema}all") == (
21 | None,
22 | "http://www.w3.org/2001/XMLSchema}all",
23 | )
24 |
25 | fi = FileObject()
26 |
27 | # Check property setting
28 | fi.mtime = "1999-12-31T23:59:59Z"
29 | _logger.debug("fi = %r" % fi)
30 |
31 | # Check bad property setting
32 | failed = None
33 | try:
34 | fi.mtime = "Not a timestamp"
35 | failed = False
36 | except:
37 | failed = True
38 | _logger.debug("fi = %r" % fi)
39 | _logger.debug("failed = %r" % failed)
40 | assert failed == True
41 |
42 | t0 = TimestampObject(prec="100ns", name="mtime")
43 | _logger.debug("t0 = %r" % t0)
44 | assert t0.prec[0] == 100
45 | assert t0.prec[1] == "ns"
46 | t1 = TimestampObject("2009-01-23T01:23:45Z", prec="2", name="atime")
47 | _logger.debug("t1 = %r" % t1)
48 | assert t1.prec[0] == 2
49 | assert t1.prec[1] == "s"
50 |
--------------------------------------------------------------------------------
/.github/workflows/supply-chain.yml:
--------------------------------------------------------------------------------
1 | # Portions of this file contributed by NIST are governed by the
2 | # following statement:
3 | #
4 | # This software was developed at the National Institute of Standards
5 | # and Technology by employees of the Federal Government in the course
6 | # of their official duties. Pursuant to title 17 Section 105 of the
7 | # United States Code this software is not subject to copyright
8 | # protection and is in the public domain. NIST assumes no
9 | # responsibility whatsoever for its use by other parties, and makes
10 | # no guarantees, expressed or implied, about its quality,
11 | # reliability, or any other characteristic.
12 | #
13 | # We would appreciate acknowledgement if the software is used.
14 |
15 | # This workflow uses Make to review direct dependencies of this
16 | # repository.
17 |
18 | name: Supply Chain
19 |
20 | on:
21 | schedule:
22 | - cron: '15 5 * * 1,2,3,4,5'
23 |
24 | jobs:
25 | build:
26 |
27 | runs-on: ubuntu-latest
28 | strategy:
29 | matrix:
30 | python-version:
31 | - '3.9'
32 | - '3.13'
33 |
34 | steps:
35 | - uses: actions/checkout@v4
36 | with:
37 | fetch-depth: 0
38 | - name: Set up Python ${{ matrix.python-version }}
39 | uses: actions/setup-python@v5
40 | with:
41 | python-version: ${{ matrix.python-version }}
42 | - name: Review dependencies
43 | run: make check-supply-chain
44 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/DFXMLObject_program_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.1.1"
17 |
18 | import os
19 | import sys
20 |
21 | import dfxml.objects as Objects
22 |
23 |
24 | def main():
25 | dobj = Objects.parse(args.in_dfxml)
26 | assert dobj.program == args.expected_program
27 | assert dobj.program_version == args.expected_program_version
28 |
29 |
30 | if __name__ == "__main__":
31 | import argparse
32 |
33 | parser = argparse.ArgumentParser()
34 | parser.add_argument("in_dfxml")
35 | parser.add_argument("expected_program")
36 | parser.add_argument("expected_program_version")
37 | args = parser.parse_args()
38 |
39 | main()
40 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diff_file_ignore_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 |
20 | import dfxml.objects as Objects
21 |
22 |
23 | def main():
24 | dobj = Objects.parse(args.in_dfxml)
25 | assert not dobj is None
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 | _logger.debug("dobj.diff_file_ignores = %r." % dobj.diff_file_ignores)
28 | assert "atime" in dobj.diff_file_ignores
29 | assert "crtime" in dobj.diff_file_ignores
30 |
31 |
32 | if __name__ == "__main__":
33 | import argparse
34 |
35 | parser = argparse.ArgumentParser()
36 | parser.add_argument("-d", "--debug", action="store_true")
37 | parser.add_argument("in_dfxml")
38 | args = parser.parse_args()
39 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
40 | main()
41 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/VolumeObject_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.1.1"
17 |
18 | import logging
19 | import os
20 | import sys
21 |
22 | import libtest
23 |
24 | import dfxml.objects as Objects
25 |
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 |
29 | def test_empty_object():
30 | dobj = Objects.DFXMLObject()
31 | vobj = Objects.VolumeObject()
32 | dobj.append(vobj)
33 |
34 | # Do file I/O round trip.
35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 | try:
37 | vobj_reconst = dobj_reconst.volumes[0]
38 | except:
39 | _logger.debug("tmp_filename = %r." % tmp_filename)
40 | raise
41 | os.remove(tmp_filename)
42 |
--------------------------------------------------------------------------------
/demos/vmstats/vmstats_decode.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | History for {{host}}
5 |
6 |
7 |
14 |
15 |
16 |
21 |
22 |
Stats
23 |
24 | | CPU Utilization: | {{cpu_percent}} % |
25 | | Mem Utilization: | {{mem_percent}} % |
26 |
27 |
28 |
29 |
30 |
Processes
31 |
32 | | | CPU Time |
33 | | PID | NAME | User | System |
34 |
35 | {% for ps in ps_list %}
36 | | {{ps.pid}} | {{ps.name}} | {{ps.user}} | {{ps.system}} |
37 | {% endfor %}
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/Makefile_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed in whole or in part by employees of the
4 | # Federal Government in the course of their official duties, and with
5 | # other Federal assistance. Pursuant to title 17 Section 105 of the
6 | # United States Code portions of this software authored by Federal
7 | # employees are not subject to copyright protection within the United
8 | # States. For portions not authored by Federal employees, the Federal
9 | # Government has been granted unlimited rights, and no claim to
10 | # copyright is made. The Federal Government assumes no responsibility
11 | # whatsoever for its use by other parties, and makes no guarantees,
12 | # expressed or implied, about its quality, reliability, or any other
13 | # characteristic.
14 | #
15 | # We would appreciate acknowledgement if the software is used.
16 |
17 | # run 'make check' and 'make clean' under py.test
18 |
19 | # TODO Some of the tests in the Makefile are currently known to be redundantly called when using py.test.
20 |
21 | import os
22 | import subprocess
23 | import sys
24 |
25 |
26 | def test_make_all():
27 | if sys.platform == "win32":
28 | return # don't run on win32
29 | os.chdir(os.path.dirname(__file__))
30 | subprocess.call(["make", "check"])
31 |
32 |
33 | def test_make_clean():
34 | if sys.platform == "win32":
35 | return # don't run on win32
36 | os.chdir(os.path.dirname(__file__))
37 | subprocess.call(["make", "clean"])
38 |
--------------------------------------------------------------------------------
/samples/tcpflow_zip_generic_header.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 | Sample
9 |
10 |
11 | vi
12 | 8.0
13 |
14 | vi tcpflow_zip_generic_header.xml
15 |
16 |
17 |
18 | ../../tcpflow/tests/airsnort-linux-browser_page_load.pcap
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 | 205.134.188.162.00080-008.030.072.112.38568
29 | 4135
30 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/LibraryObject_read_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology by employees of the Federal Government in the course
3 | # of their official duties. Pursuant to title 17 Section 105 of the
4 | # United States Code this software is not subject to copyright
5 | # protection and is in the public domain. NIST assumes no
6 | # responsibility whatsoever for its use by other parties, and makes
7 | # no guarantees, expressed or implied, about its quality,
8 | # reliability, or any other characteristic.
9 | #
10 | # We would appreciate acknowledgement if the software is used.
11 |
12 | """
13 | Run test against DFXML file generated by the _write counterpart script.
14 | """
15 |
16 | __version__ = "0.1.1"
17 |
18 | import logging
19 | import os
20 | import sys
21 |
22 | import dfxml
23 | import dfxml.objects as Objects
24 |
25 | if __name__ == "__main__":
26 | logging.basicConfig(level=logging.DEBUG)
27 | _logger = logging.getLogger(os.path.basename(__file__))
28 |
29 | dobj = Objects.parse(sys.argv[1])
30 |
31 | _logger.debug("dobj.creator_libraries = %r." % dobj.creator_libraries)
32 |
33 | assert Objects.LibraryObject("libfoo", "1.2.3") in dobj.creator_libraries
34 | assert Objects.LibraryObject("libbaz", "4.5") in dobj.build_libraries
35 |
36 | found = None
37 | for library in dobj.creator_libraries:
38 | if library.relaxed_eq(Objects.LibraryObject("libfoo")):
39 | found = True
40 | break
41 | assert found
42 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_HiveObject_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import copy
17 | import logging
18 | import os
19 | import sys
20 |
21 | import dfxml.objects as Objects
22 |
23 |
24 | def get_ho():
25 | ho = Objects.HiveObject()
26 | ho.mtime = "2010-01-02T03:45:00Z"
27 | return ho
28 |
29 |
30 | def test_all():
31 | _logger = logging.getLogger(os.path.basename(__file__))
32 | logging.basicConfig(level=logging.DEBUG)
33 |
34 | ho = get_ho()
35 |
36 | hoc = copy.deepcopy(ho)
37 |
38 | diffs = hoc.compare_to_other(ho)
39 | _logger.debug(repr(diffs))
40 | assert len(diffs) == 0
41 |
42 | hoc.mtime = "2011-01-02T03:45:00Z"
43 |
44 | diffs = hoc.compare_to_other(ho)
45 | _logger.debug(repr(diffs))
46 | assert len(diffs) == 1
47 |
--------------------------------------------------------------------------------
/dfxml/bin/mem_info.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | mem_info.py: report the memory used by a program that wrote results to a dfxml file
4 | """
5 |
6 |
7 | import sys
8 | import xml.etree.ElementTree as ET
9 |
10 |
11 | def fmt(n):
12 | if args.h:
13 | for p, let in reversed((3, "K"), (6, "M"), (9, "G"), (12, "T"), (15, "P")):
14 | if n > 10**p:
15 | return f"{n/10**p}{let}"
16 | return n
17 |
18 |
19 | def process_dfxml(dfxml):
20 | root = ET.parse(dfxml)
21 | start_time = root.find(".//start_time").text[0:19].replace("T", " ")
22 | command_line = " ".join(root.find(".//command_line").text.split()[1:])
23 | maxrss = 0
24 | for e in root.findall(".//rusage/maxrss"):
25 | maxrss += int(e.text)
26 | print(start_time, fmt(maxrss), command_line)
27 |
28 |
29 | if __name__ == "__main__":
30 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
31 |
32 | parser = ArgumentParser(
33 | formatter_class=ArgumentDefaultsHelpFormatter,
34 | description="report memory utilization from DFXML file",
35 | )
36 | parser.add_argument("--h", help="human format", action="store_true")
37 | parser.add_argument("dfxml", nargs="*")
38 | args = parser.parse_args()
39 | bad_files = []
40 | for fname in args.dfxml:
41 | try:
42 | process_dfxml(fname)
43 | except ET.ParseError as e:
44 | bad_files.append(fname)
45 | if bad_files:
46 | print("Could not read:", file=sys.stderr)
47 | print("\n".join(bad_files), file=sys.stderr)
48 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/CellObject_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 |
20 | import dfxml.objects as Objects
21 |
22 |
23 | def test_all():
24 | logging.basicConfig(level=logging.DEBUG)
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 |
27 | co = Objects.CellObject()
28 |
29 | _logger.debug("co = %r" % co)
30 | _logger.debug("co.to_regxml() = %r" % co.to_regxml())
31 |
32 | co.name_type = "v"
33 |
34 | # Test value-type tolerance of data_type: should be null, strs and ints.
35 |
36 | co.data_type = None
37 | co.data_type = 0
38 | co.data_type = "REG_NONE"
39 | failed = False
40 | try:
41 | co.data_type = 0.1
42 | except:
43 | failed = True
44 | assert failed
45 |
46 |
47 | if __name__ == "__main__":
48 | test_all()
49 |
--------------------------------------------------------------------------------
/demos/demo_spark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Shows how DFXML works with spark.
4 | # This program runs spark if it is not already running
5 |
6 |
7 | import os
8 | import sys
9 |
10 | sys.path.append("../python")
11 | from dfxml_writer import DFXMLWriter
12 |
13 |
14 | def spark_demo():
15 | """A small spark program. Must be run under spark"""
16 | import operator
17 |
18 | from pyspark import SparkConf, SparkContext
19 |
20 | conf = SparkConf()
21 | sc = SparkContext(conf=conf)
22 | m = 1000000
23 | result = sc.parallelize(range(0, m + 1)).reduce(operator.add)
24 | print(f"The sum of the numbers 0 to {m} is {result}")
25 | assert result == 500000500000
26 |
27 |
28 | def run_spark():
29 | # If we are running under spark, just call check_spark.
30 | # Otherwise, run recursively under spark-submit
31 | import os
32 |
33 | if "SPARK_ENV_LOADED" in os.environ:
34 | return # yea! Spark is running
35 |
36 | #
37 | # Re-run this script under spark, and then exit.
38 | #
39 | import subprocess
40 |
41 | r = subprocess.run(["spark-submit", __file__] + sys.argv[1:])
42 | assert r.returncode == 0
43 | exit(0)
44 |
45 |
46 | if __name__ == "__main__":
47 | import argparse
48 | import time
49 |
50 | parser = argparse.ArgumentParser()
51 | args = parser.parse_args()
52 |
53 | run_spark()
54 |
55 | dfxml = DFXMLWriter(
56 | filename=f"demo_spark_{int(time.time())}.dfxml", prettyprint=True
57 | )
58 | spark_demo()
59 | # DFXML file gets written automatically when program exits.
60 | exit(0)
61 |
--------------------------------------------------------------------------------
/dfxml/bin/nsrl_rds.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Demonstrates how to communicate with NPS NSRL RDS
4 | #
5 |
6 | RDS_SERVER = "https://domex.nps.edu/www-noauth/nsrl_rds.cgi"
7 |
8 | import xmlrpclib
9 |
10 | if __name__ == "__main__":
11 | print("Demonstration of NSRL RDS service at %s\n" % RDS_SERVER)
12 | print("")
13 | p = xmlrpclib.ServerProxy(RDS_SERVER)
14 | try:
15 | avail = p.available()
16 | except xmlrpclib.ProtocolError as e:
17 | print("Cannot access " + RDS_SERVER)
18 | print(e)
19 | raise RuntimeError
20 |
21 | print("Available RDS sets: %s " % avail)
22 |
23 | md5_val = "EB714443AA2FC1A3D16E39EB8007A0B2"
24 |
25 | # Build a search term
26 | search = {"db": avail[0], "md5": md5_val} # pick the first search term
27 |
28 | print("Here are the files with a md5 of " + md5_val)
29 | ret = p.search(search)
30 | fields = ret["fields"]
31 | for row in ret["result"]:
32 | for a, b in zip(fields, row):
33 | print(a, "=", b)
34 | print("")
35 |
36 | print(
37 | "Now we will do a query for multiple MD5 values. You can do this by specifying\n"
38 | + "a value as an array."
39 | )
40 | searchm = {
41 | "db": avail[0],
42 | "md5": ["EB714443AA2FC1A3D16E39EB8007A0B2", "9B3702B0E788C6D62996392FE3C9786A"],
43 | }
44 | print("sending:", searchm)
45 | ret = p.search(searchm)
46 | print("got:", ret)
47 | fields = ret["fields"]
48 | for row in ret["result"]:
49 | for a, b in zip(fields, row):
50 | print(a, "=", b)
51 | print("")
52 |
--------------------------------------------------------------------------------
/demos/spark/demo_spark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # Shows how DFXML works with Spark.
4 | # This program runs Spark if it is not already running
5 |
6 |
7 | import os
8 | import sys
9 |
10 | sys.path.append("../python")
11 | from dfxml_writer import DFXMLWriter
12 |
13 |
14 | def spark_demo():
15 | """A small Spark program. Must be run under Spark"""
16 | import operator
17 |
18 | from pyspark import SparkConf, SparkContext
19 |
20 | conf = SparkConf()
21 | sc = SparkContext(conf=conf)
22 | m = 1000000
23 | result = sc.parallelize(range(0, m + 1)).reduce(operator.add)
24 | print(f"The sum of the numbers 0 to {m} is {result}")
25 | assert result == 500000500000
26 |
27 |
28 | def run_spark():
29 | # If we are running under Spark, just call check_spark.
30 | # Otherwise, run recursively under spark-submit
31 | import os
32 |
33 | if "SPARK_ENV_LOADED" in os.environ:
34 | return # yea! Spark is running
35 |
36 | #
37 | # Re-run this script under Spark, and then exit.
38 | #
39 | import subprocess
40 |
41 | r = subprocess.run(["spark-submit", __file__] + sys.argv[1:])
42 | assert r.returncode == 0
43 | exit(0)
44 |
45 |
46 | if __name__ == "__main__":
47 | import argparse
48 | import time
49 |
50 | parser = argparse.ArgumentParser()
51 | args = parser.parse_args()
52 |
53 | run_spark()
54 |
55 | dfxml = DFXMLWriter(
56 | filename=f"demo_spark_{int(time.time())}.dfxml", prettyprint=True
57 | )
58 | spark_demo()
59 | # DFXML file gets written automatically when program exits.
60 | exit(0)
61 |
--------------------------------------------------------------------------------
/dfxml/bin/iextract.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import datetime
4 | import os
5 | import os.path
6 | import sys
7 | import zipfile
8 |
9 | import dfxml
10 | import dfxml.fiwalk as fiwalk
11 |
12 | if __name__ == "__main__":
13 | from optparse import OptionParser
14 |
15 | parser = OptionParser()
16 | parser.add_option(
17 | "-x",
18 | "--xml",
19 | dest="xmlfilename",
20 | help="Already-created DFXML file for imagefile",
21 | )
22 | parser.usage = "%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile"
23 | (options, args) = parser.parse_args()
24 |
25 | if len(args) < 3:
26 | parser.print_help()
27 | exit(1)
28 |
29 | imagefilename = args[0]
30 | xmlfilename = options.xmlfilename
31 | xmlfh = None
32 | if xmlfilename != None:
33 | xmlfh = open(xmlfilename, "r")
34 | zipfilename = args[1]
35 | targets = set([fn.lower() for fn in args[2:]])
36 | zfile = zipfile.ZipFile(zipfilename, "w", allowZip64=True)
37 |
38 | def proc(fi):
39 | basename = os.path.basename(fi.filename()).lower()
40 | if basename in targets:
41 | info = zipfile.ZipInfo(
42 | fi.filename(),
43 | datetime.datetime.fromtimestamp(fi.mtime().timestamp()).utctimetuple(),
44 | )
45 | info.internal_attr = 1
46 | info.external_attr = 2175008768 # specifies mode 0644
47 | zfile.writestr(info, fi.contents())
48 |
49 | fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc)
50 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/VolumeObject_hash_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.1.1"
17 |
18 | import logging
19 | import os
20 | import sys
21 |
22 | import dfxml.objects as Objects
23 |
24 |
25 | def test_all():
26 | logging.basicConfig(level=logging.DEBUG)
27 | _logger = logging.getLogger(os.path.basename(__file__))
28 |
29 | s0 = set()
30 |
31 | v0 = Objects.VolumeObject()
32 | v1 = Objects.VolumeObject()
33 |
34 | s0.add(v0)
35 | s0.add(v1)
36 |
37 | _logger.debug("len(s0) = %r" % len(s0))
38 | assert len(s0) == 2
39 |
40 | f0 = Objects.FileObject()
41 | f1 = Objects.FileObject()
42 | f0.volume_object = v0
43 | f1.volume_object = v0
44 |
45 | s1 = set()
46 | s1.add(f0.volume_object)
47 | s1.add(f1.volume_object)
48 | _logger.debug("len(s1) = %r" % len(s1))
49 | assert len(s1) == 1
50 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_path_01.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | New files:
4 | ==========
5 | 2013-05-16T21:01:00Z i_am_new.txt 40
6 |
7 |
8 | Deleted files:
9 | ==============
10 | 2013-01-01T00:00:00Z i_will_be_deleted.txt 20
11 |
12 |
13 | Renamed files:
14 | ==============
15 |
16 |
17 | Files with modified contents:
18 | =============================
19 |
20 | i_will_be_modified.txt mtime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z
21 | i_will_be_modified.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z
22 | i_will_be_modified.txt ctime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z
23 | i_will_be_modified.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=22)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=23)])
24 | i_will_be_modified.txt filesize changed, 22 23
25 | i_will_be_modified.txt md5 changed, e91577092351461d7800ef7b870a2bcf a6d9ebd95bcd3602b757ea63f9dd02ab
26 | i_will_be_modified.txt sha1 changed, 44e426344f15bd7621ca2f9ffea70d29752dccda 1e087807678a33ebbde2624341184c14303675a3
27 | i_will_be_modified.txt sha256 changed, 1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357 e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281
28 |
29 |
30 | Files with changed properties:
31 | ==============================
32 |
33 | i_will_be_accessed.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T21:00:00Z
34 | i_will_be_accessed.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=34512, file_offset=0, len=12)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=12)])
35 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_times_01.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | New files:
4 | ==========
5 | 2013-05-16T21:01:00Z i_am_new.txt 40
6 |
7 |
8 | Deleted files:
9 | ==============
10 | 2013-01-01T00:00:00Z i_will_be_deleted.txt 20
11 |
12 |
13 | Renamed files:
14 | ==============
15 |
16 |
17 | Files with modified contents:
18 | =============================
19 |
20 | i_will_be_modified.txt mtime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z
21 | i_will_be_modified.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z
22 | i_will_be_modified.txt ctime changed, 2013-01-01T00:00:00Z -> 2013-05-16T20:59:00Z
23 | i_will_be_modified.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=22)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=23)])
24 | i_will_be_modified.txt filesize changed, 22 23
25 | i_will_be_modified.txt md5 changed, e91577092351461d7800ef7b870a2bcf a6d9ebd95bcd3602b757ea63f9dd02ab
26 | i_will_be_modified.txt sha1 changed, 44e426344f15bd7621ca2f9ffea70d29752dccda 1e087807678a33ebbde2624341184c14303675a3
27 | i_will_be_modified.txt sha256 changed, 1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357 e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281
28 |
29 |
30 | Files with changed properties:
31 | ==============================
32 |
33 | i_will_be_accessed.txt atime changed, 2013-01-01T00:00:00Z -> 2013-05-16T21:00:00Z
34 | i_will_be_accessed.txt data_brs changed, ByteRuns(run_list=[ByteRun(img_offset=34512, file_offset=0, len=12)]) ByteRuns(run_list=[ByteRun(img_offset=234512, file_offset=0, len=12)])
35 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/_pick_pythons.sh:
--------------------------------------------------------------------------------
1 |
2 | # This software was developed at the National Institute of Standards
3 | # and Technology in whole or in part by employees of the Federal
4 | # Government in the course of their official duties. Pursuant to
5 | # title 17 Section 105 of the United States Code portions of this
6 | # software authored by NIST employees are not subject to copyright
7 | # protection and are in the public domain. For portions not authored
8 | # by NIST employees, NIST has been granted unlimited rights. NIST
9 | # assumes no responsibility whatsoever for its use by other parties,
10 | # and makes no guarantees, expressed or implied, about its quality,
11 | # reliability, or any other characteristic.
12 | #
13 | # We would appreciate acknowledgement if the software is used.
14 |
15 | # This script is meant to be included in Bash scripts that need a Python v2 and v3.
16 | # An autotool configure script would also suffice.
17 | # The 'or echo' statements keep the subshell from returning an error exit status on missing a Python version.
18 | #
19 | # This script defines two variables, PYTHON2 and PYTHON3, providing the highest-available Python binary for each major version.
20 | #
21 |
22 | PYTHON2=`which python2`
23 |
24 | PYTHON3=`which python3.6 2>/dev/null || echo`
25 | if [ -z "$PYTHON3" ]; then
26 | PYTHON3=`which python3.5 2>/dev/null || echo`
27 | if [ -z "$PYTHON3" ]; then
28 | PYTHON3=`which python3.4 2>/dev/null || echo`
29 | if [ -z "$PYTHON3" ]; then
30 | PYTHON3=`which python3 2>/dev/null || echo`
31 | if [ -z "$PYTHON3" ]; then
32 | echo "Error: Could not find a python3 executable." >&2
33 | exit 1
34 | fi
35 | fi
36 | fi
37 | fi
38 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/LibraryObject_write_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology by employees of the Federal Government in the course
3 | # of their official duties. Pursuant to title 17 Section 105 of the
4 | # United States Code this software is not subject to copyright
5 | # protection and is in the public domain. NIST assumes no
6 | # responsibility whatsoever for its use by other parties, and makes
7 | # no guarantees, expressed or implied, about its quality,
8 | # reliability, or any other characteristic.
9 | #
10 | # We would appreciate acknowledgement if the software is used.
11 |
12 | __version__ = "0.1.1"
13 |
14 | import logging
15 | import os
16 | import sys
17 |
18 | import dfxml
19 | import dfxml.objects as Objects
20 |
21 | if __name__ == "__main__":
22 | logging.basicConfig(level=logging.DEBUG)
23 | _logger = logging.getLogger(os.path.basename(__file__))
24 |
25 | lobj = Objects.LibraryObject()
26 |
27 | _logger.debug("lobj = %r" % lobj)
28 | _logger.debug("lobj.to_Element() = %r" % lobj.to_Element())
29 |
30 | dobj = Objects.DFXMLObject()
31 | dobj.add_creator_library(lobj)
32 | dobj.add_creator_library("libfoo", "1.2.3")
33 | dobj.add_creator_library(
34 | "Python", ".".join(map(str, sys.version_info[0:3]))
35 | ) # A bit of a bend, but gets the major version information out.
36 | try:
37 | dobj.add_creator_library("libbar", None)
38 | except ValueError:
39 | _logger.info("Caught expected value error from passing in incorrect types.")
40 | pass
41 | dobj.add_build_library("libbaz", "4.5")
42 |
43 | with open(sys.argv[1], "w") as fh:
44 | dobj.print_dfxml(fh)
45 |
--------------------------------------------------------------------------------
/demos/demo_piecewise.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.2
2 |
3 | #
4 | # Demo program that prints piecewise hashes and reports on co-occurrence of hashes.
5 | #
6 | # Multimap from http://stackoverflow.com/questions/1731971/is-there-multimap-implementation-in-python
7 |
8 | import os
9 | import sys
10 |
11 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
12 | import collections
13 | import math
14 | import sys
15 |
16 | import dfxml
17 |
18 |
19 | class SectorCorrelator:
20 | def __init__(self):
21 | self.hashdb = collections.defaultdict(
22 | list
23 | ) # key is the MD5 code, value is a list of matches
24 | self.files = 0
25 | self.sectors = 0
26 |
27 | def process(self, fi):
28 | """Process the objects as they are read from the XML file"""
29 | self.files += 1
30 | print(fi.filename())
31 | for br in fi.byte_runs():
32 | self.sectors += 1
33 | self.hashdb[br.hashdigest["md5"]].append((fi.filename(), br.file_offset))
34 |
35 | def print_report(self):
36 | print("Files processed: {}".format(self.files))
37 | print("Sectors processed: {}".format(self.sectors))
38 | print("")
39 | print("The following duplicates were found:")
40 | print("Hash Filename Offset in file")
41 | for hash, ents in self.hashdb.items():
42 | if len(ents) > 1:
43 | print("{} -- {} copies found".format(hash, len(ents)))
44 | for e in sorted(ents):
45 | print(" {} {:8,}".format(e[0], e[1]))
46 | print("")
47 |
48 |
49 | sc = SectorCorrelator()
50 | dfxml.read_dfxml(xmlfile=open(sys.argv[1], "rb"), callback=sc.process)
51 | sc.print_report()
52 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_ByteRuns_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import copy
17 | import logging
18 | import os
19 | import sys
20 |
21 | import dfxml.objects as Objects
22 |
23 |
24 | def get_brs():
25 | logging.basicConfig(level=logging.DEBUG)
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 | br = Objects.ByteRun()
29 | br.file_offset = 4128
30 | br.len = 133
31 | brs = Objects.ByteRuns()
32 | brs.append(br)
33 | return brs
34 |
35 |
36 | def test_all():
37 | logging.basicConfig(level=logging.DEBUG)
38 | _logger = logging.getLogger(os.path.basename(__file__))
39 | brs = get_brs()
40 | cbrs1 = copy.deepcopy(brs)
41 |
42 | _logger.debug("brs = %r." % brs)
43 | _logger.debug("cbrs1 = %r." % cbrs1)
44 | assert cbrs1 == brs
45 |
46 | cbrs1[0].file_offset += 133
47 | _logger.debug("cbrs1 = %r." % cbrs1)
48 | assert cbrs1 != brs
49 |
50 | cbrs2 = copy.deepcopy(brs)
51 | cbrs2[0].type = "unknown"
52 | assert cbrs2 != brs
53 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/test_TCPFlowObjects.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | import logging
15 | import os.path
16 | import pathlib
17 | import sys
18 |
19 | import pytest
20 |
21 | # TODO - It seems TCPFlowObjects might be better served from /dfxml instead of /dfxml/bin.
22 | import dfxml.bin.TCPFlowObjects
23 | import dfxml.objects as Objects
24 |
25 |
26 | @pytest.fixture
27 | def top_srcdir() -> pathlib.Path:
28 | srcdir = pathlib.Path(__file__).parent
29 | return srcdir / ".." / ".."
30 |
31 |
32 | def test_TCPFlowObjects(top_srcdir: pathlib.Path) -> None:
33 | path_to_sample = top_srcdir / "samples" / "tcpflow_zip_generic_header.xml"
34 | assert (
35 | path_to_sample.exists()
36 | ), "Hard-coded path from test to sample is no longer valid."
37 |
38 | for event, obj in Objects.iterparse(str(path_to_sample)):
39 | if not isinstance(obj, Objects.FileObject):
40 | continue
41 | results = dfxml.bin.TCPFlowObjects.scanner_results_from_FileObject(obj)
42 | assert len(results) == 1
43 | # TODO - This could do with a better presentation in relation to the pytest framework.
44 | print("Flow name: %r." % obj.filename)
45 | for result in results:
46 | result.print_report()
47 |
--------------------------------------------------------------------------------
/demos/demo_mac_timeline_iter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | # produce a MAC-times timeline using the iterative DFXML interface.
17 | # works under either Python2 or Python3
18 |
19 | import os
20 | import sys
21 |
22 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
23 | import dfxml
24 |
25 |
26 | def main():
27 | if len(sys.argv) < 2:
28 | print("Usage: {} ".format(sys.argv[0]))
29 | exit(1)
30 |
31 | timeline = []
32 |
33 | for fi in dfxml.iter_dfxml(xmlfile=open(sys.argv[1], "rb")):
34 | if fi.mtime() != None:
35 | timeline.append([fi.mtime(), fi.filename(), " modified"])
36 | if fi.crtime() != None:
37 | timeline.append([fi.crtime(), fi.filename(), " created"])
38 | if fi.ctime() != None:
39 | timeline.append([fi.ctime(), fi.filename(), " changed"])
40 | if fi.atime() != None:
41 | timeline.append([fi.atime(), fi.filename(), " accessed"])
42 |
43 | timeline.sort()
44 |
45 | for record in timeline:
46 | print("\t".join(map(str, record)))
47 |
48 |
49 | if __name__ == "__main__":
50 | main()
51 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_dfxml_tool.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 |
17 | # Determine script location
18 | SCRIPT="$(realpath $0)"
19 | SCRIPT_DIR="$(dirname ${SCRIPT})"
20 |
21 | # Guarantee sane defaults
22 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
23 |
24 | # Choose python interpreter
25 | . ${TEST_DIR}/_pick_pythons.sh
26 |
27 | # Halt on error
28 | set -e
29 | # Display all executed commands
30 | set -x
31 |
32 | # Flags listed here in alphabetical order
33 | DT_OPTIONS[0]=
34 | DT_OPTIONS[1]=--allprovenance
35 | DT_OPTIONS[2]=--commandline
36 | DT_OPTIONS[3]=--includedirs
37 | DT_OPTIONS[4]=--iso-8601
38 | DT_OPTIONS[5]=--md5
39 | DT_OPTIONS[6]=--nofilenames
40 | DT_OPTIONS[7]=--nometadata
41 | DT_OPTIONS[8]=--pythonversion
42 | DT_OPTIONS[9]=--sha1
43 | DT_OPTIONS[10]=--sha256
44 | DT_OPTIONS[11]="--stripleaddirs 1"
45 | DT_OPTIONS[12]="--stripprefix .."
46 |
47 | iter=0
48 | for x in "${DT_OPTIONS[@]}"; do
49 | echo "Iteration $iter: Testing $x" >&2
50 | # "$PYTHON2" ../dfxml_tool.py $x ../src > dfxml_tool_p2_${iter}.dfxml
51 | "$PYTHON3" ${TOOL_DIR}/dfxml_tool.py $x -- ../samples > dfxml_tool_p3_${iter}.dfxml
52 | iter=$(($iter+1))
53 | done
54 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_VolumeObject_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.0"
15 |
16 | import logging
17 | import os
18 | import sys
19 |
20 | import dfxml
21 | import dfxml.objects as Objects
22 |
23 |
24 | def test_all():
25 | logging.basicConfig(level=logging.DEBUG)
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 | v0 = Objects.VolumeObject()
29 |
30 | v0.sector_size = 512
31 | v0.block_size = 4096
32 | v0.partition_offset = 32256
33 | v0.ftype = -1
34 | assert v0.ftype == -1
35 | v0.ftype_str = 1
36 | v0.block_count = 100000
37 | v0.allocated_only = False
38 | v0.first_block = 0
39 | v0.last_block = v0.block_count
40 |
41 | _logger.debug(repr(v0))
42 | v1 = eval("Objects." + repr(v0))
43 |
44 | e0 = v0.to_Element()
45 | _logger.debug("e0 = %r" % e0)
46 |
47 | v2 = Objects.VolumeObject()
48 | v2.populate_from_Element(e0)
49 |
50 | v1.block_size = 512
51 | v2.partition_offset = v0.partition_offset + v0.block_count * v0.block_size
52 |
53 | d01 = v0.compare_to_other(v1)
54 | d02 = v0.compare_to_other(v2)
55 |
56 | _logger.debug("d01 = %r" % d01)
57 | assert d01 == set(["block_size"])
58 |
59 | _logger.debug("d02 = %r" % d02)
60 | assert d02 == set(["partition_offset"])
61 |
--------------------------------------------------------------------------------
/demos/demo_mac_timeline_objects.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | # produce a MAC-times timeline using the DFXML Objects interface.
17 | # works under either Python2 or Python3
18 |
19 | import os
20 | import sys
21 |
22 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
23 | import dfxml
24 | import dfxml.objects as Objects
25 |
26 |
27 | def main():
28 | if len(sys.argv) < 2:
29 | print("Usage: {} ".format(sys.argv[0]))
30 | exit(1)
31 |
32 | timeline = []
33 |
34 | for event, obj in Objects.iterparse(sys.argv[1]):
35 | # Only work on FileObjects
36 | if not isinstance(obj, Objects.FileObject):
37 | continue
38 | if not obj.mtime is None:
39 | timeline.append([obj.mtime, obj.filename, " modified"])
40 | if not obj.crtime is None:
41 | timeline.append([obj.crtime, obj.filename, " created"])
42 | if not obj.ctime is None:
43 | timeline.append([obj.ctime, obj.filename, " changed"])
44 | if not obj.atime is None:
45 | timeline.append([obj.atime, obj.filename, " accessed"])
46 |
47 | timeline.sort()
48 |
49 | for record in timeline:
50 | print("\t".join(map(str, record)))
51 |
52 |
53 | if __name__ == "__main__":
54 | main()
55 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.1.1"
17 |
18 | import logging
19 | import os
20 | import sys
21 |
22 | import libtest
23 |
24 | import dfxml.objects as Objects
25 |
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 |
29 | def test_empty_file_object() -> None:
30 | dobj = Objects.DFXMLObject()
31 | fobj = Objects.FileObject()
32 | dobj.append(fobj)
33 |
34 | # Do file I/O round trip.
35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 | try:
37 | fobj_reconst = dobj_reconst.files[0]
38 | assert fobj == fobj_reconst
39 | except:
40 | _logger.debug("tmp_filename = %r." % tmp_filename)
41 | raise
42 | os.remove(tmp_filename)
43 |
44 |
45 | def test_blank_file_object_filename() -> None:
46 | dobj = Objects.DFXMLObject()
47 | fobj = Objects.FileObject()
48 | dobj.append(fobj)
49 |
50 | fobj.filename = ""
51 |
52 | # Do file I/O round trip.
53 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
54 | try:
55 | fobj_reconst = dobj_reconst.files[0]
56 | assert fobj == fobj_reconst
57 | except:
58 | _logger.debug("tmp_filename = %r." % tmp_filename)
59 | raise
60 | os.remove(tmp_filename)
61 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_idifference.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Test script. Evaluates idifference.py on a sequence of disk images.
4 | """
5 |
6 | import os
7 | import subprocess
8 | import sys
9 |
10 | if __name__ == "__main__":
11 | from optparse import OptionParser
12 |
13 | parser = OptionParser()
14 | parser.usage = "%prog [options] dfxml_sequence_list.txt output_zip"
15 | parser.add_option(
16 | "-p", "--prefix", help="prepend prefix to every test image path", dest="prefix"
17 | )
18 | parser.add_option(
19 | "-v",
20 | "--verbose",
21 | help="verbose output: print call to difference program",
22 | dest="verbose",
23 | action="store_true",
24 | )
25 | parser.add_option(
26 | "-d",
27 | "--diff-program",
28 | help="use this path to the diff program",
29 | dest="diff_program",
30 | )
31 | # parser.add_option("-z", "--zap", help="Zap output directory (erases if present)" dest="zap")
32 |
33 | (options, args) = parser.parse_args()
34 | if len(args) < 2:
35 | parser.print_help()
36 | sys.exit(1)
37 |
38 | prefix = ""
39 | if options.prefix:
40 | prefix = options.prefix
41 | # Convert file contents to list
42 | files = [prefix + x.strip() for x in open(args[0], "r")]
43 |
44 | # Verify we'll run at least one difference
45 | if len(files) < 2:
46 | sys.stderr.write("Differencing requires 2 or more files.\n")
47 |
48 | # Check that the list lines actually point to files
49 | for f in files:
50 | assert os.path.isfile(f)
51 |
52 | # Run differences
53 | if options.diff_program:
54 | diff_program = options.diff_program
55 | else:
56 | diff_program = os.path.dirname(sys.argv[0]) + "/idifference.py"
57 |
58 | diff_command = [
59 | "python",
60 | diff_program,
61 | "--zipfile=" + args[1],
62 | "--imagefile",
63 | ] + files
64 | if options.verbose:
65 | print(" ".join(diff_command))
66 | subprocess.call(diff_command)
67 |
--------------------------------------------------------------------------------
/tests/test_reads.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | import os
15 |
16 | import pytest
17 |
18 | import dfxml
19 | import dfxml.objects
20 |
21 |
22 | def nop(x: object) -> None:
23 | pass
24 |
25 |
26 | @pytest.fixture
27 | def top_srcdir() -> str:
28 | srcdir = os.path.dirname(__file__)
29 | retval = os.path.join(srcdir, "..")
30 | assert os.path.isdir(
31 | os.path.join(retval, "samples")
32 | ), "Hard-coded expected path not found, '${top_srcdir}/samples/'."
33 | return retval
34 |
35 |
36 | @pytest.fixture
37 | def difference_test_0_filepath(top_srcdir: str) -> str:
38 | retval = os.path.join(top_srcdir, "samples", "difference_test_0.xml")
39 | assert os.path.exists(
40 | retval
41 | ), "Hard-coded path to file did not find expected file, '${top_srcdir}/samples/difference_test_0.xml'."
42 | return retval
43 |
44 |
45 | def test_read_dfxml(difference_test_0_filepath: str) -> None:
46 | """
47 | This test confirms that the DFXML pip-managed packaging exposes the dfxml package and the objects.py module.
48 | """
49 | with open(difference_test_0_filepath, "rb") as fh:
50 | dfxml.read_dfxml(fh, callback=nop)
51 |
52 |
53 | def test_objects_iterparse(difference_test_0_filepath: str) -> None:
54 | """
55 | This test confirms that the DFXML pip-managed packaging exposes the dfxml package's objects.py module.
56 | """
57 | for event, obj in dfxml.objects.iterparse(difference_test_0_filepath):
58 | pass
59 |
--------------------------------------------------------------------------------
/dfxml/bin/iverify.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """Reads an fiwalk XML file and reports how many of the files are still in the image..."""
3 |
4 | import hashlib
5 | import os.path
6 | import sys
7 |
8 | from dfxml import fiwalk
9 |
10 | present = []
11 | not_present = []
12 |
13 |
14 | def process_fi(fi):
15 | print("process file", fi.filename())
16 | if fi.filesize() == 0:
17 | return
18 | try:
19 | if fi.file_present():
20 | present.append(fi)
21 | return
22 | else:
23 | not_present.append(fi)
24 | return
25 | except ValueError(e):
26 | sys.stderr.write(str(e) + "\n")
27 |
28 |
29 | def main():
30 | import sys
31 | from optparse import OptionParser
32 | from subprocess import PIPE, Popen
33 |
34 | global options
35 |
36 | parser = OptionParser()
37 | parser.add_option("-d", "--debug", help="prints debugging info", dest="debug")
38 | parser.add_option("-g", "--ground", help="ground truth XML file", dest="ground")
39 | parser.usage = "%prog [options] image.iso"
40 | (options, args) = parser.parse_args()
41 |
42 | if not options.ground:
43 | parser.print_help()
44 | sys.exit(1)
45 |
46 | # Read the XML file
47 | reader = fiwalk.fileobject_reader()
48 | reader.set_imagefilename(args[0])
49 | reader.process_xml_stream(open(options.ground, "r"), process_fi)
50 |
51 | if len(present) == 0:
52 | print("None of the files are present in the image")
53 | sys.exit(0)
54 |
55 | if len(not_present) == 0:
56 | print("All of the files are present in the image")
57 | sys.exit(0)
58 |
59 | print("\n\n")
60 | print("Present in image:")
61 | print("=================")
62 | print("\n".join([fi.filename() for fi in present]))
63 |
64 | print("\n")
65 | print("Not Present or altered in image:")
66 | print("=====================")
67 | for fi in not_present:
68 | print(fi.filename())
69 |
70 |
71 | ################################################################
72 | if __name__ == "__main__":
73 | main()
74 |
--------------------------------------------------------------------------------
/.github/workflows/continuous-integration.yml:
--------------------------------------------------------------------------------
1 | # This file based on https://gist.github.com/mwouts/9842452d020c08faf9e84a3bba38a66f
2 | # See: https://help.github.com/en/actions/reference/software-installed-on-github-hosted-runners
3 | # 2020-06-22 - slg - customized
4 | # 2020-06-27 - slg - expanded to G++ for MacOS
5 | #
6 | name: CI (python)
7 | on: [push, pull_request]
8 |
9 | env:
10 | COVERAGE_OS: ubuntu-latest
11 | COVERAGE_PYTHON_VERSION: 3.14
12 |
13 | jobs:
14 | build:
15 | runs-on: ${{ matrix.os }}
16 | strategy:
17 | matrix:
18 | os: ['ubuntu-latest', 'macos-latest']
19 | python-version: ['3.10','3.14']
20 |
21 | steps:
22 | - name: Checkout
23 | uses: actions/checkout@v4
24 |
25 | - name: Set up Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 |
30 | - name: Install Python dependencies
31 | run: |
32 | python -m pip install --upgrade pip
33 | pip install pytest pytest-cov
34 | if [ -r requirements.txt ]; then pip install -r requirements.txt ; fi
35 | if [ -r requirements-dev.txt ]; then pip install -r requirements-dev.txt ; fi
36 |
37 | - name: Install xmllint on ubuntu
38 | if: runner.os == 'Linux'
39 | run: |
40 | sudo apt update
41 | sudo apt install --yes libxml2-utils
42 |
43 | - name: Pre-commit Checks
44 | run: |
45 | pip -q install pre-commit
46 | pre-commit run --all-files
47 |
48 | - name: Make check
49 | run: make check
50 |
51 | - name: Make check-tools
52 | run: make check-tools
53 |
54 | - name: Test with pytest
55 | run: pytest --cov=dfxml --cov-report=xml .
56 |
57 | - name: Upload to codecov.io
58 | if: matrix.os == env.COVERAGE_OS && matrix.python-version == env.COVERAGE_PYTHON_VERSION
59 | uses: codecov/codecov-action@v4
60 | with:
61 | token: ${{ secrets.CODECOV_TOKEN }}
62 | verbose: true
63 | files: ./coverage.xml
64 |
--------------------------------------------------------------------------------
/samples/Makefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | SHELL ?= /bin/bash
15 |
16 | XMLLINT ?= $(shell which xmllint)
17 | ifeq ($(XMLLINT),)
18 | $(error XMLLINT not found)
19 | endif
20 |
21 | SAMPLE_FILES__PASS := \
22 | difference_test_0.xml \
23 | difference_test_1.xml \
24 | difference_test_2.xml \
25 | difference_test_3.xml \
26 | fileobjectexample.xml
27 |
28 | # TODO Any remaining issues with the upstream tool should be resolved.
29 | SAMPLE_FILES__SKIP := \
30 | piecewise.xml \
31 | simple.xml
32 |
33 | SAMPLE_FILES := \
34 | $(SAMPLE_FILES__FAIL) \
35 | $(SAMPLE_FILES__SKIP)
36 |
37 | VALIDATES_LOG_FILES__PASS := $(foreach sample_file,$(SAMPLE_FILES__PASS),$(sample_file).validates.log)
38 |
39 | VALIDATES_LOG_FILES__SKIP := $(foreach sample_file,$(SAMPLE_FILES__SKIP),$(sample_file).validates.log)
40 |
41 | VALIDATES_LOG_FILES := \
42 | $(VALIDATES_LOG_FILES__PASS) \
43 | $(VALIDATES_LOG_FILES__SKIP)
44 |
45 | all:
46 |
47 | .PHONY: \
48 | check-TODO
49 |
50 | %.validates.log: \
51 | % \
52 | ../schema/dfxml.xsd
53 | $(XMLLINT) \
54 | --noout \
55 | --schema ../schema/dfxml.xsd \
56 | $< \
57 | 2> $<.err.log
58 | touch $@
59 |
60 | ../schema/dfxml.xsd:
61 | @echo "dfxml.xsd not found. To check out the DFXML schema (necessary to run unit tests in the /samples directory), please run 'make schema-init' in the repository root." >&2
62 | exit 2
63 |
64 | check: \
65 | $(VALIDATES_LOG_FILES__PASS)
66 |
67 | check-TODO: \
68 | $(VALIDATES_LOG_FILES__SKIP)
69 |
70 | clean:
71 | @rm -f *.err.log *.validates.log
72 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_byte_run_facets_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 | import xml.etree.ElementTree as ET
20 |
21 | import dfxml.objects as Objects
22 |
23 |
24 | def test_all():
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 | logging.basicConfig(level=logging.DEBUG)
27 |
28 | br1 = Objects.ByteRun(img_offset=1, len=1)
29 | br2 = Objects.ByteRun(img_offset=2, len=2)
30 | br3 = Objects.ByteRun(img_offset=4, len=3)
31 |
32 | dbr = Objects.ByteRuns()
33 | ibr = Objects.ByteRuns()
34 | nbr = Objects.ByteRuns()
35 |
36 | dbr.append(br1)
37 | ibr.append(br2)
38 | nbr.append(br3)
39 |
40 | dbr.facet = "data"
41 | ibr.facet = "inode"
42 | nbr.facet = "name"
43 |
44 | f1 = Objects.FileObject()
45 | f1.data_brs = dbr
46 | f1.inode_brs = ibr
47 | f1.name_brs = nbr
48 |
49 | assert f1.data_brs[0].img_offset == 1
50 | assert f1.inode_brs[0].img_offset == 2
51 | assert f1.name_brs[0].img_offset == 4
52 |
53 | e1 = f1.to_Element()
54 | # _logger.debug(f1)
55 | # _logger.debug(ET.tostring(e1))
56 |
57 | f2 = Objects.FileObject()
58 |
59 | f2.populate_from_Element(e1)
60 | # _logger.debug(f2)
61 |
62 | assert f2.data_brs[0].img_offset == 1
63 | assert f2.inode_brs[0].img_offset == 2
64 | assert f2.name_brs[0].img_offset == 4
65 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_path_23.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | New files:
4 | ==========
5 | 2007-08-09T12:34:58Z CHANGE___content_and_mtime 4097
6 | 2007-08-09T12:35:00Z CHANGE___erased___replaced_by_other_partition_file 4097
7 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4098
8 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097
9 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097
10 | 2007-08-09T12:34:57Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097
11 | 2007-08-09T12:34:59Z CHANGE___new_file 4097
12 | 2007-08-09T05:34:56-07:00 CHANGE___timestamp_changes_format_only 4097
13 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097
14 | 2007-08-09T12:34:56Z NO_CHANGE 4097
15 | 2007-08-09T12:34:56Z _CHANGE___move_from_P1M_to_P3G___change_name 4097
16 | 2007-08-09T12:34:56Z _CHANGE___renamed 4097
17 |
18 |
19 | Deleted files:
20 | ==============
21 | 2007-08-09T12:34:56Z CHANGE___content_and_mtime 4097
22 | 2007-08-09T12:34:56Z CHANGE___erased 4097
23 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_other_partition_file 4097
24 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4097
25 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097
26 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097
27 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097
28 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_name 4097
29 | 2007-08-09T12:34:56Z CHANGE___moved_to_erased_P1G_file 4097
30 | 2007-08-09T12:34:56Z CHANGE___renamed 4097
31 | 2007-08-09T12:34:56Z CHANGE___renamed_to_erased_sibling___change_checksum_and_mtime 4097
32 | 2007-08-09T12:34:56Z CHANGE___timestamp_changes_format_only 4097
33 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097
34 | 2007-08-09T12:34:56Z NO_CHANGE 4097
35 |
36 |
37 | Renamed files:
38 | ==============
39 |
40 |
41 | Files with modified contents:
42 | =============================
43 |
44 |
45 | Files with changed properties:
46 | ==============================
47 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/differential_dfxml_test_by_times_23.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | New files:
4 | ==========
5 | 2007-08-09T05:34:56-07:00 CHANGE___timestamp_changes_format_only 4097
6 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4098
7 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097
8 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097
9 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097
10 | 2007-08-09T12:34:56Z NO_CHANGE 4097
11 | 2007-08-09T12:34:56Z _CHANGE___move_from_P1M_to_P3G___change_name 4097
12 | 2007-08-09T12:34:56Z _CHANGE___renamed 4097
13 | 2007-08-09T12:34:57Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097
14 | 2007-08-09T12:34:58Z CHANGE___content_and_mtime 4097
15 | 2007-08-09T12:34:59Z CHANGE___new_file 4097
16 | 2007-08-09T12:35:00Z CHANGE___erased___replaced_by_other_partition_file 4097
17 |
18 |
19 | Deleted files:
20 | ==============
21 | 2007-08-09T12:34:56Z CHANGE___content_and_mtime 4097
22 | 2007-08-09T12:34:56Z CHANGE___erased 4097
23 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_other_partition_file 4097
24 | 2007-08-09T12:34:56Z CHANGE___erased___replaced_by_sibling 4097
25 | 2007-08-09T12:34:56Z CHANGE___move_from_P1G_to_P2G 4097
26 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G 4097
27 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_content___change_mtime 4097
28 | 2007-08-09T12:34:56Z CHANGE___move_from_P1M_to_P3G___change_name 4097
29 | 2007-08-09T12:34:56Z CHANGE___moved_to_erased_P1G_file 4097
30 | 2007-08-09T12:34:56Z CHANGE___renamed 4097
31 | 2007-08-09T12:34:56Z CHANGE___renamed_to_erased_sibling___change_checksum_and_mtime 4097
32 | 2007-08-09T12:34:56Z CHANGE___timestamp_changes_format_only 4097
33 | 2007-08-09T12:34:56Z CHANGE___unallocated 4097
34 | 2007-08-09T12:34:56Z NO_CHANGE 4097
35 |
36 |
37 | Renamed files:
38 | ==============
39 |
40 |
41 | Files with modified contents:
42 | =============================
43 |
44 |
45 | Files with changed properties:
46 | ==============================
47 |
--------------------------------------------------------------------------------
/dfxml/bin/exp_slack.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.2
2 | #
3 | # exp_slack.py: experiment on the slack space
4 | # quantify slack space
5 | #
6 | # (c) Martin Mulazzani, 2012
7 | # Additions by Simson Garfinkel
8 |
9 | import os
10 | import re
11 | import sys
12 |
13 | import dfxml.fiwalk as fiwalk
14 |
15 |
16 | def proc(fi):
17 | # Skip the virtual files?
18 | if fi.filename()[0:1] in ["$"]:
19 | return
20 | if fi.has_contents() and fi.is_file():
21 | outstring = (
22 | str(fi.partition())
23 | + "\t"
24 | + fi.filename()
25 | + "\t"
26 | + str(fi.filesize())
27 | + "\t"
28 | + str(fi.times())
29 | + "\n"
30 | )
31 | f_out.write(outstring)
32 |
33 |
34 | if __name__ == "__main__":
35 | if len(sys.argv) != 2:
36 | print("usage: ./fast_slack.py ")
37 | sys.exit(1)
38 |
39 | # input
40 | file_name = sys.argv[1]
41 | f = open(file_name, "rb")
42 |
43 | # output is to stdout
44 | outfile = sys.stdout
45 |
46 | # find partition information, blocksize and filesystem
47 | # 1st partition has no. 1, to correspond to fiwalk output
48 | partitioncounter = 0
49 | f.write(
50 | "********************************** PARTITIONS **********************************"
51 | )
52 | f.write("\nNo\tBlocksize\tFilesystem\n")
53 |
54 | for line in f:
55 | if re.search("block_size", line):
56 | partitioncounter += 1
57 | f_out.write(str(partitioncounter))
58 | f_out.write("\t")
59 | f_out.write(re.split(">|<", line)[2])
60 | if re.search("ftype_str", line):
61 | f_out.write("\t\t")
62 | f_out.write(re.split(">|<", line)[2])
63 | f_out.write("\n")
64 |
65 | f_out.write(
66 | "\n\n************************************* DATA *************************************\n"
67 | )
68 | f_out.write("Partition\tFilename\tSize\tTimestamps\n")
69 | f.close()
70 |
71 | # re-open file for binary reading
72 | # file processing
73 | f = open(file_name, "rb")
74 | fiwalk.fiwalk_using_sax(xmlfile=f, callback=proc)
75 |
--------------------------------------------------------------------------------
/dfxml/bin/corpus_sync.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3.2
2 | #
3 | # sync corpus based on DFXML files
4 |
5 | from collections import defaultdict
6 |
7 | import dfxml
8 | import dfxml.fiwalk as fiwalk
9 |
10 |
11 | class CorpusDB:
12 | def __init__(self):
13 | self.all = []
14 | self.md5db = defaultdict(list) # maps from
15 | self.pathdb = dict()
16 |
17 | def process_fi(self, fi):
18 | self.all.append(fi)
19 | self.md5db[fi.md5()].append(fi)
20 | self.pathdb[fi.filename()] = fi
21 |
22 | def ingest_dfxml(self, fname):
23 | fiwalk.fiwalk_using_sax(
24 | xmlfile=open(fname, "rb"), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi
25 | )
26 |
27 | def __iter__(self):
28 | return self.all.__iter__()
29 |
30 | def __delitem__(self, fi):
31 | self.all.remove(fi)
32 | self.md5db[fi.md5()].remove(fi)
33 | del self.pathdb[fi.filename()]
34 |
35 |
36 | if __name__ == "__main__":
37 | from copy import deepcopy
38 | from optparse import OptionParser
39 |
40 | parser = OptionParser()
41 | (options, args) = parser.parse_args()
42 |
43 | (fn1, fn2) = args[0:2]
44 | print("# Reading B - the master {}".format(fn1))
45 | b = CorpusDB()
46 | b.ingest_dfxml(fn1)
47 |
48 | print("# Reading A - the current system {}".format(fn2))
49 | a = CorpusDB()
50 | a.ingest_dfxml(fn2)
51 |
52 | print("# Files in A that should not be in B:")
53 | rmlist = [afi for afi in a if (afi.md5() not in b.md5db)]
54 | for afi in rmlist:
55 | print("rm {}".format(afi.filename()))
56 | del a[afi]
57 |
58 | fixups = []
59 | for bfi in b:
60 | if bfi.filename() in a.pathdb and bfi.md5() == a.pathdb[bfi.filename()].md5():
61 | continue
62 | if bfi.md5() not in a.md5db:
63 | print("get {}".format(bfi.filename()))
64 | continue
65 |
66 | afi = a.md5db[bfi.md5()][0]
67 | nfn = bfi.filename() + ".new"
68 | print("ln {} {}".format(afi.filename(), nfn))
69 | fixups.append((nfn, bfi.filename()))
70 |
71 | for nfn, bfi_filename in fixups:
72 | print("mv {} {}".format(nfn, bfi_filename))
73 |
--------------------------------------------------------------------------------
/tests/walk_to_dfxml/README.md:
--------------------------------------------------------------------------------
1 | # `walk_to_dfxml`
2 |
3 | *Source*: [`../../dfxml/bin/walk_to_dfxml.py`](../../dfxml/bin/walk_to_dfxml.py)
4 |
5 | This command walks a directory, producing a `` for each encountered file and directory, and then recurses into each directory. Output is sent to `stdout`.
6 |
7 | File characteristics are drawn from:
8 | * the path
9 | * hashes of the file contents for regular files (i.e., not directories, not device files; also, not soft links)
10 | * the `stat` structure for the file
11 | * the referenced path (for soft links)
12 |
13 | Any directory that can be navigated to can be characterized with this script. This has been tested from the root directory of a (offline) Linux system's root-filesystem partition. The tool can handle the `/dev` directory without issue.
14 |
15 | This tool can be used to walk a network file system, such as a share. However, be aware that if it is hashing, that would mean the tool is reading the file contents over the network.
16 |
17 |
18 | ## Usage
19 |
20 | ```bash
21 | cd .../my_directory
22 | walk_to_dfxml > /tmp/my_directory.dfxml
23 | ```
24 |
25 | This will record all characteristics available for each file in and below `.../my_directory`.
26 |
27 | Output should be captured outside of the present working directory, such as the parent directory. Note that this command will include the hash of an empty file `output.dfxml`:
28 |
29 | ```bash
30 | cd .../my_directory
31 | walk_to_dfxml > output.dfxml
32 | ```
33 |
34 | The `-i` (`--ignore`) flag will cause the named file characteristic to not be gathered into the output. E.g. this command will not collect access time:
35 |
36 | ```bash
37 | walk_to_dfxml -i atime > /tmp/walk.dfxml
38 | ```
39 |
40 | (Testing: See the [`Makefile`](Makefile) recipe for `walk_ignore_genprops.dfxml`, which is tested in [`test_walk_to_dfxml.py`](test_walk_to_dfxml.py)'s function `test_walk_ignore_genprops`.)
41 |
42 | The program can run without gathering any file hashes, by using the `--ignore-hashes` flag:
43 |
44 | ```bash
45 | walk_to_dfxml --ignore-hashes > /tmp/walk.dfxml
46 | ```
47 |
48 | (Testing: See the [`Makefile`](Makefile) recipe for `walk_ignore_hashes.dfxml`, which is tested in [`test_walk_to_dfxml.py`](test_walk_to_dfxml.py)'s function `test_walk_ignore_hashes`.)
49 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/ByteRuns_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import copy
17 | import os
18 | import sys
19 |
20 | import dfxml.objects as Objects
21 |
22 |
23 | def test_all():
24 | br0 = Objects.ByteRun()
25 | br0.img_offset = 0
26 | br0.len = 20
27 |
28 | br1 = Objects.ByteRun()
29 | br1.img_offset = 20
30 | br1.len = 30
31 |
32 | br2 = Objects.ByteRun()
33 | br2.img_offset = 50
34 | br2.len = 20
35 |
36 | brs_contiguous = Objects.ByteRuns()
37 | brs_contiguous.append(br0)
38 | brs_contiguous.append(br1)
39 | brs_contiguous.append(br2)
40 |
41 | brs_glommed = Objects.ByteRuns()
42 | brs_glommed.glom(br0)
43 | brs_glommed.glom(br1)
44 | brs_glommed.glom(br2)
45 |
46 | brs_discontig = Objects.ByteRuns()
47 | brs_discontig.glom(br0)
48 | brs_discontig.glom(br2)
49 |
50 | brs_backward = Objects.ByteRuns()
51 | brs_backward.glom(br1)
52 | brs_backward.glom(br0)
53 |
54 | assert len(brs_contiguous) == 3
55 | assert len(brs_glommed) == 1
56 | assert len(brs_discontig) == 2
57 | assert len(brs_backward) == 2
58 |
59 | assert brs_glommed[0].len == 70
60 | assert brs_backward[0].len == 30
61 | assert brs_backward[1].len == 20
62 |
63 | br_facet_data = Objects.ByteRuns(facet="data")
64 | br_facet_name = Objects.ByteRuns(facet="name")
65 | br_facet_default = Objects.ByteRuns()
66 | assert br_facet_data == br_facet_default
67 | assert br_facet_name != br_facet_data
68 | assert br_facet_name != br_facet_default
69 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_cat_fileobjects.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | # Determine script location
17 | SCRIPT="$(realpath $0)"
18 | SCRIPT_DIR="$(dirname ${SCRIPT})"
19 |
20 | # Guarantee sane defaults
21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
22 |
23 | # Choose python interpreter
24 | . ${TEST_DIR}/_pick_pythons.sh
25 |
26 | XMLLINT=`which xmllint`
27 |
28 | # Halt on error
29 | set -e
30 | # Display all executed commands
31 | set -x
32 |
33 | #NOTE: Python2's ETree does not understand the "unicode" output encoding.
34 | #"$PYTHON2" cat_fileobjects.py ../${SAMPLE_DIR}/simple.xml
35 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug ${SAMPLE_DIR}/simple.xml >cat_test_nocache.dfxml
36 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py --debug --cache ${SAMPLE_DIR}/simple.xml >cat_test_cache.dfxml
37 |
38 | #This checks that the XML structure wasn't changed by cache cleaning. Only the tail is hashed because the head contains metadata.
39 | subj0="x$(tail -n 10 cat_test_nocache.dfxml | openssl dgst -sha1)"
40 | subj1="x$(tail -n 10 cat_test_cache.dfxml | openssl dgst -sha1)"
41 | test "$subj0" != "x"
42 | test "$subj1" != "x"
43 | test "$subj0" == "$subj1"
44 |
45 | if [ -x "$XMLLINT" ]; then
46 | "$PYTHON3" ${TOOL_DIR}/cat_fileobjects.py ${SAMPLE_DIR}/simple.xml | "$XMLLINT" -
47 | else
48 | echo "Warning: xmllint not found. Skipped check for if generated DFXML is valid XML." >&2
49 | fi
50 |
51 | test $(grep ' idifference_test.txt
35 |
36 | #Generate XML output.
37 | "$PYTHON3" ${TOOL_DIR}/idifference.py --xml idifference_test.dfxml ${SAMPLE_DIR}/difference_test_[01].xml
38 | if [ ! -x "$XMLLINT" ]; then
39 | echo "Error: xmllint not found. Can't check for whether generated DFXML is valid XML. Install libxml2 (or possibly xmlutils) to complete these unit tests." >&2
40 | exit 1
41 | fi
42 |
43 | "$XMLLINT" --format idifference_test.dfxml >idifference_test_formatted.dfxml
44 |
45 | _check_counts() {
46 | #Check expected number of fileobjects appears
47 | test 4 == $(grep ' idifference_test_cat.dfxml
59 | "$XMLLINT" --format idifference_test_cat.dfxml >idifference_test_cat_formatted.dfxml
60 | _check_counts idifference_test_cat_formatted.dfxml
61 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_allocation_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import os
17 | import sys
18 |
19 | import dfxml.objects as Objects
20 |
21 |
22 | def test_all():
23 | fa1 = Objects.FileObject()
24 | fa1.alloc = True
25 | assert fa1.is_allocated() == True
26 |
27 | fa2 = Objects.FileObject()
28 | fa2.alloc = False
29 | assert fa2.is_allocated() == False
30 |
31 | fa3 = Objects.FileObject()
32 | assert fa3.is_allocated() == None
33 |
34 | fin1 = Objects.FileObject()
35 | fin1.alloc_inode = True
36 | fin1.alloc_name = True
37 | assert fin1.is_allocated() == True
38 |
39 | fin2 = Objects.FileObject()
40 | fin2.alloc_inode = True
41 | fin2.alloc_name = False
42 | assert fin2.is_allocated() == False
43 |
44 | fin3 = Objects.FileObject()
45 | fin3.alloc_inode = True
46 | fin3.alloc_name = None
47 | assert fin3.is_allocated() == False
48 |
49 | fin4 = Objects.FileObject()
50 | fin4.alloc_inode = False
51 | fin4.alloc_name = True
52 | assert fin4.is_allocated() == False
53 |
54 | fin5 = Objects.FileObject()
55 | fin5.alloc_inode = False
56 | fin5.alloc_name = False
57 | assert fin5.is_allocated() == False
58 |
59 | fin6 = Objects.FileObject()
60 | fin6.alloc_inode = False
61 | fin6.alloc_name = None
62 | assert fin6.is_allocated() == False
63 |
64 | fin7 = Objects.FileObject()
65 | fin7.alloc_inode = None
66 | fin7.alloc_name = True
67 | assert fin7.is_allocated() == False
68 |
69 | fin8 = Objects.FileObject()
70 | fin8.alloc_inode = None
71 | fin8.alloc_name = False
72 | assert fin8.is_allocated() == False
73 |
74 | fin9 = Objects.FileObject()
75 | fin9.alloc_inode = None
76 | fin9.alloc_name = None
77 | assert fin9.is_allocated() == None
78 |
--------------------------------------------------------------------------------
/dfxml/bin/break_out_diffs_by_anno.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | """
17 | This program reads a DFXML file with differential annotations and produces a table.
18 |
19 | Columns: FileObject annotation (is it a new file? renamed? etc.).
20 | Rows: Counts of instances of a property being changed per FileObject annotation. One row per FileObject direct-child element.
21 | """
22 |
23 | __version__ = "0.1.0"
24 |
25 | import collections
26 | import sys
27 |
28 | import dfxml.objects as Objects
29 |
30 |
31 | def main():
32 | # Key: (annotation, histogram)
33 | hist = collections.defaultdict(int)
34 | for event, obj in Objects.iterparse(sys.argv[1]):
35 | if event != "end" or not isinstance(obj, Objects.FileObject):
36 | continue
37 | # Loop through annotations
38 | for anno in obj.annos:
39 | # Loop through diffs
40 | for diff in obj.diffs:
41 | hist[(anno, diff)] += 1
42 |
43 | annos = Objects.FileObject._diff_attr_names.keys()
44 | print(
45 | """
46 |
47 |
48 |
49 | | Property |
50 | """
51 | )
52 | for anno in annos:
53 | print(" %s | " % anno)
54 | print(
55 | """
56 |
57 |
58 |
59 |
60 | """
61 | )
62 | for diff in sorted(Objects.FileObject._all_properties):
63 | print(" ")
64 | if diff in Objects.FileObject._incomparable_properties:
65 | continue
66 | print(" | %s | " % diff)
67 | for anno in annos:
68 | print(" %d | " % hist[(anno, diff)])
69 | print("
")
70 | print(
71 | """
72 |
73 |
74 | """
75 | )
76 |
77 |
78 | if __name__ == "__main__":
79 | main()
80 |
--------------------------------------------------------------------------------
/dfxml/bin/imap.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """Usage: imap imagefile0.iso imagefile1.iso imagefile2.iso ...
3 |
4 | Produces a map of imagefile0.iso, using the other image files as "hints" for missing
5 | data. Only reports files that have been allocated; deleted files are reported only if
6 | they can be found allocated in another file.
7 | """
8 | import dfxml.fiwalk as fiwalk
9 |
10 | ################################################################
11 | if __name__ == "__main__":
12 | import sys
13 | from optparse import OptionParser
14 | from sys import stdout
15 |
16 | parser = OptionParser()
17 | parser.usage = "%prog [options] image.iso "
18 | parser.add_option("-d", "--debug", help="debug", action="store_true")
19 | (options, args) = parser.parse_args()
20 |
21 | if len(args) < 1:
22 | parser.print_help()
23 | sys.exit(1)
24 |
25 | imagefile = open(args[0], "r")
26 | annotated_runs = []
27 | # TODO - This debug statement needs to moved to somewhere appropriate after an image read.
28 | # if options.debug: print("Read %d file objects from %s" % (len(fileobjects),imagefile.name))
29 |
30 | def cb(fi):
31 | if options.debug:
32 | print("Read " + str(fi))
33 | fragment_num = 1
34 | for run in fi.byte_runs():
35 | annotated_runs.append((run.img_offset, run, fragment_num, fi))
36 | fragment_num += 1
37 |
38 | fiwalk.fiwalk_using_sax(imagefile=imagefile, callback=cb)
39 |
40 | next_sector = 0
41 |
42 | for ip, run, fragment_num, fi in sorted(annotated_runs):
43 | extra = ""
44 | fragment = ""
45 | start_sector = run.img_offset / 512
46 | sector_count = int(run.bytes / 512)
47 | partial = run.bytes % 512
48 |
49 | if not fi.allocated():
50 | print("***")
51 |
52 | if not fi.file_present(): # it's not here!
53 | continue
54 |
55 | if partial > 0:
56 | sector_count += 1
57 | extra = "(%3d bytes slack)" % (512 - partial)
58 |
59 | if fi.fragments() > 2:
60 | fragment = "fragment %d" % fragment_num
61 |
62 | if next_sector != start_sector:
63 | print(
64 | " <-- %5d unallocated sectors @ sector %5d -->"
65 | % (start_sector - next_sector, next_sector)
66 | )
67 |
68 | print(
69 | "[ %6d -> %6d sectors %18s ] %s %s "
70 | % (start_sector, sector_count, extra, fi.filename(), fragment)
71 | )
72 |
73 | next_sector = start_sector + sector_count
74 |
--------------------------------------------------------------------------------
/tests/walk_to_dfxml/Makefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | # Bash selection is described in the top-level Makefile.
15 | ifeq ($(shell basename $(SHELL)),sh)
16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
17 | endif
18 |
19 | top_srcdir := $(shell cd ../.. ; pwd)
20 |
21 | tests_srcdir := $(top_srcdir)/tests
22 |
23 | all: \
24 | walk_ignore_genprops.dfxml \
25 | walk_ignore_hashes.dfxml
26 |
27 | .scaffolding.done.log:
28 | rm -rf walk_ignore_test
29 | mkdir -p walk_ignore_test/foo/bar/baz
30 | echo 'contents c' > walk_ignore_test/foo/bar/baz/c
31 | echo 'contents b' > walk_ignore_test/foo/bar/b
32 | echo 'contents a' > walk_ignore_test/foo/a
33 | touch $@
34 |
35 | check: \
36 | walk_ignore_genprops.dfxml \
37 | walk_ignore_hashes.dfxml
38 | source $(tests_srcdir)/venv/bin/activate \
39 | && pytest \
40 | --log-level=DEBUG
41 |
42 | clean:
43 | @rm -f \
44 | .scaffolding.done.log \
45 | *.dfxml
46 | @rm -rf \
47 | walk_ignore_test/
48 |
49 | walk_ignore_genprops.dfxml: \
50 | $(tests_srcdir)/.venv.done.log \
51 | $(top_srcdir)/dfxml/bin/walk_to_dfxml.py \
52 | .scaffolding.done.log
53 | rm -f \
54 | __$@ \
55 | _$@
56 | source $(tests_srcdir)/venv/bin/activate \
57 | && cd walk_ignore_test \
58 | && walk_to_dfxml \
59 | -i atime \
60 | -i ctime \
61 | -i crtime \
62 | -i gid \
63 | -i inode \
64 | -i mtime@d \
65 | -i uid \
66 | > ../__$@
67 | xmllint \
68 | --format \
69 | __$@ \
70 | > _$@
71 | rm __$@
72 | mv _$@ $@
73 |
74 | walk_ignore_hashes.dfxml: \
75 | $(tests_srcdir)/.venv.done.log \
76 | $(top_srcdir)/dfxml/bin/walk_to_dfxml.py \
77 | .scaffolding.done.log
78 | rm -f \
79 | __$@ \
80 | _$@
81 | source $(tests_srcdir)/venv/bin/activate \
82 | && cd walk_ignore_test \
83 | && walk_to_dfxml \
84 | --ignore-hashes \
85 | > ../__$@
86 | xmllint \
87 | --format \
88 | __$@ \
89 | > _$@
90 | rm __$@
91 | mv _$@ $@
92 |
--------------------------------------------------------------------------------
/dfxml/bin/dedup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | #
3 | # dedup - detect and optionally remove duplicates based on a DFXML file
4 |
5 | import os
6 | import xml
7 |
8 | import dfxml
9 |
10 |
11 | class dedup:
12 | def __init__(self):
13 | from collections import defaultdict
14 |
15 | self.seen = defaultdict(list)
16 | self.files = 0
17 | self.md5s = 0
18 |
19 | def process(self, fi):
20 | self.files += 1
21 | if fi.md5():
22 | self.seen[fi.md5()].append(fi.filename())
23 | self.md5s += 1
24 |
25 | def find_dups(self, cb=None):
26 | for md5, names in self.seen.items():
27 | if cb and len(names) > 1:
28 | cb(names)
29 |
30 | def report(self, func, cb):
31 | for md5, names in self.seen.items():
32 | if func(names):
33 | cb(names)
34 |
35 |
36 | def process_dups(names):
37 | print("dups: ", names)
38 |
39 |
40 | if __name__ == "__main__":
41 | from argparse import ArgumentParser
42 |
43 | global options
44 |
45 | parser = ArgumentParser()
46 | parser.add_argument("dfxml", type=str)
47 | parser.add_argument("--verbose", action="store_true")
48 | parser.add_argument(
49 | "--prefix", type=str, help="Only output files with the given prefix"
50 | )
51 | parser.add_argument(
52 | "--distinct", action="store_true", help="Report the distinct files"
53 | )
54 | parser.add_argument(
55 | "--dups",
56 | action="store_true",
57 | help="Report the files that are dups, and give dup count",
58 | )
59 | args = parser.parse_args()
60 |
61 | dobj = dedup()
62 |
63 | try:
64 | dfxml.read_dfxml(open(args.dfxml, "rb"), callback=dobj.process)
65 | except xml.parsers.expat.ExpatError:
66 | pass
67 |
68 | print(
69 | "Total files: {:,} total MD5s processed: {:,} Unique MD5s: {:,}".format(
70 | dobj.files, dobj.md5s, len(dobj.seen)
71 | )
72 | )
73 |
74 | if args.distinct:
75 |
76 | def report_distinct(names):
77 | if args.prefix and not names[0].startswith(args.prefix):
78 | return
79 | print("distinct: ", names[0])
80 |
81 | dobj.report(lambda names: len(names) == 1, report_distinct)
82 |
83 | if args.dups:
84 |
85 | def report_dups(names):
86 | for name in names:
87 | if not args.prefix or name.startswith(args.prefix):
88 | print("dups: {} {}".format(name, len(names)))
89 |
90 | dobj.report(lambda names: len(names) > 1, report_dups)
91 |
--------------------------------------------------------------------------------
/samples/difference_test_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 | Sample
9 |
10 |
11 | vi
12 | 8.0
13 |
14 | vi post.xml
15 |
16 |
17 |
18 |
19 | i_am_new.txt
20 | r
21 | 40
22 | 123459
23 | 2013-05-16T21:01:00Z
24 | 2013-05-16T21:01:00Z
25 | 2013-05-16T21:01:00Z
26 |
27 |
28 |
29 | 55b228770d96e4dbd1b218f4f07d8aae
30 | 8632a06e80eefbaf702ac6a44e633937e2be7186
31 | 77f380ce33609d55f8b874833c4495282fdf54869912822cde05c68090a60a18
32 |
33 |
34 | i_will_be_modified.txt
35 | r
36 | 23
37 | 123457
38 | 2013-05-16T20:59:00Z
39 | 2013-05-16T20:59:00Z
40 | 2013-05-16T20:59:00Z
41 |
42 |
43 |
44 | a6d9ebd95bcd3602b757ea63f9dd02ab
45 | 1e087807678a33ebbde2624341184c14303675a3
46 | e49ff8fc09127f458830d7328b0aaabed46cab5bbeb1a22e4c93d762025be281
47 |
48 |
49 | i_will_be_accessed.txt
50 | r
51 | 12
52 | 123458
53 | 2013-01-01T00:00:00Z
54 | 2013-01-01T00:00:00Z
55 | 2013-05-16T21:00:00Z
56 |
57 |
58 |
59 | f3a8f17b47f1fe899805c25b8f5a26b0
60 | b439e832cb243e18f6bfc21ca0150de3ef4c6f27
61 | 3c4ace963a2a069a92d8abaa7c77d88e118758eff65c5180fed6534e75889bf3
62 |
63 |
64 |
--------------------------------------------------------------------------------
/dfxml/bin/iexport.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """iexport.py: export the unallocated spaces."""
3 |
4 |
5 | class Run:
6 | """Keeps track of a single run"""
7 |
8 | def __init__(self, start, len):
9 | self.start = start
10 | self.len = len
11 | self.end = start + len - 1
12 |
13 | def __str__(self):
14 | return "Run<%d--%d> (len %d)" % (self.start, self.end, self.len)
15 |
16 | def contains(self, b):
17 | """Returns true if b is inside self."""
18 | print(
19 | "%d <= %d <= %d = %s"
20 | % (self.start, b, self.end, (self.start <= b <= self.end))
21 | )
22 | return self.start <= b <= self.end
23 |
24 | def intersects_run(self, r):
25 | """Return true if self intersects r. This may be because r.start is
26 | inside the run, r.end is inside the run, or self is inside the run."""
27 | return self.contains(r.start) or self.contains(r.end) or r.contains(self.start)
28 |
29 | def contains_run(self, r):
30 | """Returns true if self completely contains r"""
31 | return self.contains(r.start) and self.contains(r.end)
32 |
33 |
34 | class RunDB:
35 | """The RunDB maintains a list of all the runs in a disk image. The
36 | RunDB is created with a single run that represents all of the sectors
37 | in the disk image. Runs can then be removed, which causes existing
38 | runs to be split. Finally all of the remaining runs can be removed."""
39 |
40 | def __init__(self, start, len):
41 | self.runs = [Run(start, len)]
42 |
43 | def __str__(self):
44 | return "RunDB\n" + "\n".join([str(p) for p in self.runs])
45 |
46 | def intersecting_runs(self, r):
47 | """Return a list of all the Runs that intersect with r.
48 | This may be because r.start is inside the run, r.end is inside
49 | the run, because the run completely encloses r, or because r completely
50 | encloses the run."""
51 | return filter(lambda x: x.intersects_run(r), self.runs)
52 |
53 | def remove(self, r):
54 | """Remove run r"""
55 | for p in self.intersecting_runs(r):
56 | self.runs.remove(p)
57 |
58 | # if P is completely inside r, just remove it
59 | if r.contains_run(p):
60 | continue
61 |
62 | # Split p into before and after r; add the non-zero pieces
63 | before_len = r.start - p.start
64 | if before_len > 0:
65 | self.runs.append(Run(p.start, before_len))
66 | after_len = p.end - r.end
67 | if after_len > 0:
68 | self.runs.append(Run(r.end, after_len))
69 |
--------------------------------------------------------------------------------
/samples/difference_test_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 | Sample
9 |
10 |
11 | vi
12 | 8.0
13 |
14 | vi pre.xml
15 |
16 |
17 |
18 |
19 | i_will_be_deleted.txt
20 | r
21 | 20
22 | 123456
23 | 2013-01-01T00:00:00Z
24 | 2013-01-01T00:00:00Z
25 | 2013-01-01T00:00:00Z
26 |
27 |
28 |
29 | e834b5c2f64759832fb33ec53c8b5028
30 | 9125cb87b8f0035c22d3efad2b0473367cc456ca
31 | c75d73927a6ca221ccc71c4f4dee9286fce2b5cf7122950c73157cbf821af07f
32 |
33 |
34 | i_will_be_modified.txt
35 | r
36 | 22
37 | 123457
38 | 2013-01-01T00:00:00Z
39 | 2013-01-01T00:00:00Z
40 | 2013-01-01T00:00:00Z
41 |
42 |
43 |
44 | e91577092351461d7800ef7b870a2bcf
45 | 44e426344f15bd7621ca2f9ffea70d29752dccda
46 | 1a13a4bb62ab8549fa4836cc5ae37803217ab10c3fba4c1204b216485dcf1357
47 |
48 |
49 | i_will_be_accessed.txt
50 | r
51 | 12
52 | 123458
53 | 2013-01-01T00:00:00Z
54 | 2013-01-01T00:00:00Z
55 | 2013-01-01T00:00:00Z
56 |
57 |
58 |
59 | f3a8f17b47f1fe899805c25b8f5a26b0
60 | b439e832cb243e18f6bfc21ca0150de3ef4c6f27
61 | 3c4ace963a2a069a92d8abaa7c77d88e118758eff65c5180fed6534e75889bf3
62 |
63 |
64 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/PartitionSystemObject_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.1.1"
17 |
18 | import logging
19 | import os
20 | import sys
21 |
22 | import libtest
23 |
24 | import dfxml.objects as Objects
25 |
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 |
29 | def test_empty_object():
30 | dobj = Objects.DFXMLObject()
31 | psobj = Objects.PartitionSystemObject()
32 | dobj.append(psobj)
33 |
34 | # Do file I/O round trip.
35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 | try:
37 | psobj_reconst = dobj_reconst.partition_systems[0]
38 | except:
39 | _logger.debug("tmp_filename = %r." % tmp_filename)
40 | raise
41 | os.remove(tmp_filename)
42 |
43 |
44 | def test_error_element_order():
45 | dobj = Objects.DFXMLObject()
46 | psobj = Objects.PartitionSystemObject()
47 | fobj = Objects.FileObject()
48 |
49 | psobj.pstype_str = "gpt"
50 |
51 | # The error element should come after the fileobject stream.
52 | psobj.error = "foo"
53 |
54 | # Add a unallocated file object found floating in the partition system.
55 | fobj.alloc_inode = False
56 | fobj.alloc_name = False
57 |
58 | dobj.append(psobj)
59 | psobj.append(fobj)
60 |
61 | el = dobj.to_Element()
62 |
63 | # Confirm error comes after file stream.
64 | assert el[-1][0].tag.endswith("pstype_str")
65 | assert el[-1][-2].tag.endswith("fileobject")
66 | assert el[-1][-1].tag.endswith("error")
67 |
68 | # Do file I/O round trip.
69 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
70 | psobj_reconst = dobj_reconst.partition_systems[0]
71 | try:
72 | assert psobj_reconst.pstype_str == "gpt"
73 | assert psobj_reconst.error == "foo"
74 | except:
75 | _logger.debug("tmp_filename = %r." % tmp_filename)
76 | raise
77 | os.remove(tmp_filename)
78 |
--------------------------------------------------------------------------------
/tests/misc_bin_tests/test_mac_timelines.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | # Determine script location
17 | SCRIPT="$(realpath $0)"
18 | SCRIPT_DIR="$(dirname ${SCRIPT})"
19 |
20 | # Guarantee sane defaults
21 | . ${SCRIPT_DIR}/_sane_defaults.sh ${SCRIPT_DIR}
22 |
23 | # Choose python interpreter
24 | source ${TEST_DIR}/_pick_pythons.sh
25 |
26 | # Halt on error
27 | set -e
28 | # Display all executed commands
29 | set -x
30 |
31 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p2.txt
32 | test 12 == $(cat demo_mac_timeline_simple_p2.txt | wc -l)
33 |
34 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline.py ../samples/simple.xml >demo_mac_timeline_simple_p3.txt
35 | test 12 == $(cat demo_mac_timeline_simple_p3.txt | wc -l)
36 |
37 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p2.txt
38 | test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l)
39 |
40 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/simple.xml >demo_mac_timeline_iter_simple_p3.txt
41 | test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l)
42 |
43 | "$PYTHON2" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/simple.xml >demo_mac_timeline_objects_simple_p2.txt
44 | test 12 == $(cat demo_mac_timeline_iter_simple_p2.txt | wc -l)
45 |
46 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/simple.xml >demo_mac_timeline_objects_simple_p3.txt
47 | test 12 == $(cat demo_mac_timeline_iter_simple_p3.txt | wc -l)
48 |
49 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline.py ../samples/difference_test_1.xml >demo_mac_timeline_dt1.txt
50 | test 9 == $(cat demo_mac_timeline_dt1.txt | wc -l)
51 |
52 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_iter.py ../samples/difference_test_1.xml >demo_mac_timeline_iter_dt1.txt
53 | test 9 == $(cat demo_mac_timeline_iter_dt1.txt | wc -l)
54 |
55 | "$PYTHON3" $DEMO_DIR/demo_mac_timeline_objects.py ../samples/difference_test_1.xml >demo_mac_timeline_objects_dt1.txt
56 | test 9 == $(cat demo_mac_timeline_objects_dt1.txt | wc -l)
57 |
--------------------------------------------------------------------------------
/tests/walk_to_dfxml/test_walk_to_dfxml.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.2.0"
17 |
18 | import logging
19 | import os
20 |
21 | import pytest
22 |
23 | import dfxml.objects as Objects
24 |
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 |
27 |
28 | @pytest.fixture
29 | def srcdir() -> str:
30 | retval = os.path.dirname(__file__)
31 | return retval
32 |
33 |
34 | def test_walk_ignore_genprops(srcdir: str) -> None:
35 | files_encountered = 0
36 | for event, obj in Objects.iterparse(
37 | os.path.join(srcdir, "walk_ignore_genprops.dfxml")
38 | ):
39 | if not isinstance(obj, Objects.FileObject):
40 | continue
41 | files_encountered += 1
42 | for propname in ["atime", "ctime", "crtime", "gid", "inode", "mtime", "uid"]:
43 | try:
44 | assert (
45 | getattr(obj, propname) is None
46 | ), "Found property that should have been ignored."
47 | except:
48 | if propname == "mtime" and obj.name_type != "d":
49 | continue
50 | _logger.error("obj.filename = %r.", obj.filename)
51 | _logger.error("propname = %r.", propname)
52 | raise
53 | assert files_encountered > 0, "Encountered no files in walk_ignore_genprops.dfxml."
54 |
55 |
56 | def test_walk_ignore_hashes(srcdir: str) -> None:
57 | files_encountered = 0
58 | for event, obj in Objects.iterparse(
59 | os.path.join(srcdir, "walk_ignore_hashes.dfxml")
60 | ):
61 | if not isinstance(obj, Objects.FileObject):
62 | continue
63 | files_encountered += 1
64 | for propname in Objects.FileObject._hash_properties:
65 | try:
66 | assert (
67 | getattr(obj, propname) is None
68 | ), "Found hash property when none was expected."
69 | except:
70 | _logger.error("obj.filename = %r.", obj.filename)
71 | _logger.error("propname = %r.", propname)
72 | raise
73 | assert files_encountered > 0, "Encountered no files in walk_ignore_hashes.dfxml."
74 |
--------------------------------------------------------------------------------
/dfxml/bin/allocation_counter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | """
17 | For a disk image or DFXML file, this program produces a cross-tabulation of the allocation state of each file's inode and name.
18 | """
19 |
20 | __version__ = "0.1.1"
21 | # Version 0.2.0:
22 | # * Tabular output in HTML
23 | # * Tabular output in LaTeX
24 |
25 | import collections
26 | import logging
27 | import os
28 | import sys
29 | import xml.etree.ElementTree as ET
30 |
31 | import dfxml.bin.make_differential_dfxml
32 | import dfxml.objects as Objects
33 |
34 | _logger = logging.getLogger(os.path.basename(__file__))
35 |
36 |
37 | def main():
38 | counter = collections.defaultdict(lambda: 0)
39 | prev_obj = None
40 | for event, obj in Objects.iterparse(args.input_image):
41 | if isinstance(obj, Objects.FileObject):
42 | if (
43 | args.ignore_virtual_files
44 | and dfxml.bin.make_differential_dfxml.ignorable_name(obj.filename)
45 | ):
46 | continue
47 | counter[(obj.alloc_inode, obj.alloc_name)] += 1
48 |
49 | # Inspect weird data
50 | if args.debug and obj.alloc_inode is None and obj.alloc_name is None:
51 | _logger.debug("Encountered a file with all-null allocation.")
52 | _logger.debug("Event: %r." % event)
53 | _logger.debug(
54 | "Previous object: %s." % ET.tostring(prev_obj.to_Element())
55 | )
56 | _logger.debug("Current object: %s." % ET.tostring(obj.to_Element()))
57 | prev_obj = obj
58 | print(repr(counter))
59 |
60 |
61 | if __name__ == "__main__":
62 | import argparse
63 |
64 | parser = argparse.ArgumentParser()
65 | parser.add_argument(
66 | "--ignore-virtual-files",
67 | action="store_true",
68 | help="Use the same file-ignoring rules as make_differential_dfxml.py.",
69 | )
70 | parser.add_argument(
71 | "-d", "--debug", action="store_true", help="Enable debug printing."
72 | )
73 | parser.add_argument("input_image", help="Disk image, or DFXML file.")
74 | args = parser.parse_args()
75 |
76 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
77 |
78 | main()
79 |
--------------------------------------------------------------------------------
/demos/demo_readtimes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """Reads an fiwalk XML file and reports how many of the files are still in the image..."""
3 |
4 | import os
5 | import sys
6 |
7 | sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
8 | import time
9 |
10 | import dfxml
11 | import dfxml.fiwalk as fiwalk
12 |
13 |
14 | def calc_jumps(fis, title):
15 | print(title)
16 | print("Count: %d" % (len(fis)))
17 | from histogram import histogram
18 |
19 | h = histogram()
20 | pos = 0
21 | backwards = 0
22 | prev_frag_count = 0
23 | for fi in fis:
24 | for i in range(0, len(fi.byte_runs())):
25 | run = fi.byte_runs()[i]
26 | try:
27 | sector = run.start_sector()
28 | if sector < pos:
29 | backwards += 1
30 | h.add((prev_frag_count, i))
31 | pos = sector
32 | except AttributeError:
33 | pass
34 | pref_frag_count = len(fi.byte_runs())
35 |
36 | print("Backwards Jumps: %d" % backwards)
37 | print("Histogram of backwards:")
38 | h.print_top(10)
39 |
40 |
41 | if __name__ == "__main__":
42 | import sys
43 | from optparse import OptionParser
44 | from subprocess import PIPE, Popen
45 |
46 | global options
47 |
48 | parser = OptionParser()
49 | parser.add_option("-d", "--debug", help="prints debugging info", dest="debug")
50 | parser.add_option("-x", "--xmlfile", help="XML file (optional)")
51 | parser.add_option("-i", "--imagefile", help="image file (required)")
52 | parser.usage = "%prog [options] xmlfile diskimage"
53 | (options, args) = parser.parse_args()
54 |
55 | if not options.xmlfile or not options.imagefile:
56 | parser.print_help()
57 | sys.exit(1)
58 |
59 | # Read the redaction configuration file
60 | imagefile = open(options.imagefile, "r")
61 | if options.xmlfile:
62 | xmlfile = open(options.xmlfile, "r")
63 | else:
64 | xmlfile = None
65 |
66 | t0 = time.time()
67 | fis = fiwalk.fileobjects_using_sax(imagefile=imagefile, xmlfile=xmlfile)
68 | t1 = time.time()
69 | print("Time to read file objects: {} seconds".format(t1 - t0))
70 |
71 | # Create a new array with just those that we can read
72 | def resident_file(fi):
73 | if len(fi.byte_runs()) == 0:
74 | return False
75 | if len(fi.byte_runs()) > 2:
76 | return False
77 | if hasattr(fi.byte_runs()[0], "uncompressed_len"):
78 | return False
79 | if not hasattr(fi.byte_runs()[0], "img_offset"):
80 | return False
81 | return True
82 |
83 | fis = filter(resident_file, fis)
84 |
85 | print("Native order: ")
86 | calc_jumps(fis, "Native Order")
87 |
88 | def sort_function(a, b):
89 | a0 = a.byte_runs()[0].start_sector()
90 | b0 = b.byte_runs()[0].start_sector()
91 | if a0 < b0:
92 | return -1
93 | if a0 == b0:
94 | return 0
95 | return 1
96 |
97 | fis.sort(sort_function)
98 | calc_jumps(fis, "Sorted Order")
99 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/PartitionObject_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | __version__ = "0.1.1"
17 |
18 | import logging
19 | import os
20 | import sys
21 |
22 | import libtest
23 |
24 | import dfxml.objects as Objects
25 |
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 |
29 | def test_empty_object():
30 | dobj = Objects.DFXMLObject()
31 | pobj = Objects.PartitionObject()
32 | dobj.append(pobj)
33 |
34 | # Do file I/O round trip.
35 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
36 | try:
37 | pobj_reconst = dobj_reconst.partitions[0]
38 | except:
39 | _logger.debug("tmp_filename = %r." % tmp_filename)
40 | raise
41 | os.remove(tmp_filename)
42 |
43 |
44 | def test_cfreds_macwd_properties():
45 | """
46 | These were drawn from a CFReDS sample Mac disk image.
47 | """
48 | dobj = Objects.DFXMLObject()
49 | pobj = Objects.PartitionObject()
50 | dobj.append(pobj)
51 |
52 | pobj.ptype_str = "Apple_Boot"
53 | pobj.partition_index = 8
54 |
55 | # Do file I/O round trip.
56 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
57 | try:
58 | pobj_reconst = dobj_reconst.partitions[0]
59 | assert pobj_reconst.ptype_str == "Apple_Boot"
60 | assert pobj_reconst.partition_index == "8"
61 | except:
62 | _logger.debug("tmp_filename = %r." % tmp_filename)
63 | raise
64 | os.remove(tmp_filename)
65 |
66 |
67 | def test_bsd_disklabel_properties():
68 | """
69 | These were drawn from a BSD Disk Label sample image.
70 | """
71 | dobj = Objects.DFXMLObject()
72 | pobj_a = Objects.PartitionObject()
73 | pobj_c = Objects.PartitionObject()
74 | dobj.append(pobj_a)
75 | dobj.append(pobj_c)
76 |
77 | pobj_a.partition_index = "a"
78 | pobj_c.partition_index = "c"
79 |
80 | # Do file I/O round trip.
81 | (tmp_filename, dobj_reconst) = libtest.file_round_trip_dfxmlobject(dobj)
82 | try:
83 | pobj_a_reconst = dobj_reconst.partitions[0]
84 | pobj_c_reconst = dobj_reconst.partitions[1]
85 | assert pobj_a_reconst.partition_index == "a"
86 | assert pobj_c_reconst.partition_index == "c"
87 | except:
88 | _logger.debug("tmp_filename = %r." % tmp_filename)
89 | raise
90 | os.remove(tmp_filename)
91 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/Makefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | # Bash selection is described in the top-level Makefile.
17 | ifeq ($(shell basename $(SHELL)),sh)
18 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
19 | endif
20 |
21 | top_srcdir := $(shell cd ../.. ; pwd)
22 |
23 | PYTHON3 ?= python3
24 |
25 | OBJECTS := $(top_srcdir)/dfxml/objects.py
26 |
27 | SAMPLES_DIR := $(top_srcdir)/samples
28 |
29 | TOOLS_DIR := $(top_srcdir)/dfxml/bin
30 |
31 | all: \
32 | check
33 |
34 | .PHONY: \
35 | check-diff_file_ignore-py3 \
36 | check-versioned
37 |
38 | check: \
39 | check-diff_file_ignore-py3 \
40 | check-libraries-py3 \
41 | check-versioned
42 | source $(top_srcdir)/tests/venv/bin/activate \
43 | && $(PYTHON3) "$(TOOLS_DIR)/cat_partitions.py" \
44 | 12345678:$(SAMPLES_DIR)/difference_test_0.xml \
45 | 87654321:$(SAMPLES_DIR)/difference_test_1.xml \
46 | > __cat_patterns_test.sh.dfxml
47 | xmllint \
48 | --format \
49 | __cat_patterns_test.sh.dfxml \
50 | > _cat_patterns_test.sh.dfxml
51 | rm \
52 | __cat_patterns_test.sh.dfxml
53 | mv \
54 | _cat_patterns_test.sh.dfxml \
55 | cat_patterns_test.sh.dfxml
56 |
57 | check-diff_file_ignore-py3: \
58 | diff_file_ignore_sample-py3.dfxml \
59 | diff_file_ignore_test.py
60 | source $(top_srcdir)/tests/venv/bin/activate \
61 | && $(PYTHON3) diff_file_ignore_test.py --debug diff_file_ignore_sample-py3.dfxml
62 |
63 | check-libraries-py3:
64 | source $(top_srcdir)/tests/venv/bin/activate \
65 | && $(PYTHON3) LibraryObject_write_test.py LibraryObject_py3_test.dfxml
66 | source $(top_srcdir)/tests/venv/bin/activate \
67 | && $(PYTHON3) LibraryObject_read_test.py LibraryObject_py3_test.dfxml
68 |
69 | check-versioned:
70 | $(PYTHON3) $(OBJECTS)
71 | source $(top_srcdir)/tests/venv/bin/activate \
72 | && $(PYTHON3) DFXMLObject_program_test.py \
73 | $(SAMPLES_DIR)/difference_test_0.xml \
74 | vi \
75 | 8.0
76 |
77 | clean:
78 | rm -f difference_counts_test.py-d*
79 | rm -f cat_partitions_test.sh.dfxml
80 | rm -f diff_file_ignore_sample-py3.dfxml
81 | rm -f LibraryObject_py3_test.dfxml
82 | rm -f *~
83 |
84 |
85 | diff_file_ignore_sample-py3.dfxml: \
86 | $(OBJECTS) \
87 | diff_file_ignore_sample_dfxml_test.py
88 | rm -f _$@
89 | source $(top_srcdir)/tests/venv/bin/activate \
90 | && $(PYTHON3) diff_file_ignore_sample_dfxml_test.py --debug _$@
91 | mv _$@ $@
92 |
--------------------------------------------------------------------------------
/samples/simple.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
7 | Hash List
8 |
9 |
10 | MD5DEEP
11 | 4.0.0_beta2-002
12 |
13 | GCC 4.2
14 |
15 |
16 | Darwin
17 | 11.3.0
18 | Darwin Kernel Version 11.3.0: Thu Jan 12 18:47:41 PST 2012; root:xnu-1699.24.23~1/RELEASE_X86_64
19 | Mucha.local
20 | x86_64
21 | md5deep -dp512 /Users/simsong/uploads/einstein template.jpg /Users/simsong/uploads/image1.jpg /Users/simsong/uploads/image2.jpg /Users/simsong/uploads/image3.jpg
22 | 502
23 | 2012-02-23T16:35:11Z
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 | /Users/simsong/uploads/image2.jpg
37 | 12833
38 | 2012-02-22T03:53:05Z
39 | 2012-02-22T03:53:05Z
40 | 2012-02-23T16:34:27Z
41 | d7ced55e7d7f5b9995fc3cbac7942155
42 |
43 |
44 | /Users/simsong/uploads/image1.jpg
45 | 12551
46 | 2012-02-22T03:53:54Z
47 | 2012-02-22T03:53:54Z
48 | 2012-02-23T16:34:27Z
49 | 3bb144b5abc65312099f79caa69ff94f
50 |
51 |
52 | /Users/simsong/uploads/image3.jpg
53 | 12545
54 | 2012-02-22T03:55:38Z
55 | 2012-02-22T03:55:38Z
56 | 2012-02-23T16:34:27Z
57 | 6377d89ab3165a3fe24b390b513f47d7
58 |
59 |
60 | /Users/simsong/uploads/einstein template.jpg
61 | 43819
62 | 2012-02-22T03:54:19Z
63 | 2012-02-22T03:54:19Z
64 | 2012-02-23T16:34:27Z
65 | 702da00183448a42f5a861c95973f4f3
66 |
67 |
68 | 0.008982
69 | 0.003041
70 | 1069056
71 | 391
72 | 0
73 | 0
74 | 0
75 | 0
76 | 0.006578
77 |
78 |
79 |
--------------------------------------------------------------------------------
/dfxml/bin/cat_fileobjects.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | """
17 | Make a new DFXML file of all fileobjects in an input DFXML file.
18 | """
19 |
20 | __version__ = "0.4.0"
21 |
22 | import logging
23 | import os
24 | import sys
25 | import xml.etree.ElementTree as ET
26 |
27 | import dfxml
28 |
29 | _logger = logging.getLogger(os.path.basename(__file__))
30 |
31 | if sys.version < "3":
32 | _logger.error(
33 | "Due to Unicode issues with Python 2's ElementTree, Python 3 and up is required.\n"
34 | )
35 | exit(1)
36 |
37 |
38 | def main():
39 | print(
40 | """\
41 |
42 |
46 |
47 |
48 | %s
49 | %s
50 |
51 | %s
52 |
53 |
54 |
55 | %s
56 | \
57 | """
58 | % (
59 | dfxml.XMLNS_DFXML,
60 | dfxml.XMLNS_DELTA,
61 | dfxml.DFXML_VERSION,
62 | sys.argv[0],
63 | __version__,
64 | " ".join(sys.argv),
65 | args.filename,
66 | )
67 | )
68 |
69 | ET.register_namespace("delta", dfxml.XMLNS_DELTA)
70 |
71 | xs = []
72 | for fi in dfxml.iter_dfxml(
73 | xmlfile=open(args.filename, "rb"), preserve_elements=True
74 | ):
75 | _logger.debug("Processing: %s" % str(fi))
76 | if args.cache:
77 | xs.append(fi.xml_element)
78 | else:
79 | _logger.debug("Printing without cache: %s" % str(fi))
80 | print(dfxml.ET_tostring(fi.xml_element, encoding="unicode"))
81 | if args.cache:
82 | for x in xs:
83 | _logger.debug("Printing with cache: %s" % str(fi))
84 | print(dfxml.ET_tostring(x, encoding="unicode"))
85 |
86 | print("""""")
87 |
88 |
89 | if __name__ == "__main__":
90 | import argparse
91 |
92 | parser = argparse.ArgumentParser()
93 | parser.add_argument("filename")
94 | parser.add_argument("--cache", action="store_true")
95 | parser.add_argument("--debug", action="store_true")
96 | args = parser.parse_args()
97 |
98 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
99 |
100 | main()
101 |
--------------------------------------------------------------------------------
/dfxml/bin/tcpdiff.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This software was developed in whole or in part by employees of the
4 | # Federal Government in the course of their official duties, and with
5 | # other Federal assistance. Pursuant to title 17 Section 105 of the
6 | # United States Code portions of this software authored by Federal
7 | # employees are not subject to copyright protection within the United
8 | # States. For portions not authored by Federal employees, the Federal
9 | # Government has been granted unlimited rights, and no claim to
10 | # copyright is made. The Federal Government assumes no responsibility
11 | # whatsoever for its use by other parties, and makes no guarantees,
12 | # expressed or implied, about its quality, reliability, or any other
13 | # characteristic.
14 | #
15 | # We would appreciate acknowledgement if the software is used.
16 |
17 | """tcpdiff.py
18 |
19 | Generates a report about what's different between two tcp DFXML files
20 | produced by tcpflow.
21 |
22 | Process:
23 |
24 | """
25 |
26 | import sys
27 | import time
28 |
29 | if sys.version_info < (3, 1):
30 | raise RuntimeError("rdifference.py requires Python 3.1 or above")
31 |
32 | import dfxml
33 | import dfxml.dfxml_html as dfxml_html
34 | import dfxml.fiwalk as fiwalk
35 |
36 |
37 | def ptime(t):
38 | """Print the time in the requested format. T is a dfxml time value"""
39 | global options
40 | if t is None:
41 | return None
42 | elif options.timestamp:
43 | return str(t.timestamp())
44 | else:
45 | return str(t.iso8601())
46 |
47 |
48 | def dprint(x):
49 | "Debug print"
50 | global options
51 | if options.debug:
52 | print(x)
53 |
54 |
55 | #
56 | # This program keeps track of the current and previous TCP connections in a single
57 | # object called "FlowState". Another way to do that would have been to have
58 | # the instance built from the XML file and then have another function that compares
59 | # them.
60 | #
61 |
62 |
63 | class FlowState:
64 | def __init__(self, fname):
65 | self.options = options
66 | self.connections = set()
67 | self.process(fname)
68 |
69 | def process(self, fname):
70 | self.fname = fname
71 | dfxml.read_dfxml(xmlfile=open(fname, "rb"), callback=self.process_fi)
72 |
73 | def process_fi(self, fi):
74 | self.connections.add(fi)
75 |
76 | def report(self):
77 | dfxml_html.header()
78 | dfxml_html.h1("DFXML file:" + self.current_fname)
79 | dfxml_html.table(["Total Connections", str(len(self.connections))])
80 |
81 |
82 | if __name__ == "__main__":
83 | from copy import deepcopy
84 | from optparse import OptionParser
85 |
86 | global options
87 |
88 | parser = OptionParser()
89 | parser.usage = "%prog [options] file1 file2 (files MUST be tcpflow DFXML files)"
90 | parser.add_option("-d", "--debug", help="debug", action="store_true")
91 |
92 | (options, args) = parser.parse_args()
93 |
94 | if len(args) != 2:
95 | parser.print_help()
96 | sys.exit(1)
97 |
98 | a = FlowState(fname=args[0])
99 | a.report()
100 |
101 | b = FlowState(fname=args[1])
102 | b.report()
103 |
104 | print("Difference:")
105 |
--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | # Bash selection is described in the top-level Makefile.
15 | ifeq ($(shell basename $(SHELL)),sh)
16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
17 | endif
18 |
19 | top_srcdir := $(shell cd .. ; pwd)
20 |
21 | PYTHON3 ?= python3
22 | ifeq ($(PYTHON3),)
23 | $(error python3 not found)
24 | endif
25 |
26 | all: \
27 | all-make_differential_dfxml \
28 | all-walk_to_dfxml
29 |
30 | .PHONY: \
31 | all-make_differential_dfxml \
32 | all-walk_to_dfxml \
33 | check-mypy \
34 | check-mypy-stricter
35 |
36 | all-make_differential_dfxml: \
37 | .venv.done.log
38 | $(MAKE) \
39 | --directory make_differential_dfxml
40 |
41 | all-walk_to_dfxml: \
42 | .venv.done.log
43 | $(MAKE) \
44 | --directory walk_to_dfxml
45 |
46 | .venv.done.log: \
47 | $(top_srcdir)/setup.cfg \
48 | $(top_srcdir)/setup.py \
49 | requirements.txt
50 | rm -rf venv
51 | $(PYTHON3) -m venv \
52 | venv
53 | source venv/bin/activate \
54 | && pip install \
55 | --upgrade \
56 | pip \
57 | setuptools
58 | source venv/bin/activate \
59 | && cd $(top_srcdir) \
60 | && pip install \
61 | --editable \
62 | .
63 | source venv/bin/activate \
64 | && pip install \
65 | --requirement requirements.txt
66 | touch $@
67 |
68 | check: \
69 | all-make_differential_dfxml \
70 | all-walk_to_dfxml \
71 | check-mypy
72 | source venv/bin/activate \
73 | && pytest \
74 | --log-level=DEBUG
75 |
76 | #TODO - Type-checking would best be done against all of ../dfxml, when someone finds some time to do so.
77 | check-mypy: \
78 | check-mypy-stricter
79 | source venv/bin/activate \
80 | && mypy \
81 | ../dfxml/bin/idifference.py \
82 | ../dfxml/bin/summarize_differential_dfxml.py \
83 | ../dfxml/__init__.py \
84 | ../dfxml/fiwalk.py \
85 | ../dfxml/objects.py \
86 | misc_bin_tests \
87 | misc_object_tests
88 | @echo "INFO:tests/Makefile:mypy is currently run against a subset of the dfxml directory." >&2
89 |
90 | #TODO - Strict type-checking is another long-term goal, likewise eventually done against all of ../dfxml.
91 | check-mypy-stricter: \
92 | .venv.done.log
93 | source venv/bin/activate \
94 | && mypy \
95 | ../demos/demo_fiwalk_diskimage.py \
96 | ../dfxml/bin/idifference2.py \
97 | ../dfxml/bin/make_differential_dfxml.py \
98 | ../dfxml/bin/walk_to_dfxml.py \
99 | make_differential_dfxml \
100 | walk_to_dfxml \
101 | *.py
102 |
103 | clean:
104 | @$(MAKE) \
105 | --directory misc_object_tests \
106 | clean
107 | @$(MAKE) \
108 | --directory make_differential_dfxml \
109 | clean
110 | @$(MAKE) \
111 | --directory walk_to_dfxml \
112 | clean
113 | @rm -f \
114 | .venv.done.log
115 | @rm -rf \
116 | venv
117 |
--------------------------------------------------------------------------------
/dfxml/bin/report_silent_changes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology in whole or in part by employees of the Federal
5 | # Government in the course of their official duties. Pursuant to
6 | # title 17 Section 105 of the United States Code portions of this
7 | # software authored by NIST employees are not subject to copyright
8 | # protection and are in the public domain. For portions not authored
9 | # by NIST employees, NIST has been granted unlimited rights. NIST
10 | # assumes no responsibility whatsoever for its use by other parties,
11 | # and makes no guarantees, expressed or implied, about its quality,
12 | # reliability, or any other characteristic.
13 | #
14 | # We would appreciate acknowledgement if the software is used.
15 |
16 | """
17 | This program takes a differentially-annotated DFXML file as input, and outputs a DFXML document that contains 'Silent' changes. For instance, a changed checksum with no changed timestamps would be 'Silent.'
18 | """
19 |
20 | __version__ = "0.2.2"
21 |
22 | import logging
23 | import os
24 | import sys
25 |
26 | _logger = logging.getLogger(os.path.basename(__file__))
27 |
28 | import make_differential_dfxml
29 |
30 | import dfxml.objects as Objects
31 |
32 |
33 | def main():
34 | d = Objects.DFXMLObject("1.2.0")
35 | d.program = sys.argv[0]
36 | d.program_version = __version__
37 | d.command_line = " ".join(sys.argv)
38 | d.dc["type"] = "File system silent-change report"
39 | d.add_creator_library(
40 | "Python", ".".join(map(str, sys.version_info[0:3]))
41 | ) # A bit of a bend, but gets the major version information out.
42 | d.add_creator_library("Objects.py", Objects.__version__)
43 | d.add_creator_library("dfxml.py", Objects.dfxml.__version__)
44 |
45 | current_appender = d
46 | tally = 0
47 | for event, obj in Objects.iterparse(args.infile):
48 | if event == "start":
49 | # Inherit namespaces
50 | if isinstance(obj, Objects.DFXMLObject):
51 | for prefix, url in obj.iter_namespaces():
52 | d.add_namespace(prefix, url)
53 | # Group files by volume
54 | elif isinstance(obj, Objects.VolumeObject):
55 | d.append(obj)
56 | current_appender = obj
57 | elif event == "end":
58 | if isinstance(obj, Objects.VolumeObject):
59 | current_appender = d
60 | elif isinstance(obj, Objects.FileObject):
61 | if "_changed" not in obj.diffs:
62 | if "_modified" in obj.diffs or "_renamed" in obj.diffs:
63 | current_appender.append(obj)
64 | tally += 1
65 | print(d.to_dfxml())
66 | _logger.info("Found %d suspiciously-changed files." % tally)
67 |
68 |
69 | if __name__ == "__main__":
70 | import argparse
71 |
72 | parser = argparse.ArgumentParser()
73 | parser.add_argument("-d", "--debug", action="store_true")
74 | parser.add_argument("infile")
75 | args = parser.parse_args()
76 |
77 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
78 |
79 | if not args.infile.endswith("xml"):
80 | raise Exception(
81 | "Input file should be a DFXML file, and should end with 'xml': %r."
82 | % args.infile
83 | )
84 |
85 | if not os.path.exists(args.infile):
86 | raise Exception("Input file does not exist: %r." % args.infile)
87 |
88 | main()
89 |
--------------------------------------------------------------------------------
/dfxml/dfxml_html.py:
--------------------------------------------------------------------------------
1 | # This software was developed in whole or in part by employees of the
2 | # Federal Government in the course of their official duties, and with
3 | # other Federal assistance. Pursuant to title 17 Section 105 of the
4 | # United States Code portions of this software authored by Federal
5 | # employees are not subject to copyright protection within the United
6 | # States. For portions not authored by Federal employees, the Federal
7 | # Government has been granted unlimited rights, and no claim to
8 | # copyright is made. The Federal Government assumes no responsibility
9 | # whatsoever for its use by other parties, and makes no guarantees,
10 | # expressed or implied, about its quality, reliability, or any other
11 | # characteristic.
12 | #
13 | # We would appreciate acknowledgement if the software is used.
14 |
15 | # dfxml_html.py:
16 | # A collection of functions for generating HTML
17 |
18 | html = False
19 |
20 |
21 | def header():
22 | if html:
23 | print(
24 | """
25 |
26 |
27 |
32 | """
33 | )
34 |
35 |
36 | def h1(title):
37 | global options
38 | if html:
39 | print("%s
" % title)
40 | return
41 | print("\n\n%s\n" % title)
42 |
43 |
44 | def h2(title):
45 | global options
46 | if html:
47 | print("%s
" % title)
48 | return
49 | print("\n%s\n" % title)
50 |
51 |
52 | def table(rows, styles=None, break_on_change=False):
53 | import sys
54 |
55 | global options
56 |
57 | def alldigits(x):
58 | if type(x) != str and type(x) != unicode:
59 | return False
60 | for ch in x:
61 | if ch.isdigit() == False:
62 | return False
63 | return True
64 |
65 | def fmt(x):
66 | if x == None:
67 | return ""
68 | if type(x) == int:
69 | return "%12d" % x
70 | if alldigits(x):
71 | return "%12d" % int(x)
72 | if type(x) == unicode:
73 | return x
74 | return unicode(x)
75 |
76 | if html:
77 | print("")
78 | for row in rows:
79 | print("")
80 | if not styles:
81 | styles = [""] * len(rows)
82 | for col, style in zip(row, styles):
83 | sys.stdout.write("| %s | " % (style, col))
84 | print("
")
85 | print("
")
86 | return
87 | lastRowCol0 = None
88 | for row in rows:
89 | if row[0] != lastRowCol0:
90 | sys.stdout.write("\n")
91 | lastRowCol0 = row[0]
92 | try:
93 | line = "\t".join([fmt(col) for col in row])
94 | sys.stdout.write(line)
95 | sys.stdout.write("\n")
96 | except UnicodeEncodeError:
97 | # Fall back to manual join
98 | for col in row:
99 | for ch in fmt(col):
100 | try:
101 | sys.stdout.write(ch)
102 | except UnicodeEncodeError:
103 | sys.stdout.write("?")
104 | sys.stdout.write("\t")
105 | print("(UNICODE ERROR)")
106 |
--------------------------------------------------------------------------------
/demos/vmstats/vmstats_json.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | History for my host
5 |
6 |
7 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | | [PREV] |
23 | [NEXT] |
24 |
25 |
26 |
27 |
28 |
Stats
29 |
30 | | Host: | |
31 | | Time: | |
32 | | CPU Utilization: | |
33 | | Mem Utilization: | |
34 | | Page: | 1 |
35 |
36 |
37 |
38 |
Processes
39 |
40 |
41 | | | CPU Time | |
42 |
43 | | PID | NAME | User |
44 | System | RSS |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
101 |
102 |
103 |
--------------------------------------------------------------------------------
/dfxml/bin/ihistogram.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | """Draw a quick histogram of the timestamps on the hard drive"""
3 |
4 | import matplotlib
5 |
6 | matplotlib.use("agg.pdf")
7 |
8 |
9 | import datetime
10 | import time
11 |
12 | from matplotlib.dates import (
13 | MONDAY,
14 | SATURDAY,
15 | DateFormatter,
16 | MonthLocator,
17 | WeekdayLocator,
18 | )
19 | from pylab import *
20 |
21 | import dfxml.fiwalk as fiwalk
22 |
23 |
24 | def get_dates_and_counts(times):
25 | from datetime import date
26 |
27 | data = {}
28 | for t in times:
29 | gm = time.gmtime(t)
30 | d = date(gm[0], gm[1], gm[2])
31 | data[d] = data.get(d, 0) + 1
32 |
33 | # Create a list of key,val items so you can sort by date
34 | dates_and_counts = [(date, count) for date, count in data.items()]
35 | dates_and_counts = sorted(dates_and_counts)
36 | return dates_and_counts
37 |
38 |
39 | def version1(times):
40 | import pylab
41 |
42 | pylab.grid()
43 | pylab.hist(times, 100)
44 | pylab.show()
45 |
46 |
47 | def version2(times):
48 | # see http://mail.python.org/pipermail/python-list/2003-November/236559.html
49 | # http://www.gossamer-threads.com/lists/python/python/665014
50 | from matplotlib.pylab import (
51 | bar,
52 | gca,
53 | plot,
54 | plot_date,
55 | savefig,
56 | show,
57 | title,
58 | xlabel,
59 | ylabel,
60 | )
61 |
62 | dates_and_counts = get_dates_and_counts(times)
63 | dates, counts = zip(*dates_and_counts)
64 | # bar(dates,counts)
65 | plot_date(dates, counts)
66 | xlabel("Date")
67 | ylabel("count")
68 | show()
69 |
70 |
71 | def version3(times):
72 | import datetime
73 |
74 | import matplotlib
75 | import matplotlib.dates as mdates
76 | import matplotlib.mlab as mlab
77 | import matplotlib.pyplot as pyplot
78 | import numpy as np
79 |
80 | dates_and_counts = get_dates_and_counts(times)
81 | dates, counts = zip(*dates_and_counts)
82 |
83 | years = mdates.YearLocator() # every year
84 | months = mdates.MonthLocator() # every month
85 | yearsFmt = mdates.DateFormatter("%Y")
86 |
87 | fig = pyplot.figure()
88 | ax = fig.add_subplot(111)
89 | ax.bar(dates, counts)
90 |
91 | ax.set_ylabel("file count")
92 | ax.set_xlabel("file modification time (mtime)")
93 |
94 | # ax.set_yscale('log')
95 |
96 | # Format the ticks
97 |
98 | ax.xaxis.set_major_locator(years)
99 | ax.xaxis.set_major_formatter(yearsFmt)
100 | # ax.xaxis.set_minor_locator(months)
101 |
102 | datemin = datetime.date(min(dates).year, 1, 1)
103 | datemax = datetime.date(max(dates).year, 1, 1)
104 | ax.set_xlim(datemin, datemax)
105 | ax.set_ylim(0, max(counts))
106 |
107 | # format the coords message box
108 | def price(x):
109 | return "$%1.2f" % x
110 |
111 | ax.format_xdata = mdates.DateFormatter("%Y-%m-%d")
112 | ax.format_ydata = price
113 | ax.grid(True)
114 |
115 | # rotates and right aligns the x labels, and moves the bottom of the
116 | # axes up to make room for them
117 | fig.autofmt_xdate()
118 | plt.savefig("hist.pdf", format="pdf")
119 |
120 | print("dates:", dates)
121 | print("num dates:", len(dates))
122 |
123 |
124 | if __name__ == "__main__":
125 | import sys
126 | from optparse import OptionParser
127 | from sys import stdout
128 |
129 | parser = OptionParser()
130 | parser.usage = "%prog [options] xmlfile "
131 | (options, args) = parser.parse_args()
132 |
133 | import time
134 |
135 | times = []
136 | for fi in fiwalk.fileobjects_using_sax(xmlfile=open(args[0])):
137 | try:
138 | times.append(fi.mtime())
139 | except KeyError:
140 | pass
141 |
142 | version3(times)
143 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/FileObject_externals_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 | import xml.etree.ElementTree as ET
20 |
21 | import dfxml.objects as Objects
22 |
23 |
24 | def test_all():
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 | logging.basicConfig(level=logging.DEBUG)
27 |
28 | XMLNS_TEST_CLAMSCAN = "file:///opt/local/bin/clamscan"
29 | XMLNS_TEST_UNREGGED = "file:///dev/random"
30 |
31 | ET.register_namespace("clam", XMLNS_TEST_CLAMSCAN)
32 |
33 | fi = Objects.FileObject()
34 | fi.filename = "clamscanned"
35 |
36 | # Try and fail to add a non-Element to the list.
37 | failed = None
38 | _logger.debug("Before: " + repr(fi.externals))
39 | try:
40 | fi.externals.append(1)
41 | failed = False
42 | except TypeError:
43 | failed = True
44 | except:
45 | failed = True
46 | raise
47 | _logger.debug("After: " + repr(fi.externals))
48 | assert failed
49 | failed = None
50 |
51 | # Dummy up a non-DFXML namespace element. This should be appendable.
52 | e = ET.Element("{%s}scan_results" % XMLNS_TEST_CLAMSCAN)
53 | e.text = "Clean"
54 | fi.externals.append(e)
55 |
56 | # Dummy up a DFXML namespace element. This should not be appendable (the schema specifies other namespaces).
57 | e = ET.Element("{%s}filename" % Objects.dfxml.XMLNS_DFXML)
58 | e.text = "Superfluous name"
59 | _logger.debug("Before: " + repr(fi.externals))
60 | try:
61 | fi.externals.append(e)
62 | failed = False
63 | except ValueError:
64 | failed = True
65 | except:
66 | failed = True
67 | raise
68 | _logger.debug("After: " + repr(fi.externals))
69 | assert failed
70 | failed = None
71 |
72 | # Add an element with the colon prefix style
73 | e = ET.Element("clam:version")
74 | e.text = "20140101"
75 | fi.externals.append(e)
76 |
77 | # Add an element that doesn't have an ET-registered namespace prefix.
78 | e = ET.Element("{%s}test2" % XMLNS_TEST_UNREGGED)
79 | e.text = "yes"
80 | fi.externals.append(e)
81 |
82 | # Test serialization
83 | s = Objects._ET_tostring(
84 | fi.to_Element()
85 | ) # TODO Maybe this should be more than an internal function.
86 | _logger.debug(s)
87 | if s.find("scan_results") == -1:
88 | raise ValueError(
89 | "Serialization did not output other-namespace element 'scan_results'."
90 | )
91 | if s.find("clam:version") == -1:
92 | raise ValueError(
93 | "Serialization did not output prefixed element 'clam:version'."
94 | )
95 | if s.find("test2") == -1:
96 | raise ValueError(
97 | "Serialization did not output unregistered-prefix element 'test2'."
98 | )
99 |
100 | # Test de-serialization
101 | fir = Objects.FileObject()
102 | x = ET.XML(s)
103 | fir.populate_from_Element(x)
104 | _logger.debug("De-serialized: %r." % fir.externals)
105 | assert len(fir.externals) == 3
106 |
107 |
108 | if __name__ == "__main__":
109 | test_all()
110 |
--------------------------------------------------------------------------------
/tests/misc_object_tests/diffing_FileObject_test.py:
--------------------------------------------------------------------------------
1 | # This software was developed at the National Institute of Standards
2 | # and Technology in whole or in part by employees of the Federal
3 | # Government in the course of their official duties. Pursuant to
4 | # title 17 Section 105 of the United States Code portions of this
5 | # software authored by NIST employees are not subject to copyright
6 | # protection and are in the public domain. For portions not authored
7 | # by NIST employees, NIST has been granted unlimited rights. NIST
8 | # assumes no responsibility whatsoever for its use by other parties,
9 | # and makes no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | __version__ = "0.1.1"
15 |
16 | import logging
17 | import os
18 | import sys
19 |
20 | import dfxml.objects as Objects
21 |
22 |
23 | def test_all():
24 | logging.basicConfig(level=logging.DEBUG)
25 | _logger = logging.getLogger(os.path.basename(__file__))
26 |
27 | f0 = Objects.FileObject()
28 |
29 | fo = Objects.FileObject()
30 | pfo = Objects.FileObject()
31 | pfo.inode = 234
32 | f0.parent_object = pfo
33 | f0.filename = "test file"
34 | f0.error = "Neither a real file, nor real error"
35 | f0.partition = 2
36 | f0.id = 235
37 | f0.name_type = "r"
38 | f0.filesize = 1234
39 | f0.unalloc = 0
40 | f0.unused = 0
41 | f0.orphan = 0
42 | f0.compressed = 1
43 | f0.inode = 6543
44 | f0.libmagic = "data"
45 | f0.meta_type = 8
46 | f0.mode = 755
47 | f0.nlink = 1
48 | f0.uid = "S-1-234-etc"
49 | f0.gid = "S-2-234-etc"
50 | f0.mtime = "1999-12-31T12:34:56Z"
51 | f0.ctime = "1998-12-31T12:34:56Z"
52 | f0.atime = "1997-12-31T12:34:56Z"
53 | f0.crtime = "1996-12-31T12:34:56Z"
54 | f0.seq = 3
55 | f0.dtime = "1995-12-31T12:34:56Z"
56 | f0.bkup_time = "1994-12-31T12:34:56Z"
57 | f0.link_target = "Nonexistent file"
58 | f0.libmagic = "Some kind of compressed"
59 | f0.md5 = "db72d20e83d0ae39771403bc4cdde040"
60 | f0.sha1 = "866e1f426b2380aaf74a091aa0f39f62ae8a2de7"
61 | f0.sha256 = "4bc5996997ab9196b2d998b05ef302ed1dc167d74ec881533ee35008b5168630"
62 | f0.sha384 = "2ec378692eeae4b855f58832664f95bb85411caac8dcebe7cd3916e915559d3f0ccda688a1fad1e3f47801fe15298ac0"
63 | # fo.brs = brs #TODO
64 | _logger.debug("f0 = %r" % f0)
65 | _logger.debug("f0.to_dfxml() = %r" % f0.to_dfxml())
66 |
67 | e0 = f0.to_Element()
68 | _logger.debug("e0 = %r" % e0)
69 |
70 | # f1 = eval(repr(f0)) #TODO The recursive evals cause namespace confusion (Objects.foo); replace the next two lines when that's settled.
71 | f1 = Objects.FileObject()
72 | f1.populate_from_Element(e0)
73 |
74 | f2 = Objects.FileObject()
75 | f2.populate_from_Element(e0)
76 |
77 | # The id property should not be included in the comparisons
78 | f1.id = 111
79 | f1.alloc = False
80 |
81 | f2.mtime = "2999-12-31T12:34:56Z"
82 | f2.md5 = "593c8fe4a2236f3eeba7f4577b663876"
83 | f2.sha1 = "0c0c20c03bdb8913da8ea120bd59ba5f596deceb"
84 | f2.sha256 = "4f6dcb46e0f7b0ad748d083f6e92d7df586d0298a94acc3795287ff156614540"
85 | f2.sha384 = "2af87ca47d01989009caf3927a84be215528a53629dd935a828921ac0a4b22202bcba20d38fdd16d719b8c4241fcdacb"
86 |
87 | _logger.debug("f1 = %r" % f1)
88 | d01 = f0.compare_to_other(f1)
89 | _logger.debug("d01 = %r" % d01)
90 | assert d01 == set(["alloc"]) or d01 == set(["alloc", "unalloc"])
91 |
92 | d02 = f0.compare_to_other(f2)
93 |
94 | _logger.debug("d02 = %r" % d02)
95 | assert d02 == set(["mtime", "md5", "sha1", "sha256", "sha384"])
96 |
97 | f2.original_fileobject = f0
98 | f2.compare_to_original()
99 | _logger.debug("f2.diffs = %r" % f2.diffs)
100 | assert f2.diffs == d02
101 |
102 | # TODO include byte_runs
103 |
104 |
105 | if __name__ == "__main__":
106 | test_all()
107 |
--------------------------------------------------------------------------------
/dfxml/bin/README.md:
--------------------------------------------------------------------------------
1 | # Tools for working with DFXML-files
2 |
3 | This directory contains scripts that can be run when the `dfxml` package is installed. Some of these tools are added to the shell's `PATH` when the `dfxml` package is installed. Others should be called in-place, e.g. with `python3 $PWD/allocation_counter.py`.
4 |
5 |
6 | ## Installed tools
7 |
8 | Some tools are provided as command-line programs when the `dfxml` module is installed. Their source is in this directory, with the suffix `.py`. The link in this table goes to the tool's documentation and testing directory.
9 |
10 | | Program name | Short description |
11 | |-------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------|
12 | | [`walk_to_dfxml`](../../tests/walk_to_dfxml/#walk_to_dfxml) | Fully walk the current working directory and record all files encountered. |
13 | | [`make_differential_dfxml`](../../tests/make_differential_dfxml/#make_differential_dfxml) | Produce a DFXML file denoting file system changes noted by two input DFXML files. |
14 |
15 | ## In-place scripts
16 |
17 | The following DFXML tools are provided in this directory:
18 |
19 | | Script name | Short description |
20 | |----------------------------|--------------------------------------------------------------------------------------|
21 | | `allocation_counter.py` | Produces a cross-tabulation of the allocation state of each file's inode and name. |
22 | | `cat_fileobjects.py` | Prints a new DFXML of all fileobjects in an input DFXML file to stdout. |
23 | | `cat_partitions.py` | Concatenates dfxml-files containing one partition each and prints result to stdout. |
24 | | `deidentify_xml.py` | Removes PII from filenames in a DFXML file. |
25 | | `dfxinfo.py` | Print a summary of a DFXML file - summary of all files, duplicate files, file types. |
26 | | `dfxml_gen.py` | generates DFXML. Based on the C generator. |
27 | | `dfxml_html.py` | A collection of functions for generating HTML. |
28 | | `Extractor.py` | Extracts files specified in a XML-file (or all) from an image to a target directory. |
29 | | `hash_sectors.py` | Outputs sector hashes for sectors with files matching a predicate. |
30 | | `iblkfind.py` | Outputs files, which are located in a given set of sectors. |
31 | | `icarvingtruth.py` | Finds the ground truth in a predefined series of disk images. |
32 | | `idifference.py` | Generates a report about what's different between two disk images. |
33 | | `igrep.py` | Find files in image, which contain the given string. |
34 | | `ihistogram.py` | Draws a quick histogram of the timestamps in an XML file. |
35 | | `imap.py` | Map image files and try to find "missing" data by comparing with the other imgs. |
36 | | `iredact.py` | Image redaction tool using rules described in the file. |
37 | | `ireport.py` | Generates stats from a DFXML file(s). |
38 | | `iverify.py` | Reads an XML file and image and verifies that the files are present. |
39 | | `rdifference.py` | Finds and reports differences in two Windows registry hive-files. |
40 | | `report_silent_changes.py` | Takes a differentially-annotated DFXML file and outputs subtle and 'silent' changes. |
41 |
42 |
43 | ### Work needed
44 |
45 | - `dfxml_tool.py`
46 | - `idifference2.py`
47 | - `iexport.py`
48 | - `exp_slack.py`
49 | - `validate_dfxml.py`
50 | - `nsrl_rds.py`
51 | - `corpus_sync.py`
52 |
53 |
54 | ### Uncategorized
55 |
56 | - `break_out_diffs_by_anno.py`
57 | - `mem_info.py` (no dependencies)
58 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | # While SHELL would typically be set with ":=" assignment, some
15 | # environments do not have Bash at /bin/bash (e.g. FreeBSD stores Bash
16 | # at /usr/local/bin/bash).
17 | ifeq ($(shell basename $(SHELL)),sh)
18 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
19 | endif
20 |
21 | PYTHON3 ?= python3
22 | ifeq ($(PYTHON3),)
23 | $(error python3 not found)
24 | endif
25 |
26 | all: \
27 | .venv-pre-commit/var/.pre-commit-built.log
28 |
29 | .PHONY: \
30 | check-mypy \
31 | check-supply-chain \
32 | check-supply-chain-pre-commit
33 |
34 | .git_submodule_init.done.log: .gitmodules
35 | # Confirm dfxml_schema has been checked out at least once.
36 | test -r dependencies/dfxml_schema/dfxml.xsd \
37 | || (git submodule init dependencies/dfxml_schema && git submodule update dependencies/dfxml_schema)
38 | test -r dependencies/dfxml_schema/dfxml.xsd
39 | touch $@
40 |
41 | # This virtual environment is meant to be built once and then persist, even through 'make clean'.
42 | # If a recipe is written to remove this flag file, it should first run `pre-commit uninstall`.
43 | .venv-pre-commit/var/.pre-commit-built.log:
44 | rm -rf .venv-pre-commit
45 | test -r .pre-commit-config.yaml \
46 | || (echo "ERROR:Makefile:pre-commit is expected to install for this repository, but .pre-commit-config.yaml does not seem to exist." >&2 ; exit 1)
47 | $(PYTHON3) -m venv \
48 | .venv-pre-commit
49 | source .venv-pre-commit/bin/activate \
50 | && pip install \
51 | --upgrade \
52 | pip \
53 | setuptools \
54 | wheel
55 | source .venv-pre-commit/bin/activate \
56 | && pip install \
57 | pre-commit
58 | source .venv-pre-commit/bin/activate \
59 | && pre-commit install
60 | mkdir -p \
61 | .venv-pre-commit/var
62 | touch $@
63 |
64 | clean:
65 | find . -name '*~' -exec rm {} \;
66 | $(MAKE) \
67 | --directory tests \
68 | clean
69 |
70 | check: \
71 | .git_submodule_init.done.log \
72 | .venv-pre-commit/var/.pre-commit-built.log
73 | $(MAKE) \
74 | PYTHON3=$(PYTHON3) \
75 | SHELL=$(SHELL) \
76 | --directory tests \
77 | check
78 |
79 | check-mypy: \
80 | .git_submodule_init.done.log
81 | $(MAKE) \
82 | PYTHON3=$(PYTHON3) \
83 | SHELL=$(SHELL) \
84 | --directory tests \
85 | check-mypy
86 |
87 | check-supply-chain: \
88 | check-supply-chain-pre-commit \
89 | check-mypy
90 |
91 | # Update pre-commit configuration and use the updated config file to
92 | # review code. Only have Make exit if 'pre-commit run' modifies files.
93 | check-supply-chain-pre-commit: \
94 | .venv-pre-commit/var/.pre-commit-built.log
95 | source .venv-pre-commit/bin/activate \
96 | && pre-commit autoupdate
97 | git diff \
98 | --exit-code \
99 | .pre-commit-config.yaml \
100 | || ( \
101 | source .venv-pre-commit/bin/activate \
102 | && pre-commit run \
103 | --all-files \
104 | --config .pre-commit-config.yaml \
105 | ) \
106 | || git diff \
107 | --stat \
108 | --exit-code \
109 | || ( \
110 | echo \
111 | "WARNING:Makefile:pre-commit configuration can be updated. It appears the updated would change file formatting." \
112 | >&2 \
113 | ; exit 1 \
114 | )
115 | @git diff \
116 | --exit-code \
117 | .pre-commit-config.yaml \
118 | || echo \
119 | "INFO:Makefile:pre-commit configuration can be updated. It appears the update would not change file formatting." \
120 | >&2
121 |
122 | check-tools:
123 | (cd tests/misc_object_tests;make check)
124 |
--------------------------------------------------------------------------------
/dfxml/bin/deidentify_xml.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | #
3 | # deidentify_xml.py:
4 | # Given XML for a disk, remove information that might be personally identifying from filenames.
5 | # remember the mapping so that directory names don't get changed.
6 | #
7 | # 2012-10-27 slg - updated to Python3
8 |
9 | import typing
10 |
11 | private_dirs = ["home/", "usr/home", "Users"]
12 | ok_top_paths_win = ["program files/", "System", "Windows"]
13 | ok_top_paths_mac = [
14 | "bin/",
15 | "usr",
16 | "etc",
17 | "private",
18 | "applications",
19 | "developer",
20 | "bin",
21 | "sbin",
22 | "lib",
23 | "dev",
24 | ]
25 | ok_top_paths = ok_top_paths_win + ok_top_paths_mac + ["$orphanfiles"]
26 | acceptable_extensions = ["exe", "dll", "sys", "com", "hlp"]
27 |
28 | import os
29 | import os.path
30 | import sys
31 |
32 | partdir: typing.Dict[str, str] = dict()
33 |
34 |
35 | def sanitize_part(part):
36 | """Sanitize a part of a pathname in a consistent manner"""
37 | if part not in partdir:
38 | partdir[part] = "P%07d" % (len(partdir) + 1)
39 | return partdir[part]
40 |
41 |
42 | def sanitize_filename(fname):
43 | """Given a filename, sanitize each part and return it."""
44 | ofn = fname
45 | jfn = fname
46 | if jfn[0] == "/":
47 | jfn = jfn[1:]
48 | pathok = False
49 | for p in ok_top_paths:
50 | if jfn.lower().startswith(p):
51 | pathok = True
52 |
53 | if not pathok:
54 | # if the path is not okay, replace all of the parts
55 | # and the name up to the .ext
56 | parts = fname.split("/")
57 | parts[:-1] = [sanitize_part(s) for s in parts[:-1]]
58 | (root, ext) = os.path.splitext(parts[-1])
59 | if ext not in acceptable_extensions:
60 | parts[-1] = sanitize_part(root) + ext
61 | fname = "/".join(parts)
62 | if ofn[0] == "/" and fname[0] != "/":
63 | fname = "/" + fname
64 | return fname
65 |
66 |
67 | class xml_sanitizer:
68 | """Read and write the XML, but sanitize the filename elements."""
69 |
70 | def __init__(self, out):
71 | self.out = out
72 | self.cdata = ""
73 |
74 | def _start_element(self, name, attrs):
75 | """Handles the start of an element for the XPAT scanner"""
76 | s = ["<", name]
77 | if attrs:
78 | for a, v in attrs.items():
79 | if '"' not in v:
80 | s += [" ", a, '="', v, '"']
81 | else:
82 | s += [" ", a, "='", v, "'"]
83 | s += [">"]
84 | self.out.write("".join(s))
85 | self.cdata = "" # new element
86 |
87 | def _end_element(self, name):
88 | """Handles the end of an element for the XPAT scanner"""
89 | if name == "filename":
90 | self.cdata = sanitize_filename(self.cdata)
91 | if self.cdata == "\n":
92 | self.cdata = ""
93 | self.out.write("".join([self.cdata, "", name, ">"]))
94 | self.cdata = ""
95 |
96 | def _char_data(self, data):
97 | """Handles XML data"""
98 | self.cdata += data
99 |
100 | def process_xml_stream(self, xml_stream):
101 | "Run the reader on a given XML input stream"
102 | import xml.parsers.expat
103 |
104 | p = xml.parsers.expat.ParserCreate()
105 | p.StartElementHandler = self._start_element
106 | p.EndElementHandler = self._end_element
107 | p.CharacterDataHandler = self._char_data
108 | p.ParseFile(xml_stream)
109 |
110 |
111 | if __name__ == "__main__":
112 | from optparse import OptionParser
113 |
114 | global options
115 | parser = OptionParser()
116 | parser.add_option("-t", "--test", help="Test a specific pathanme to sanitize")
117 | (options, args) = parser.parse_args()
118 |
119 | if options.test:
120 | if os.path.isdir(options.test):
121 | for dirpath, dirnames, filenames in os.walk(options.test):
122 | for filename in filenames:
123 | fn = dirpath + "/" + filename
124 | print("%s\n %s" % (fn, sanitize_filename(fn)))
125 |
126 | x = xml_sanitizer(sys.stdout)
127 | x.process_xml_stream(open(args[0], "rb"))
128 |
--------------------------------------------------------------------------------
/dfxml/bin/filesdb.py:
--------------------------------------------------------------------------------
1 | #
2 | # filesdb
3 | # a module that holds a database of DFXML files
4 | #
5 |
6 | import sys
7 | from collections import defaultdict
8 |
9 | import dfxml
10 |
11 |
12 | class filesdb:
13 | def __init__(self, fname=None):
14 | self.sha1db = defaultdict(list) # fi's by hashdb
15 | self.md5db = defaultdict(list) # fi's by hashdb
16 | self.fnamedb = defaultdict(list) # fi's by fname
17 | self.dirs = defaultdict(list) # fi's by directory name
18 | self.fis = []
19 | self.prefix = None
20 | self.delfix = None
21 | if fname:
22 | self.read(fname)
23 |
24 | def __iter__(self):
25 | """The iterator for filesdb iterates through all the files"""
26 | return self.fis.__iter__()
27 |
28 | def read(self, f):
29 | if type(f) == str:
30 | self.fname = f
31 | f = open(f, "rb")
32 | dfxml.read_dfxml(xmlfile=f, callback=self.pass1)
33 |
34 | def read_with_prefix(self, fname):
35 | if ":" in fname:
36 | (fmt, fname) = fname.split(":")
37 | if fmt[0] == "+":
38 | self.prefix = fmt[1:]
39 | if fmt[0] == "=":
40 | self.delfix = fmt[1:]
41 | if fmt[0] != "+" and fmt[0] != "=":
42 | self.prefix = fmt
43 | self.read(fname)
44 |
45 | def pass1(self, fi):
46 | """First pass for reading fi objects"""
47 | import os
48 |
49 | self.fis.append(fi)
50 | if fi.sha1():
51 | self.sha1db[fi.sha1()].append(fi)
52 | if fi.md5():
53 | self.md5db[fi.md5()].append(fi)
54 | if fi.filename():
55 | fname = fi.filename()
56 | if self.delfix:
57 | if fname.startswith(self.delfix):
58 | fname = fname[len(self.delfix) :]
59 | if self.prefix:
60 | fname = self.prefix + fname
61 | self.sha1db[fname].append(fi)
62 | self.dirs[os.path.dirname(fname)].append(fi)
63 |
64 | def print_stats(self, f=sys.stdout):
65 | """Returns a text string of the stats"""
66 | ret = [
67 | ["Total directories", len(self.dirs)],
68 | ["Total files", len(self.fis)],
69 | ["Total bytes", sum([int(fi.filesize()) for fi in self.fis])],
70 | ["Total sha1s", len(self.sha1db)],
71 | ["Total md5s", len(self.md5db)],
72 | ]
73 | print("\n".join(["{:20}: {:14,}".format(a[0], a[1]) for a in ret]))
74 |
75 | mtime_min = [fi.mtime() for fi in self.fis]
76 | # print('mtime=',len(mtime_min))
77 | # flt = list(filter(lambda a:a!=None,mtime_min))
78 | # print('flt=',flt,len(flt))
79 |
80 | # print('mtime_min=',mtime_min)
81 | # print(['ctime range',mtime_min])
82 | # exit(0)
83 |
84 | def del_dirs(self, targetdb):
85 | """Given a targetdb, provide the dirs to get there."""
86 | return set(self.dirs.keys()).difference(set(targetdb.dirs.keys()))
87 |
88 | def del_files(self, targetdb):
89 | """Given an targetdb, provide the files needed to get there."""
90 | return set(self.filesdb).difference(set(db.filesdb))
91 |
92 | def new_dirs(self, db):
93 | """Given an older db, provide the dirs that are new."""
94 | return set(db.dirs.keys()).difference(set(self.dirs.keys()))
95 |
96 | def search(self, mfi, hash=False, name=False):
97 | """Return the matching fis"""
98 | if hash and not name:
99 | return self.md5db[mfi.md5()]
100 | if name and not hash:
101 | return self.fnamedb[mfi.filename()]
102 | if hash and name:
103 | return filter(
104 | lambda fi: fi.filename() == mfi.filename(), self.md5db[mfi.md5()]
105 | )
106 | return []
107 |
108 |
109 | #
110 | # test program. Reads a database and dumps it.
111 | #
112 | if __name__ == "__main__":
113 | from argparse import ArgumentParser
114 |
115 | parser = ArgumentParser(
116 | description="Test the files database with one or more DFXML files"
117 | )
118 | parser.add_argument("xmlfiles", help="XML files to process", nargs="+")
119 |
120 | args = parser.parse_args()
121 | db = filesdb()
122 | for fn in args.xmlfiles:
123 | db.read(fn)
124 | db.print_stats()
125 |
--------------------------------------------------------------------------------
/CONTRIBUTE.md:
--------------------------------------------------------------------------------
1 | # Contributing to DFXML's Python code base
2 |
3 |
4 | ## Pre-commit
5 |
6 | This project uses [the `pre-commit` tool](https://pre-commit.com/) for linting.
7 |
8 | `pre-commit` hooks into Git's commit machinery to run a set of linters and static analyzers over each change. To install `pre-commit` into Git's hooks, run one (not both) of the following sets of commands:
9 |
10 | ```bash
11 | pip install pre-commit
12 | pre-commit --version
13 | pre-commit install
14 | ```
15 |
16 | Or:
17 |
18 | ```bash
19 | make
20 | ```
21 |
22 |
23 | ## Installable tools versus in-place scripts
24 |
25 | The [`dfxml/bin/`](dfxml/bin/) directory contains scripts for interacting with DFXML. Some of the tools are installed in the command-line `$PATH` when the `dfxml` package is installed.
26 |
27 | If there is a request to add a tool to the package's installed-tools list, the tool should have these implemented:
28 | 1. A unit test suite that exercises the tool's command line features, such as flags, and `pytest` tests to confirm expected output.
29 | 2. A documentation page, preferably a README alongside the unit test suite. The documentation should include command-line usage.
30 | 3. A row in [`dfxml/bin/README.md`](dfxml/bin/README.md)'s table of installed tools, linking to the documentation.
31 | 4. The tool should be analyzed with a static type checker. See e.g. the target `check-mypy` in the [tests Makefile](tests/Makefile) that is run as part of CI. (Note this would be started by adding type signatures to the tool's functions.)
32 |
33 |
34 | ## Version management
35 |
36 | **Note that DFXML 1.0.2 DOES NOT YET follow SEMVER practices.**
37 |
38 | This project plans to adopt [SEMVER](https://semver.org/) to denote expected stability of its offered resources. The project *has not yet* adopted SEMVER; when it does, a note will be added to the README.
39 |
40 | Once a SEMVER-adherent major version is declared, backwards-incompatible commits will be merged into the `release-x.0.0` branch (where `x` is the next major version) instead of `develop`.
41 |
42 | Following SEMVER's `major.minor.patch` version designation:
43 | * The `major` version will increment on deploying changes that are backwards-incompatible with the prior major release.
44 | * The `minor` version will increment on new functionality being added.
45 | * The `patch` version will increment on new tests for existing functionality being added, or a bug being fixed, with some discretion to be used for any needed interface corrections.
46 |
47 | The following are this repository's policies on backwards compatibility for this project's resources.
48 |
49 |
50 | ### Version of the DFXML Python code base
51 |
52 | The overall package version of `dfxml` is stored in one location, the `__version__` variable of `dfxml/__init__.py`.
53 |
54 | Other resources may track their own version independently.
55 |
56 |
57 | ### Package resources
58 |
59 | The set of command-line tools offered in the package (defined in `setup.cfg`) is considered in-scope for backwards compatibility.
60 |
61 |
62 | ### Command-line functionality
63 |
64 | Tests that illustrate expected command-line behavior are available under the [`tests/`](tests/) directory. See the `Makefile`s under the directories named after the provided tools. Recipes that include activating a virtual environment (e.g. `source $(tests_srcdir)/venv/bin/activate`) show command line execution patterns.
65 |
66 | The command-line functionality demonstrated by the `Makefile`s under `tests/` is considered in-scope for backwards compatibility.
67 |
68 |
69 | ### Module functionality
70 |
71 | This project uses the [`pytest`](https://docs.pytest.org) framework to run unit tests. These tests encode the expected behaviors of command-line results, and of module functions. Tests generally follow a "Ground-truth comparison" model, where an expected set of results is compared to a computed set of results (generally, as `expected_X == computed_X`).
72 |
73 | The module functionality exercised by the `pytest` unit tests is considered in-scope for backwards compatibility.
74 |
75 |
76 | ## Merge model
77 |
78 | On adoption of SEMVER, this project will follow the `git-flow` merge model. In short:
79 | * The `main` branch will contain tagged release commits only.
80 | * The `develop` branch will be the target of Pull Requests for new features.
81 | * `release-x.y.z` branches will be made off of `develop` when a new release is to be tagged , and merged into `main` and back into `develop`.
82 |
83 | The above practice can be seen illustrated in the first figure on [this page](https://nvie.com/posts/a-successful-git-branching-model/).
84 |
--------------------------------------------------------------------------------
/tests/make_differential_dfxml/Makefile:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # This software was developed at the National Institute of Standards
4 | # and Technology by employees of the Federal Government in the course
5 | # of their official duties. Pursuant to title 17 Section 105 of the
6 | # United States Code this software is not subject to copyright
7 | # protection and is in the public domain. NIST assumes no
8 | # responsibility whatsoever for its use by other parties, and makes
9 | # no guarantees, expressed or implied, about its quality,
10 | # reliability, or any other characteristic.
11 | #
12 | # We would appreciate acknowledgement if the software is used.
13 |
14 | # Bash selection is described in the top-level Makefile.
15 | ifeq ($(shell basename $(SHELL)),sh)
16 | SHELL := $(shell which /bin/bash 2>/dev/null || which /usr/local/bin/bash)
17 | endif
18 |
19 | top_srcdir := $(shell cd ../.. ; pwd)
20 |
21 | tests_srcdir := $(top_srcdir)/tests
22 |
23 | all: \
24 | differential_dfxml_test_by_path_01.txt \
25 | differential_dfxml_test_by_path_23.txt \
26 | differential_dfxml_test_by_times_01.txt \
27 | differential_dfxml_test_by_times_23.txt
28 |
29 | check: \
30 | differential_dfxml_test_by_path_01.txt \
31 | differential_dfxml_test_by_path_23.txt \
32 | differential_dfxml_test_by_times_01.txt \
33 | differential_dfxml_test_by_times_23.txt
34 | source $(tests_srcdir)/venv/bin/activate \
35 | && pytest \
36 | --log-level=DEBUG
37 |
38 | clean:
39 | @rm -f \
40 | *.dfxml \
41 | *.txt
42 | @#Restore Git-tracked version of these files, so deletions aren't accidentally committed.
43 | @git checkout \
44 | -- \
45 | differential_dfxml_test_by_path_01.txt \
46 | differential_dfxml_test_by_path_23.txt \
47 | differential_dfxml_test_by_times_01.txt \
48 | differential_dfxml_test_by_times_23.txt \
49 | || true
50 |
51 | differential_dfxml_test_01.dfxml: \
52 | $(tests_srcdir)/.venv.done.log \
53 | $(top_srcdir)/dfxml/bin/make_differential_dfxml.py \
54 | $(top_srcdir)/samples/difference_test_0.xml \
55 | $(top_srcdir)/samples/difference_test_1.xml
56 | rm -f \
57 | __$@ \
58 | _$@
59 | source $(tests_srcdir)/venv/bin/activate \
60 | && make_differential_dfxml \
61 | $(top_srcdir)/samples/difference_test_0.xml \
62 | $(top_srcdir)/samples/difference_test_1.xml \
63 | > __$@
64 | xmllint \
65 | --format \
66 | __$@ \
67 | > _$@
68 | rm __$@
69 | mv _$@ $@
70 |
71 | differential_dfxml_test_by_path_01.txt: \
72 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
73 | differential_dfxml_test_01.dfxml
74 | source $(tests_srcdir)/venv/bin/activate \
75 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
76 | --debug \
77 | --sort-by path \
78 | differential_dfxml_test_01.dfxml \
79 | > _$@
80 | mv _$@ $@
81 |
82 | differential_dfxml_test_by_times_01.txt: \
83 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
84 | differential_dfxml_test_01.dfxml
85 | source $(tests_srcdir)/venv/bin/activate \
86 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
87 | --debug \
88 | --sort-by times \
89 | differential_dfxml_test_01.dfxml \
90 | > _$@
91 | mv _$@ $@
92 |
93 | differential_dfxml_test_23.dfxml: \
94 | $(tests_srcdir)/.venv.done.log \
95 | $(top_srcdir)/dfxml/bin/make_differential_dfxml.py \
96 | $(top_srcdir)/samples/difference_test_2.xml \
97 | $(top_srcdir)/samples/difference_test_3.xml
98 | rm -f \
99 | __$@ \
100 | _$@
101 | source $(tests_srcdir)/venv/bin/activate \
102 | && make_differential_dfxml \
103 | $(top_srcdir)/samples/difference_test_2.xml \
104 | $(top_srcdir)/samples/difference_test_3.xml \
105 | > __$@
106 | xmllint \
107 | --format \
108 | __$@ \
109 | > _$@
110 | rm __$@
111 | mv _$@ $@
112 |
113 | differential_dfxml_test_by_path_23.txt: \
114 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
115 | differential_dfxml_test_23.dfxml
116 | source $(tests_srcdir)/venv/bin/activate \
117 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
118 | --debug \
119 | --sort-by path \
120 | differential_dfxml_test_23.dfxml \
121 | > _$@
122 | mv _$@ $@
123 |
124 | differential_dfxml_test_by_times_23.txt: \
125 | $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
126 | differential_dfxml_test_23.dfxml
127 | source $(tests_srcdir)/venv/bin/activate \
128 | && python $(top_srcdir)/dfxml/bin/summarize_differential_dfxml.py \
129 | --debug \
130 | --sort-by times \
131 | differential_dfxml_test_23.dfxml \
132 | > _$@
133 | mv _$@ $@
134 |
--------------------------------------------------------------------------------