├── .editorconfig
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
    ├── 0_Intro.md
    ├── 1_Tutorial.md
    ├── 2_Installation_and_requirements.md
    ├── 3_Architecture.md
    ├── 4_Configuration.md
    ├── 5_Existing_plugins.md
    ├── 6_Develop_your_own_plugin.md
    ├── 7_Licenses.md
    └── 8_FAQ.md
├── pyproject.toml
├── src
    └── orc2timeline
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── cli.py
    │   ├── conf
    │       └── Orc2Timeline.yaml
    │   ├── config.py
    │   ├── core.py
    │   ├── info.py
    │   ├── plugins
    │       ├── EventLogsToTimeline-eventmap.txt
    │       ├── EventLogsToTimeline.py
    │       ├── GenericToTimeline.py
    │       ├── I30InfoToTimeline.py
    │       ├── NTFSInfoToTimeline.py
    │       ├── RegistryToTimeline-important-keys.txt
    │       ├── RegistryToTimeline.py
    │       ├── USNInfoToTimeline.py
    │       └── __init__.py
    │   └── py.typed
└── tests
    ├── __init__.py
    ├── conftest.py
    ├── data
        ├── conf_7_archives
        │   ├── ORC_Server_FAKEMACHINE_Detail.7z
        │   ├── ORC_Server_FAKEMACHINE_General.7z
        │   ├── ORC_Server_FAKEMACHINE_Little.7z
        │   ├── ORC_Server_FAKEMACHINE_Memory.7z
        │   └── ORC_Server_FAKEMACHINE_SAM.7z
        └── null_csv
        │   ├── ORC_Server_FAKEMACHINE_Detail.7z
        │   ├── ORC_Server_FAKEMACHINE_General.7z
        │   ├── ORC_Server_FAKEMACHINE_Little.7z
        │   ├── ORC_Server_FAKEMACHINE_Memory.7z
        │   └── ORC_Server_FAKEMACHINE_SAM.7z
    ├── output
        └── .gitignore
    ├── test_cli.py
    ├── test_config.py
    └── test_core.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # https://editorconfig.org/
 2 | root = true
 3 | 
 4 | [*]
 5 | charset = utf-8
 6 | end_of_line = lf
 7 | indent_size = 2
 8 | indent_style = space
 9 | 
10 | [*.py]
11 | indent_size = 4
12 | max_line_length = 120
13 | insert_final_newline = true
14 | trim_trailing_whitespace = true
15 | 
16 | [Dockerfile]
17 | indent_size = 4
18 | 
19 | [*.{yml,yaml,toml,json,jsonc,jsonl,js,ts}]
20 | indent_size = 2
21 | insert_final_newline = true
22 | trim_trailing_whitespace = true
23 | 
24 | [*.{bat,cmd,ps1}]
25 | end_of_line = crlf
26 | insert_final_newline = true
27 | trim_trailing_whitespace = true
28 | 
29 | [*.{md,txt,rst}]
30 | insert_final_newline = true
31 | trim_trailing_whitespace = false
32 | 
33 | [*.tsv]
34 | indent_style = tab
35 | 
36 | [Makefile]
37 | indent_style = tab
38 | insert_final_newline = true
39 | trim_trailing_whitespace = true
40 | 
41 | [LICENSE]
42 | insert_final_newline = false
43 | trim_trailing_whitespace = true
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # cached files
 2 | __pycache__/
 3 | *.py[cod]
 4 | .cache
 5 | 
 6 | # installation package
 7 | *.egg-info/
 8 | dist/
 9 | build/
10 | 
11 | # environments
12 | .env
13 | .venv
14 | env/
15 | venv/
16 | ENV/
17 | env.bak/
18 | venv.bak/
19 | 
20 | # pycharm
21 | .idea/
22 | 
23 | # vscode
24 | .vscode/
25 | *.code-workspace
26 | 
27 | # mypy
28 | .mypy_cache/
29 | .dmypy.json
30 | dmypy.json
31 | mypy.ini
32 | 
33 | # test caches
34 | .tox/
35 | .pytest_cache/
36 | .coverage
37 | htmlcov
38 | report.xml
39 | coverage.xml
40 | 
41 | # Docs
42 | public/
43 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 		   GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include .editorconfig
 2 | include .gitignore
 3 | include LICENSE
 4 | include Makefile
 5 | include MANIFEST.in
 6 | include pyproject.toml
 7 | include README.rst
 8 | include src/orc2timeline/py.typed
 9 | recursive-include tests *
10 | recursive-include docs *
11 | recursive-exclude * __pycache__
12 | recursive-exclude * *.py[co]
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------
  2 | # OS dependent configuration
  3 | # ----------------------------------------------------------------------
  4 | 
  5 | VENV=venv/bin/
  6 | LIB=venv/Lib/site-packages/
  7 | MARKER=venv/marker
  8 | EXE=
  9 | ifeq ($(OS),Windows_NT)
 10 | VENV=venv/Scripts/
 11 | LIB=venv/Lib/site-packages/
 12 | MARKER=venv/marker
 13 | EXE=.exe
 14 | endif
 15 | 
 16 | 
 17 | # ----------------------------------------------------------------------
 18 | # Python interpreter detection
 19 | # ----------------------------------------------------------------------
 20 | 
 21 | ARG_COMMAND="import sys;print(sys.version_info[:2]>=(3, 8))"
 22 | 
 23 | ifeq (ok,$(shell test -e /dev/null 2>&1 && echo ok))
 24 | NULL_STDERR=2>/dev/null
 25 | else
 26 | NULL_STDERR=2>NUL
 27 | endif
 28 | 
 29 | ifndef PY
 30 | 
 31 | ifndef _PY
 32 | ifeq (True,$(shell py -3 -c $(ARG_COMMAND) $(NULL_STDERR)))
 33 | _PY=py -3
 34 | endif
 35 | endif
 36 | 
 37 | ifndef _PY
 38 | ifeq (True,$(shell python3 -c $(ARG_COMMAND) $(NULL_STDERR)))
 39 | _PY=python3
 40 | endif
 41 | endif
 42 | 
 43 | ifndef _PY
 44 | ifeq (True,$(shell python -c $(ARG_COMMAND) $(NULL_STDERR)))
 45 | PY=python
 46 | endif
 47 | 
 48 | endif
 49 | 
 50 | ifndef _PY
 51 | $(error Could not detect Python 3.8 or greather interpreter automatically, please use PY environment variable.)
 52 | endif
 53 | 
 54 | PY=$(shell $(_PY) -c "import os,sys;print(sys.base_prefix.replace(os.sep,'/') + ('/python.exe' if os.name == 'nt' else '/bin/python3'))")
 55 | 
 56 | endif
 57 | 
 58 | ifneq (True,$(shell $(PY) -c $(ARG_COMMAND) $(NULL_STDERR)))
 59 | $(error $(PY) is not a valid Python 3.8 or greather interpreter)
 60 | endif
 61 | 
 62 | # ----------------------------------------------------------------------
 63 | # Configuration
 64 | # ----------------------------------------------------------------------
 65 | 
 66 | GIT=git
 67 | PIP=$(PY) -m pip
 68 | VENV_PY=$(VENV)python$(EXE)
 69 | VENV_PIP=$(VENV)pip$(EXE)
 70 | 
 71 | RM_GLOB := $(PY) -c "import shutil,sys,pathlib;[shutil.rmtree(sp, ignore_errors=False) if sp.is_dir() else sp.unlink() for p in sys.argv[1:]for sp in pathlib.Path().resolve().glob(p)]"
 72 | BROWSER := $(PY) -c "import os,webbrowser,sys;from urllib.request import pathname2url;webbrowser.open('file:'+pathname2url(os.path.abspath(sys.argv[1])))"
 73 | EXTRACT_HELP := $(PY) -c "import re,sys;m=[re.match(r'^([a-zA-Z_-]+):.*?\#\# (.*)$$',line)for line in sys.stdin];print('\n'.join('{:14} {}'.format(*g.groups())for g in m if g))"
 74 | LS := $(PY) -c "import sys,os;print('\n'.join(os.listdir(os.path.abspath(sys.argv[1]))))"
 75 | TOUCH := $(PY) -c "import sys;open(sys.argv[1],'ab')"
 76 | 
 77 | TOX=$(VENV)tox$(EXE)
 78 | SPHINX=$(VENV)sphinx-build$(EXE)
 79 | COVERAGE=$(VENV)coverage$(EXE)
 80 | TWINE=$(VENV)twine$(EXE)
 81 | 
 82 | 
 83 | # ----------------------------------------------------------------------
 84 | # Automatic installation
 85 | # ----------------------------------------------------------------------
 86 | 
 87 | .git:
 88 | 	$(GIT) init
 89 | 	$(GIT) add *
 90 | 	$(GIT) commit -m "Initial commit"
 91 | 	$(GIT) branch -M main
 92 | 
 93 | $(MARKER):
 94 | 	$(MAKE) clean
 95 | 	$(MAKE) .git
 96 | 	$(PIP) install virtualenv
 97 | 	$(PY) -m virtualenv venv
 98 | 	$(VENV_PIP) install 'setuptools>=62.0.0' 'pip>=21.3'
 99 | 	$(VENV_PIP) install -e .[lint]
100 | 
101 | 	$(TOUCH) $(MARKER)
102 | 
103 | $(VENV): $(MARKER)
104 | 
105 | $(VENV_PY): $(MARKER)
106 | 
107 | $(VENV_PIP): $(MARKER)
108 | 
109 | $(TOX): $(VENV_PIP)
110 | 	$(VENV_PIP) install -e .[tox]
111 | 
112 | $(PRECOMMIT): $(VENV_PIP)
113 | 
114 | $(COVERAGE): $(VENV_PIP)
115 | 	$(VENV_PIP) install -e .[cov]
116 | 
117 | $(TWINE): $(VENV_PIP)
118 | 	$(VENV_PIP) install -e .[deploy]
119 | 
120 | $(LIB)build: $(VENV_PIP)
121 | 	$(VENV_PIP) install -e .[build]
122 | 
123 | 
124 | # ----------------------------------------------------------------------
125 | # Commands
126 | # ----------------------------------------------------------------------
127 | 
128 | .DEFAULT_GOAL := help
129 | 
130 | .PHONY: clean
131 | clean:  ## Remove all build, test, coverage, venv and Python artifacts.
132 | 	$(RM_GLOB) 'venv/*/python.?e?x?e?' 'venv' 'build/' 'dist/' 'public/' '.eggs/' '.tox/' '.coverage' 'htmlcov/' '.pytest_cache' '.mypy_cache' '.ruff_cache'  '**/*.egg-info' '**/*.egg' '**/__pycache__' '**/*~' '**/*.pyc' '**/*.pyo'
133 | 
134 | .PHONY: cov
135 | cov: $(TOX)  ## Check code coverage.
136 | 	tox -e cov
137 | 
138 | .PHONY: dist
139 | dist: clean $(LIB)build  ## Builds source and wheel package.
140 | 	$(VENV_PY) -m build
141 | 	$(LS) dist/
142 | 
143 | .PHONY: format
144 | format: $(TOX) ## Format style with tox, ruff, black.
145 | 	$(TOX) -e format
146 | 
147 | .PHONY: help
148 | help:  ## Show current message.
149 | 	@$(EXTRACT_HELP) < $(MAKEFILE_LIST)
150 | 
151 | .PHONY: install
152 | install:  ## Install the package to the active Python's site-packages.
153 | 	$(PIP) install .
154 | 
155 | .PHONY: lint
156 | lint: $(TOX)  ## Check style with tox, ruff, black and mypy.
157 | 	$(TOX) -e lint
158 | 
159 | .PHONY: open-cov
160 | open-cov: cov  ## Open coverage report.
161 | 	$(BROWSER) htmlcov/index.html
162 | 
163 | .PHONY: setup
164 | setup: clean $(VENV_PY)  ## Create virtual environment.
165 | 
166 | .PHONY: tests
167 | tests: $(TOX)  ## Run unit and functional tests.
168 | 	$(TOX) -e tests
169 | 
170 | .PHONY: tests-all
171 | tests-all: $(TOX)  ## Run all tests in parallel (lint and tests).
172 | 	$(TOX) -p
173 | 
174 | .PHONY: uninstall
175 | uninstall:  ## Install the package to the active Python's site-packages.
176 | 	$(PIP) uninstall orc2timeline
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # orc2timeline
 2 | 
 3 | **orc2timeline** stands for "ORC to timeline", ORC refers to [DFIR-ORC](https://github.com/DFIR-ORC/dfir-orc) which is a tool used to parse and collect critical **artefacts of a Windows system** during an **incident response**.
 4 | 
 5 | orc2timeline can take one or several ORC as input and **generate one timeline per host**.
 6 | 
 7 | ## Installation
 8 | 
 9 | ```
10 | git clone https://github.com/ANSSI-FR/orc2timeline.git
11 | cd orc2timeline
12 | pip install .
13 | ```
14 | 
15 | ## Examples
16 | 
17 | Let us consider the following file tree:
18 | ```
19 | $ tree ~
20 | ~
21 | └── Documents
22 |     ├── ORC
23 |     │   ├── DFIR-ORC_Server_ServerName.domain_Browsers.7z
24 |     │   ├── DFIR-ORC_Server_ServerName.domain_Detail.7z
25 |     │   ├── DFIR-ORC_Server_ServerName.domain_General.7z
26 |     │   ├── DFIR-ORC_Server_ServerName.domain_Little.7z
27 |     │   ├── DFIR-ORC_Server_ServerName.domain_Powershell.7z
28 |     │   ├── DFIR-ORC_Server_ServerName.domain_SAM.7z
29 |     │   └── DFIR-ORC_Workstation_MachineName.domain_Offline.7z
30 |     └── output_directory
31 | 
32 | 3 directories, 7 files
33 | ```
34 | 
35 | Process all the ORC contained in a directory (orc2timeline will infer hostname from file names and group files by host to process them):
36 | ```
37 | $ orc2timeline --tmp-dir=/tmp process_dir -j 4  ~/Documents/ORC ~/Documents/output_directory
38 | ```
39 | 
40 | This command will create the following files:
41 | ```
42 | ~
43 | └── Documents
44 |     └── output_directory
45 |         ├── MachineName.domain.csv.gz
46 |         └── ServerName.domain.csv.gz
47 | ```
48 | 
49 | ## Documentation
50 | 
51 | A more detailed documentation is provided if needed :
52 | 
53 | ### [Introduction](docs/0_Intro.md)
54 | ### [Tutorial](docs/1_Tutorial.md)
55 | ### [Installation and requirements](docs/2_Installation_and_requirements.md)
56 | ### [Architecture](docs/3_Architecture.md)
57 | ### [Configuration](docs/4_Configuration.md)
58 | ### [Existing plugins](docs/5_Existing_plugins.md)
59 | ### [Develop your own plugin](docs/6_Develop_your_own_plugin.md)
60 | ### [Licenses](docs/7_Licenses.md)
61 | ### [Frequently Asked Questions](docs/8_FAQ.md)
62 | 
63 | 


--------------------------------------------------------------------------------
/docs/0_Intro.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | **orc2timeline** stands for "ORC to timeline", ORC refers to DFIR-ORC which is a tool used to parse and collect critical **artefacts of a Windows system** during an **incident response**.
 4 | 
 5 | While DFIR-ORC allows to gather all the data needed to operate a successful incident response, no opensource tool was released to **help analyst to dissect archives that result of DFIR-ORC.exe** execution.
 6 | 
 7 | As a reminder, in the following we will use the term ORC to refer to a set of archives that is the output of DFIR-ORC.exe for a single host.
 8 | 
 9 | orc2timeline can take one or several ORC as input and generate one timeline per host.
10 | 
11 | This means that **orc2timeline decompresses targeted files** contained in ORC archives, **parses them** to extract interesting information and creates one or many event for a given artefact. One event must contain a timestamp. A **timeline** will then be created, **sorted by date** and **compressed in gzip** format to allow forensics analysis.
12 | 
13 | The **output timeline** is a **csv file** with the four following columns:
14 |   - `Timestamp` (Time when the event occurred);
15 |   - `Hostname` (Name of the host, this can be useful when merging two or more timelines);
16 |   - `SourceType` (Type of event) ;
17 |   - `Description` (Description and details about the event);
18 |   - `SourceFile` (Original path of the artefact if it exists, path in ORC archive otherwise).
19 | 
20 | orc2timeline can be run with a **list of file as input** and a **path to result file as output**. Files mentioned in input list must belong to the **same ORC run** (for a single host).
21 | 
22 | To process multiple ORC, it is also possible to specify an **input directory**, it is then necessary to specify an **output directory**. The **list of hosts** to process will be **inferred** from the **recursive list of files** in the given directory. For now orc2timeline can not process a directory if two ORC of the same host are in different subdirectories. The **subtree** of the input directory will be **reproduced** in the output directory. One output file per host will be created in the given output directory.
23 | 
24 | Since artefact processing can be **time and resource consuming**, orc2timeline was designed to run on **multiple threads**. The usage of orc2timeline **could cause disk space or RAM exhaustion**, therefore testing its impact in your own environment is necessary and it **should not be run in a critical production environment**.
25 | 
26 | The goal of orc2timeline is the provide a framework that **knows how to extract specific pieces of data from an ORC collection** and create at least one event from it. **Plugins rely on external dependencies, that are deliberately not redevelop.**
27 | 


--------------------------------------------------------------------------------
/docs/1_Tutorial.md:
--------------------------------------------------------------------------------
 1 | # Tutorials
 2 | 
 3 | Let us consider the following file tree:
 4 | ```
 5 | $ tree ~
 6 | ~
 7 | └── Documents
 8 |     ├── ORC
 9 |     │   ├── DFIR-ORC_Server_ServerName.domain_Browsers.7z
10 |     │   ├── DFIR-ORC_Server_ServerName.domain_Detail.7z
11 |     │   ├── DFIR-ORC_Server_ServerName.domain_General.7z
12 |     │   ├── DFIR-ORC_Server_ServerName.domain_Little.7z
13 |     │   ├── DFIR-ORC_Server_ServerName.domain_Powershell.7z
14 |     │   ├── DFIR-ORC_Server_ServerName.domain_SAM.7z
15 |     │   └── DFIR-ORC_Workstation_MachineName.domain_Offline.7z
16 |     └── output_directory
17 | 
18 | 3 directories, 7 files
19 | ```
20 | 
21 | ## Process a single ORC
22 | 
23 | #### Process one ORC (input files must belong to the same execution of DFIR-ORC.exe for a single host):
24 | ```
25 | $ orc2timeline process Documents/ORC/DFIR-ORC_Server_ServerName.domain_Powershell.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_Little.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_Browsers.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_General.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_Detail.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_SAM.7z Documents/output_directory/ServerName.domain.csv.gz
26 | ```
27 | 
28 | or
29 | 
30 | ```
31 | $ orc2timeline process ~/Documents/ORC/DFIR-ORC_Server_ServerName.domain_*.7z ~/Documents/output_directory/ServerName.domain.csv.gz
32 | ```
33 | 
34 | If you try to process archives that do not belong to the same host, an exception will be raised and program will exit:
35 | ```
36 | $ orc2timeline process --overwrite Documents/ORC/DFIR-ORC_* Documents/output_directory/ServerName.domain.csv.gz
37 | [2012-12-21 23:59:59,999] WARNING  - --jobs option was not given, thus only one thread will be used. Therefore processing could take a while. 
38 | Usage: orc2timeline process [OPTIONS] [FILE_LIST]... OUTPUT_PATH
39 | Try 'orc2timeline process --help' for help.
40 | 
41 | Error: Invalid value: Bad file list, all files must belong to the same host. Parsed hosts : {'ServerName.domain', 'MachineName.domain'}
42 | ```
43 | 
44 | #### Use multiple threads
45 | 
46 | Use 4 threads to process one ORC, overwrite output file if it already exists, use ~/temp as temporary directory:
47 | ```
48 | $ TMPDIR=~/temp orc2timeline process -j 4 --overwrite Documents/ORC/DFIR-ORC_Server_ServerName.domain_* Documents/output_directory/ServerName.domain.csv.gz
49 | ```
50 | 
51 | ## Process many ORC with a single command
52 | 
53 | Process all the ORC contained in a directory (orc2timeline will infer hostname from file names and group files by host to process them):
54 | ```
55 | $ orc2timeline --tmp-dir=/tmp/data process_dir -j 4  ~/Documents/ORC ~/Documents/output_directory
56 | ```
57 | 
58 | This previous command will create the following files:
59 | ```
60 | ~
61 | └── Documents
62 |     └── output_directory
63 |         ├── MachineName.domain.csv.gz
64 |         └── ServerName.domain.csv.gz
65 | ```
66 | 
67 | ## Show configuration
68 | 
69 | Command that show the path to configuration file :
70 | ```
71 | orc2timeline show_conf_file
72 | ```
73 | 
74 | Command that shows the configuration (content of configuration file) that will be used :
75 | ```
76 | orc2timeline show_conf
77 | ```
78 | 
79 | **NB** : if you want to run orc2timeline with a custom configuration, you **must** modify the configuration file inplace, there is no way to give a custom path to another configuration file.
80 | 
81 | ## Command that combine a lot of options
82 | 
83 | This command will process all the ORC contained in `./Documents/ORC` and write the timelines in `./Documents/`. Four threads will be used, if a timeline already exists it will be overwritten. `/tmp/data/` will be used as temporary directory and output log level is DEBUG (maximum value).
84 | ```
85 | $ orc2timeline --log-level=DEBUG --tmp-dir=/tmp/data process_dir --overwrite -j 4  ./Documents/ORC ./Documents/output_directory
86 | ```
87 | 


--------------------------------------------------------------------------------
/docs/2_Installation_and_requirements.md:
--------------------------------------------------------------------------------
 1 | # Installation and requirements
 2 | 
 3 | orc2timeline requires **python3** and **python3-pip**. On Debian based distribution, these packages can be installed like this:
 4 | ```
 5 | apt update && apt install python3 python3-pip
 6 | ```
 7 | 
 8 | Make sure that the **latest version of pip** is installed, if not, it can be upgraded with the following command:
 9 | ```
10 | pip install --upgrade pip
11 | ```
12 | 
13 | orc2timeline can be installed **system-wide** or in a **virtual environment** (to avoid dependency issues) like any other python project. After cloning the repository with git, just run the `pip install .` command. This should download and install dependencies described in `pyproject.toml` file, after that the command `orc2timeline` should be in your path.
14 | 
15 | Supported and tested Operating Systems are:
16 |   - Debian 11
17 |   - Debian 12
18 |   - Ubuntu 20.04
19 |   - Ubuntu 22.04
20 |   - Ubuntu 24.04
21 | 
22 | If an error occurs while using orc2timeline with one of these OS, feel free to **create an issue or a pull-request**.
23 | 
24 | If your favorite OS is not in the list, do not give up, it just means that it has not been tested **yet**.
25 | 
26 | ## Installation without a virtual environment:
27 | 
28 | ```
29 | git clone https://github.com/ANSSI-FR/orc2timeline.git
30 | cd orc2timeline
31 | pip install .
32 | ```
33 | 
34 | ## Installation with a virtual environment generated with virtualenv tool:
35 | 
36 | ```
37 | git clone https://github.com/ANSSI-FR/orc2timeline.git
38 | cd orc2timeline
39 | virtualenv -p python3 venv
40 | source venv/bin/activate
41 | pip install .
42 | ```
43 | 
44 | ## View and edit dependencies for debugging or developing purposes
45 | 
46 | If you want to know or edit the dependencies, they can be found in `pyproject.toml` file, in the "dependencies" section.
47 | ```
48 | [...]
49 | 
50 | dependencies = [  # Duplicate in pre-commit-config.yaml
51 |   "click>=8.1.0",
52 |   "dateparser==1.2.0",
53 |   "py7zr==0.21.0",
54 |   "libevtx-python==20240204",
55 |   "libesedb-python==20240420",
56 |   "dfwinreg==20240229",
57 |   "six==1.16.0",
58 |   "python-registry==1.3.1",
59 |   "pytz==2024.1",
60 |   "pyyaml==6.0.1",
61 | ]
62 | 
63 | [...]
64 | ```
65 | 


--------------------------------------------------------------------------------
/docs/3_Architecture.md:
--------------------------------------------------------------------------------
 1 | # Architecture
 2 | 
 3 | ## Language
 4 | 
 5 | orc2timeline is written in **python (version 3)**. Since the goal of this tool is **to rely on external dependencies** to parse artefacts, it seemed relevant to choose a **widely adopted language** to take advantage of the **large amount of libraries** available.
 6 | 
 7 | Moreover, considering the adoption of python it seems perfect to **ease maintenance and evolutions** of the project.
 8 | 
 9 | ## Plugin
10 | 
11 | orc2timeline works with plugins. This means that when launched, orc2timeline will **read configuration** to know the **list of plugins** (and the configuration for every plugin) to run. After that list is built, every plugin instance (there **can be several plugin instances for one plugin**) will be run using **all the available threads** given.
12 | 
13 | Each plugin writes a temporary intermediate file that contains an extract of the final timeline (csv file ordered by date). Once all the plugins are executed, all the **plugin timelines for a given host are consolidated into a final host timeline**. All lines are **deduplicated and sorted by date**. During this consolidation, **one thread can be used per host** treated.
14 | 


--------------------------------------------------------------------------------
/docs/4_Configuration.md:
--------------------------------------------------------------------------------
  1 | # Configuration file
  2 | 
  3 | ### Introduction
  4 | 
  5 | Depending on the **Orc configuration** you use, you may have to **customize the configuration**. The given configuration works with the configuration of DFIR-ORC that is published on [GitHub](https://github.com/DFIR-ORC/dfir-orc-config).
  6 | 
  7 | The configuration file is a **yaml file** that is read every time orc2timeline is run during the preliminary phase of the execution. The file can be modified, but **must stay inplace**. To know the path where to find this file the following command can be used : `orc2timeline show_conf_file`. To validate the modifications, `orc2timeline show_conf` command can be used to view the configuration that will be used.
  8 | 
  9 | ### Explanations
 10 | 
 11 | The following snippet of the configuration file will be explained:
 12 | ```
 13 | Plugins:
 14 |   - EventLogsToTimeline:
 15 |       archives: ["General", "Little"]
 16 |       sub_archives: ["Event.7z", "Event_Little.7z"]
 17 |       match_pattern: ".*evtx.*"
 18 |       sourcetype: "Event"
 19 | 
 20 |   - NewPlugin:
 21 | [...]
 22 | ```
 23 | 
 24 | The file begins with the keyword `Plugins`, it contains a list of plugin. In this example `EventLogsToTimeline` is configured, it means that `src/orc2timeline/plugins/EventLogsToTimeline.py` file will be loaded (a complete guide to write a plugin exists [here](6_Develop_your_own_plugin.md)).
 25 | 
 26 | The plugin has **four attributes**:
 27 |   - `archives`: list of archive types to dissect (example: from `General`, the `DFIR-ORC_Server_MACHINENAME_General.7z` will be used);
 28 |   - `sub_archives`: list of archives to decompress from the primary archive (the final artefacts are inside these sub\_archives), if the files are directly contained in the primary archive this attribute **can be omitted**;
 29 |   - `match_pattern`: regex pattern used to filter which files must be processed;
 30 |   - `sourcetype`: string that will be used for the column SourceType for this plugin.
 31 | 
 32 | All the combinations between `archives` and `sub_archives` will be used to create plugin instances. With the previous example, the following instances will be created:
 33 |   - `EventLogsToTimeline(archives="General", sub_archives="Event.7z", ...)`;
 34 |   - `EventLogsToTimeline(archives="General", sub_archives="Event_Little.7z", ...)`;
 35 |   - `EventLogsToTimeline(archives="Little", sub_archives="Event.7z", ...)`;
 36 |   - `EventLogsToTimeline(archives="Little", sub_archives="Event_Little.7z", ...)`.
 37 | 
 38 | Considering the following layout:
 39 | 
 40 | ```
 41 | DFIR-ORC_Server_MACHINENAME_General.7z
 42 | ├── Event.7z
 43 | │   └── file1.evtx
 44 | └── Other.7z
 45 |     └── file5.evtx
 46 | DFIR-ORC_Server_MACHINENAME_Little.7z
 47 | ├── Event.7z
 48 | │   └── file2.evtx
 49 | └── Event_Little.7z
 50 |     ├── file3.evtx
 51 |     └── file4.evt
 52 | ```
 53 | 
 54 | The files `file1.evtx`, `file2.evtx`, `file3.evtx` will be processed. `file4.evt` will not be processed because it does not match the `match_pattern`, `file5.evtx` will not be processed because `Other.7z` is not mentioned in the `sub_archives` list.
 55 | 
 56 | The **same plugin can be described many times** in the configuration file. The following snippet or configuration is equivalent to the previous one:
 57 | ```
 58 | Plugins:
 59 |   - EventLogsToTimeline:
 60 |       archives: ["General"]
 61 |       sub_archives: ["Event.7z", "Event_Little.7z"]
 62 |       match_pattern: ".*evtx.*"
 63 |       sourcetype: "Event"
 64 | 
 65 |   - EventLogsToTimeline:
 66 |       archives: ["Little"]
 67 |       sub_archives: ["Event.7z", "Event_Little.7z"]
 68 |       match_pattern: ".*evtx.*"
 69 |       sourcetype: "Event"
 70 | 
 71 |   - NewPlugin:
 72 | [...]
 73 | ```
 74 | 
 75 | **Warning !** The following snippet is **NOT** equivalent to the first one:
 76 | ```
 77 | Plugins:
 78 |   - EventLogsToTimeline:
 79 |       archives: ["General"]
 80 |       sub_archives: ["Event.7z", "Event_Little.7z"]
 81 |       match_pattern: ".*evtx.*"
 82 |       sourcetype: "Event"
 83 | 
 84 |   - EventLogsToTimeline:
 85 |       archives: ["Little"]
 86 |       sub_archives: ["Event_Little.7z"]
 87 |       match_pattern: ".*evtx.*"
 88 |       sourcetype: "Event"
 89 | 
 90 |   - NewPlugin:
 91 | [...]
 92 | ```
 93 | 
 94 | Because `file2.evtx` from precedent example **would not be parsed anymore**.
 95 | 
 96 | ### Syntactic sugar
 97 | 
 98 | For **readability** purpose, it may be useful to split the configuration of a plugin in two **distinct** configuration specifications. For example, the Offline configuration could be detached from the "live" configuration. The two following snippets are equivalent :
 99 | 
100 | **All in one** configuration:
101 | ```
102 | [...]
103 |   - EventLogsToTimeline:
104 |       archives: ["General", "Little", "Offline"]
105 |       sub_archives: ["Event.7z"]
106 |       match_pattern: ".*evtx.*"
107 |       sourcetype: "Event"
108 | [...]
109 | ```
110 | 
111 | **Two-piece** configuration:
112 | ```
113 | [...]
114 |   - EventLogsToTimeline:
115 |       archives: ["General", "Little"]
116 |       sub_archives: ["Event.7z"]
117 |       match_pattern: ".*evtx.*"
118 |       sourcetype: "Event"
119 | 
120 |   - EventLogsToTimeline:
121 |       archives: ["Offline"]
122 |       sub_archives: ["Event.7z"]
123 |       match_pattern: ".*evtx.*"
124 |       sourcetype: "Event"
125 | [...]
126 | ```
127 | 
128 | ### One configuration to rule them all.
129 | 
130 | orc2timeline's configuration allows the user to **set multiple DFIR-ORC configurations in the same file**. As long as parameters are **narrow enough** and the two configurations **do not conflict** with each other, they can live in the same file.
131 | 
132 | Of course this will result in multiple plugin instances that will not match any artefact, but this should not deteriorate performance and the final result will remain valid.
133 | 


--------------------------------------------------------------------------------
/docs/5_Existing_plugins.md:
--------------------------------------------------------------------------------
  1 | # Existing plugins
  2 | 
  3 | orc2timeline is designed to work with plugins. Plugin files are located in `src/orc2timeline/plugins/` directory, one file per plugin.
  4 | 
  5 | **One plugin** is meant to process **one type of artefact** collected by DFIR-ORC. The location of these artefacts **must be predictable**, so that the plugin can efficiently extract it from the archives.
  6 | 
  7 | Plugins may be divided in two categories: DFIR-ORC-artefact plugins and Windows-artefact plugins.
  8 | 
  9 | ## DFIR-ORC-artefact plugins
 10 | 
 11 | These plugins are meant to process files that are **generated during DFIR-ORC execution**. Those files are not actual artefacts but **the result of DFIR-ORC parsers**, they gather information that are very relevant for forensics analysis.
 12 | 
 13 | ### NTFSInfoToTimeline plugin
 14 | 
 15 | This plugin processes files located in:
 16 |   - the `Little` archive, inside `NTFSInfo_detail.7z`;
 17 |   - the `General` archive, inside `NTFSInfo_quick.7z`;
 18 |   - the `Detail` archive, inside `NTFSInfo_detail.7z`;
 19 |   - the `Offline` archive, inside `NTFSInfo_detail.7z`.
 20 | 
 21 | The treated csv file should be the result of DFIR-ORC's NTFSInfo command.
 22 | 
 23 | Configuration snippet:
 24 | ```
 25 | [...]
 26 |   - NTFSInfoToTimeline:
 27 |       archives: ["Detail", "General", "Little", "Offline"]
 28 |       sub_archives: ["NTFSInfo_detail.7z", "NTFSInfo_quick.7z"]
 29 |       match_pattern: "^.*NTFSInfo[^/]*\\.csv$"
 30 |       sourcetype: "MFT"
 31 | [...]
 32 | ```
 33 | 
 34 | For each entry in this csv file, one event is created per file and per different timestamp. This means that events with the same file\_path and timestamp will be grouped in a single event.
 35 | 
 36 | Output example:
 37 | ```
 38 | 2021-01-05 10:35:26.012,FAKEMACHINE,MFT,$SI: .A.B - $FN: MACB - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Bits-Client%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv
 39 | 2021-01-05 10:35:26.996,FAKEMACHINE,MFT,$SI: .A.B - $FN: MACB - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Diagnosis-DPS%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv
 40 | 2022-10-24 01:48:19.929,FAKEMACHINE,MFT,$SI: M.C. - $FN: .... - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Diagnosis-DPS%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv
 41 | 2022-10-24 14:12:54.482,FAKEMACHINE,MFT,$SI: M.C. - $FN: .... - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Bits-Client%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv
 42 | ```
 43 | 
 44 | ### I30InfoToTimeline plugin
 45 | 
 46 | This plugin processes files located in:
 47 |   - the `Detail` archive, inside `NTFSInfo_i30Info.7z`;
 48 |   - the `Offline` archive, inside `NTFSInfo_i30Info.7z`.
 49 | 
 50 | The treated csv file should be the result of DFIR-ORC's NTFSInfo with `/i30info` argument.
 51 | 
 52 | Configuration snippet:
 53 | ```
 54 | [...]
 55 |   - I30InfoToTimeline:
 56 |       archives: ["Detail", "Offline"]
 57 |       sub_archives: ["NTFSInfo_i30Info.7z"]
 58 |       match_pattern: "^I30Info.*\\.csv$"
 59 |       sourcetype: "I30"
 60 | [...]
 61 | ```
 62 | 
 63 | For each entry in this csv file, one event is created per file and per different timestamp. This means that events with the same file\_path and timestamp will be grouped in a single event.
 64 | 
 65 | Output example:
 66 | ```
 67 | 2009-07-14 03:20:08.961,FAKEMACHINE,I30,Entry in slackspace - $FN: ...B - Name: Windows - MFT segment num: 379 - Parent FRN: 0x0005000000000005 ,I30Info_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv
 68 | 2021-01-05 19:24:19.796,FAKEMACHINE,I30,Entry in slackspace - $FN: MACB - Name: WinPEpge.sys - MFT segment num: 54 - Parent FRN: 0x0005000000000005 ,I30Info_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv
 69 | 2021-01-05 19:24:33.593,FAKEMACHINE,I30,Entry in slackspace - $FN: MAC. - Name: Windows
 70 | ```
 71 | 
 72 | ### USNInfoToTimeline plugin
 73 | 
 74 | This plugin processes files located in:
 75 |   - the `Little` archive, inside `USNInfo.7z`;
 76 |   - the `Detail` archive, inside `USNInfo.7z`;
 77 |   - the `Offline` archive inside `USNInfo.7z`.
 78 | 
 79 | The treated csv file should be the result of DFIR-ORC's USNInfo command.
 80 | 
 81 | Configuration snippet:
 82 | ```
 83 | [...]
 84 |   - USNInfoToTimeline:
 85 |       archives: ["Detail", "Little", "Offline"]
 86 |       sub_archives: ["USNInfo.7z"]
 87 |       match_pattern: "^USNInfo.*\\.csv$"
 88 |       sourcetype: "USN journal"
 89 | [...]
 90 | ```
 91 | 
 92 | For each entry in this csv file, one event is created per file and per different timestamp. This means that events with the same file\_path and timestamp will be grouped in a single event.
 93 | 
 94 | Output example:
 95 | ```
 96 | 2023-11-30 16:12:58.609,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - CLOSE|DATA_EXTEND|DATA_OVERWRITE|DATA_TRUNCATION|FILE_CREATE|SECURITY_CHANGE - MFT segment num : 77487,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv
 97 | 2023-11-30 16:12:58.609,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - CLOSE|FILE_DELETE - MFT segment num : 77487,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv
 98 | 2023-11-30 16:17:52.133,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - FILE_CREATE - MFT segment num : 2259,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv
 99 | 2023-11-30 16:17:52.242,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - DATA_EXTEND|FILE_CREATE - MFT segment num : 2259,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv
100 | ```
101 | 
102 | ## Windows-artefact plugins
103 | 
104 | DFIR-ORC collects files that may help DFIR analysis. Extracting the relevant pieces of data out of those files can be tricky since they are not meant to be parsed, and can be in proprietary format. orc2timeline **relies on opensource parsers**, the choice was made not to redevelop all the parsers and **take advantage of existing libraries**.
105 | 
106 | The plugins to parse Registry Hives and Event Logs are released. Many more could be developed for processing other types of artefacts such as LNK files, Jumplists... Developing these plugins is left as an exercise to the reader (contributions are welcome).
107 | 
108 | 
109 | ### RegistryToTimeline plugin
110 | 
111 | This plugin processes registry hives, it creates one event per registry key, the last modification date of the key is used as a timestamp.
112 | 
113 | The file named `RegistryToTimeline-important-keys.txt` allows to specify keys for which an event will be printed in the final timeline for each key value. The **key path must be exact**, regex are not supported.
114 | 
115 | For more sophisticated treatments on key paths or key values, a new plugin must be developed. This new plugin could inherit `RegistryToTimeline` to benefit from existing functions.
116 | 
117 | This plugin processes files located in:
118 |   - the `Little` archive, inside `SystemHives_little.7z`;
119 |   - the `Detail` archive, inside `SystemHives.7z` and `UserHives.7z`;
120 |   - the `SAM` archive, inside `SAM.7z`;
121 |   - the `Offline` archive inside `SystemHives.7z`, `UserHives.7z`, `SAM.7z`.
122 | 
123 | Configuration snippet:
124 | ```
125 | [...]
126 |   - RegistryToTimeline:
127 |       archives: ["SAM", "Little", "Detail", "Offline"]
128 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
129 |       match_pattern: ".*data$"
130 |       sourcetype: "Registry"
131 | [...]
132 | ```
133 | 
134 | Output example:
135 | ```
136 | 2009-07-14 04:49:35.659,FAKEMACHINE,Registry,HKEY_CURRENT_USER\Environment,\Windows\ServiceProfiles\LocalService\NTUSER.DAT
137 | 2009-07-14 04:49:35.659,FAKEMACHINE,Registry,KeyPath: HKEY_CURRENT_USER\Environment - KeyName: TEMP - KeyType: RegExpandSZ - KeyValue: %USERPROFILE%\AppData\Local\Temp,\Windows\ServiceProfiles\LocalService\NTUSER.DAT
138 | 2009-07-14 04:49:35.659,FAKEMACHINE,Registry,KeyPath: HKEY_CURRENT_USER\Environment - KeyName: TMP - KeyType: RegExpandSZ - KeyValue: %USERPROFILE%\AppData\Local\Temp,\Windows\ServiceProfiles\LocalService\NTUSER.DAT
139 | 2009-07-14 04:49:35.674,FAKEMACHINE,Registry,HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Winlogon,\Windows\ServiceProfiles\LocalService\NTUSER.DAT
140 | 2009-07-14 04:49:35.674,FAKEMACHINE,Registry,KeyPath: HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Winlogon - KeyName: ExcludeProfileDirs - KeyType: RegSZ - KeyValue: AppData\Local;AppData\LocalLow;$Recycle.Bin,\Windows\ServiceProfiles\LocalService\NTUSER.DAT
141 | ```
142 | 
143 | 
144 | ### EventLogsToTimeline plugin
145 | 
146 | This plugin processes Windows log events, for each `evtx` file, this plugin parses all the events to create one line per event in the final timeline.
147 | 
148 | The file `EventLogsToTimeline-eventmap.txt` allows the analyst to specify tuples (Channel/Event ID) for which events description will be prefixed with a custom string.
149 | 
150 | This plugin processes files located in:
151 |   - the `General` archive, inside Event.7z;
152 |   - the `Little` archive, inside Event.7z;
153 |   - the `Offline` archive, inside Event.7z.
154 | 
155 | Configuration snippet:
156 | ```
157 | [...]
158 |   - EventLogsToTimeline:
159 |       archives: ["General", "Little", "Offline"]
160 |       sub_archives: ["Event.7z"]
161 |       match_pattern: ".*evtx.*"
162 |       sourcetype: "Event"
163 | [...]
164 | ```
165 | 
166 | Output example:
167 | ```
168 | 2021-02-12 15:56:30.372,FAKEMACHINE,Event,Microsoft-Windows-Servicing:1 S-1-5-18 (KBWUClient-SelfUpdate-Aux Staged Installed WindowsUpdateAgent),\Windows\System32\winevt\Logs\Setup.evtx
169 | 2021-02-12 15:56:32.512,FAKEMACHINE,Event,Microsoft-Windows-Servicing:4 S-1-5-18 (KBWUClient-SelfUpdate-Aux Installed 0x0 WindowsUpdateAgent),\Windows\System32\winevt\Logs\Setup.evtx
170 | 2022-10-24 01:46:29.681,FAKEMACHINE,Event,Microsoft-Windows-Servicing:2 S-1-5-18 (KBWUClient-SelfUpdate-Aux Installed 0x0 WindowsUpdateAgent),\Windows\System32\winevt\Logs\Setup.evtx
171 | ```
172 | 


--------------------------------------------------------------------------------
/docs/6_Develop_your_own_plugin.md:
--------------------------------------------------------------------------------
  1 | # Develop your own plugin
  2 | 
  3 | orc2timeline works with plugins in order to ease features integration. Therefore, adding the parsing of an artefact can be done by modifying only two files. First the plugin file must be created, then the plugin configuration must be appended to the configuration file.
  4 | 
  5 | ## MyPlugin.py
  6 | 
  7 | ### File path and file name
  8 | 
  9 | The first file **must** be named by the plugin name: if your plugin will process LNK files, it could be named `LNKToTimeline`, therefore the file will be named `LNKToTimeline.py`.
 10 | 
 11 | The location of this file must be `<orc2timeline_directory>/src/orc2timeline/plugins/LNKToTimeline.py`.
 12 | 
 13 | In the following example, we assume that we have a very convenient library named `magic_lnk_library` that contains all the functions and class we need to parse lnk files.
 14 | 
 15 | ## One plugin equals one class
 16 | 
 17 | ### GenericToTimeline
 18 | 
 19 | This file, is a python module that can contain multiple classes. This module **must contain a class that is named after the file name**, this class **must inherit** from `GenericToTimeline`.
 20 | 
 21 | `GenericToTimeline` is a module that contains two classes:
 22 |   - `Event` (describes an event that represents one line in the final timeline);
 23 |   - `GenericToTimeline` (implements a collection of functions that will be useful during the plugin development).
 24 | 
 25 | Example:
 26 | ```
 27 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
 28 | 
 29 | class LNKToTimeline(GenericToTimeline):
 30 |     def __init__(
 31 |         self,
 32 |         config: PluginConfig,
 33 |         orclist: list[str],
 34 |         output_file_path: str,
 35 |         hostname: str,
 36 |         tmp_dir: str,
 37 |         lock: Lock,
 38 |     ) -> None:
 39 |         """Construct."""
 40 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
 41 | 
 42 | ```
 43 | 
 44 | ### Event
 45 | 
 46 | As stated above, `Event` class **must** be used to add an event to the final timeline. It is a very simple class but all the attributes of this class must be completed.
 47 | 
 48 | The event object **must** be added with the function `_add_event` of the class `GenericToTimeline`.
 49 | 
 50 | How to add an event :
 51 | ```
 52 | event = Event()
 53 | event.description = "Good description"
 54 | event.timestamp = datetime.now()
 55 | # the following line could replace the previous line
 56 | # event.timestamp_str = "2012-12-21 23:59:59.999"
 57 | event.source = "/path/to/artefact"
 58 | self._add_event(event)
 59 | ```
 60 | 
 61 | ## Helpful and mandatory functions
 62 | 
 63 | One function of your class that is **absolutely mandatory to override** is `_parse_artefact`, because the original one does **nothing**.
 64 | 
 65 | Another function that **must** be called is `_add_event`, it take an Event as argument, and **adds it to the final timeline**.
 66 | 
 67 | Based on the configuration, the artefact files will be extracted accordingly to GenericToTimeline's mechanisms. These files will then be passed one by one as argument to the function `_parse_artefact`.
 68 | 
 69 | `self._get_original_path` can be used to retrieve the path of the artefact as it was on the original filesystem. If an error occurs, this function returns the path inside the archive instead.
 70 | 
 71 | Example:
 72 | ```
 73 | def _parse_artefact(self, artefact: Path) -> None:
 74 |     timestamp = magic_lnk_library.get_relevant_timestamp_from_file(artefact)
 75 |     source=self._get_original_path(artefact)
 76 |     description = magic_lnk_library.get_relevant_description_from_file(artefact)
 77 | 
 78 |     event = Event(
 79 |         timestamp=timestamp,
 80 |         source=source,
 81 |         description=description,
 82 |     )
 83 | 
 84 |     self._add_event(event)
 85 | ```
 86 | 
 87 | ## File header filter
 88 | 
 89 | In your plugin class (LNKToTimeline in our example), it is possible to add an optional attribute called `file_header`. It is a byte array that is an additional filter on files that should be processed.
 90 | 
 91 | If the file header matches the byte array, it will be processed, otherwise the file will be ignored.
 92 | 
 93 | For our example, LNK files begin with the length of the header (0x4c) followed by the GUID {00021401-0000-0000-c000-000000000046}. Therefore, the header of the LNK files is `4c00 0000 0114 0200 0000 0000 c000 0000 0000 0046`.
 94 | 
 95 | Example:
 96 | ```
 97 | self.file_header = bytes([0x4c, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])
 98 | ```
 99 | 
100 | ## Plugin configuration
101 | 
102 | DFIR-ORC configuration analysis shows that lnk files are collected:
103 |   - in `General` archive in `Artefacts.7z`;
104 |   - in `Offline` archive in `Artefacts.7z`.
105 | 
106 | All the collected files contain `lnk` is their names.
107 | 
108 | We could add the following snippet to orc2timeline's configuration:
109 | ```
110 |   - LNKToTimeline:
111 |       archives: ["General", "Offline"]
112 |       sub_archives: ["Artefacts.7z"]
113 |       match_pattern: "^.*lnk.*$"
114 |       sourcetype: "LNK"
115 | ```
116 | 
117 | ## Final example
118 | 
119 | Considering all the above, here is the final result of our example plugin.
120 | 
121 | ### LNKToTimeline.py
122 | ```
123 | #######################################
124 | # Following lines are only for typing #
125 | #######################################
126 | """Plugin to parse LNK files."""
127 | from __future__ import annotations
128 | 
129 | from typing import TYPE_CHECKING
130 | 
131 | if TYPE_CHECKING:
132 |     from pathlib import Path
133 |     from threading import Lock
134 | 
135 |     from orc2timeline.config import PluginConfig
136 | #######################################
137 | 
138 | 
139 | import magic_lnk_library
140 | 
141 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
142 | 
143 | 
144 | class LNKToTimeline(GenericToTimeline):
145 |     def __init__(
146 |         self,
147 |         config: PluginConfig,
148 |         orclist: list[str],
149 |         output_file_path: str,
150 |         hostname: str,
151 |         tmp_dir: str,
152 |         lock: Lock,
153 |     ) -> None:
154 |         """Construct."""
155 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
156 | 
157 |     def _parse_artefact(self, artefact: Path) -> None:
158 |         timestamp = magic_lnk_library.get_relevant_timestamp_from_file(artefact)
159 |         source = self._get_original_path(artefact)
160 |         description = magic_lnk_library.get_relevant_description_from_file(artefact)
161 | 
162 |         event = Event(
163 |             timestamp=timestamp,
164 |             source=source,
165 |             description=description,
166 |         )
167 | 
168 |         self._add_event(event)
169 | 
170 | ```
171 | 
172 | 


--------------------------------------------------------------------------------
/docs/7_Licenses.md:
--------------------------------------------------------------------------------
  1 | 		   GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 
167 | 


--------------------------------------------------------------------------------
/docs/8_FAQ.md:
--------------------------------------------------------------------------------
 1 | # Frequently asked questions
 2 | 
 3 | ### Processing one ORC takes a long time, is it normal ?
 4 | 
 5 | Yes ! Parsing a large amount of data takes time.
 6 | 
 7 | Processing an Offline ORC of 1,7G on a laptop with i5 CPU (1.60GHz) with a single thread takes 12 minutes. When using 4 threads on the same laptop, it takes less than 8 minutes.
 8 | 
 9 | Processing an ORC of 500M can take 20 minutes when using 4 threads.
10 | 
11 | ### Why does doubling the number of threads not halve the time of processing by two ?
12 | 
13 | The processing can be divided in two parts. The first part is plugin execution, it ends only when the last plugin instance reaches its end. Only after that, orc2timeline begins to merge plugin timelines into final timelines.
14 | 
15 | If a plugin instance takes significantly longer, it will have an impact on orc2timeline execution time.
16 | 
17 | Nevertheless, it is worth mentioning that the more ORC are processed in parallel, the more effective orc2timeline will be.
18 | 
19 | Do not hesitate to run orc2timeline against a directory with a large number of ORC in it.
20 | 
21 | ### My laptop freezes while running orc2timeline ?
22 | 
23 | orc2timeline can slow down your laptop. There can be two reasons that may explain this behavior. First orc2timeline may cause RAM exhaustion, second orc2timeline may use too much CPU.
24 | 
25 | You can choose to use less threads, orc2timeline should then use less CPU and memory.
26 | 
27 | Concerning memory consumption, another adjustment may be useful: you could decrease the size of chunks (line 198 of file GenericToTimeline.py), orc2timeline will use less memory, but will be less effective.
28 | 
29 | ### My disk is out of space ?
30 | 
31 | orc2timeline writes a lot of things on the disk, it may require a large amount of space. `TMP_DIR` global variable can be used to specify a directory to write temporary files to. The option `--tmp-directory` has the same effect.
32 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------
  2 | # Packaging
  3 | # https://packaging.python.org/en/latest/tutorials/packaging-projects
  4 | # ----------------------------------------------------------------------
  5 | 
  6 | [build-system]
  7 | requires = ["setuptools>=67.0.0", "wheel"]
  8 | build-backend = "setuptools.build_meta"
  9 | 
 10 | [project]
 11 | name = "orc2timeline"
 12 | description = "Generate a timeline from list of Orc files"
 13 | authors = [
 14 |   {name = "Berenger Foucher", email = "berenger.foucher@ssi.gouv.fr" }
 15 | ]
 16 | maintainers = [
 17 |   {name = "Berenger Foucher", email = "berenger.foucher@ssi.gouv.fr" }
 18 | ]
 19 | dependencies = [
 20 |   "click>=8.1.0",
 21 |   "dateparser==1.2.1",
 22 |   "py7zr==0.22.0",
 23 |   "libevtx-python==20240504",
 24 |   "libesedb-python==20240420",
 25 |   "dfwinreg==20240229",
 26 |   "six==1.17.0",
 27 |   "pytz==2025.2",
 28 | ]
 29 | 
 30 | readme = "README.rst"
 31 | requires-python = ">=3.8"
 32 | keywords = ["python"]
 33 | license = {file = "LICENSE"}
 34 | 
 35 | # See https://pypi.org/classifiers/
 36 | classifiers = [
 37 | 
 38 |   #"Development Status :: 1 - Planning",
 39 |   #"Development Status :: 2 - Pre-Alpha",
 40 |   "Development Status :: 3 - Alpha",
 41 |   #"Development Status :: 4 - Beta",
 42 |   #"Development Status :: 5 - Production/Stable",
 43 |   #"Development Status :: 6 - Mature",
 44 |   #"Development Status :: 7 - Inactive",
 45 | 
 46 |   # Default usage
 47 |   "Environment :: Console",
 48 | 
 49 |   # Framework used
 50 |   "Framework :: Pytest",
 51 |   "Framework :: Sphinx",
 52 |   "Framework :: tox",
 53 | 
 54 |   # Indicate who your project is intended for
 55 |   "Intended Audience :: Developers",
 56 |   "Intended Audience :: System Administrators",
 57 | 
 58 |   # Target OS
 59 |   "Operating System :: OS Independent",
 60 | 
 61 |   # Version available for this project
 62 |   "Programming Language :: Python :: 3",
 63 |   "Programming Language :: Python :: 3.8",
 64 |   "Programming Language :: Python :: 3.9",
 65 |   "Programming Language :: Python :: 3.10",
 66 |   "Programming Language :: Python :: 3.11",
 67 |   "Programming Language :: Python :: 3.12",
 68 |   "Programming Language :: Python :: 3 :: Only",
 69 | 
 70 |   # What is the language used in the project
 71 |   "Natural Language :: English",
 72 | 
 73 |   "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
 74 | 
 75 |   # The project is typed
 76 |   "Typing :: Typed"
 77 | ]
 78 | dynamic = ["version"]
 79 | 
 80 | [project.urls]
 81 | Homepage = "https://github.com/ANSSI-FR/orc2timeline"
 82 | Issues = "https://github.com/ANSSI-FR/orc2timeline/issues"
 83 | Documentation = "https://github.com/ANSSI-FR/DECODE/tree/main/docs"
 84 | Source = "https://github.com/ANSSI-FR/orc2timeline"
 85 | 
 86 | [project.scripts]
 87 | orc2timeline = "orc2timeline.cli:entrypoint"
 88 | 
 89 | [project.optional-dependencies]
 90 | tests = [
 91 |   "pytest>=7.3.0",
 92 |   "pytest-mock>=3.10.0",
 93 | ]
 94 | cov = [
 95 |   "orc2timeline[tests]",
 96 |   "coverage[toml]>=6.5.0",
 97 |   "pytest-cov>=4.0.0",
 98 | ]
 99 | lint = [
100 |   "orc2timeline[tests]",
101 |   "mypy>=1.2.0",
102 |   "black>=23.0.0",
103 |   "ruff>=v0.0.275",
104 |   "types-setuptools>=57.0",
105 | ]
106 | tox = [
107 |   "tox>=4.0.0",
108 | ]
109 | build = [
110 |   "build>=0.10.0",
111 | ]
112 | deploy = [
113 |   "twine>=4.0.0",
114 | ]
115 | dev = [
116 |   "orc2timeline[tests,cov,lint,tox,build,deploy]",
117 | ]
118 | # For add optional dependencies, uncomment the next section
119 | #[project.optional-dependencies]
120 | 
121 | 
122 | # ----------------------------------------------------------------------
123 | # Setuptools
124 | # https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html
125 | # ----------------------------------------------------------------------
126 | 
127 | [tool.setuptools]
128 | include-package-data = true
129 | 
130 | [tool.setuptools.dynamic]
131 | version = {attr = "orc2timeline.info.__version__"}
132 | 
133 | [tool.setuptools.packages.find]
134 | where = ["src"]
135 | 
136 | [tool.setuptools.package-data]
137 | orc2timeline = ["plugins/**/*.txt", "conf/*.yaml"]
138 | 
139 | # ----------------------------------------------------------------------
140 | # Tox
141 | # https://pypi.org/project/tox
142 | # ----------------------------------------------------------------------
143 | 
144 | [tool.tox]
145 | legacy_tox_ini = """
146 | [tox]
147 | min_version = 4.0
148 | envlist = lint,tests
149 | 
150 | [testenv]
151 | deps = .[tests]
152 | commands =
153 |   pytest
154 | 
155 | [testenv:lint]
156 | deps = .[lint]
157 | commands =
158 |   ruff check .
159 |   black --diff .
160 |   mypy .
161 | 
162 | [testenv:format]
163 | deps = .[lint]
164 | commands =
165 |   black .
166 |   ruff check --fix .
167 | 
168 | [testenv:cov]
169 | deps = .[cov]
170 | commands =
171 | 	pytest -s --cov {envsitepackagesdir}/orc2timeline --cov-report html --cov-report term --cov-append
172 | """
173 | 
174 | 
175 | # ----------------------------------------------------------------------
176 | # Pytest
177 | # https://docs.pytest.org/en/7.3.x/
178 | # ----------------------------------------------------------------------
179 | 
180 | [tool.pytest.ini_options]
181 | log_cli = true
182 | log_cli_level = "DEBUG"
183 | #asyncio_mode = "auto"
184 | 
185 | 
186 | # ----------------------------------------------------------------------
187 | # Black
188 | # https://pypi.org/project/black
189 | # ----------------------------------------------------------------------
190 | 
191 | [tool.black]
192 | line-length = 120
193 | target-version  = ["py38", "py39", "py310", "py311"]
194 | 
195 | # Enable linting on pyi files
196 | include = "\\.pyi?$"
197 | 
198 | 
199 | # ----------------------------------------------------------------------
200 | # Mypy
201 | # https://pypi.org/project/mypy
202 | # ----------------------------------------------------------------------
203 | 
204 | [tool.mypy]
205 | python_version = 3.8
206 | exclude = [
207 |   ".bzr",
208 |   ".direnv",
209 |   ".eggs",
210 |   ".git",
211 |   ".hg",
212 |   ".mypy_cache",
213 |   ".nox",
214 |   ".pants.d",
215 |   ".pytype",
216 |   ".ruff_cache",
217 |   ".svn",
218 |   ".tox",
219 |   ".venv",
220 |   "__pypackages__",
221 |   "_build",
222 |   "buck-out",
223 |   "build",
224 |   "dist",
225 |   "node_modules",
226 |   "venv",
227 | ]
228 | enable_error_code = ["ignore-without-code", "truthy-bool", "redundant-expr"]
229 | 
230 | # Disallow dynamic typing
231 | disallow_any_unimported = false
232 | disallow_any_expr = false  # All attribut of argparse.Namespace are Any
233 | disallow_any_decorated = false  # Too many package doesn't have typed decorator
234 | disallow_any_generics = true
235 | disallow_subclassing_any = true
236 | 
237 | # Disallow untyped definitions and calls
238 | disallow_untyped_calls = true
239 | disallow_untyped_defs = true
240 | disallow_incomplete_defs = true
241 | check_untyped_defs = true
242 | disallow_untyped_decorators = false  # Too many decorator are untyped
243 | 
244 | # None and optional handling
245 | no_implicit_optional = true
246 | 
247 | # Configuring warnings
248 | warn_unused_ignores = true
249 | warn_no_return = true
250 | warn_return_any = true
251 | warn_redundant_casts = true
252 | 
253 | # Misc things
254 | strict_equality = true
255 | 
256 | # Config file
257 | warn_unused_configs = true
258 | 
259 | # Exemple for missing types
260 | [[tool.mypy.overrides]]
261 | module = ["py7zr", "dfwinreg", "pyevtx", "pytz", "pyesedb", "dateparser", "yaml"]
262 | ignore_missing_imports = true
263 | 
264 | # ----------------------------------------------------------------------
265 | # Ruff
266 | # https://pypi.org/project/ruff
267 | # ----------------------------------------------------------------------
268 | 
269 | [tool.ruff]
270 | exclude = [
271 |   ".bzr",
272 |   ".direnv",
273 |   ".eggs",
274 |   ".git",
275 |   ".hg",
276 |   ".mypy_cache",
277 |   ".nox",
278 |   ".pants.d",
279 |   ".pytype",
280 |   ".ruff_cache",
281 |   ".svn",
282 |   ".tox",
283 |   ".venv",
284 |   "__pypackages__",
285 |   "_build",
286 |   "buck-out",
287 |   "build",
288 |   "dist",
289 |   "node_modules",
290 |   "venv",
291 | ]
292 | line-length = 120
293 | target-version = "py38"
294 | 
295 | [tool.ruff.lint]
296 | select = ["ALL"]
297 | # D203 and D211 are incompatible
298 | # D212 and D213 are incompatible
299 | # D400 [*] First line should end with a period
300 | # D101 Missing docstring in public class
301 | ignore = ["D203", "D213", "D400", "D101", "PERF203", "N999"]
302 | fixable = ["ALL"]
303 | unfixable = []
304 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
305 | isort.known-first-party = ["orc2timeline"]
306 | mccabe.max-complexity = 12
307 | 
308 | [tool.ruff.lint.per-file-ignores]
309 | # E402 Module level import not at top of file
310 | # INP001 File `docs\conf.py` is part of an implicit namespace package. Add an `__init__.py`.
311 | # A001 Variable `copyright` is shadowing a python builtin
312 | # PTH100 `os.path.abspath()` should be replaced by `Path.resolve()`
313 | "docs/conf.py" = ["E402", "INP001", "A001", "PTH100"]
314 | # S101 Use of `assert` detected
315 | # S603 `subprocess` call: check for execution of untrusted input
316 | "tests/*.py" = ["S101", "S603"]
317 | # Q003 [*] Change outer quotes to avoid escaping inner quotes
318 | # E501 Line too long
319 | "*/info.py" = ["Q003", "E501"]
320 | # E501 Line too long
321 | "*/__main__.py" = ["E501"]
322 | 
323 | # ----------------------------------------------------------------------
324 | # Pylint
325 | # https://pylint.pycqa.org/en/latest/index.html
326 | # ----------------------------------------------------------------------
327 | 
328 | # We dont use pylint, so we disabled it
329 | [tool.pylint.main]
330 | ignore-patterns = ["*"]
331 | 
332 | [tool.ruff.lint.pylint]
333 | max-args = 7
334 | 


--------------------------------------------------------------------------------
/src/orc2timeline/__init__.py:
--------------------------------------------------------------------------------
 1 | """Main module."""
 2 | 
 3 | from .cli import entrypoint
 4 | from .core import process
 5 | from .info import __author__, __copyright__, __description__, __email__, __version__
 6 | 
 7 | __all__ = [
 8 |     "__author__",
 9 |     "__copyright__",
10 |     "__description__",
11 |     "__email__",
12 |     "__version__",
13 |     "entrypoint",
14 |     "process",
15 | ]
16 | 


--------------------------------------------------------------------------------
/src/orc2timeline/__main__.py:
--------------------------------------------------------------------------------
 1 | """Entrypoint with `python -m cookiepython`."""
 2 | 
 3 | import sys
 4 | 
 5 | from .cli import entrypoint
 6 | 
 7 | if __name__ == "__main__":
 8 |     # Patch pickle for anyio.to_process.run_sync
 9 |     # ImportError: attempted relative import with no known parent package
10 |     sys.modules["__main__"] = entrypoint  # type: ignore[assignment]  # pragma: no cover
11 |     entrypoint()  # pragma: no cover
12 | 


--------------------------------------------------------------------------------
/src/orc2timeline/cli.py:
--------------------------------------------------------------------------------
  1 | """Module for command line interface."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | import os
  7 | import re
  8 | from pathlib import Path
  9 | 
 10 | import click
 11 | 
 12 | from .config import Config
 13 | from .core import OrcArgument, process, process_dir
 14 | from .info import __copyright__, __description__, __version__
 15 | 
 16 | LOG_LEVELS = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
 17 | ORC_REGEX = r"^(?:DFIR\-)?ORC_[^_]*_(.*)_[^_]*\.7z$"
 18 | RESULT_EXTENSION = ".csv.gz"
 19 | 
 20 | LOGGER = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | @click.group(name="orc2timeline", help=__description__, epilog=f"{__version__} - {__copyright__}")
 24 | @click.version_option(__version__)
 25 | @click.option(
 26 |     "tmp_dir",
 27 |     "--tmp-dir",
 28 |     envvar="TMPDIR",
 29 |     type=click.Path(dir_okay=True, file_okay=False, exists=True, writable=True, readable=True),
 30 |     help="Directory where to write temporary files into. TMPDIR global variable can also be used.",
 31 | )
 32 | @click.option(
 33 |     "--log-level",
 34 |     metavar="level",
 35 |     type=click.Choice(LOG_LEVELS),
 36 |     default="INFO",
 37 |     show_default=True,
 38 |     help="Print log messages of this level and higher",
 39 | )
 40 | @click.option("--log-file", help="Log file to store DEBUG level messages", metavar="file")
 41 | def entrypoint(tmp_dir: str, log_level: str, log_file: str | None) -> None:
 42 |     """Cli function."""
 43 |     # Setup logging
 44 |     if log_file:
 45 |         # Send everything (DEBUG included) in the log file and keep only log_level messages on the console
 46 |         logging.basicConfig(
 47 |             level=logging.DEBUG,
 48 |             format="[%(asctime)s] %(levelname)-8s - %(name)s - %(message)s",
 49 |             filename=log_file,
 50 |             filemode="w",
 51 |         )
 52 |         # define a Handler which writes messages of log_level or higher to the sys.stderr
 53 |         console = logging.StreamHandler()
 54 |         console.setLevel(log_level)
 55 |         # set a format which is simpler for console use
 56 |         formatter = logging.Formatter("[%(asctime)s] %(levelname)-8s - %(message)s")
 57 |         # tell the handler to use this format
 58 |         console.setFormatter(formatter)
 59 |         # add the handler to the root logger
 60 |         logging.root.addHandler(console)
 61 |     else:
 62 |         logging.basicConfig(
 63 |             level=log_level,
 64 |             format="[%(asctime)s] %(levelname)-8s - %(message)s",
 65 |         )
 66 | 
 67 |     if tmp_dir is not None:
 68 |         os.environ["TMPDIR"] = tmp_dir
 69 | 
 70 | 
 71 | @entrypoint.command("show_conf_file")
 72 | def cmd_show_conf_file() -> None:
 73 |     """Show path to configuration file."""
 74 |     click.echo("Configuration file is located at the following path:")
 75 |     click.echo(Config().config_file)
 76 | 
 77 | 
 78 | @entrypoint.command("show_conf")
 79 | def cmd_show_conf() -> None:
 80 |     """Show the configuration file content."""
 81 |     conf_path = Config().config_file
 82 |     click.echo("Configuration file content:")
 83 |     click.echo("=======================================================================")
 84 |     with conf_path.open("r") as f:
 85 |         data = f.read()
 86 |         click.echo(data)
 87 |     click.echo("=======================================================================")
 88 | 
 89 | 
 90 | @entrypoint.command("process")
 91 | @click.option("-j", "--jobs", type=int, default=-1, help="Number of threads to use")
 92 | @click.argument("file_list", type=click.Path(dir_okay=False, exists=True), nargs=-1)
 93 | @click.argument("output_path", type=click.Path(dir_okay=False, exists=False), nargs=1)
 94 | @click.option(
 95 |     "--overwrite",
 96 |     is_flag=True,
 97 |     show_default=True,
 98 |     default=False,
 99 |     help="Overwrite destination file if it already exists",
100 | )
101 | def cmd_process(jobs: int, file_list: str, output_path: str, *, overwrite: bool) -> None:
102 |     """Command to process a list of files."""
103 |     if (not Path(output_path).parent.exists()) or (not Path(output_path).parent.is_dir()):
104 |         msg = (
105 |             f"'OUTPUT_PATH': Directory '{click.format_filename(Path(output_path).parent.as_posix())}'"
106 |             " does not exist or is not a directory."
107 |         )
108 |         raise click.BadParameter(msg)
109 |     if not overwrite and Path(output_path).exists():
110 |         msg = (
111 |             f"'OUTPUT_PATH': File '{click.format_filename(output_path)}' already exists,"
112 |             " use '--overwrite' if you know what you are doing."
113 |         )
114 |         raise click.BadParameter(msg)
115 |     if jobs == -1:
116 |         LOGGER.warning(
117 |             "--jobs option was not given, thus only one thread will be used. Therefore processing could take a while.",
118 |         )
119 | 
120 |     hostname_set = set()
121 |     clean_file_list = []
122 |     for file in file_list:
123 |         hostname = ""
124 |         try:
125 |             re_extract = re.search(ORC_REGEX, Path(file).name)
126 |             if re_extract is not None:
127 |                 hostname = re_extract.group(1)
128 |                 clean_file_list.append(Path(file))
129 |             else:
130 |                 msg = (
131 |                     rf"Impossible to extract hostname from filename '{file}', file will be ignored."
132 |                     rf" Tip: filename must match regex '{ORC_REGEX}'"
133 |                 )
134 |                 LOGGER.info(msg)
135 | 
136 |         except AttributeError:
137 |             msg = rf"Impossible to extract hostname from filename '{file}', filename must match regex '{ORC_REGEX}'"
138 |             LOGGER.info(msg)
139 | 
140 |         if hostname != "":
141 |             hostname_set.add(hostname)
142 | 
143 |     if len(hostname_set) != 1:
144 |         msg = f"Bad file list, all files must belong to the same host. Parsed hosts: {hostname_set}"
145 |         raise click.BadParameter(msg)
146 | 
147 |     process(clean_file_list, output_path, hostname_set.pop(), jobs)
148 | 
149 | 
150 | @entrypoint.command("process_dir")
151 | @click.option("-j", "--jobs", type=int, default=-1, help="Number of threads to use")
152 | @click.argument("input_dir", type=click.Path(dir_okay=True, file_okay=False, exists=True), nargs=1)
153 | @click.argument("output_dir", type=click.Path(dir_okay=True, file_okay=False, exists=True), nargs=1)
154 | @click.option(
155 |     "--overwrite",
156 |     is_flag=True,
157 |     show_default=True,
158 |     default=False,
159 |     help="Overwrite destination file if it already exists",
160 | )
161 | def cmd_process_dir(jobs: int, input_dir: str, output_dir: str, *, overwrite: bool) -> None:
162 |     """Process all ORCs in INPUT_DIRECTORY, writes output files in OUTPUT_DIR."""
163 |     if jobs == -1:
164 |         LOGGER.warning(
165 |             "--jobs option was not given, thus only one thread will be used. Therefore processing could take a while.",
166 |         )
167 | 
168 |     orc_arguments = _crawl_input_dir_and_return_megastruct(input_dir, output_dir)
169 | 
170 |     final_orc_arguments = []
171 |     for orc_argument in orc_arguments:
172 |         if orc_argument.output_path.exists() and not overwrite:
173 |             # verify if destination output already exists
174 |             # create output directory if it does not exist
175 |             LOGGER.warning(
176 |                 "Output file '%s' already exists, processing will be ignored for host %s"
177 |                 " use '--overwrite' if you know what you are doing.",
178 |                 orc_argument.output_path.as_posix(),
179 |                 orc_argument.hostname,
180 |             )
181 |             continue
182 |         if not orc_argument.output_path.parent.exists():
183 |             orc_argument.output_path.parent.mkdir(parents=True, exist_ok=True)
184 |         final_orc_arguments.append(orc_argument)
185 | 
186 |     process_dir(final_orc_arguments, jobs)
187 | 
188 | 
189 | def _crawl_input_dir_and_return_megastruct(input_dir: str, output_dir: str) -> list[OrcArgument]:
190 |     orc_arguments: dict[str, OrcArgument] = {}
191 |     for file_in_sub_dir in Path(input_dir).glob("**/*"):
192 |         re_extract = re.search(ORC_REGEX, Path(file_in_sub_dir).name)
193 |         hostname = ""
194 |         if re_extract is not None:
195 |             hostname = re_extract.group(1)
196 |         if hostname != "":
197 |             output_sub_path = Path(file_in_sub_dir.parent).relative_to(input_dir) / (hostname + RESULT_EXTENSION)
198 |             output_total_path = str(Path(output_dir) / output_sub_path)
199 |             if orc_arguments.get(output_total_path) is None:
200 |                 new_orc_argument = OrcArgument(hostname=hostname, output_path=Path(output_total_path))
201 |                 orc_arguments[output_total_path] = new_orc_argument
202 |             orc_arguments[output_total_path].orc_paths.append(Path(file_in_sub_dir))
203 | 
204 |     return list(orc_arguments.values())
205 | 


--------------------------------------------------------------------------------
/src/orc2timeline/conf/Orc2Timeline.yaml:
--------------------------------------------------------------------------------
 1 | Plugins:
 2 |   - RegistryToTimeline:
 3 |       archives: ["SAM", "Little", "Detail", "Offline"]
 4 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
 5 |       match_pattern: ".*data$"
 6 |       sourcetype: "Registry"
 7 | 
 8 |   - EventLogsToTimeline:
 9 |       archives: ["General", "Little", "Offline"]
10 |       sub_archives: ["Event.7z"]
11 |       match_pattern: ".*evtx.*data$"
12 |       sourcetype: "Event"
13 | 
14 |   - NTFSInfoToTimeline:
15 |       archives: ["Detail", "General", "Little", "Offline"]
16 |       sub_archives: ["NTFSInfo_detail.7z", "NTFSInfo_quick.7z"]
17 |       match_pattern: "^.*NTFSInfo[^/]*\\.csv$"
18 |       sourcetype: "MFT"
19 |  
20 |   - USNInfoToTimeline:
21 |       archives: ["Detail", "Little", "Offline"]
22 |       sub_archives: ["USNInfo.7z"]
23 |       match_pattern: "^USNInfo.*\\.csv$"
24 |       sourcetype: "USN journal"
25 |  
26 |   - I30InfoToTimeline:
27 |       archives: ["Detail", "Offline"]
28 |       sub_archives: ["NTFSInfo_i30Info.7z"]
29 |       match_pattern: "^I30Info.*\\.csv$"
30 |       sourcetype: "I30"
31 | 


--------------------------------------------------------------------------------
/src/orc2timeline/config.py:
--------------------------------------------------------------------------------
  1 | """Module for configuration."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | import sys
  7 | from pathlib import Path
  8 | 
  9 | import yaml
 10 | 
 11 | DEFAULT_CONFIG_FILE = "Orc2Timeline.yaml"
 12 | ROOT_DIR = Path(__file__).resolve().parent
 13 | LOGGER = logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class Orc2TimelineConfigError(Exception):
 17 |     pass
 18 | 
 19 | 
 20 | class Config:
 21 |     def __init__(self) -> None:
 22 |         """Create Config object."""
 23 |         self.plugin_conf_list: list[PluginConfig] = []
 24 |         config_file = ROOT_DIR / "conf" / DEFAULT_CONFIG_FILE
 25 | 
 26 |         if not config_file.exists():
 27 |             LOGGER.error('Cannot read configuration file "%s" (file does not exist)', config_file)
 28 |             error_str = f'Cannot read configuration file "{config_file}" (file does not exist)'
 29 |             raise Orc2TimelineConfigError(error_str)
 30 | 
 31 |         if not config_file.is_file():
 32 |             LOGGER.error('Cannot read configuration file "%s" (is not a file)', config_file)
 33 |             error_str = f'Cannot read configuration file "{config_file}" (is not a file)'
 34 |             raise Orc2TimelineConfigError(error_str)
 35 | 
 36 |         try:
 37 |             with config_file.open("r") as conf_file:
 38 |                 self.global_config = yaml.safe_load(conf_file)
 39 |                 self._parse_global_config()
 40 |         except yaml.error.MarkedYAMLError:
 41 |             LOGGER.critical("An error occured while parsing configuration (file: %s)", str(config_file))
 42 |             raise
 43 | 
 44 |         self.config_file = config_file
 45 | 
 46 |     def _parse_global_config(self) -> None:
 47 |         for plugin_conf_text in self.global_config["Plugins"]:
 48 |             for plug in plugin_conf_text:
 49 |                 if plugin_conf_text[plug]["archives"] is None or len(plugin_conf_text[plug]["archives"]) == 0:
 50 |                     msg = f"Plugin {plug}: configuration describes plugin without any archive."
 51 |                     raise Orc2TimelineConfigError(msg)
 52 |                 for archive in plugin_conf_text[plug]["archives"]:
 53 |                     if (
 54 |                         plugin_conf_text[plug].get("sub_archives") is None
 55 |                         or len(plugin_conf_text[plug].get("sub_archives")) == 0
 56 |                     ):
 57 |                         plugin_conf = PluginConfig(
 58 |                             plug,
 59 |                             [archive],
 60 |                             plugin_conf_text[plug]["match_pattern"],
 61 |                             plugin_conf_text[plug]["sourcetype"],
 62 |                             [],
 63 |                         )
 64 |                         self.plugin_conf_list.append(plugin_conf)
 65 |                     else:
 66 |                         if not isinstance(plugin_conf_text[plug].get("sub_archives", []), list):
 67 |                             msg = f"Plugin {plug}: sub_archives is not a list."
 68 |                             raise Orc2TimelineConfigError(msg)
 69 | 
 70 |                         for sub_archive in plugin_conf_text[plug].get("sub_archives", []):
 71 |                             plugin_conf = PluginConfig(
 72 |                                 plug,
 73 |                                 [archive],
 74 |                                 plugin_conf_text[plug]["match_pattern"],
 75 |                                 plugin_conf_text[plug]["sourcetype"],
 76 |                                 [sub_archive],
 77 |                             )
 78 |                             self.plugin_conf_list.append(plugin_conf)
 79 |         if len(self.plugin_conf_list) == 0:
 80 |             LOGGER.critical("Plugin list seems empty, exiting.")
 81 |             sys.exit(1)
 82 | 
 83 | 
 84 | class PluginConfig:
 85 |     def __init__(
 86 |         self,
 87 |         plugin_name: str,
 88 |         archives: list[str],
 89 |         match_pattern: str,
 90 |         sourcetype: str,
 91 |         sub_archives: list[str],
 92 |     ) -> None:
 93 |         """Create PluginConfig object."""
 94 |         self.plugin_name = plugin_name
 95 |         self.archives = archives
 96 |         self.sub_archives = sub_archives
 97 |         self.match_pattern = match_pattern
 98 |         self.sourcetype = sourcetype
 99 | 
100 |         if self.sub_archives is None:
101 |             self.sub_archives = []
102 | 
103 |         if self.plugin_name == "":
104 |             msg = "Empty plugin name in configuration is not allowed."
105 |             raise Orc2TimelineConfigError(msg)
106 |         if not Path(ROOT_DIR, "plugins", self.plugin_name + ".py").is_file():
107 |             msg = (
108 |                 f"Plugin {self.plugin_name}: {Path(ROOT_DIR, 'plugins', self.plugin_name + '.py').as_posix()}"
109 |                 f" does not exist."
110 |             )
111 |             raise Orc2TimelineConfigError(msg)
112 |         if len(self.archives) == 0:
113 |             msg = f"Plugin {self.plugin_name}: archives should not be empty."
114 |             raise Orc2TimelineConfigError(msg)
115 |         if self.sourcetype == "":
116 |             msg = f"Plugin {self.plugin_name}: empty sourcetype is not allowed."
117 |             raise Orc2TimelineConfigError(msg)
118 |         if self.match_pattern == "":
119 |             msg = (
120 |                 f"Plugin {self.plugin_name}: empty match_pattern is not allowed. "
121 |                 'Hint: ".*" can be used to match all the files.'
122 |             )
123 |             raise Orc2TimelineConfigError(msg)
124 | 


--------------------------------------------------------------------------------
/src/orc2timeline/core.py:
--------------------------------------------------------------------------------
  1 | """Core module."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import concurrent.futures
  6 | import csv
  7 | import gzip
  8 | import heapq
  9 | import logging
 10 | import multiprocessing
 11 | import os
 12 | import shutil
 13 | import sys
 14 | import tempfile
 15 | from importlib import import_module
 16 | from pathlib import Path
 17 | from tempfile import TemporaryDirectory
 18 | from typing import TYPE_CHECKING, Any, TextIO
 19 | 
 20 | if TYPE_CHECKING:
 21 |     from threading import Lock as LockType
 22 | 
 23 | from .config import Config
 24 | 
 25 | ROOT_DIR = Path(__file__).resolve().parent
 26 | TEMP_DIRECTORY: Any = None
 27 | LOGGER = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | def _add_header_to_csv_file(output_path: str) -> None:
 31 |     """Add header at the beginning of csv file."""
 32 |     header = ["Timestamp", "Hostname", "SourceType", "Description", "SourceFile"]
 33 |     with gzip.open(output_path, "wt", newline="") as f:
 34 |         csv_dict_writer = csv.DictWriter(f, delimiter=",", quotechar='"', fieldnames=header)
 35 |         csv_dict_writer.writeheader()
 36 | 
 37 | 
 38 | def _map_open(input_file: Path) -> TextIO:
 39 |     return input_file.open(encoding="utf-8")
 40 | 
 41 | 
 42 | def _merge_sorted_files(paths: list[Path], output_path: str, temp_dir: str) -> int:
 43 |     """Merge sorted files contained in paths list to output_path file and return number of unique lines."""
 44 |     events_count = 0
 45 |     intermediate_file = tempfile.NamedTemporaryFile(  # noqa: SIM115
 46 |         dir=temp_dir,
 47 |         encoding="utf-8",
 48 |         mode="w+",
 49 |         delete=False,
 50 |     )
 51 |     old_intermediate_file_name = ""
 52 |     while len(paths) != 0:
 53 |         sub_paths = []
 54 |         # We merge files by batch so that we do not reach the limitation of files opened at the same time
 55 |         # arbitrary value is 300 because 512 is the maximum value on Windows
 56 |         sub_paths = [paths.pop() for _ in range(min(300, len(paths)))]
 57 |         if old_intermediate_file_name != "":
 58 |             sub_paths.append(Path(old_intermediate_file_name))
 59 | 
 60 |         files = map(_map_open, sub_paths)
 61 |         previous_comparable = ""
 62 |         for line in heapq.merge(*files):
 63 |             comparable = line
 64 |             if previous_comparable != comparable:
 65 |                 intermediate_file.write(line)
 66 |                 previous_comparable = comparable
 67 |                 if len(paths) == 0:
 68 |                     events_count += 1
 69 |         old_intermediate_file_name = intermediate_file.name
 70 |         intermediate_file.close()
 71 |         for f in files:
 72 |             f.close()
 73 |         intermediate_file = tempfile.NamedTemporaryFile(  # noqa: SIM115
 74 |             dir=temp_dir,
 75 |             encoding="utf-8",
 76 |             mode="w+",
 77 |             delete=False,
 78 |         )
 79 | 
 80 |     _add_header_to_csv_file(output_path)
 81 |     with Path(old_intermediate_file_name).open(encoding="utf-8") as infile, gzip.open(
 82 |         output_path,
 83 |         "at",
 84 |         encoding="utf-8",
 85 |         newline="",
 86 |     ) as outfile:
 87 |         shutil.copyfileobj(infile, outfile)
 88 | 
 89 |     return events_count
 90 | 
 91 | 
 92 | def _merge_timelines_for_host(hostname: str, output_path: str, tmp_dir: tempfile.TemporaryDirectory[str]) -> int:
 93 |     """Merge subtimelines for a given host.
 94 | 
 95 |     Merge all files that match 'timeline_{hostname}_*' regex
 96 |     for hostname in temporary directory to output_path file.
 97 |     """
 98 |     files_to_merge = list(Path(tmp_dir.name).glob(f"**/timeline_{hostname}_*"))
 99 |     LOGGER.info("Merging all timelines generated per artefact for host %s", hostname)
100 | 
101 |     result = _merge_sorted_files(
102 |         files_to_merge,
103 |         output_path,
104 |         tmp_dir.name,
105 |     )
106 | 
107 |     for file in files_to_merge:
108 |         file.unlink()
109 | 
110 |     return result
111 | 
112 | 
113 | def _is_list_uniq(host_list: list[str]) -> bool:
114 |     """Return True if all elements are different in host_lists."""
115 |     return len(host_list) == len(set(host_list))
116 | 
117 | 
118 | def _get_duplicate_values_from_list(input_list: list[str]) -> set[str]:
119 |     """Return a sublist of input_list containing duplicate values of this list."""
120 |     seen = set()
121 |     dupes = set()
122 |     for x in input_list:
123 |         if x in seen:
124 |             dupes.add(x)
125 |         else:
126 |             seen.add(x)
127 |     return dupes
128 | 
129 | 
130 | def _load_plugins(
131 |     config: Config,
132 |     orc_arguments: list[OrcArgument],
133 |     tmp_dir: TemporaryDirectory[str],
134 |     lock: LockType | None,
135 | ) -> list[Any]:
136 |     plugin_classes_list = []
137 |     for orc_argument in orc_arguments:
138 |         hostname = orc_argument.hostname
139 |         for plugin_config in config.plugin_conf_list:
140 |             mod = import_module(f"orc2timeline.plugins.{plugin_config.plugin_name}")
141 |             plugin_class = getattr(mod, plugin_config.plugin_name, None)
142 |             if plugin_class is not None:
143 |                 plugin_timeline_path = Path(tmp_dir.name) / f"timeline_{hostname}_{plugin_class.__name__}"
144 |                 plugin_classes_list.append(
145 |                     plugin_class(
146 |                         plugin_config,
147 |                         orc_argument.orc_paths,
148 |                         plugin_timeline_path,
149 |                         hostname,
150 |                         tmp_dir.name,
151 |                         lock,
152 |                     ),
153 |                 )
154 | 
155 |     return plugin_classes_list
156 | 
157 | 
158 | def _run_plugin(
159 |     plugin: Any,  # noqa: ANN401
160 | ) -> Any:  # noqa: ANN401
161 |     return plugin.add_to_timeline()
162 | 
163 | 
164 | class OrcArgument:
165 |     """Define all the needed parameters to process ORC and create timeline."""
166 | 
167 |     def __init__(self, hostname: str = "", output_path: Path = Path(), orc_paths: list[Path] | None = None) -> None:
168 |         """Construct."""
169 |         self.hostname = hostname
170 |         self.output_path = output_path
171 |         if orc_paths is None:
172 |             self.orc_paths = []
173 |         else:
174 |             self.orc_paths = orc_paths
175 | 
176 | 
177 | def process(file_list: list[Path], output_path: str, hostname: str, jobs: int) -> int:
178 |     """Create a timeline for one host.
179 | 
180 |     Create timeline in output_path file from Orc given in file_list
181 |     for a specific host (hostname), jobs variable is used to indicate
182 |     how many threads can be used.
183 |     """
184 |     orc_argument = OrcArgument(orc_paths=file_list, hostname=hostname, output_path=Path(output_path))
185 |     return _process_inner(orc_argument, jobs)
186 | 
187 | 
188 | def _process_inner(orc_argument: OrcArgument, jobs: int) -> int:
189 |     """Create timeline from OrcArgument object with "jobs" threads."""
190 |     LOGGER.info("Processing files for host: %s", orc_argument.hostname)
191 |     lock = None
192 |     if jobs > 1:
193 |         lock = multiprocessing.Manager().Lock()
194 |     temp_directory_parent = os.environ.get("TMPDIR")
195 |     tmp_dir = tempfile.TemporaryDirectory(dir=temp_directory_parent, prefix="Orc2TimelineTempDir_")
196 |     orc_arguments = [orc_argument]
197 |     plugin_classes_list = _load_plugins(Config(), orc_arguments, tmp_dir, lock)
198 | 
199 |     # all_results is a list of tuple(host, plugin_name, number_of_events) that is later used to print final summary
200 |     all_results = []
201 |     if jobs <= 1:
202 |         all_results.extend(
203 |             [
204 |                 (
205 |                     orc_argument.hostname,
206 |                     plugin.__class__.__name__,
207 |                     _run_plugin(plugin),
208 |                 )
209 |                 for plugin in plugin_classes_list
210 |             ],
211 |         )
212 |     else:
213 |         with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as pool:
214 |             # store parallel plugin execution results
215 |             futures = [pool.submit(_run_plugin, plugin) for plugin in plugin_classes_list]
216 |             concurrent.futures.wait(futures)
217 |             futures_results = [future.result() for future in futures]
218 |             # loop to match plugin results to initial parameters
219 |             for plugin, res in zip(plugin_classes_list, futures_results):
220 |                 all_results.append((orc_argument.hostname, plugin.__class__.__name__, res))
221 | 
222 |     total_result = _merge_timelines_for_host(orc_argument.hostname, str(orc_argument.output_path), tmp_dir)
223 |     total_results_per_host = {orc_argument.hostname: total_result}
224 | 
225 |     _print_summaries(total_results_per_host, all_results)
226 | 
227 |     return total_result
228 | 
229 | 
230 | def process_dir(orc_arguments: list[OrcArgument], jobs: int) -> int:
231 |     """Process all plugins for all hosts."""
232 |     lock = None
233 |     if jobs > 1:
234 |         lock = multiprocessing.Manager().Lock()
235 | 
236 |     temp_directory_parent = os.environ.get("TMPDIR")
237 |     tmp_dir = tempfile.TemporaryDirectory(dir=temp_directory_parent, prefix="Orc2TimelineTempDir_")
238 |     plugin_classes_list = _load_plugins(Config(), orc_arguments, tmp_dir, lock)
239 | 
240 |     _check_orc_list_and_print_intro(orc_arguments)
241 | 
242 |     # all_results is a list of tuple(host, plugin_name, number_of_events) that is later used to print final summary
243 |     all_results = []
244 |     if jobs <= 1:
245 |         all_results.extend(
246 |             [
247 |                 (
248 |                     plugin.hostname,
249 |                     plugin.__class__.__name__,
250 |                     _run_plugin(plugin),
251 |                 )
252 |                 for plugin in plugin_classes_list
253 |             ],
254 |         )
255 |     else:
256 |         with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as pool:
257 |             # store parallel plugin execution results
258 |             futures = []
259 |             index_list = []
260 |             for plugin in plugin_classes_list:
261 |                 # need to keep trace of argument to make it match with results later
262 |                 index_list.append((plugin.hostname, plugin.__class__.__name__))
263 |                 futures.append(
264 |                     pool.submit(_run_plugin, plugin),
265 |                 )
266 | 
267 |             concurrent.futures.wait(futures)
268 |             future_results = [future.result() for future in futures]
269 |             # building all_results by using simultanueously index_list and plugin results (futures)
270 |             for index_num, index_tup in enumerate(index_list):
271 |                 hostname = index_tup[0]
272 |                 plugin = index_tup[1]
273 |                 all_results.append((hostname, plugin, future_results[index_num]))
274 | 
275 |     total_results = 0
276 |     # dictionnary total_results_per_host[hostname] = total_number_of_events_for_this_host
277 |     total_results_per_host = _merge_timelines_with_jobs(orc_arguments, jobs, tmp_dir)
278 |     total_results = sum(total_results_per_host.values())
279 | 
280 |     _print_summaries(total_results_per_host, all_results)
281 | 
282 |     return total_results
283 | 
284 | 
285 | def _get_all_results_filtered_by_host(all_results: list[tuple[str, str, int]], host: str) -> list[tuple[str, str, int]]:
286 |     """Return sublist of all_results where first element of tuple (hostname) match given host."""
287 |     return [result for result in all_results if host == result[0]]
288 | 
289 | 
290 | def _get_all_results_filtered_by_plugin(
291 |     all_results: list[tuple[str, str, int]],
292 |     plugin: str,
293 | ) -> list[tuple[str, str, int]]:
294 |     """Return sublist of all_results where second element of tuple (hostname) match given host."""
295 |     return [result for result in all_results if plugin == result[1]]
296 | 
297 | 
298 | def _check_orc_list_and_print_intro(orc_arguments: list[OrcArgument]) -> None:
299 |     """Verify that there is no duplicates in given orc_arguments (stops the program if there is) and print intro."""
300 |     host_list = [orc_argument.hostname for orc_argument in orc_arguments]
301 |     if not _is_list_uniq(host_list):
302 |         dupes = _get_duplicate_values_from_list(host_list)
303 |         LOGGER.critical("Unable to process directory if the same host is used many times.")
304 |         LOGGER.critical("Hint, these hosts seem to be the source of the problem : %s", dupes)
305 |         sys.exit(2)
306 | 
307 |     _print_intro(orc_arguments)
308 | 
309 | 
310 | def _print_intro(orc_arguments: list[OrcArgument]) -> None:
311 |     """Print simple intro that sums up the files that will be used to generate timelines."""
312 |     for orc_argument in orc_arguments:
313 |         LOGGER.info("==============================================")
314 |         LOGGER.info("Host: %s", orc_argument.hostname)
315 |         LOGGER.info("Files used: [%s]", ", ".join(str(path) for path in orc_argument.orc_paths))
316 |         LOGGER.info("Result file: %s", orc_argument.output_path)
317 | 
318 | 
319 | def _print_summaries(total_results_per_host: dict[str, int], all_results: list[tuple[str, str, int]]) -> None:
320 |     """Print summaries for every treated Orc at the end of the program execution.
321 | 
322 |     Parameters
323 |     ----------
324 |     total_results_per_host: dict[str, int]
325 |         Dictionary with hostname as key and total_events_for_this_host (after deduplication) as value
326 |     all_results: list[tuple[str, str, int]]
327 |         List of tuple (hostname, plugin_name, events_number)
328 | 
329 |     """
330 |     LOGGER.info("== Printing final summary of generated timelines:")
331 |     host_list = sorted(set(total_results_per_host.keys()))
332 |     for host in host_list:
333 |         LOGGER.info(
334 |             "=======================================================================",
335 |         )
336 |         LOGGER.info("====== Hostname: %s - %s events", host, total_results_per_host[host])
337 |         results_filtered_by_host = _get_all_results_filtered_by_host(all_results, host)
338 |         plugin_list = sorted({plugin[1] for plugin in results_filtered_by_host})
339 |         for plugin in plugin_list:
340 |             results_filtered_by_plugin = _get_all_results_filtered_by_plugin(results_filtered_by_host, plugin)
341 |             sum_for_plugin = sum([int(plugin[2]) for plugin in results_filtered_by_plugin])
342 |             # for plugin in results_filtered_by_host:
343 |             LOGGER.info("========== %s %s %s", host, plugin, sum_for_plugin)
344 |         LOGGER.info("====== Total for %s: %s", host, total_results_per_host[host])
345 | 
346 |     LOGGER.info(
347 |         "=======================================================================",
348 |     )
349 |     LOGGER.info("====== Total: %s events processed", sum(total_results_per_host.values()))
350 |     LOGGER.info(
351 |         "=======================================================================",
352 |     )
353 | 
354 | 
355 | def _merge_timelines_with_jobs(
356 |     orc_arguments: list[OrcArgument],
357 |     jobs: int,
358 |     tmp_dir: tempfile.TemporaryDirectory[str],
359 | ) -> dict[str, int]:
360 |     """Create final timeline for every host by merging subtimelines.
361 | 
362 |     For a list of OrcArgument, for every host, this function will merge all
363 |     the subtimelines that were generated by the execution of the plugins
364 |     and create the final timeline.
365 |     """
366 |     result_list = []
367 | 
368 |     if jobs <= 1:
369 |         result_list = [
370 |             _merge_timelines_for_host(orc_argument.hostname, str(orc_argument.output_path), tmp_dir)
371 |             for orc_argument in orc_arguments
372 |         ]
373 |     else:
374 |         with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as pool:
375 |             futures = [
376 |                 pool.submit(_merge_timelines_for_host, orc_argument.hostname, str(orc_argument.output_path), tmp_dir)
377 |                 for orc_argument in orc_arguments
378 |             ]
379 |             concurrent.futures.wait(futures)
380 |             result_list = [future.result() for future in futures]
381 | 
382 |     # return value is a dictionnary with hostname as key
383 |     # and number of events for this host as value dict_res : dict{str, int}
384 |     return dict(zip([orc_argument.hostname for orc_argument in orc_arguments], result_list))
385 | 


--------------------------------------------------------------------------------
/src/orc2timeline/info.py:
--------------------------------------------------------------------------------
 1 | """Metadata for orc2timeline."""
 2 | 
 3 | # fmt: off
 4 | __project__ = "orc2timeline"
 5 | __author__ = "Berenger Foucher"
 6 | __maintainer__ = "Berenger Foucher"
 7 | __description__ = "Generate a timeline from list of Orc files"
 8 | __email__ = "berenger.foucher@ssi.gouv.fr"
 9 | __version__ = "1.0.0"
10 | __copyright__ = "Copyright 2023, ANSSI"
11 | __license__ = "LGPL-3.0-or-later"
12 | # fmt: on
13 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/EventLogsToTimeline-eventmap.txt:
--------------------------------------------------------------------------------
1 | Microsoft-Windows-Security-Auditing/4624:[Logon]
2 | Microsoft-Windows-Security-Auditing/4625:[Failed Login]
3 | Microsoft-Windows-Security-Auditing/1102:[Log Cleared]
4 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/EventLogsToTimeline.py:
--------------------------------------------------------------------------------
  1 | """Plugin to parse windows event logs."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import logging
  6 | from pathlib import Path
  7 | from typing import TYPE_CHECKING
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from threading import Lock
 11 | 
 12 |     from orc2timeline.config import PluginConfig
 13 | 
 14 | from typing import Any, Iterator
 15 | 
 16 | import pyevtx
 17 | 
 18 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
 19 | 
 20 | LOGGER = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | def _get_event_id(event: Any) -> int | None:  # noqa: ANN401
 24 |     try:
 25 |         raw_event_id = event.get_event_identifier()
 26 |     except OSError:
 27 |         LOGGER.debug("Error while trying to recover event identifier")
 28 |         return None
 29 |     # Mask the facility code, reserved, customer and severity bits. Only keeps the status code.
 30 |     return int(0xFFFF & raw_event_id)
 31 | 
 32 | 
 33 | def _get_args(event: Any) -> list[str]:  # noqa: ANN401
 34 |     args = []
 35 |     args_number = 0
 36 |     try:
 37 |         args_number = event.get_number_of_strings()
 38 |     except OSError as e:
 39 |         if "unable to retrieve number of strings" in str(e):
 40 |             LOGGER.debug(
 41 |                 "Unable to retrieve args_number for event. Error: %s",
 42 |                 e,
 43 |             )
 44 |             return []
 45 |         raise
 46 | 
 47 |     for i in range(args_number):
 48 |         argi = None
 49 |         try:
 50 |             argi = event.get_string(i)
 51 |         except OSError as err:
 52 |             if "pyevtx_record_get_string_by_index: unable to determine size of string:" in str(err):
 53 |                 LOGGER.debug("Unable to get string argument from event. Error: %s", err)
 54 |             else:
 55 |                 raise
 56 | 
 57 |         if argi:
 58 |             argi = argi.replace("\r\n", "\\r\\n")
 59 |             argi = argi.replace("\n", "\\n")
 60 |             argi = argi.replace("\r", "\\r")
 61 |             args.append(argi)
 62 |         else:
 63 |             args.append("")
 64 | 
 65 |     return args
 66 | 
 67 | 
 68 | class EventLogsToTimeline(GenericToTimeline):
 69 |     def __init__(
 70 |         self,
 71 |         config: PluginConfig,
 72 |         orclist: list[str],
 73 |         output_file_path: str,
 74 |         hostname: str,
 75 |         tmp_dir: str,
 76 |         lock: Lock,
 77 |     ) -> None:
 78 |         """Construct."""
 79 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
 80 | 
 81 |         self.event_tags_file = Path(__file__).parent / "EventLogsToTimeline-eventmap.txt"
 82 |         self.event_tags = self._parse_event_tags_file(self.event_tags_file)
 83 | 
 84 |     def _build_description_field(self, event_provider: str, event_id: int, user_id: str, args: list[str]) -> str:
 85 |         description = f"{event_provider}:{event_id}"
 86 | 
 87 |         if (event_provider in self.event_tags) and (event_id in self.event_tags[event_provider]):
 88 |             description += f" {self.event_tags[event_provider][event_id]}"
 89 | 
 90 |         description += f" {user_id}"
 91 | 
 92 |         if len(args) != 0:
 93 |             args_string = "|".join(args)
 94 |             description += f" ({args_string})"
 95 | 
 96 |         return description
 97 | 
 98 |     def _parse_artefact(self, artefact: Path) -> None:
 99 |         for event in self._evtx_events(artefact):
100 |             evt_dict = self._evtx_get_event_object(
101 |                 event,
102 |                 artefact,
103 |                 recovered=False,
104 |             )
105 | 
106 |             if evt_dict and evt_dict.description and evt_dict.description != "":
107 |                 self._add_event(evt_dict)
108 | 
109 |         for event in self._evtx_recovered_events(artefact):
110 |             evt_dict = self._evtx_get_event_object(
111 |                 event,
112 |                 artefact,
113 |                 recovered=True,
114 |             )
115 |             if evt_dict and evt_dict.description and evt_dict.description != "":
116 |                 self._add_event(evt_dict)
117 | 
118 |     def _evtx_recovered_events(self, evtx_file_path: Path) -> Iterator[Any]:
119 |         with Path(evtx_file_path).open("rb") as f:
120 |             evtx_file = pyevtx.file()
121 |             try:
122 |                 evtx_file.open_file_object(f)
123 |             except OSError:
124 |                 self.logger.critical(
125 |                     "Error while opening the event log file %s",
126 |                     evtx_file_path,
127 |                 )
128 |             else:
129 |                 for i in range(evtx_file.number_of_recovered_records):
130 |                     try:
131 |                         evtx = evtx_file.get_recovered_record(i)
132 |                     except OSError as e:
133 |                         self.logger.debug(
134 |                             "Error while parsing a recovered event record in %s. Error: %s",
135 |                             evtx_file_path,
136 |                             e,
137 |                         )
138 |                         continue
139 |                     yield evtx
140 |                 evtx_file.close()
141 | 
142 |     def _evtx_events(self, evtx_file_path: Path) -> Iterator[Any]:
143 |         with Path(evtx_file_path).open("rb") as f:
144 |             evtx_file = pyevtx.file()
145 |             try:
146 |                 evtx_file.open_file_object(f)
147 |             except OSError:
148 |                 self.logger.critical(
149 |                     "Error while opening the event log file %s",
150 |                     evtx_file_path,
151 |                 )
152 |             else:
153 |                 for i in range(evtx_file.number_of_records):
154 |                     try:
155 |                         evtx = evtx_file.get_record(i)
156 |                     except OSError as e:
157 |                         self.logger.debug(
158 |                             "Error while parsing an event record in %s. Error: %s",
159 |                             evtx_file_path,
160 |                             e,
161 |                         )
162 |                         continue
163 |                     yield evtx
164 |                 evtx_file.close()
165 | 
166 |     def _evtx_get_event_object(
167 |         self,
168 |         event_input: Any,  # noqa: ANN401
169 |         event_file: Path,
170 |         *,
171 |         recovered: bool,
172 |     ) -> Event | None:
173 |         event_result = Event(source=self._get_original_path(event_file))
174 | 
175 |         try:
176 |             event_result.timestamp = event_input.get_written_time()
177 |         except ValueError:
178 |             self.logger.critical("Unable to get written time from event in %s", event_file)
179 |             return None
180 | 
181 |         # Event ID
182 |         event_id = _get_event_id(event_input)
183 |         if event_id is None:
184 |             return None
185 | 
186 |         # Get the non formatted arguments
187 |         args = []
188 |         args = _get_args(event_input)
189 | 
190 |         event_provider = "Unknown"
191 |         try:
192 |             event_provider = event_input.get_source_name()
193 |         except OSError as err:
194 |             if "pyevtx_record_get_source_name: unable to determine size of source name as UTF-8 string." in str(err):
195 |                 self.logger.debug("Unable to get source name from event")
196 |             else:
197 |                 raise
198 |         user_id = event_input.get_user_security_identifier()
199 | 
200 |         event_result.description = self._build_description_field(event_provider, event_id, user_id, args)
201 |         if recovered:
202 |             event_result.description += " (Recovered)"
203 | 
204 |         return event_result
205 | 
206 |     def _parse_event_tags_file(self, event_tags_file: Path) -> dict[str, dict[int, str]]:
207 |         """Parse a file containing information to add tags to some event."""
208 |         event_tags = {}
209 |         if event_tags_file.exists():
210 |             with event_tags_file.open() as f:
211 |                 for line in f.readlines():
212 |                     my_line = line.strip()
213 | 
214 |                     # commented-out line
215 |                     if my_line.startswith("#") or len(my_line) == 0:
216 |                         continue
217 | 
218 |                     splitted_line = my_line.split(":")
219 |                     if len(splitted_line) != 2:  # noqa: PLR2004
220 |                         self.logger.warning(
221 |                             'Wrong format for a line in %s: "%s"',
222 |                             event_tags_file,
223 |                             my_line,
224 |                         )
225 |                         continue
226 | 
227 |                     event, tag = splitted_line
228 | 
229 |                     splitted_event = event.split("/")
230 |                     if len(splitted_event) != 2:  # noqa: PLR2004
231 |                         self.logger.warning(
232 |                             'Wrong format for a line in %s: "%s"',
233 |                             event_tags_file,
234 |                             my_line,
235 |                         )
236 |                         continue
237 | 
238 |                     event_provider, event_id = splitted_event[0], int(splitted_event[1])
239 | 
240 |                     if event_provider not in event_tags:
241 |                         event_tags[event_provider] = {event_id: tag}
242 |                     else:
243 |                         event_tags[event_provider][event_id] = tag
244 | 
245 |         return event_tags
246 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/GenericToTimeline.py:
--------------------------------------------------------------------------------
  1 | """Generic plugin, all real plugin will inherit from this plugin."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import bisect
  6 | import csv
  7 | import logging
  8 | import os
  9 | import random
 10 | import re
 11 | import string
 12 | import tempfile
 13 | import time
 14 | from datetime import datetime
 15 | from pathlib import Path
 16 | from typing import TYPE_CHECKING, Callable, Iterator
 17 | 
 18 | import py7zr
 19 | import pytz
 20 | 
 21 | if TYPE_CHECKING:
 22 |     from threading import Lock
 23 | 
 24 |     from orc2timeline.config import PluginConfig
 25 | 
 26 | MAX_FILE_NAME_LENGTH = 255
 27 | 
 28 | 
 29 | def _delete_everything_in_dir(path: Path) -> None:
 30 |     """Mimic the command rm -r path."""
 31 |     for subpath in path.iterdir():
 32 |         if subpath.is_dir():
 33 |             _delete_everything_in_dir(subpath)
 34 |         else:
 35 |             subpath.unlink()
 36 |     path.rmdir()
 37 | 
 38 | 
 39 | def _get_relevant_archives(orc_list: list[str], archive_list: list[str]) -> Iterator[tuple[str, str]]:
 40 |     """Return Iterator that is a tuple of str.
 41 | 
 42 |     Return:
 43 |     ------
 44 |     Iterator[tuple[str, str]]
 45 |         first element of tuple: path to orc archive
 46 |         second element of tuple: archive type (Details, Memory, Little, General...)
 47 | 
 48 |     """
 49 |     for orc in orc_list:
 50 |         for archive in archive_list:
 51 |             if archive.casefold() in Path(orc).name.casefold():
 52 |                 yield orc, archive
 53 | 
 54 | 
 55 | def _extract_sub_archives_from_archive(archive_path: str, extraction_path: Path, sub_archive: str) -> None:
 56 |     def _sub_archive_filter(f: str) -> bool:
 57 |         return f.casefold() == sub_archive.casefold()
 58 | 
 59 |     _extract_filtered_files_from_archive(archive_path, extraction_path, _sub_archive_filter)
 60 | 
 61 | 
 62 | def _extract_matching_files_from_archive(archive_path: str, extraction_path: Path, match_pattern: str) -> None:
 63 |     filter_pattern = re.compile(match_pattern, re.IGNORECASE)
 64 | 
 65 |     def _re_filter(input_str: str) -> bool:
 66 |         return bool(filter_pattern.match(input_str))
 67 | 
 68 |     _extract_filtered_files_from_archive(archive_path, extraction_path, _re_filter)
 69 | 
 70 | 
 71 | def _extract_getthis_file_from_archive(archive_path: str, extraction_path: Path) -> None:
 72 |     def _get_this_filter(f: str) -> bool:
 73 |         return f.casefold() == "GetThis.csv".casefold()
 74 | 
 75 |     _extract_filtered_files_from_archive(archive_path, extraction_path, _get_this_filter)
 76 | 
 77 | 
 78 | def _extract_filtered_files_from_archive(
 79 |     archive_path: str,
 80 |     extraction_path: Path,
 81 |     filter_function: Callable[[str], bool],
 82 | ) -> None:
 83 |     try:
 84 |         with py7zr.SevenZipFile(archive_path, mode="r") as z:
 85 |             allfiles = z.getnames()
 86 |             targets = [f for f in allfiles if filter_function(f)]
 87 |             z.extract(
 88 |                 targets=targets,
 89 |                 path=extraction_path,
 90 |             )
 91 |     except OSError as e:
 92 |         if "File name too long:" in str(e) or (os.name == "nt" and "Invalid argument" in str(e)):
 93 |             _extract_safe(archive_path, extraction_path, filter_function)
 94 |         else:
 95 |             raise
 96 | 
 97 | 
 98 | def _extract_safe(archive_name: str, output_dir: Path, filter_function: Callable[[str], bool]) -> None:
 99 |     """Extract files from archive in a safe way.
100 | 
101 |     This function extracts files from the archive that is located in archive_name.
102 |     All files that match filter_function (this function should return True) are extracted in
103 |     a safe way. output_dir is the directory that will be used to write uncompressed files.
104 | 
105 |     To extract files in a safer way, the files that name does not exceed MAX_FILE_NAME_LENGTH
106 |     are extracted in the simplest way.
107 |     Matching files that name is too long are extracted using read function, and name will be truncated
108 |     from the beginning until length is less than MAX_FILE_NAME_LENGTH.
109 |     """
110 |     with py7zr.SevenZipFile(archive_name, "r") as z:
111 |         allfiles = z.getnames()
112 |     files_to_extract = []
113 |     exception_file = []
114 |     targets = [f for f in allfiles if filter_function(f)]
115 |     for i in targets:
116 |         if len(Path(i).name) < MAX_FILE_NAME_LENGTH:
117 |             files_to_extract.append(i)
118 |         else:
119 |             exception_file.append(i)
120 | 
121 |     with py7zr.SevenZipFile(archive_name, "r") as z:
122 |         z.extract(targets=files_to_extract, path=output_dir)
123 |         z.reset()
124 |         res = z.read(targets=exception_file)
125 |         for data in res:
126 |             new_path = output_dir / Path(data).parent
127 |             new_path.mkdir(parents=True, exist_ok=True)
128 |             new_filename = Path(data).name[(len(Path(data).name) - MAX_FILE_NAME_LENGTH) :]
129 |             new_filepath = new_path / new_filename
130 |             with new_filepath.open("wb") as result_file:
131 |                 result_file.write(res[data].read())
132 | 
133 | 
134 | class Event:
135 |     def __init__(
136 |         self,
137 |         timestamp: datetime | None = None,
138 |         timestamp_str: str = "",
139 |         sourcetype: str = "",
140 |         description: str = "",
141 |         source: str = "",
142 |     ) -> None:
143 |         """Construct."""
144 |         self.timestamp = timestamp
145 |         self.timestamp_str = timestamp_str
146 |         self.sourcetype = sourcetype
147 |         self.description = description
148 |         self.source = source
149 | 
150 | 
151 | class SortedChunk:
152 |     """Store events temporary in a sorted way.
153 | 
154 |     This class describes an object that is used to store the events temporary in a sorted way.
155 |     When the number of events reaches the limit (10 000 be default), the content of the chunk
156 |     is written an disk.
157 |     """
158 | 
159 |     def __init__(self, max_size: int) -> None:
160 |         """Construct."""
161 |         self.raw_lines: list[str] = []
162 |         self.max_size: int = max_size
163 | 
164 |     def write(self, s: str) -> None:
165 |         """Write in sorted chink."""
166 |         bisect.insort(self.raw_lines, s)
167 | 
168 |     def new_chunk(self) -> None:
169 |         """Create new chunk."""
170 |         self.raw_lines = []
171 | 
172 |     def is_full(self) -> bool:
173 |         """Check if chunk is full."""
174 |         return len(self.raw_lines) > self.max_size
175 | 
176 | 
177 | class GenericToTimeline:
178 |     def __init__(
179 |         self,
180 |         config: PluginConfig,
181 |         orclist: list[str],
182 |         output_file_path: str,
183 |         hostname: str,
184 |         tmp_dir: str,
185 |         lock: Lock | None,
186 |     ) -> None:
187 |         """Construct."""
188 |         self.orclist = orclist
189 |         self.hostname = hostname
190 |         self.lock = lock
191 |         self.written_rows_count = 0
192 |         self.current_chunk = SortedChunk(10000)  # Default 10,000 lines at once
193 |         self.output_file_nb = 0
194 |         self.output_files_list: list[Path] = []
195 |         self.nonce = "".join(random.choices(string.ascii_uppercase + string.digits, k=5))  # noqa: S311
196 |         self.output_file_path = f"{output_file_path}_{self.nonce}_nb{self.output_file_nb}"
197 |         self.output_file_prefix = self.output_file_path
198 | 
199 |         self.archives: list[str] = []
200 |         self.sub_archives: list[str] = []
201 |         self.match_pattern = ""
202 |         self.file_header = bytes([])
203 | 
204 |         self.sourcetype = ""
205 | 
206 |         self.tmpDirectory = tempfile.TemporaryDirectory(
207 |             dir=tmp_dir,
208 |             prefix=f"orc2timeline_{self.__class__.__name__}_",
209 |         )
210 | 
211 |         self.logger = logging.getLogger()
212 |         self.eventList: set[dict[str, str]] = set()
213 |         self.originalPath: dict[str, str] = {}
214 | 
215 |         self._load_config(config)
216 | 
217 |     def _setup_next_output_file(self) -> None:
218 |         """Switch output file to new one.
219 | 
220 |         When writing lines during the plugin execution, lines are not written straight ahead.
221 |         Instead they are stored in a Chunk object (which hold sorted lines in memory), when this Chunk
222 |         is full (10 000 events by default) all the events are written to disk and a new Chunk
223 |         will be used (with a new output file).
224 | 
225 |         It is compulsary that a new file is used at every new chunk because the functions written
226 |         in core.py consider that every subtimeline is already sorted when create the final timeline.
227 | 
228 |         File names follow this rule: timeline_{hostname}_{plugin_name}_nb{file_number}
229 |         """
230 |         self.output_file_nb += 1
231 |         self.output_file_path = f"{self.output_file_prefix}_{self.nonce}_nb{self.output_file_nb}"
232 |         self.fd_plugin_file = Path(self.output_file_path).open("w", encoding="utf-8", newline="")  # noqa: SIM115
233 |         self.output_files_list.append(Path(self.output_file_path))
234 | 
235 |     def _delete_all_result_files(self) -> None:
236 |         """Flush current chunk and delete all result files.
237 | 
238 |         This is can be necessary when an unpredictable error occurs during plugin execution.
239 |         After calling this function, processing can be re-run from the beginning without worrying
240 |         of previous execution.
241 |         """
242 |         self._flush_chunk()
243 |         for output_file in self.output_files_list:
244 |             self.logger.critical("Delete %s", self.output_files_list)
245 |             output_file.unlink()
246 |         self.logger.critical("Reinitialization of chunks")
247 | 
248 |         self.current_chunk = SortedChunk(10000)
249 |         self.csvWriter = csv.writer(self.current_chunk, delimiter=",", quotechar='"')
250 |         self.output_files_list = []
251 |         self._setup_next_output_file()
252 | 
253 |     def _deflate_archives(self) -> None:
254 |         """Deflate files from Orc.
255 | 
256 |         For all Orcs contained in self.orclist:
257 |             Select archive that match self.archives.
258 |                 Deflate sub_archive from archive
259 |                     Deflate files that match self.match_pattern from sub_archive in extraction_path
260 | 
261 |         extraction_path is built ad it follows:
262 |             {tmp_dir}/{orc2timeline_tmp_dir}/{plugin_tmp_dir}/all_extraction
263 |         """
264 |         for orc, archive in _get_relevant_archives(self.orclist, self.archives):
265 |             path_to_create = Path(self.tmpDirectory.name) / archive
266 |             if not path_to_create.exists():
267 |                 path_to_create.mkdir(parents=True)
268 |             extraction_path = path_to_create / "all_extraction"
269 |             if len(self.sub_archives) == 0:
270 |                 # we look for matching files without subarchive
271 |                 try:
272 |                     _extract_matching_files_from_archive(orc, extraction_path, self.match_pattern)
273 |                 except Exception as e:  # noqa: BLE001
274 |                     self.logger.critical(
275 |                         "Unable to open %s archive. Error: %s",
276 |                         orc,
277 |                         e,
278 |                     )
279 |             else:
280 |                 for sub_archive in self.sub_archives:
281 |                     try:
282 |                         sub_extraction_path = (
283 |                             Path(self.tmpDirectory.name) / archive / (sub_archive + "_" + str(time.time()))
284 |                         )
285 | 
286 |                         _extract_sub_archives_from_archive(orc, sub_extraction_path, sub_archive)
287 |                         for f2 in Path(sub_extraction_path).glob("*"):
288 |                             if f2.name.casefold() == sub_archive.casefold():
289 |                                 _extract_matching_files_from_archive(str(f2), extraction_path, self.match_pattern)
290 |                                 _extract_getthis_file_from_archive(str(f2), extraction_path)
291 |                                 self._parse_then_delete_getthis_file(
292 |                                     extraction_path / "GetThis.csv",
293 |                                 )
294 |                         _delete_everything_in_dir(sub_extraction_path)
295 |                     except Exception as e:  # noqa: BLE001
296 |                         err_msg = f"Unable to deflate {sub_archive} from {orc}. Error: {e}"
297 |                         if "Invalid argument" in str(e):
298 |                             err_msg += " (this may happen when compressed file is empty)"
299 |                         self.logger.critical(err_msg)
300 | 
301 |     def _parse_artefact(self, artefact: Path) -> None:
302 |         """Artefact specific function.
303 | 
304 |         The content of this function is specific to every plugin. Events will not be parsed
305 |         the same way LNK files are. Therefore this function should not be implemented in
306 |         the Generic plugin.
307 | 
308 |         When writing a specific plugin, this function is the only one that should be overwritten.
309 |         """
310 | 
311 |     def _get_original_path(self, path: Path) -> str:
312 |         original_formatted_path = str(path.relative_to(Path(self.tmpDirectory.name)).as_posix())
313 |         return str(self.originalPath.get(path.name, original_formatted_path))
314 | 
315 |     def _parse_then_delete_getthis_file(self, path_to_file: Path) -> None:
316 |         try:
317 |             with Path(path_to_file).open(encoding="utf-8") as infile:
318 |                 for line in csv.reader(infile):
319 |                     self.originalPath[Path(line[5].replace("\\", "/")).name] = line[4]
320 |             path_to_file.unlink()
321 |         except Exception as e:  # noqa: BLE001
322 |             self.logger.debug(str(e))
323 | 
324 |     def _parse_all_artefacts(self) -> None:
325 |         for art in Path(self.tmpDirectory.name).glob("**/all_extraction/**/*"):
326 |             if not art.is_file():
327 |                 continue
328 |             file_path_split = Path(art).parts
329 |             try:
330 |                 file_name = file_path_split[-1]
331 |                 archive_name = "unknown"
332 |                 # Get archive name from artefact path (for logging purposes only)
333 |                 for i in range(len(file_path_split)):
334 |                     if file_path_split[i] == "all_extraction":
335 |                         archive_name = file_path_split[i - 1]
336 |             except Exception:  # noqa: BLE001
337 |                 archive_name = "unknown"
338 |             self.logger.debug(
339 |                 "[%s] [%s] parsing : %s",
340 |                 self.hostname,
341 |                 archive_name,
342 |                 file_name,
343 |             )
344 |             self._parse_artefact(art)
345 | 
346 |     def _add_event(self, event: Event) -> None:
347 |         timestamp = ""
348 | 
349 |         if event.timestamp is None and event.timestamp_str == "":
350 |             self.logger.critical("None Timestamp given for event %s", event)
351 |             timestamp = datetime.fromtimestamp(0, tz=pytz.UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
352 | 
353 |         if event.timestamp_str != "":
354 |             timestamp = event.timestamp_str
355 |         elif event.timestamp is not None:
356 |             try:
357 |                 timestamp = event.timestamp.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
358 |             except ValueError as e:
359 |                 self.logger.critical(e)
360 |                 timestamp = datetime.fromtimestamp(0, tz=pytz.UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
361 | 
362 |         self._write_line(
363 |             timestamp,
364 |             self.sourcetype,
365 |             event.description,
366 |             event.source,
367 |         )
368 | 
369 |     def _write_line(self, date: str, sourcetype: str, description: str, sourcefile: str) -> None:
370 |         """Write event to timeline.
371 | 
372 |         The events are not written to disk along the way, instead they are store in a chunk object
373 |         (in memory), when the chunk reaches the event number limit, all the events are written to
374 |         disk in a sorted way. A new chunk will be used and its content will be written in another
375 |         file.
376 |         """
377 |         # sanitize output
378 |         rows_to_write = [row.replace("\n", "\\n") for row in (date, self.hostname, sourcetype, description, sourcefile)]
379 |         self.csvWriter.writerow(rows_to_write)
380 |         if self.current_chunk.is_full():
381 |             self._flush_chunk_and_new_chunk()
382 | 
383 |     def _flush_chunk(self) -> None:
384 |         self.fd_plugin_file.writelines(self.current_chunk.raw_lines)
385 |         self.fd_plugin_file.close()
386 |         self.written_rows_count += len(self.current_chunk.raw_lines)
387 | 
388 |     def _flush_chunk_and_new_chunk(self) -> None:
389 |         self._flush_chunk()
390 |         self.current_chunk.new_chunk()
391 |         self._setup_next_output_file()
392 | 
393 |     def _filter_files_based_on_first_bytes(self) -> None:
394 |         if len(self.file_header) == 0:
395 |             return
396 | 
397 |         for art in Path(self.tmpDirectory.name).glob("**/all_extraction/**/*"):
398 |             if not art.is_file():
399 |                 continue
400 |             must_delete = False
401 |             with Path(art).open("rb") as fd:
402 |                 first_bytes_of_file = fd.read(len(self.file_header))
403 |                 if first_bytes_of_file != self.file_header:
404 |                     must_delete = True
405 |             if must_delete:
406 |                 art.unlink()
407 | 
408 |     def _load_config(self, config: PluginConfig) -> None:
409 |         self.archives = config.archives
410 |         self.sub_archives = config.sub_archives
411 |         self.match_pattern = config.match_pattern
412 |         self.sourcetype = config.sourcetype
413 | 
414 |     def add_to_timeline(self) -> int:
415 |         """Create the result file with the result of argument parsing."""
416 |         self.logger.debug("%s started", self.__class__.__name__)
417 |         self.csvWriter = csv.writer(self.current_chunk, delimiter=",", quotechar='"')
418 |         self._setup_next_output_file()
419 |         self._deflate_archives()
420 |         self._filter_files_based_on_first_bytes()
421 |         self._parse_all_artefacts()
422 |         self._flush_chunk()
423 |         self.logger.debug("%s ended", self.__class__.__name__)
424 |         return self.written_rows_count
425 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/I30InfoToTimeline.py:
--------------------------------------------------------------------------------
  1 | """Plugin to parse I30Info files."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import _csv
  6 | import csv
  7 | import string
  8 | from io import StringIO
  9 | from pathlib import Path
 10 | from typing import TYPE_CHECKING
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from threading import Lock
 14 | 
 15 |     from orc2timeline.config import PluginConfig
 16 | 
 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
 18 | 
 19 | 
 20 | class I30InfoToTimeline(GenericToTimeline):
 21 |     def __init__(
 22 |         self,
 23 |         config: PluginConfig,
 24 |         orclist: list[str],
 25 |         output_file_path: str,
 26 |         hostname: str,
 27 |         tmp_dir: str,
 28 |         lock: Lock,
 29 |     ) -> None:
 30 |         """Construct."""
 31 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
 32 | 
 33 |     def _generate_one_csv_line(
 34 |         self,
 35 |         i30_info_row: dict[str, str],
 36 |         i30_info_path_source: str,
 37 |         same_timestamps_group: list[str],
 38 |         ref_timestamp: str,
 39 |     ) -> None:
 40 |         fn = [
 41 |             ("FileNameLastModificationDate", "M"),
 42 |             ("FileNameLastAccessDate", "A"),
 43 |             ("FileNameLastAttrModificationDate", "C"),
 44 |             ("FileNameCreationDate", "B"),
 45 |         ]
 46 | 
 47 |         event = Event(timestamp_str=ref_timestamp, source=i30_info_path_source)
 48 |         meaning = ""
 49 |         for t in fn:
 50 |             if t[0] in same_timestamps_group:
 51 |                 meaning += t[1]
 52 |             else:
 53 |                 meaning += "."
 54 | 
 55 |         event.description = "Entry in slackspace - $FN: {} - Name: {} - MFT segment num: {} - Parent FRN: {} ".format(
 56 |             meaning,
 57 |             i30_info_row["Name"],
 58 |             str(int(i30_info_row["FRN"], 16) & 0xFFFFFFFFFFFF),
 59 |             i30_info_row["ParentFRN"],
 60 |         )
 61 |         self._add_event(event)
 62 | 
 63 |     def _parse_line(self, i30_info_row: dict[str, str], artefact: Path) -> None:
 64 |         # CarvedEntry
 65 |         if "CarvedEntry" in i30_info_row and i30_info_row["CarvedEntry"] == "Y":
 66 |             timestamp_fields = [
 67 |                 "FileNameCreationDate",
 68 |                 "FileNameLastModificationDate",
 69 |                 "FileNameLastAccessDate",
 70 |                 "FileNameLastAttrModificationDate",
 71 |             ]
 72 |             while len(timestamp_fields) > 0:
 73 |                 ref_field = timestamp_fields.pop()
 74 |                 ref_timestamp = i30_info_row[ref_field]
 75 |                 same_timestamps_group = [ref_field]
 76 |                 same_timestamps_group.extend(
 77 |                     field for field in timestamp_fields if ref_timestamp == i30_info_row[field]
 78 |                 )
 79 | 
 80 |                 # generate an event for a groupe sharing the same timestamp
 81 |                 self._generate_one_csv_line(
 82 |                     i30_info_row,
 83 |                     Path(artefact).name,
 84 |                     same_timestamps_group,
 85 |                     ref_timestamp,
 86 |                 )
 87 | 
 88 |                 for field in same_timestamps_group:
 89 |                     if field != ref_field:
 90 |                         timestamp_fields.remove(field)
 91 | 
 92 |     def _parse_artefact(self, artefact: Path) -> None:
 93 |         # It is compulsary to use new chunk because if an error occurs
 94 |         # all files in self.output_files_list will be deleted an artefact
 95 |         # will be reprocessed.
 96 |         # Processing as it follow ensures that events extracted from previous
 97 |         # artefacts will not be deleted is an error occurs while processing
 98 |         # current artefact.
 99 |         self.output_files_list = []
100 |         self._flush_chunk_and_new_chunk()
101 |         try:
102 |             with Path(artefact).open(encoding="utf-8") as fd:
103 |                 csv_reader = csv.DictReader(fd)
104 |                 for i30_info_row in csv_reader:
105 |                     self._parse_line(i30_info_row, artefact)
106 |         # when file contains NULL character, old versions of csv can crash
107 |         except (_csv.Error, UnicodeDecodeError) as e:
108 |             with Path(artefact).open(encoding="utf-8", errors="ignore") as fd:
109 |                 self.logger.critical("csv error caught alternative way for host %s: %s", self.hostname, e)
110 |                 self._delete_all_result_files()
111 |                 data = fd.read()
112 |                 clean_data = "".join(c for c in data if c in string.printable)
113 |                 data_io = StringIO(clean_data)
114 |                 csv_reader = csv.DictReader(data_io)
115 |                 for i30_info_row in csv_reader:
116 |                     self._parse_line(i30_info_row, artefact)
117 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/NTFSInfoToTimeline.py:
--------------------------------------------------------------------------------
  1 | """Plugin to parse NTFSInfo files."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import _csv
  6 | import csv
  7 | import string
  8 | from io import StringIO
  9 | from pathlib import Path
 10 | from typing import TYPE_CHECKING, Any
 11 | 
 12 | if TYPE_CHECKING:
 13 |     from threading import Lock
 14 | 
 15 |     from orc2timeline.config import PluginConfig
 16 | 
 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
 18 | 
 19 | 
 20 | class NTFSInfoToTimeline(GenericToTimeline):
 21 |     def __init__(
 22 |         self,
 23 |         config: PluginConfig,
 24 |         orclist: list[str],
 25 |         output_file_path: str,
 26 |         hostname: str,
 27 |         tmp_dir: str,
 28 |         lock: Lock,
 29 |     ) -> None:
 30 |         """Construct."""
 31 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
 32 | 
 33 |     def _set_separator(self, parentname: str) -> None:
 34 |         if len(parentname) == 0:
 35 |             self.separator = "\\"
 36 |         elif len(parentname) == 1:
 37 |             if parentname != "\\":
 38 |                 self.separator = "\\"
 39 |             else:
 40 |                 self.separator = ""
 41 |         elif parentname[-1] != "\\":
 42 |             self.separator = "\\"
 43 |         else:
 44 |             self.separator = ""
 45 | 
 46 |     def _generate_one_csv_line(
 47 |         self,
 48 |         ntfs_info_row: dict[str, str],
 49 |         ntfs_info_path_source: str,
 50 |         same_timestamps_group: list[str],
 51 |         ref_timestamp: str,
 52 |     ) -> None:
 53 |         si = [
 54 |             ("LastModificationDate", "M"),
 55 |             ("LastAccessDate", "A"),
 56 |             ("LastAttrChangeDate", "C"),
 57 |             ("CreationDate", "B"),
 58 |         ]
 59 |         fn = [
 60 |             ("FileNameLastModificationDate", "M"),
 61 |             ("FileNameLastAccessDate", "A"),
 62 |             ("FileNameLastAttrModificationDate", "C"),
 63 |             ("FileNameCreationDate", "B"),
 64 |         ]
 65 | 
 66 |         event = Event(timestamp_str=ref_timestamp, source=ntfs_info_path_source)
 67 | 
 68 |         fn_flag = ntfs_info_row.get("FilenameFlags")
 69 |         if fn_flag is not None and fn_flag == "2":
 70 |             return
 71 | 
 72 |         meaning = "$SI: "
 73 |         for t in si:
 74 |             if t[0] in same_timestamps_group:
 75 |                 meaning += t[1]
 76 |             else:
 77 |                 meaning += "."
 78 |         meaning += " - $FN: "
 79 |         for t in fn:
 80 |             if t[0] in same_timestamps_group:
 81 |                 meaning += t[1]
 82 |             else:
 83 |                 meaning += "."
 84 | 
 85 |         if not hasattr(self, "separator"):
 86 |             self._set_separator(ntfs_info_row["ParentName"])
 87 |         name = ntfs_info_row["ParentName"] + self.separator + ntfs_info_row["File"]
 88 | 
 89 |         size_in_bytes: str | None = "unknown"
 90 | 
 91 |         try:
 92 |             size_in_bytes = ntfs_info_row.get("SizeInBytes")
 93 |         except ValueError as e:
 94 |             self.logger.debug("Error while getting FRN or Size. Error: %s", e)
 95 | 
 96 |         event.description = f"{meaning} - Name: {name} - Size in bytes: {size_in_bytes}"
 97 |         self._add_event(event)
 98 | 
 99 |     def __parse_artefact(self, csv_reader: Any, artefact: Path) -> None:  # noqa: ANN401
100 |         for ntfs_info_row in csv_reader:
101 |             timestamp_fields = [
102 |                 "CreationDate",
103 |                 "LastModificationDate",
104 |                 "LastAccessDate",
105 |                 "LastAttrChangeDate",
106 |                 "FileNameCreationDate",
107 |                 "FileNameLastModificationDate",
108 |                 "FileNameLastAccessDate",
109 |                 "FileNameLastAttrModificationDate",
110 |             ]
111 | 
112 |             while len(timestamp_fields) > 0:
113 |                 ref_field = timestamp_fields.pop()
114 |                 ref_timestamp = ntfs_info_row[ref_field]
115 |                 same_timestamps_group = [ref_field]
116 |                 same_timestamps_group.extend(
117 |                     field for field in timestamp_fields if ref_timestamp == ntfs_info_row[field]
118 |                 )
119 | 
120 |                 self._generate_one_csv_line(
121 |                     ntfs_info_row,
122 |                     Path(artefact).name,
123 |                     same_timestamps_group,
124 |                     ref_timestamp,
125 |                 )
126 | 
127 |                 for field in same_timestamps_group:
128 |                     if field != ref_field:
129 |                         timestamp_fields.remove(field)
130 | 
131 |     def _parse_artefact(self, artefact: Path) -> None:
132 |         # It is compulsary to use new chunk because if an error occurs
133 |         # all files in self.output_files_list will be deleted an artefact
134 |         # will be reprocessed.
135 |         # Processing as it follow ensures that events extracted from previous
136 |         # artefacts will not be deleted is an error occurs while processing
137 |         # current artefact.
138 |         self.output_files_list = []
139 |         self._flush_chunk_and_new_chunk()
140 |         try:
141 |             with Path(artefact).open(encoding="utf-8") as fd:
142 |                 csv_reader = csv.DictReader(fd)
143 |                 self.__parse_artefact(csv_reader, artefact)
144 |         # when file contains NULL character, old versions of csv can crash
145 |         except (_csv.Error, UnicodeDecodeError) as e:
146 |             with Path(artefact).open(encoding="utf-8", errors="ignore") as fd:
147 |                 self.logger.critical("csv error caught alternative way for host %s: %s", self.hostname, e)
148 |                 self._delete_all_result_files()
149 |                 data = fd.read()
150 |                 clean_data = "".join(c for c in data if c in string.printable)
151 |                 data_io = StringIO(clean_data)
152 |                 csv_reader = csv.DictReader(data_io)
153 |                 self.__parse_artefact(csv_reader, artefact)
154 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/RegistryToTimeline-important-keys.txt:
--------------------------------------------------------------------------------
 1 | HKEY_CURRENT_USER\Environment
 2 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\Run
 3 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\RunEx
 4 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnce
 5 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnceEx
 6 | HKEY_CURRENT_USER\Software\Microsoft\HtmlHelp Author
 7 | HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Windows
 8 | HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Winlogon
 9 | HKEY_CURRENT_USER\Software\Microsoft\Windows\Windows Error Reporting\Hangs
10 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Run
11 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\RunEx
12 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnce
13 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnceEx
14 | HKEY_LOCAL_MACHINE\SYSTEM\ControlSet001\Control\Lsa
15 | HKEY_LOCAL_MACHINE\SYSTEM\ControlSet002\Control\Lsa
16 | HKEY_LOCAL_MACHINE\SYSTEM\ControlSet003\Control\Lsa
17 | HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\Lsa
18 | HKEY_LOCAL_MACHINE\Software\Classes\CLSID\{52A2AAAE-085D-4187-97EA-8C30DB990436}\InprocServer32
19 | HKEY_LOCAL_MACHINE\Software\Classes\Wow6432Node\CLSID\{52A2AAAE-085D-4187-97EA-8C30DB990436}\InprocServer32
20 | HKEY_LOCAL_MACHINE\Software\Microsoft\Command Processor
21 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\AeDebug
22 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Windows
23 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon
24 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon\Notify
25 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon\Shell
26 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon\Userinit
27 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows\CurrentVersion\Explorer\MyComputer
28 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows\Windows Error Reporting\Hangs
29 | HKEY_LOCAL_MACHINE\Software\WOW6432Node\Microsoft\Windows NT\CurrentVersion\AeDebug
30 | HKEY_LOCAL_MACHINE\Software\WOW6432Node\Microsoft\Windows NT\CurrentVersion\Winlogon
31 | HKEY_LOCAL_MACHINE\Software\Wow6432Node\Microsoft\Command Processor
32 | HKEY_LOCAL_MACHINE\Software\Wow6432Node\Microsoft\Windows NT\CurrentVersion\Windows
33 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\ContentIndex\Language
34 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\LsaExtensionConfig\LsaSrv
35 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\Terminal Server\WinStations\RDP-Tcp
36 | HKEY_LOCAL_MACHINE\System\ControlSet001\Services\DNS\Parameters
37 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\ContentIndex\Language
38 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\LsaExtensionConfig\LsaSrv
39 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\Terminal Server\WinStations\RDP-Tcp
40 | HKEY_LOCAL_MACHINE\System\ControlSet002\Services\DNS\Parameters
41 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\ContentIndex\Language
42 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\LsaExtensionConfig\LsaSrv
43 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\Terminal Server\WinStations\RDP-Tcp
44 | HKEY_LOCAL_MACHINE\System\ControlSet003\Services\DNS\Parameters
45 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\LsaExtensionConfig\LsaSrv
46 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\LsaExtensionConfig\LsaSrv
47 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\LsaExtensionConfig\LsaSrv
48 | HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\LsaExtensionConfig\LsaSrv
49 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\Session Manager\AppCertDlls
50 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\Session Manager\AppCertDlls
51 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\Session Manager\AppCertDlls
52 | HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\Session Manager\AppCertDlls
53 | HKEY_LOCAL_MACHINE\System\ControlSet001\Services\DNS\Parameters
54 | HKEY_LOCAL_MACHINE\System\ControlSet002\Services\DNS\Parameters
55 | HKEY_LOCAL_MACHINE\System\ControlSet003\Services\DNS\Parameters
56 | HKEY_LOCAL_MACHINE\System\CurrentControlSet\Services\DNS\Parameters
57 | \Environment
58 | \Software\Microsoft\Command Processor
59 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/RegistryToTimeline.py:
--------------------------------------------------------------------------------
  1 | """Plugin to parse hives."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | from pathlib import Path
  6 | from typing import TYPE_CHECKING
  7 | 
  8 | if TYPE_CHECKING:
  9 |     from threading import Lock
 10 | 
 11 |     from orc2timeline.config import PluginConfig
 12 | 
 13 | from dfwinreg import definitions as dfwinreg_definition
 14 | from dfwinreg import regf as dfwinreg_regf
 15 | from dfwinreg import registry as dfwinreg_registry
 16 | 
 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
 18 | 
 19 | Type = {}
 20 | Type[0x0001] = "RegSZ"
 21 | Type[0x0002] = "RegExpandSZ"
 22 | Type[0x0003] = "RegBin"
 23 | Type[0x0004] = "RegDWord"
 24 | Type[0x0007] = "RegMultiSZ"
 25 | Type[0x000B] = "RegQWord"
 26 | Type[0x0000] = "RegNone"
 27 | Type[0x0005] = "RegBigEndian"
 28 | Type[0x0006] = "RegLink"
 29 | Type[0x0008] = "RegResourceList"
 30 | Type[0x0009] = "RegFullResourceDescriptor"
 31 | Type[0x000A] = "RegResourceRequirementsList"
 32 | Type[0x0010] = "RegFileTime"
 33 | 
 34 | 
 35 | def _decode_utf16le(s: bytes) -> str:
 36 |     if b"\x00\x00" in s:
 37 |         index = s.index(b"\x00\x00")
 38 |         if index > 2:  # noqa: PLR2004
 39 |             if s[index - 2] != b"\x00"[0]:  # py2+3 # noqa: SIM108
 40 |                 #  61 00 62 00 63 64 00 00
 41 |                 #                    ^  ^-- end of string
 42 |                 #                    +-- index
 43 |                 s = s[: index + 2]
 44 |             else:
 45 |                 #  61 00 62 00 63 00 00 00
 46 |                 #                 ^     ^-- end of string
 47 |                 #                 +-- index
 48 |                 s = s[: index + 3]
 49 |     if (len(s) % 2) != 0:
 50 |         s = s + b"\x00"
 51 |     res = s.decode("utf16", errors="ignore")
 52 |     return res.partition("\x00")[0]
 53 | 
 54 | 
 55 | def _readable_multi_sz(value: bytes) -> str:
 56 |     new_value = value[:-4]
 57 |     res = ""
 58 |     for word in new_value.split(b"\x00\x00\x00"):
 59 |         res += _decode_utf16le(word)
 60 |         res += "|"
 61 | 
 62 |     return res[:-1]
 63 | 
 64 | 
 65 | def _readable_reg_value(value: dfwinreg_regf.REGFWinRegistryValue) -> bytes | str:
 66 |     simple_types = {dfwinreg_definition.REG_EXPAND_SZ, dfwinreg_definition.REG_SZ, dfwinreg_definition.REG_LINK}
 67 |     if value.data_type in simple_types:
 68 |         return _decode_utf16le(value.data)
 69 |     if value.data_type == dfwinreg_definition.REG_MULTI_SZ:
 70 |         return _readable_multi_sz(value.data)
 71 | 
 72 |     return bytes(value.data)
 73 | 
 74 | 
 75 | class RegistryToTimeline(GenericToTimeline):
 76 |     def __init__(
 77 |         self,
 78 |         config: PluginConfig,
 79 |         orclist: list[str],
 80 |         output_file_path: str,
 81 |         hostname: str,
 82 |         tmp_dir: str,
 83 |         lock: Lock,
 84 |     ) -> None:
 85 |         """Construct."""
 86 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
 87 | 
 88 |         self.file_header = bytes([0x72, 0x65, 0x67, 0x66])
 89 | 
 90 |         self.importantKeysFile = Path(Path(__file__).parent) / "RegistryToTimeline-important-keys.txt"
 91 |         self.importantKeys = self._parse_important_keys_file(self.importantKeysFile)
 92 | 
 93 |     def _parse_important_keys_file(self, file_path: Path) -> list[str]:
 94 |         result = []
 95 |         if file_path.exists():
 96 |             with Path(file_path).open() as f:
 97 |                 for line in f:
 98 |                     my_line = line.strip()
 99 |                     if my_line.startswith("#") or len(my_line) == 0:
100 |                         continue
101 | 
102 |                     result.append(my_line)
103 |         return result
104 | 
105 |     def _print_only_key(self, key: dfwinreg_regf.REGFWinRegistryValue, artefact: Path) -> None:
106 |         try:
107 |             event = Event(
108 |                 timestamp_str=key.last_written_time.CopyToDateTimeString()[:-4],
109 |                 source=self._get_original_path(artefact),
110 |                 description=key.path,
111 |             )
112 |             self._add_event(event)
113 |         except Exception as e:  # noqa: BLE001
114 |             key_path = "Unknown"
115 |             if key_path:
116 |                 key_path = key.path
117 |             self.logger.critical("Unable to print key %s from %s. Error: %s", key_path, artefact, e)
118 | 
119 |     def _print_all_keyvalues(self, key: dfwinreg_regf.REGFWinRegistryValue, artefact: Path) -> None:
120 |         for value in key.GetValues():
121 |             readable_type = Type[value.data_type]
122 |             readable_data = _readable_reg_value(value)
123 |             event = Event(
124 |                 timestamp_str=key.last_written_time.CopyToDateTimeString()[:-4],
125 |                 source=self._get_original_path(artefact),
126 |                 description=(
127 |                     f"KeyPath: {key.path} - ValueName: {value.name} - "
128 |                     f"ValueType: {readable_type} - ValueData: {readable_data!s}"
129 |                 ),
130 |             )
131 |             self._add_event(event)
132 | 
133 |     def _parse_key(self, key: dfwinreg_regf.REGFWinRegistryValue, artefact: Path) -> None:
134 |         if key is not None:
135 |             self._print_only_key(key, artefact)
136 |             if key.path in self.importantKeys:
137 |                 self._print_all_keyvalues(key, artefact)
138 | 
139 |             for subkey_index in range(key.number_of_subkeys):
140 |                 try:
141 |                     subkey = key.GetSubkeyByIndex(subkey_index)
142 |                     self._parse_key(subkey, artefact)
143 |                 except OSError as e:
144 |                     self.logger.debug("Error while parsing registry keys: %s", e)
145 | 
146 |     def _parse_artefact(self, artefact: Path) -> None:
147 |         with Path(artefact).open("rb") as f:
148 |             try:
149 |                 reg_file = dfwinreg_regf.REGFWinRegistryFile(emulate_virtual_keys=False)
150 |                 reg_file.Open(f)
151 |                 win_registry = dfwinreg_registry.WinRegistry()
152 |                 key_path_prefix = win_registry.GetRegistryFileMapping(reg_file)
153 |                 reg_file.SetKeyPathPrefix(key_path_prefix)
154 |                 root_key = reg_file.GetRootKey()
155 |                 self._parse_key(root_key, artefact)
156 |             except Exception as e:  # noqa: BLE001
157 |                 self.logger.warning(
158 |                     "Error while parsing %s: %s",
159 |                     Path(artefact).name,
160 |                     e,
161 |                 )
162 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/USNInfoToTimeline.py:
--------------------------------------------------------------------------------
 1 | """Plugin to parse USNInfo files."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import _csv
 6 | import csv
 7 | import string
 8 | from io import StringIO
 9 | from pathlib import Path
10 | from typing import TYPE_CHECKING, Any
11 | 
12 | if TYPE_CHECKING:
13 |     from threading import Lock
14 | 
15 |     from orc2timeline.config import PluginConfig
16 | 
17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline
18 | 
19 | 
20 | class USNInfoToTimeline(GenericToTimeline):
21 |     def __init__(
22 |         self,
23 |         config: PluginConfig,
24 |         orclist: list[str],
25 |         output_file_path: str,
26 |         hostname: str,
27 |         tmp_dir: str,
28 |         lock: Lock,
29 |     ) -> None:
30 |         """Construct."""
31 |         super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock)
32 | 
33 |     def _parse_usn_file(self, csv_reader: Any, artefact: Path) -> None:  # noqa: ANN401
34 |         for row in csv_reader:
35 |             # Not pretty but it's a way to skip header
36 |             if row["USN"] == "USN":
37 |                 continue
38 |             event = Event(
39 |                 timestamp_str=row["TimeStamp"],
40 |                 source=Path(artefact).name,
41 |             )
42 |             mft_segment_number = 0
43 |             try:
44 |                 mft_segment_number = int(row["FRN"], 16) & 0xFFFFFFFF
45 |             except ValueError as e:
46 |                 self.logger.warning("Error while getting FRN. Error: %s", e)
47 |             full_path = row["FullPath"]
48 |             reason = row["Reason"]
49 |             event.description = f"{full_path} - {reason} - MFT segment num : {mft_segment_number}"
50 | 
51 |             self._add_event(event)
52 | 
53 |     def _parse_artefact(self, artefact: Path) -> None:
54 |         # It is compulsary to use new chunk because if an error occurs
55 |         # all files in self.output_files_list will be deleted an artefact
56 |         # will be reprocessed.
57 |         # Processing as it follow ensures that events extracted from previous
58 |         # artefacts will not be deleted is an error occurs while processing
59 |         # current artefact.
60 |         self.output_files_list = []
61 |         self._flush_chunk_and_new_chunk()
62 |         try:
63 |             with Path(artefact).open(encoding="utf-8") as fd:
64 |                 csv_reader = csv.DictReader(fd)
65 |                 self._parse_usn_file(csv_reader, artefact)
66 |         # when file contains NULL character, old versions of csv can crash
67 |         except (_csv.Error, UnicodeDecodeError) as e:
68 |             with Path(artefact).open(encoding="utf-8", errors="ignore") as fd:
69 |                 self.logger.critical("csv error caught alternative way for host %s: %s", self.hostname, e)
70 |                 self._delete_all_result_files()
71 |                 data = fd.read()
72 |                 clean_data = "".join(c for c in data if c in string.printable)
73 |                 data_io = StringIO(clean_data)
74 |                 csv_reader = csv.DictReader(data_io)
75 |                 self._parse_usn_file(csv_reader, artefact)
76 | 


--------------------------------------------------------------------------------
/src/orc2timeline/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | """Directory with plugins code."""
2 | 


--------------------------------------------------------------------------------
/src/orc2timeline/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/src/orc2timeline/py.typed


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Package for test project."""
2 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Configuration for all tests."""
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def resources_path() -> Path:
10 |     """Fixture for create a path to test resources."""
11 |     return Path(__file__).parent / "resources"
12 | 


--------------------------------------------------------------------------------
/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z


--------------------------------------------------------------------------------
/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z


--------------------------------------------------------------------------------
/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z


--------------------------------------------------------------------------------
/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z


--------------------------------------------------------------------------------
/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z


--------------------------------------------------------------------------------
/tests/data/null_csv/ORC_Server_FAKEMACHINE_Detail.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_Detail.7z


--------------------------------------------------------------------------------
/tests/data/null_csv/ORC_Server_FAKEMACHINE_General.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_General.7z


--------------------------------------------------------------------------------
/tests/data/null_csv/ORC_Server_FAKEMACHINE_Little.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_Little.7z


--------------------------------------------------------------------------------
/tests/data/null_csv/ORC_Server_FAKEMACHINE_Memory.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_Memory.7z


--------------------------------------------------------------------------------
/tests/data/null_csv/ORC_Server_FAKEMACHINE_SAM.7z:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_SAM.7z


--------------------------------------------------------------------------------
/tests/output/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/output/.gitignore


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | """Test for command line interface."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import gzip
  6 | import hashlib
  7 | import subprocess
  8 | import sys
  9 | from pathlib import Path
 10 | 
 11 | from click.testing import CliRunner
 12 | 
 13 | from orc2timeline import __version__, entrypoint
 14 | 
 15 | 
 16 | def _zcat_and_sha1(file: str) -> str:
 17 |     buf_size = 65536
 18 |     with gzip.open(file, "rb") as fd:
 19 |         my_sha1 = hashlib.sha1()  # noqa: S324
 20 |         while True:
 21 |             data = fd.read(buf_size)
 22 |             if not data:
 23 |                 break
 24 |             my_sha1.update(data)
 25 | 
 26 |     return str(my_sha1.hexdigest())
 27 | 
 28 | 
 29 | def _run_process(args: list[str]) -> tuple[str, str, int]:
 30 |     process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 31 |     out, err = process.communicate()
 32 |     code = process.returncode
 33 |     return out.decode("utf"), err.decode("utf"), code
 34 | 
 35 | 
 36 | def test_cli_version() -> None:
 37 |     """Test if the command line interface is installed correctly."""
 38 |     ver = f"version {__version__}"
 39 |     out = subprocess.check_output(
 40 |         (
 41 |             "orc2timeline",
 42 |             "--version",
 43 |         ),
 44 |         text=True,
 45 |         shell=False,
 46 |     )
 47 |     assert ver in out
 48 |     out = subprocess.check_output(
 49 |         (
 50 |             sys.executable,
 51 |             "-m",
 52 |             "orc2timeline",
 53 |             "--version",
 54 |         ),
 55 |         text=True,
 56 |         shell=False,
 57 |     )
 58 |     assert ver in out
 59 |     runner = CliRunner()
 60 |     result = runner.invoke(entrypoint, ["--version"])
 61 |     out = result.output
 62 |     assert ver in out
 63 | 
 64 | 
 65 | def test_import() -> None:
 66 |     """Test if module entrypoint has correct imports."""
 67 |     import orc2timeline.__main__  # noqa: F401
 68 | 
 69 | 
 70 | def test_dir_input_dir_is_a_file() -> None:
 71 |     """Test is error a properly triggered when a file is given instead of the input dir."""
 72 |     out, err, code = _run_process(
 73 |         [
 74 |             "orc2timeline",
 75 |             "process_dir",
 76 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
 77 |             "tests/output/",
 78 |         ],
 79 |     )
 80 | 
 81 |     assert (
 82 |         "Invalid value for 'INPUT_DIR': Directory "
 83 |         "'tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z' is a file." in err
 84 |     )
 85 | 
 86 | 
 87 | def test_dir_output_dir_is_a_file() -> None:
 88 |     """Test is error a properly triggered when a file is given instead of the output dir."""
 89 |     Path("tests/output/file_instead_of_dir").touch()
 90 |     out, err, code = _run_process(
 91 |         [
 92 |             "orc2timeline",
 93 |             "process_dir",
 94 |             "tests/data/conf_7_archives/",
 95 |             "tests/output/file_instead_of_dir",
 96 |         ],
 97 |     )
 98 | 
 99 |     Path("tests/output/file_instead_of_dir").unlink()
100 | 
101 |     assert "Invalid value for 'OUTPUT_DIR': Directory 'tests/output/file_instead_of_dir' is a file." in err
102 | 
103 | 
104 | def test_dir_no_job() -> None:
105 |     """Test if processing directory with 1 job works correctly."""
106 |     out, err, code = _run_process(
107 |         [
108 |             "orc2timeline",
109 |             "process_dir",
110 |             "--overwrite",
111 |             "tests/data/conf_7_archives/",
112 |             "tests/output/",
113 |         ],
114 |     )
115 | 
116 |     assert "== Printing final summary of generated timelines:" in err
117 |     assert "====== Hostname: FAKEMACHINE - 1149 events" in err
118 |     assert "========== FAKEMACHINE RegistryToTimeline 683" in err
119 |     assert "========== FAKEMACHINE EventLogsToTimeline 125" in err
120 |     assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err
121 |     assert "========== FAKEMACHINE USNInfoToTimeline 99" in err
122 |     assert "========== FAKEMACHINE I30InfoToTimeline 54" in err
123 |     assert "====== Total for FAKEMACHINE: 1149" in err
124 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
125 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
126 | 
127 |     for f in Path("tests/output").glob("**"):
128 |         if f.is_file():
129 |             f.unlink()
130 | 
131 | 
132 | def test_dir_1_jobs() -> None:
133 |     """Test if processing directory with 1 job works correctly."""
134 |     out, err, code = _run_process(
135 |         [
136 |             "orc2timeline",
137 |             "process_dir",
138 |             "--overwrite",
139 |             "-j 1",
140 |             "tests/data/conf_7_archives/",
141 |             "tests/output/",
142 |         ],
143 |     )
144 | 
145 |     assert "== Printing final summary of generated timelines:" in err
146 |     assert "====== Hostname: FAKEMACHINE - 1149 events" in err
147 |     assert "========== FAKEMACHINE RegistryToTimeline 683" in err
148 |     assert "========== FAKEMACHINE EventLogsToTimeline 125" in err
149 |     assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err
150 |     assert "========== FAKEMACHINE USNInfoToTimeline 99" in err
151 |     assert "========== FAKEMACHINE I30InfoToTimeline 54" in err
152 |     assert "====== Total for FAKEMACHINE: 1149" in err
153 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
154 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
155 | 
156 |     for f in Path("tests/output").glob("**"):
157 |         if f.is_file():
158 |             f.unlink()
159 | 
160 | 
161 | def test_dir_5_jobs() -> None:
162 |     """Test if processing directory with 5 jobs works correctly."""
163 |     out, err, code = _run_process(
164 |         [
165 |             "orc2timeline",
166 |             "process_dir",
167 |             "--overwrite",
168 |             "-j 5",
169 |             "tests/data/conf_7_archives/",
170 |             "tests/output/",
171 |         ],
172 |     )
173 | 
174 |     assert "== Printing final summary of generated timelines:" in err
175 |     assert "====== Hostname: FAKEMACHINE - 1149 events" in err
176 |     assert "========== FAKEMACHINE RegistryToTimeline 683" in err
177 |     assert "========== FAKEMACHINE EventLogsToTimeline 125" in err
178 |     assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err
179 |     assert "========== FAKEMACHINE USNInfoToTimeline 99" in err
180 |     assert "========== FAKEMACHINE I30InfoToTimeline 54" in err
181 |     assert "====== Total for FAKEMACHINE: 1149" in err
182 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
183 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
184 | 
185 |     for f in Path("tests/output").glob("**"):
186 |         if f.is_file():
187 |             f.unlink()
188 | 
189 | 
190 | def test_dir_twice_same_hostname() -> None:
191 |     """Test if error is properly triggered when two Orc with the same hostname are in input directory."""
192 |     out, err, code = _run_process(
193 |         [
194 |             "orc2timeline",
195 |             "process_dir",
196 |             "tests/data/",
197 |             "tests/output/",
198 |         ],
199 |     )
200 | 
201 |     assert "CRITICAL - Unable to process directory if the same host is used many times." in err
202 |     assert "CRITICAL - Hint, these hosts seem to be the source of the problem : {'FAKEMACHINE'}" in err
203 | 
204 | 
205 | def test_dir_output_file_already_exists() -> None:
206 |     """Test if the error meggase is displayed when process_dir is called for result file that already exists."""
207 |     Path("tests/output/FAKEMACHINE.csv.gz").touch()
208 |     out, err, code = _run_process(
209 |         [
210 |             "orc2timeline",
211 |             "process_dir",
212 |             "tests/data/conf_7_archives/",
213 |             "tests/output/",
214 |         ],
215 |     )
216 | 
217 |     assert (
218 |         "Output file 'tests/output/FAKEMACHINE.csv.gz' already exists, processing"
219 |         " will be ignored for host FAKEMACHINE use '--overwrite' if you know what you are doing." in err
220 |     )
221 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
222 | 
223 | 
224 | def test_simple_5_jobs() -> None:
225 |     """Test if processing the test ORCs with 5 jobs works correctly."""
226 |     out, err, code = _run_process(
227 |         [
228 |             "orc2timeline",
229 |             "process",
230 |             "--overwrite",
231 |             "-j 5",
232 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
233 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z",
234 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z",
235 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z",
236 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z",
237 |             "tests/output/FAKEMACHINE.csv.gz",
238 |         ],
239 |     )
240 | 
241 |     assert "== Printing final summary of generated timelines:" in err
242 |     assert "====== Hostname: FAKEMACHINE - 1149 events" in err
243 |     assert "========== FAKEMACHINE RegistryToTimeline 683" in err
244 |     assert "========== FAKEMACHINE EventLogsToTimeline 125" in err
245 |     assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err
246 |     assert "========== FAKEMACHINE USNInfoToTimeline 99" in err
247 |     assert "========== FAKEMACHINE I30InfoToTimeline 54" in err
248 |     assert "====== Total for FAKEMACHINE: 1149" in err
249 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
250 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
251 | 
252 | 
253 | def test_simple_1_job() -> None:
254 |     """Test if processing the test ORCs with 1 job works correctly."""
255 |     out, err, code = _run_process(
256 |         [
257 |             "orc2timeline",
258 |             "process",
259 |             "--overwrite",
260 |             "-j 1",
261 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
262 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z",
263 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z",
264 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z",
265 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z",
266 |             "tests/output/FAKEMACHINE.csv.gz",
267 |         ],
268 |     )
269 | 
270 |     assert "== Printing final summary of generated timelines:" in err
271 |     assert "====== Hostname: FAKEMACHINE - 1149 events" in err
272 |     assert "========== FAKEMACHINE RegistryToTimeline 683" in err
273 |     assert "========== FAKEMACHINE EventLogsToTimeline 125" in err
274 |     assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err
275 |     assert "========== FAKEMACHINE USNInfoToTimeline 99" in err
276 |     assert "========== FAKEMACHINE I30InfoToTimeline 54" in err
277 |     assert "====== Total for FAKEMACHINE: 1149" in err
278 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
279 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
280 | 
281 | 
282 | def test_simple_no_job() -> None:
283 |     """Test if processing the test ORCs with 1 job works correctly."""
284 |     out, err, code = _run_process(
285 |         [
286 |             "orc2timeline",
287 |             "process",
288 |             "--overwrite",
289 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
290 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z",
291 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z",
292 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z",
293 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z",
294 |             "tests/output/FAKEMACHINE.csv.gz",
295 |         ],
296 |     )
297 | 
298 |     assert "== Printing final summary of generated timelines:" in err
299 |     assert "====== Hostname: FAKEMACHINE - 1149 events" in err
300 |     assert "========== FAKEMACHINE RegistryToTimeline 683" in err
301 |     assert "========== FAKEMACHINE EventLogsToTimeline 125" in err
302 |     assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err
303 |     assert "========== FAKEMACHINE USNInfoToTimeline 99" in err
304 |     assert "========== FAKEMACHINE I30InfoToTimeline 54" in err
305 |     assert "====== Total for FAKEMACHINE: 1149" in err
306 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
307 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
308 | 
309 | 
310 | def test_simple_log_file() -> None:
311 |     """Test if processing the test ORCs with 1 job works correctly."""
312 |     out, err, code = _run_process(
313 |         [
314 |             "orc2timeline",
315 |             "--log-file",
316 |             "tests/output/blabla.log",
317 |             "process",
318 |             "--overwrite",
319 |             "-j 1",
320 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
321 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z",
322 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z",
323 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z",
324 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z",
325 |             "tests/output/FAKEMACHINE.csv.gz",
326 |         ],
327 |     )
328 | 
329 |     if Path("tests/output/blabla.log").exists():
330 |         with Path("tests/output/blabla.log").open("r") as f:
331 |             data = f.read()
332 |             assert "== Printing final summary of generated timelines:" in data
333 |             assert "====== Hostname: FAKEMACHINE - 1149 events" in data
334 |             assert "========== FAKEMACHINE RegistryToTimeline 683" in data
335 |             assert "========== FAKEMACHINE EventLogsToTimeline 125" in data
336 |             assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in data
337 |             assert "========== FAKEMACHINE USNInfoToTimeline 99" in data
338 |             assert "========== FAKEMACHINE I30InfoToTimeline 54" in data
339 |             assert "====== Total for FAKEMACHINE: 1149" in data
340 | 
341 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
342 |     Path("tests/output/blabla.log").unlink()
343 | 
344 | 
345 | def test_simple_input_file_doesnt_exist() -> None:
346 |     """Test if the error is triggered when orc2timeline is used with wrong parameters."""
347 |     out, err, code = _run_process(
348 |         ["orc2timeline", "process", "tests/data/DOES_NOT_EXIST", "tests/output/FAKEMACHINE.csv.gz"],
349 |     )
350 | 
351 |     assert "Error: Invalid value for '[FILE_LIST]...': File 'tests/data/DOES_NOT_EXIST' does not exist." in err
352 | 
353 | 
354 | def test_simple_output_file_already_exists() -> None:
355 |     """Test if the error is triggered when orc2timeline is used with wrong parameters."""
356 |     Path("tests/output/FAKEMACHINE.csv.gz").touch()
357 |     out, err, code = _run_process(
358 |         [
359 |             "orc2timeline",
360 |             "process",
361 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
362 |             "tests/output/FAKEMACHINE.csv.gz",
363 |         ],
364 |     )
365 | 
366 |     assert (
367 |         "Error: Invalid value: 'OUTPUT_PATH': File 'tests/output/FAKEMACHINE.csv.gz' already exists, use '--overwrite' if you know what you are doing."  # noqa: E501
368 |         in err
369 |     )
370 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
371 | 
372 | 
373 | def test_simple_output_dir_does_not_exist() -> None:
374 |     """Test if the error is triggered when orc2timeline is used with wrong parameters."""
375 |     out, err, code = _run_process(
376 |         [
377 |             "orc2timeline",
378 |             "process",
379 |             "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z",
380 |             "tests/DOES_NOT_EXIST/FAKEMACHINE.csv.gz",
381 |         ],
382 |     )
383 | 
384 |     assert (
385 |         "Error: Invalid value: 'OUTPUT_PATH': Directory 'tests/DOES_NOT_EXIST' does not exist or is not a directory."
386 |         in err
387 |     )
388 | 
389 | 
390 | def test_show_conf() -> None:
391 |     """Test if show_conf works properly."""
392 |     out, err, code = _run_process(
393 |         [
394 |             "orc2timeline",
395 |             "show_conf",
396 |         ],
397 |     )
398 | 
399 | 
400 | def test_show_conf_file() -> None:
401 |     """Test if show_conf_file works properly."""
402 |     out, err, code = _run_process(
403 |         [
404 |             "orc2timeline",
405 |             "show_conf_file",
406 |         ],
407 |     )
408 | 


--------------------------------------------------------------------------------
/tests/test_config.py:
--------------------------------------------------------------------------------
  1 | """Test config parser."""
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import subprocess
  6 | from pathlib import Path
  7 | 
  8 | 
  9 | def _run_process(args: list[str]) -> tuple[str, str, int]:
 10 |     process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 11 |     out, err = process.communicate()
 12 |     code = process.returncode
 13 |     return out.decode("utf"), err.decode("utf"), code
 14 | 
 15 | 
 16 | def _get_conf_file_path() -> Path:
 17 |     out, err, code = _run_process(
 18 |         [
 19 |             "orc2timeline",
 20 |             "show_conf_file",
 21 |         ],
 22 |     )
 23 | 
 24 |     file_path = out.splitlines()[-1]
 25 |     return Path(file_path)
 26 | 
 27 | 
 28 | def test_conf_file_do_not_exist() -> None:
 29 |     """Test config parsing when file does not exist."""
 30 |     file_path = _get_conf_file_path()
 31 |     conf_file_path = Path(file_path)
 32 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
 33 |     conf_file_path.rename(str(conf_file_path_bak))
 34 | 
 35 |     out, err, code = _run_process(
 36 |         [
 37 |             "orc2timeline",
 38 |             "process_dir",
 39 |             "--overwrite",
 40 |             "tests/data/conf_7_archives/",
 41 |             "tests/output/",
 42 |         ],
 43 |     )
 44 | 
 45 |     conf_file_path_bak.rename(str(conf_file_path))
 46 | 
 47 |     assert "Cannot read configuration file" in err
 48 |     assert "(file does not exist)" in err
 49 | 
 50 | 
 51 | def test_conf_file_is_a_dir() -> None:
 52 |     """Test config parsing when configuration is in fact a directory."""
 53 |     file_path = _get_conf_file_path()
 54 |     conf_file_path = Path(file_path)
 55 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
 56 |     conf_file_path.rename(str(conf_file_path_bak))
 57 |     conf_file_path.mkdir()
 58 | 
 59 |     out, err, code = _run_process(
 60 |         [
 61 |             "orc2timeline",
 62 |             "process_dir",
 63 |             "--overwrite",
 64 |             "tests/data/conf_7_archives/",
 65 |             "tests/output/",
 66 |         ],
 67 |     )
 68 | 
 69 |     conf_file_path.rmdir()
 70 |     conf_file_path_bak.rename(str(conf_file_path))
 71 | 
 72 |     assert "Cannot read configuration file" in err
 73 |     assert "(is not a file)" in err
 74 | 
 75 | 
 76 | def test_conf_file_wrong_yaml() -> None:
 77 |     """Test config when yaml parsing goes wrong."""
 78 |     content = '''Plugins:
 79 |   - RegistryToTimeline:
 80 |   archives: ["SAM", "Little", "Detail", "Offline"]
 81 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
 82 |       match_pattern: ".*data$"
 83 |       sourcetype: "Registry"'''
 84 |     file_path = _get_conf_file_path()
 85 |     conf_file_path = Path(file_path)
 86 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
 87 |     conf_file_path.rename(str(conf_file_path_bak))
 88 |     with conf_file_path.open("w") as conf_file:
 89 |         conf_file.write(content)
 90 | 
 91 |     out, err, code = _run_process(
 92 |         [
 93 |             "orc2timeline",
 94 |             "process_dir",
 95 |             "--overwrite",
 96 |             "tests/data/conf_7_archives/",
 97 |             "tests/output/",
 98 |         ],
 99 |     )
100 | 
101 |     conf_file_path.unlink()
102 |     conf_file_path_bak.rename(str(conf_file_path))
103 | 
104 |     assert "An error occured while parsing configuration" in err
105 | 
106 | 
107 | def test_conf_file_empty_archive() -> None:
108 |     """Test configuration parsing when archive is empty."""
109 |     content = '''Plugins:
110 |   - RegistryToTimeline:
111 |       archives: []
112 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
113 |       match_pattern: ".*data$"
114 |       sourcetype: "Registry"'''
115 |     file_path = _get_conf_file_path()
116 |     conf_file_path = Path(file_path)
117 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
118 |     conf_file_path.rename(str(conf_file_path_bak))
119 |     with conf_file_path.open("w") as conf_file:
120 |         conf_file.write(content)
121 | 
122 |     out, err, code = _run_process(
123 |         [
124 |             "orc2timeline",
125 |             "process_dir",
126 |             "--overwrite",
127 |             "tests/data/conf_7_archives/",
128 |             "tests/output/",
129 |         ],
130 |     )
131 | 
132 |     conf_file_path.unlink()
133 |     conf_file_path_bak.rename(str(conf_file_path))
134 | 
135 |     assert "configuration describes plugin without any archive." in err
136 | 
137 | 
138 | def test_conf_file_sub_archives_empty() -> None:
139 |     """Test configuration parsing when sub_archive is empty."""
140 |     content = '''Plugins:
141 |   - RegistryToTimeline:
142 |       archives: ["SAM", "Little", "Detail", "Offline"]
143 |       sub_archives: []
144 |       match_pattern: ".*data$"
145 |       sourcetype: "Registry"'''
146 |     file_path = _get_conf_file_path()
147 |     conf_file_path = Path(file_path)
148 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
149 |     conf_file_path.rename(str(conf_file_path_bak))
150 |     with conf_file_path.open("w") as conf_file:
151 |         conf_file.write(content)
152 | 
153 |     out, err, code = _run_process(
154 |         [
155 |             "orc2timeline",
156 |             "process_dir",
157 |             "--overwrite",
158 |             "tests/data/conf_7_archives/",
159 |             "tests/output/",
160 |         ],
161 |     )
162 | 
163 |     conf_file_path.unlink()
164 |     conf_file_path_bak.rename(str(conf_file_path))
165 | 
166 |     assert "FAKEMACHINE RegistryToTimeline 0" in err
167 | 
168 | 
169 | def test_conf_file_empty_plugin_name() -> None:
170 |     """Test configuration when plugin name is empty."""
171 |     content = '''Plugins:
172 |   - "":
173 |       archives: ["SAM", "Little", "Detail", "Offline"]
174 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
175 |       match_pattern: ".*data$"
176 |       sourcetype: "Registry"'''
177 |     file_path = _get_conf_file_path()
178 |     conf_file_path = Path(file_path)
179 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
180 |     conf_file_path.rename(str(conf_file_path_bak))
181 |     with conf_file_path.open("w") as conf_file:
182 |         conf_file.write(content)
183 | 
184 |     out, err, code = _run_process(
185 |         [
186 |             "orc2timeline",
187 |             "process_dir",
188 |             "--overwrite",
189 |             "tests/data/conf_7_archives/",
190 |             "tests/output/",
191 |         ],
192 |     )
193 | 
194 |     conf_file_path.unlink()
195 |     conf_file_path_bak.rename(str(conf_file_path))
196 | 
197 |     assert "Empty plugin name in configuration is not allowed." in err
198 | 
199 | 
200 | def test_conf_file_fake_plugin() -> None:
201 |     """Test configuration when plugin file does not exist."""
202 |     content = '''Plugins:
203 |   - "FAKEPLUGIN":
204 |       archives: ["SAM", "Little", "Detail", "Offline"]
205 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
206 |       match_pattern: ".*data$"
207 |       sourcetype: "Registry"'''
208 |     file_path = _get_conf_file_path()
209 |     conf_file_path = Path(file_path)
210 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
211 |     conf_file_path.rename(str(conf_file_path_bak))
212 |     with conf_file_path.open("w") as conf_file:
213 |         conf_file.write(content)
214 | 
215 |     out, err, code = _run_process(
216 |         [
217 |             "orc2timeline",
218 |             "process_dir",
219 |             "--overwrite",
220 |             "tests/data/conf_7_archives/",
221 |             "tests/output/",
222 |         ],
223 |     )
224 | 
225 |     conf_file_path.unlink()
226 |     conf_file_path_bak.rename(str(conf_file_path))
227 | 
228 |     assert "Plugin FAKEPLUGIN:" in err
229 |     assert "orc2timeline/plugins/FAKEPLUGIN.py does not exist." in err
230 | 
231 | 
232 | def test_conf_file_empty_sourcetype() -> None:
233 |     """Test error in."""
234 |     content = '''Plugins:
235 |   - RegistryToTimeline:
236 |       archives: ["SAM", "Little", "Detail", "Offline"]
237 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
238 |       match_pattern: ".*data$"
239 |       sourcetype: ""'''
240 |     file_path = _get_conf_file_path()
241 |     conf_file_path = Path(file_path)
242 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
243 |     conf_file_path.rename(str(conf_file_path_bak))
244 |     with conf_file_path.open("w") as conf_file:
245 |         conf_file.write(content)
246 | 
247 |     out, err, code = _run_process(
248 |         [
249 |             "orc2timeline",
250 |             "process_dir",
251 |             "--overwrite",
252 |             "tests/data/conf_7_archives/",
253 |             "tests/output/",
254 |         ],
255 |     )
256 | 
257 |     conf_file_path.unlink()
258 |     conf_file_path_bak.rename(str(conf_file_path))
259 | 
260 |     assert "empty sourcetype is not allowed." in err
261 | 
262 | 
263 | def test_conf_file_empty_match_pattern() -> None:
264 |     """Test configuration when match_pattern is empty."""
265 |     content = '''Plugins:
266 |   - RegistryToTimeline:
267 |       archives: ["SAM", "Little", "Detail", "Offline"]
268 |       sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"]
269 |       match_pattern: ""
270 |       sourcetype: "Registry"'''
271 |     file_path = _get_conf_file_path()
272 |     conf_file_path = Path(file_path)
273 |     conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak"
274 |     conf_file_path.rename(str(conf_file_path_bak))
275 |     with conf_file_path.open("w") as conf_file:
276 |         conf_file.write(content)
277 | 
278 |     out, err, code = _run_process(
279 |         [
280 |             "orc2timeline",
281 |             "process_dir",
282 |             "--overwrite",
283 |             "tests/data/conf_7_archives/",
284 |             "tests/output/",
285 |         ],
286 |     )
287 | 
288 |     conf_file_path.unlink()
289 |     conf_file_path_bak.rename(str(conf_file_path))
290 | 
291 |     assert "empty match_pattern is not allowed." in err
292 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
 1 | """Test core module."""
 2 | 
 3 | import gzip
 4 | import hashlib
 5 | from pathlib import Path
 6 | 
 7 | from orc2timeline import process
 8 | 
 9 | 
10 | def _zcat_and_sha1(file: str) -> str:
11 |     buf_size = 65536
12 |     with gzip.open(file, "rb") as fd:
13 |         my_sha1 = hashlib.sha1()  # noqa: S324
14 |         while True:
15 |             data = fd.read(buf_size)
16 |             if not data:
17 |                 break
18 |             my_sha1.update(data)
19 | 
20 |     return str(my_sha1.hexdigest())
21 | 
22 | 
23 | def test_process_1_job() -> None:
24 |     """Test import mode with 1 job."""
25 |     file_list = [
26 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z"),
27 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z"),
28 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z"),
29 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z"),
30 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z"),
31 |     ]
32 | 
33 |     if Path("tests/output/FAKEMACHINE.csv.gz").exists():
34 |         Path("tests/output/FAKEMACHINE.csv.gz").unlink()
35 | 
36 |     process(file_list, "tests/output/FAKEMACHINE.csv.gz", "FAKEMACHINE", 1)
37 | 
38 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
39 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
40 | 
41 | 
42 | def test_process_5_jobs() -> None:
43 |     """Test import mode with 5 jobs."""
44 |     file_list = [
45 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z"),
46 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z"),
47 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z"),
48 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z"),
49 |         Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z"),
50 |     ]
51 | 
52 |     if Path("tests/output/FAKEMACHINE.csv.gz").exists():
53 |         Path("tests/output/FAKEMACHINE.csv.gz").unlink()
54 | 
55 |     process(file_list, "tests/output/FAKEMACHINE.csv.gz", "FAKEMACHINE", 5)
56 | 
57 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
58 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
59 | 
60 | 
61 | def test_null_in_csv_files() -> None:
62 |     """Test import mode with 1 job."""
63 |     file_list = [
64 |         Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_General.7z"),
65 |         Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_Detail.7z"),
66 |         Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_Little.7z"),
67 |         Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_Memory.7z"),
68 |         Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_SAM.7z"),
69 |     ]
70 | 
71 |     if Path("tests/output/FAKEMACHINE.csv.gz").exists():
72 |         Path("tests/output/FAKEMACHINE.csv.gz").unlink()
73 | 
74 |     process(file_list, "tests/output/FAKEMACHINE.csv.gz", "FAKEMACHINE", 1)
75 | 
76 |     assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb"
77 |     Path("tests/output/FAKEMACHINE.csv.gz").unlink()
78 | 


--------------------------------------------------------------------------------