├── .editorconfig ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── 0_Intro.md ├── 1_Tutorial.md ├── 2_Installation_and_requirements.md ├── 3_Architecture.md ├── 4_Configuration.md ├── 5_Existing_plugins.md ├── 6_Develop_your_own_plugin.md ├── 7_Licenses.md └── 8_FAQ.md ├── pyproject.toml ├── src └── orc2timeline │ ├── __init__.py │ ├── __main__.py │ ├── cli.py │ ├── conf │ └── Orc2Timeline.yaml │ ├── config.py │ ├── core.py │ ├── info.py │ ├── plugins │ ├── EventLogsToTimeline-eventmap.txt │ ├── EventLogsToTimeline.py │ ├── GenericToTimeline.py │ ├── I30InfoToTimeline.py │ ├── NTFSInfoToTimeline.py │ ├── RegistryToTimeline-important-keys.txt │ ├── RegistryToTimeline.py │ ├── USNInfoToTimeline.py │ └── __init__.py │ └── py.typed └── tests ├── __init__.py ├── conftest.py ├── data ├── conf_7_archives │ ├── ORC_Server_FAKEMACHINE_Detail.7z │ ├── ORC_Server_FAKEMACHINE_General.7z │ ├── ORC_Server_FAKEMACHINE_Little.7z │ ├── ORC_Server_FAKEMACHINE_Memory.7z │ └── ORC_Server_FAKEMACHINE_SAM.7z └── null_csv │ ├── ORC_Server_FAKEMACHINE_Detail.7z │ ├── ORC_Server_FAKEMACHINE_General.7z │ ├── ORC_Server_FAKEMACHINE_Little.7z │ ├── ORC_Server_FAKEMACHINE_Memory.7z │ └── ORC_Server_FAKEMACHINE_SAM.7z ├── output └── .gitignore ├── test_cli.py ├── test_config.py └── test_core.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # https://editorconfig.org/ 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | indent_size = 2 8 | indent_style = space 9 | 10 | [*.py] 11 | indent_size = 4 12 | max_line_length = 120 13 | insert_final_newline = true 14 | trim_trailing_whitespace = true 15 | 16 | [Dockerfile] 17 | indent_size = 4 18 | 19 | [*.{yml,yaml,toml,json,jsonc,jsonl,js,ts}] 20 | indent_size = 2 21 | insert_final_newline = true 22 | trim_trailing_whitespace = true 23 | 24 | [*.{bat,cmd,ps1}] 25 | end_of_line = crlf 26 | insert_final_newline = true 27 | trim_trailing_whitespace = true 28 | 29 | [*.{md,txt,rst}] 30 | insert_final_newline = true 31 | trim_trailing_whitespace = false 32 | 33 | [*.tsv] 34 | indent_style = tab 35 | 36 | [Makefile] 37 | indent_style = tab 38 | insert_final_newline = true 39 | trim_trailing_whitespace = true 40 | 41 | [LICENSE] 42 | insert_final_newline = false 43 | trim_trailing_whitespace = true 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # cached files 2 | __pycache__/ 3 | *.py[cod] 4 | .cache 5 | 6 | # installation package 7 | *.egg-info/ 8 | dist/ 9 | build/ 10 | 11 | # environments 12 | .env 13 | .venv 14 | env/ 15 | venv/ 16 | ENV/ 17 | env.bak/ 18 | venv.bak/ 19 | 20 | # pycharm 21 | .idea/ 22 | 23 | # vscode 24 | .vscode/ 25 | *.code-workspace 26 | 27 | # mypy 28 | .mypy_cache/ 29 | .dmypy.json 30 | dmypy.json 31 | mypy.ini 32 | 33 | # test caches 34 | .tox/ 35 | .pytest_cache/ 36 | .coverage 37 | htmlcov 38 | report.xml 39 | coverage.xml 40 | 41 | # Docs 42 | public/ 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include .editorconfig 2 | include .gitignore 3 | include LICENSE 4 | include Makefile 5 | include MANIFEST.in 6 | include pyproject.toml 7 | include README.rst 8 | include src/orc2timeline/py.typed 9 | recursive-include tests * 10 | recursive-include docs * 11 | recursive-exclude * __pycache__ 12 | recursive-exclude * *.py[co] 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # OS dependent configuration 3 | # ---------------------------------------------------------------------- 4 | 5 | VENV=venv/bin/ 6 | LIB=venv/Lib/site-packages/ 7 | MARKER=venv/marker 8 | EXE= 9 | ifeq ($(OS),Windows_NT) 10 | VENV=venv/Scripts/ 11 | LIB=venv/Lib/site-packages/ 12 | MARKER=venv/marker 13 | EXE=.exe 14 | endif 15 | 16 | 17 | # ---------------------------------------------------------------------- 18 | # Python interpreter detection 19 | # ---------------------------------------------------------------------- 20 | 21 | ARG_COMMAND="import sys;print(sys.version_info[:2]>=(3, 8))" 22 | 23 | ifeq (ok,$(shell test -e /dev/null 2>&1 && echo ok)) 24 | NULL_STDERR=2>/dev/null 25 | else 26 | NULL_STDERR=2>NUL 27 | endif 28 | 29 | ifndef PY 30 | 31 | ifndef _PY 32 | ifeq (True,$(shell py -3 -c $(ARG_COMMAND) $(NULL_STDERR))) 33 | _PY=py -3 34 | endif 35 | endif 36 | 37 | ifndef _PY 38 | ifeq (True,$(shell python3 -c $(ARG_COMMAND) $(NULL_STDERR))) 39 | _PY=python3 40 | endif 41 | endif 42 | 43 | ifndef _PY 44 | ifeq (True,$(shell python -c $(ARG_COMMAND) $(NULL_STDERR))) 45 | PY=python 46 | endif 47 | 48 | endif 49 | 50 | ifndef _PY 51 | $(error Could not detect Python 3.8 or greather interpreter automatically, please use PY environment variable.) 52 | endif 53 | 54 | PY=$(shell $(_PY) -c "import os,sys;print(sys.base_prefix.replace(os.sep,'/') + ('/python.exe' if os.name == 'nt' else '/bin/python3'))") 55 | 56 | endif 57 | 58 | ifneq (True,$(shell $(PY) -c $(ARG_COMMAND) $(NULL_STDERR))) 59 | $(error $(PY) is not a valid Python 3.8 or greather interpreter) 60 | endif 61 | 62 | # ---------------------------------------------------------------------- 63 | # Configuration 64 | # ---------------------------------------------------------------------- 65 | 66 | GIT=git 67 | PIP=$(PY) -m pip 68 | VENV_PY=$(VENV)python$(EXE) 69 | VENV_PIP=$(VENV)pip$(EXE) 70 | 71 | RM_GLOB := $(PY) -c "import shutil,sys,pathlib;[shutil.rmtree(sp, ignore_errors=False) if sp.is_dir() else sp.unlink() for p in sys.argv[1:]for sp in pathlib.Path().resolve().glob(p)]" 72 | BROWSER := $(PY) -c "import os,webbrowser,sys;from urllib.request import pathname2url;webbrowser.open('file:'+pathname2url(os.path.abspath(sys.argv[1])))" 73 | EXTRACT_HELP := $(PY) -c "import re,sys;m=[re.match(r'^([a-zA-Z_-]+):.*?\#\# (.*)$$',line)for line in sys.stdin];print('\n'.join('{:14} {}'.format(*g.groups())for g in m if g))" 74 | LS := $(PY) -c "import sys,os;print('\n'.join(os.listdir(os.path.abspath(sys.argv[1]))))" 75 | TOUCH := $(PY) -c "import sys;open(sys.argv[1],'ab')" 76 | 77 | TOX=$(VENV)tox$(EXE) 78 | SPHINX=$(VENV)sphinx-build$(EXE) 79 | COVERAGE=$(VENV)coverage$(EXE) 80 | TWINE=$(VENV)twine$(EXE) 81 | 82 | 83 | # ---------------------------------------------------------------------- 84 | # Automatic installation 85 | # ---------------------------------------------------------------------- 86 | 87 | .git: 88 | $(GIT) init 89 | $(GIT) add * 90 | $(GIT) commit -m "Initial commit" 91 | $(GIT) branch -M main 92 | 93 | $(MARKER): 94 | $(MAKE) clean 95 | $(MAKE) .git 96 | $(PIP) install virtualenv 97 | $(PY) -m virtualenv venv 98 | $(VENV_PIP) install 'setuptools>=62.0.0' 'pip>=21.3' 99 | $(VENV_PIP) install -e .[lint] 100 | 101 | $(TOUCH) $(MARKER) 102 | 103 | $(VENV): $(MARKER) 104 | 105 | $(VENV_PY): $(MARKER) 106 | 107 | $(VENV_PIP): $(MARKER) 108 | 109 | $(TOX): $(VENV_PIP) 110 | $(VENV_PIP) install -e .[tox] 111 | 112 | $(PRECOMMIT): $(VENV_PIP) 113 | 114 | $(COVERAGE): $(VENV_PIP) 115 | $(VENV_PIP) install -e .[cov] 116 | 117 | $(TWINE): $(VENV_PIP) 118 | $(VENV_PIP) install -e .[deploy] 119 | 120 | $(LIB)build: $(VENV_PIP) 121 | $(VENV_PIP) install -e .[build] 122 | 123 | 124 | # ---------------------------------------------------------------------- 125 | # Commands 126 | # ---------------------------------------------------------------------- 127 | 128 | .DEFAULT_GOAL := help 129 | 130 | .PHONY: clean 131 | clean: ## Remove all build, test, coverage, venv and Python artifacts. 132 | $(RM_GLOB) 'venv/*/python.?e?x?e?' 'venv' 'build/' 'dist/' 'public/' '.eggs/' '.tox/' '.coverage' 'htmlcov/' '.pytest_cache' '.mypy_cache' '.ruff_cache' '**/*.egg-info' '**/*.egg' '**/__pycache__' '**/*~' '**/*.pyc' '**/*.pyo' 133 | 134 | .PHONY: cov 135 | cov: $(TOX) ## Check code coverage. 136 | tox -e cov 137 | 138 | .PHONY: dist 139 | dist: clean $(LIB)build ## Builds source and wheel package. 140 | $(VENV_PY) -m build 141 | $(LS) dist/ 142 | 143 | .PHONY: format 144 | format: $(TOX) ## Format style with tox, ruff, black. 145 | $(TOX) -e format 146 | 147 | .PHONY: help 148 | help: ## Show current message. 149 | @$(EXTRACT_HELP) < $(MAKEFILE_LIST) 150 | 151 | .PHONY: install 152 | install: ## Install the package to the active Python's site-packages. 153 | $(PIP) install . 154 | 155 | .PHONY: lint 156 | lint: $(TOX) ## Check style with tox, ruff, black and mypy. 157 | $(TOX) -e lint 158 | 159 | .PHONY: open-cov 160 | open-cov: cov ## Open coverage report. 161 | $(BROWSER) htmlcov/index.html 162 | 163 | .PHONY: setup 164 | setup: clean $(VENV_PY) ## Create virtual environment. 165 | 166 | .PHONY: tests 167 | tests: $(TOX) ## Run unit and functional tests. 168 | $(TOX) -e tests 169 | 170 | .PHONY: tests-all 171 | tests-all: $(TOX) ## Run all tests in parallel (lint and tests). 172 | $(TOX) -p 173 | 174 | .PHONY: uninstall 175 | uninstall: ## Install the package to the active Python's site-packages. 176 | $(PIP) uninstall orc2timeline 177 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # orc2timeline 2 | 3 | **orc2timeline** stands for "ORC to timeline", ORC refers to [DFIR-ORC](https://github.com/DFIR-ORC/dfir-orc) which is a tool used to parse and collect critical **artefacts of a Windows system** during an **incident response**. 4 | 5 | orc2timeline can take one or several ORC as input and **generate one timeline per host**. 6 | 7 | ## Installation 8 | 9 | ``` 10 | git clone https://github.com/ANSSI-FR/orc2timeline.git 11 | cd orc2timeline 12 | pip install . 13 | ``` 14 | 15 | ## Examples 16 | 17 | Let us consider the following file tree: 18 | ``` 19 | $ tree ~ 20 | ~ 21 | └── Documents 22 | ├── ORC 23 | │   ├── DFIR-ORC_Server_ServerName.domain_Browsers.7z 24 | │   ├── DFIR-ORC_Server_ServerName.domain_Detail.7z 25 | │   ├── DFIR-ORC_Server_ServerName.domain_General.7z 26 | │   ├── DFIR-ORC_Server_ServerName.domain_Little.7z 27 | │   ├── DFIR-ORC_Server_ServerName.domain_Powershell.7z 28 | │   ├── DFIR-ORC_Server_ServerName.domain_SAM.7z 29 | │   └── DFIR-ORC_Workstation_MachineName.domain_Offline.7z 30 | └── output_directory 31 | 32 | 3 directories, 7 files 33 | ``` 34 | 35 | Process all the ORC contained in a directory (orc2timeline will infer hostname from file names and group files by host to process them): 36 | ``` 37 | $ orc2timeline --tmp-dir=/tmp process_dir -j 4 ~/Documents/ORC ~/Documents/output_directory 38 | ``` 39 | 40 | This command will create the following files: 41 | ``` 42 | ~ 43 | └── Documents 44 | └── output_directory 45 | ├── MachineName.domain.csv.gz 46 | └── ServerName.domain.csv.gz 47 | ``` 48 | 49 | ## Documentation 50 | 51 | A more detailed documentation is provided if needed : 52 | 53 | ### [Introduction](docs/0_Intro.md) 54 | ### [Tutorial](docs/1_Tutorial.md) 55 | ### [Installation and requirements](docs/2_Installation_and_requirements.md) 56 | ### [Architecture](docs/3_Architecture.md) 57 | ### [Configuration](docs/4_Configuration.md) 58 | ### [Existing plugins](docs/5_Existing_plugins.md) 59 | ### [Develop your own plugin](docs/6_Develop_your_own_plugin.md) 60 | ### [Licenses](docs/7_Licenses.md) 61 | ### [Frequently Asked Questions](docs/8_FAQ.md) 62 | 63 | -------------------------------------------------------------------------------- /docs/0_Intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | **orc2timeline** stands for "ORC to timeline", ORC refers to DFIR-ORC which is a tool used to parse and collect critical **artefacts of a Windows system** during an **incident response**. 4 | 5 | While DFIR-ORC allows to gather all the data needed to operate a successful incident response, no opensource tool was released to **help analyst to dissect archives that result of DFIR-ORC.exe** execution. 6 | 7 | As a reminder, in the following we will use the term ORC to refer to a set of archives that is the output of DFIR-ORC.exe for a single host. 8 | 9 | orc2timeline can take one or several ORC as input and generate one timeline per host. 10 | 11 | This means that **orc2timeline decompresses targeted files** contained in ORC archives, **parses them** to extract interesting information and creates one or many event for a given artefact. One event must contain a timestamp. A **timeline** will then be created, **sorted by date** and **compressed in gzip** format to allow forensics analysis. 12 | 13 | The **output timeline** is a **csv file** with the four following columns: 14 | - `Timestamp` (Time when the event occurred); 15 | - `Hostname` (Name of the host, this can be useful when merging two or more timelines); 16 | - `SourceType` (Type of event) ; 17 | - `Description` (Description and details about the event); 18 | - `SourceFile` (Original path of the artefact if it exists, path in ORC archive otherwise). 19 | 20 | orc2timeline can be run with a **list of file as input** and a **path to result file as output**. Files mentioned in input list must belong to the **same ORC run** (for a single host). 21 | 22 | To process multiple ORC, it is also possible to specify an **input directory**, it is then necessary to specify an **output directory**. The **list of hosts** to process will be **inferred** from the **recursive list of files** in the given directory. For now orc2timeline can not process a directory if two ORC of the same host are in different subdirectories. The **subtree** of the input directory will be **reproduced** in the output directory. One output file per host will be created in the given output directory. 23 | 24 | Since artefact processing can be **time and resource consuming**, orc2timeline was designed to run on **multiple threads**. The usage of orc2timeline **could cause disk space or RAM exhaustion**, therefore testing its impact in your own environment is necessary and it **should not be run in a critical production environment**. 25 | 26 | The goal of orc2timeline is the provide a framework that **knows how to extract specific pieces of data from an ORC collection** and create at least one event from it. **Plugins rely on external dependencies, that are deliberately not redevelop.** 27 | -------------------------------------------------------------------------------- /docs/1_Tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorials 2 | 3 | Let us consider the following file tree: 4 | ``` 5 | $ tree ~ 6 | ~ 7 | └── Documents 8 | ├── ORC 9 | │   ├── DFIR-ORC_Server_ServerName.domain_Browsers.7z 10 | │   ├── DFIR-ORC_Server_ServerName.domain_Detail.7z 11 | │   ├── DFIR-ORC_Server_ServerName.domain_General.7z 12 | │   ├── DFIR-ORC_Server_ServerName.domain_Little.7z 13 | │   ├── DFIR-ORC_Server_ServerName.domain_Powershell.7z 14 | │   ├── DFIR-ORC_Server_ServerName.domain_SAM.7z 15 | │   └── DFIR-ORC_Workstation_MachineName.domain_Offline.7z 16 | └── output_directory 17 | 18 | 3 directories, 7 files 19 | ``` 20 | 21 | ## Process a single ORC 22 | 23 | #### Process one ORC (input files must belong to the same execution of DFIR-ORC.exe for a single host): 24 | ``` 25 | $ orc2timeline process Documents/ORC/DFIR-ORC_Server_ServerName.domain_Powershell.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_Little.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_Browsers.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_General.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_Detail.7z Documents/ORC/DFIR-ORC_Server_ServerName.domain_SAM.7z Documents/output_directory/ServerName.domain.csv.gz 26 | ``` 27 | 28 | or 29 | 30 | ``` 31 | $ orc2timeline process ~/Documents/ORC/DFIR-ORC_Server_ServerName.domain_*.7z ~/Documents/output_directory/ServerName.domain.csv.gz 32 | ``` 33 | 34 | If you try to process archives that do not belong to the same host, an exception will be raised and program will exit: 35 | ``` 36 | $ orc2timeline process --overwrite Documents/ORC/DFIR-ORC_* Documents/output_directory/ServerName.domain.csv.gz 37 | [2012-12-21 23:59:59,999] WARNING - --jobs option was not given, thus only one thread will be used. Therefore processing could take a while. 38 | Usage: orc2timeline process [OPTIONS] [FILE_LIST]... OUTPUT_PATH 39 | Try 'orc2timeline process --help' for help. 40 | 41 | Error: Invalid value: Bad file list, all files must belong to the same host. Parsed hosts : {'ServerName.domain', 'MachineName.domain'} 42 | ``` 43 | 44 | #### Use multiple threads 45 | 46 | Use 4 threads to process one ORC, overwrite output file if it already exists, use ~/temp as temporary directory: 47 | ``` 48 | $ TMPDIR=~/temp orc2timeline process -j 4 --overwrite Documents/ORC/DFIR-ORC_Server_ServerName.domain_* Documents/output_directory/ServerName.domain.csv.gz 49 | ``` 50 | 51 | ## Process many ORC with a single command 52 | 53 | Process all the ORC contained in a directory (orc2timeline will infer hostname from file names and group files by host to process them): 54 | ``` 55 | $ orc2timeline --tmp-dir=/tmp/data process_dir -j 4 ~/Documents/ORC ~/Documents/output_directory 56 | ``` 57 | 58 | This previous command will create the following files: 59 | ``` 60 | ~ 61 | └── Documents 62 | └── output_directory 63 | ├── MachineName.domain.csv.gz 64 | └── ServerName.domain.csv.gz 65 | ``` 66 | 67 | ## Show configuration 68 | 69 | Command that show the path to configuration file : 70 | ``` 71 | orc2timeline show_conf_file 72 | ``` 73 | 74 | Command that shows the configuration (content of configuration file) that will be used : 75 | ``` 76 | orc2timeline show_conf 77 | ``` 78 | 79 | **NB** : if you want to run orc2timeline with a custom configuration, you **must** modify the configuration file inplace, there is no way to give a custom path to another configuration file. 80 | 81 | ## Command that combine a lot of options 82 | 83 | This command will process all the ORC contained in `./Documents/ORC` and write the timelines in `./Documents/`. Four threads will be used, if a timeline already exists it will be overwritten. `/tmp/data/` will be used as temporary directory and output log level is DEBUG (maximum value). 84 | ``` 85 | $ orc2timeline --log-level=DEBUG --tmp-dir=/tmp/data process_dir --overwrite -j 4 ./Documents/ORC ./Documents/output_directory 86 | ``` 87 | -------------------------------------------------------------------------------- /docs/2_Installation_and_requirements.md: -------------------------------------------------------------------------------- 1 | # Installation and requirements 2 | 3 | orc2timeline requires **python3** and **python3-pip**. On Debian based distribution, these packages can be installed like this: 4 | ``` 5 | apt update && apt install python3 python3-pip 6 | ``` 7 | 8 | Make sure that the **latest version of pip** is installed, if not, it can be upgraded with the following command: 9 | ``` 10 | pip install --upgrade pip 11 | ``` 12 | 13 | orc2timeline can be installed **system-wide** or in a **virtual environment** (to avoid dependency issues) like any other python project. After cloning the repository with git, just run the `pip install .` command. This should download and install dependencies described in `pyproject.toml` file, after that the command `orc2timeline` should be in your path. 14 | 15 | Supported and tested Operating Systems are: 16 | - Debian 11 17 | - Debian 12 18 | - Ubuntu 20.04 19 | - Ubuntu 22.04 20 | - Ubuntu 24.04 21 | 22 | If an error occurs while using orc2timeline with one of these OS, feel free to **create an issue or a pull-request**. 23 | 24 | If your favorite OS is not in the list, do not give up, it just means that it has not been tested **yet**. 25 | 26 | ## Installation without a virtual environment: 27 | 28 | ``` 29 | git clone https://github.com/ANSSI-FR/orc2timeline.git 30 | cd orc2timeline 31 | pip install . 32 | ``` 33 | 34 | ## Installation with a virtual environment generated with virtualenv tool: 35 | 36 | ``` 37 | git clone https://github.com/ANSSI-FR/orc2timeline.git 38 | cd orc2timeline 39 | virtualenv -p python3 venv 40 | source venv/bin/activate 41 | pip install . 42 | ``` 43 | 44 | ## View and edit dependencies for debugging or developing purposes 45 | 46 | If you want to know or edit the dependencies, they can be found in `pyproject.toml` file, in the "dependencies" section. 47 | ``` 48 | [...] 49 | 50 | dependencies = [ # Duplicate in pre-commit-config.yaml 51 | "click>=8.1.0", 52 | "dateparser==1.2.0", 53 | "py7zr==0.21.0", 54 | "libevtx-python==20240204", 55 | "libesedb-python==20240420", 56 | "dfwinreg==20240229", 57 | "six==1.16.0", 58 | "python-registry==1.3.1", 59 | "pytz==2024.1", 60 | "pyyaml==6.0.1", 61 | ] 62 | 63 | [...] 64 | ``` 65 | -------------------------------------------------------------------------------- /docs/3_Architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | ## Language 4 | 5 | orc2timeline is written in **python (version 3)**. Since the goal of this tool is **to rely on external dependencies** to parse artefacts, it seemed relevant to choose a **widely adopted language** to take advantage of the **large amount of libraries** available. 6 | 7 | Moreover, considering the adoption of python it seems perfect to **ease maintenance and evolutions** of the project. 8 | 9 | ## Plugin 10 | 11 | orc2timeline works with plugins. This means that when launched, orc2timeline will **read configuration** to know the **list of plugins** (and the configuration for every plugin) to run. After that list is built, every plugin instance (there **can be several plugin instances for one plugin**) will be run using **all the available threads** given. 12 | 13 | Each plugin writes a temporary intermediate file that contains an extract of the final timeline (csv file ordered by date). Once all the plugins are executed, all the **plugin timelines for a given host are consolidated into a final host timeline**. All lines are **deduplicated and sorted by date**. During this consolidation, **one thread can be used per host** treated. 14 | -------------------------------------------------------------------------------- /docs/4_Configuration.md: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | ### Introduction 4 | 5 | Depending on the **Orc configuration** you use, you may have to **customize the configuration**. The given configuration works with the configuration of DFIR-ORC that is published on [GitHub](https://github.com/DFIR-ORC/dfir-orc-config). 6 | 7 | The configuration file is a **yaml file** that is read every time orc2timeline is run during the preliminary phase of the execution. The file can be modified, but **must stay inplace**. To know the path where to find this file the following command can be used : `orc2timeline show_conf_file`. To validate the modifications, `orc2timeline show_conf` command can be used to view the configuration that will be used. 8 | 9 | ### Explanations 10 | 11 | The following snippet of the configuration file will be explained: 12 | ``` 13 | Plugins: 14 | - EventLogsToTimeline: 15 | archives: ["General", "Little"] 16 | sub_archives: ["Event.7z", "Event_Little.7z"] 17 | match_pattern: ".*evtx.*" 18 | sourcetype: "Event" 19 | 20 | - NewPlugin: 21 | [...] 22 | ``` 23 | 24 | The file begins with the keyword `Plugins`, it contains a list of plugin. In this example `EventLogsToTimeline` is configured, it means that `src/orc2timeline/plugins/EventLogsToTimeline.py` file will be loaded (a complete guide to write a plugin exists [here](6_Develop_your_own_plugin.md)). 25 | 26 | The plugin has **four attributes**: 27 | - `archives`: list of archive types to dissect (example: from `General`, the `DFIR-ORC_Server_MACHINENAME_General.7z` will be used); 28 | - `sub_archives`: list of archives to decompress from the primary archive (the final artefacts are inside these sub\_archives), if the files are directly contained in the primary archive this attribute **can be omitted**; 29 | - `match_pattern`: regex pattern used to filter which files must be processed; 30 | - `sourcetype`: string that will be used for the column SourceType for this plugin. 31 | 32 | All the combinations between `archives` and `sub_archives` will be used to create plugin instances. With the previous example, the following instances will be created: 33 | - `EventLogsToTimeline(archives="General", sub_archives="Event.7z", ...)`; 34 | - `EventLogsToTimeline(archives="General", sub_archives="Event_Little.7z", ...)`; 35 | - `EventLogsToTimeline(archives="Little", sub_archives="Event.7z", ...)`; 36 | - `EventLogsToTimeline(archives="Little", sub_archives="Event_Little.7z", ...)`. 37 | 38 | Considering the following layout: 39 | 40 | ``` 41 | DFIR-ORC_Server_MACHINENAME_General.7z 42 | ├── Event.7z 43 | │   └── file1.evtx 44 | └── Other.7z 45 | └── file5.evtx 46 | DFIR-ORC_Server_MACHINENAME_Little.7z 47 | ├── Event.7z 48 | │   └── file2.evtx 49 | └── Event_Little.7z 50 | ├── file3.evtx 51 | └── file4.evt 52 | ``` 53 | 54 | The files `file1.evtx`, `file2.evtx`, `file3.evtx` will be processed. `file4.evt` will not be processed because it does not match the `match_pattern`, `file5.evtx` will not be processed because `Other.7z` is not mentioned in the `sub_archives` list. 55 | 56 | The **same plugin can be described many times** in the configuration file. The following snippet or configuration is equivalent to the previous one: 57 | ``` 58 | Plugins: 59 | - EventLogsToTimeline: 60 | archives: ["General"] 61 | sub_archives: ["Event.7z", "Event_Little.7z"] 62 | match_pattern: ".*evtx.*" 63 | sourcetype: "Event" 64 | 65 | - EventLogsToTimeline: 66 | archives: ["Little"] 67 | sub_archives: ["Event.7z", "Event_Little.7z"] 68 | match_pattern: ".*evtx.*" 69 | sourcetype: "Event" 70 | 71 | - NewPlugin: 72 | [...] 73 | ``` 74 | 75 | **Warning !** The following snippet is **NOT** equivalent to the first one: 76 | ``` 77 | Plugins: 78 | - EventLogsToTimeline: 79 | archives: ["General"] 80 | sub_archives: ["Event.7z", "Event_Little.7z"] 81 | match_pattern: ".*evtx.*" 82 | sourcetype: "Event" 83 | 84 | - EventLogsToTimeline: 85 | archives: ["Little"] 86 | sub_archives: ["Event_Little.7z"] 87 | match_pattern: ".*evtx.*" 88 | sourcetype: "Event" 89 | 90 | - NewPlugin: 91 | [...] 92 | ``` 93 | 94 | Because `file2.evtx` from precedent example **would not be parsed anymore**. 95 | 96 | ### Syntactic sugar 97 | 98 | For **readability** purpose, it may be useful to split the configuration of a plugin in two **distinct** configuration specifications. For example, the Offline configuration could be detached from the "live" configuration. The two following snippets are equivalent : 99 | 100 | **All in one** configuration: 101 | ``` 102 | [...] 103 | - EventLogsToTimeline: 104 | archives: ["General", "Little", "Offline"] 105 | sub_archives: ["Event.7z"] 106 | match_pattern: ".*evtx.*" 107 | sourcetype: "Event" 108 | [...] 109 | ``` 110 | 111 | **Two-piece** configuration: 112 | ``` 113 | [...] 114 | - EventLogsToTimeline: 115 | archives: ["General", "Little"] 116 | sub_archives: ["Event.7z"] 117 | match_pattern: ".*evtx.*" 118 | sourcetype: "Event" 119 | 120 | - EventLogsToTimeline: 121 | archives: ["Offline"] 122 | sub_archives: ["Event.7z"] 123 | match_pattern: ".*evtx.*" 124 | sourcetype: "Event" 125 | [...] 126 | ``` 127 | 128 | ### One configuration to rule them all. 129 | 130 | orc2timeline's configuration allows the user to **set multiple DFIR-ORC configurations in the same file**. As long as parameters are **narrow enough** and the two configurations **do not conflict** with each other, they can live in the same file. 131 | 132 | Of course this will result in multiple plugin instances that will not match any artefact, but this should not deteriorate performance and the final result will remain valid. 133 | -------------------------------------------------------------------------------- /docs/5_Existing_plugins.md: -------------------------------------------------------------------------------- 1 | # Existing plugins 2 | 3 | orc2timeline is designed to work with plugins. Plugin files are located in `src/orc2timeline/plugins/` directory, one file per plugin. 4 | 5 | **One plugin** is meant to process **one type of artefact** collected by DFIR-ORC. The location of these artefacts **must be predictable**, so that the plugin can efficiently extract it from the archives. 6 | 7 | Plugins may be divided in two categories: DFIR-ORC-artefact plugins and Windows-artefact plugins. 8 | 9 | ## DFIR-ORC-artefact plugins 10 | 11 | These plugins are meant to process files that are **generated during DFIR-ORC execution**. Those files are not actual artefacts but **the result of DFIR-ORC parsers**, they gather information that are very relevant for forensics analysis. 12 | 13 | ### NTFSInfoToTimeline plugin 14 | 15 | This plugin processes files located in: 16 | - the `Little` archive, inside `NTFSInfo_detail.7z`; 17 | - the `General` archive, inside `NTFSInfo_quick.7z`; 18 | - the `Detail` archive, inside `NTFSInfo_detail.7z`; 19 | - the `Offline` archive, inside `NTFSInfo_detail.7z`. 20 | 21 | The treated csv file should be the result of DFIR-ORC's NTFSInfo command. 22 | 23 | Configuration snippet: 24 | ``` 25 | [...] 26 | - NTFSInfoToTimeline: 27 | archives: ["Detail", "General", "Little", "Offline"] 28 | sub_archives: ["NTFSInfo_detail.7z", "NTFSInfo_quick.7z"] 29 | match_pattern: "^.*NTFSInfo[^/]*\\.csv$" 30 | sourcetype: "MFT" 31 | [...] 32 | ``` 33 | 34 | For each entry in this csv file, one event is created per file and per different timestamp. This means that events with the same file\_path and timestamp will be grouped in a single event. 35 | 36 | Output example: 37 | ``` 38 | 2021-01-05 10:35:26.012,FAKEMACHINE,MFT,$SI: .A.B - $FN: MACB - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Bits-Client%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv 39 | 2021-01-05 10:35:26.996,FAKEMACHINE,MFT,$SI: .A.B - $FN: MACB - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Diagnosis-DPS%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv 40 | 2022-10-24 01:48:19.929,FAKEMACHINE,MFT,$SI: M.C. - $FN: .... - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Diagnosis-DPS%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv 41 | 2022-10-24 14:12:54.482,FAKEMACHINE,MFT,$SI: M.C. - $FN: .... - Name: \Windows\System32\winevt\Logs\Microsoft-Windows-Bits-Client%4Operational.evtx - Size in bytes: 69632,NTFSInfo_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv 42 | ``` 43 | 44 | ### I30InfoToTimeline plugin 45 | 46 | This plugin processes files located in: 47 | - the `Detail` archive, inside `NTFSInfo_i30Info.7z`; 48 | - the `Offline` archive, inside `NTFSInfo_i30Info.7z`. 49 | 50 | The treated csv file should be the result of DFIR-ORC's NTFSInfo with `/i30info` argument. 51 | 52 | Configuration snippet: 53 | ``` 54 | [...] 55 | - I30InfoToTimeline: 56 | archives: ["Detail", "Offline"] 57 | sub_archives: ["NTFSInfo_i30Info.7z"] 58 | match_pattern: "^I30Info.*\\.csv$" 59 | sourcetype: "I30" 60 | [...] 61 | ``` 62 | 63 | For each entry in this csv file, one event is created per file and per different timestamp. This means that events with the same file\_path and timestamp will be grouped in a single event. 64 | 65 | Output example: 66 | ``` 67 | 2009-07-14 03:20:08.961,FAKEMACHINE,I30,Entry in slackspace - $FN: ...B - Name: Windows - MFT segment num: 379 - Parent FRN: 0x0005000000000005 ,I30Info_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv 68 | 2021-01-05 19:24:19.796,FAKEMACHINE,I30,Entry in slackspace - $FN: MACB - Name: WinPEpge.sys - MFT segment num: 54 - Parent FRN: 0x0005000000000005 ,I30Info_00000000_DiskInterface_0xc87c5cca7c5cb542_.csv 69 | 2021-01-05 19:24:33.593,FAKEMACHINE,I30,Entry in slackspace - $FN: MAC. - Name: Windows 70 | ``` 71 | 72 | ### USNInfoToTimeline plugin 73 | 74 | This plugin processes files located in: 75 | - the `Little` archive, inside `USNInfo.7z`; 76 | - the `Detail` archive, inside `USNInfo.7z`; 77 | - the `Offline` archive inside `USNInfo.7z`. 78 | 79 | The treated csv file should be the result of DFIR-ORC's USNInfo command. 80 | 81 | Configuration snippet: 82 | ``` 83 | [...] 84 | - USNInfoToTimeline: 85 | archives: ["Detail", "Little", "Offline"] 86 | sub_archives: ["USNInfo.7z"] 87 | match_pattern: "^USNInfo.*\\.csv$" 88 | sourcetype: "USN journal" 89 | [...] 90 | ``` 91 | 92 | For each entry in this csv file, one event is created per file and per different timestamp. This means that events with the same file\_path and timestamp will be grouped in a single event. 93 | 94 | Output example: 95 | ``` 96 | 2023-11-30 16:12:58.609,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - CLOSE|DATA_EXTEND|DATA_OVERWRITE|DATA_TRUNCATION|FILE_CREATE|SECURITY_CHANGE - MFT segment num : 77487,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv 97 | 2023-11-30 16:12:58.609,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - CLOSE|FILE_DELETE - MFT segment num : 77487,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv 98 | 2023-11-30 16:17:52.133,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - FILE_CREATE - MFT segment num : 2259,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv 99 | 2023-11-30 16:17:52.242,W11-22H2U,USN journal,\ProgramData\Microsoft\Windows Defender\Scans\mpenginedb.db-wal - DATA_EXTEND|FILE_CREATE - MFT segment num : 2259,USNInfo_00000000_DiskInterface_0x48f2eac0f2eab0fc_.csv 100 | ``` 101 | 102 | ## Windows-artefact plugins 103 | 104 | DFIR-ORC collects files that may help DFIR analysis. Extracting the relevant pieces of data out of those files can be tricky since they are not meant to be parsed, and can be in proprietary format. orc2timeline **relies on opensource parsers**, the choice was made not to redevelop all the parsers and **take advantage of existing libraries**. 105 | 106 | The plugins to parse Registry Hives and Event Logs are released. Many more could be developed for processing other types of artefacts such as LNK files, Jumplists... Developing these plugins is left as an exercise to the reader (contributions are welcome). 107 | 108 | 109 | ### RegistryToTimeline plugin 110 | 111 | This plugin processes registry hives, it creates one event per registry key, the last modification date of the key is used as a timestamp. 112 | 113 | The file named `RegistryToTimeline-important-keys.txt` allows to specify keys for which an event will be printed in the final timeline for each key value. The **key path must be exact**, regex are not supported. 114 | 115 | For more sophisticated treatments on key paths or key values, a new plugin must be developed. This new plugin could inherit `RegistryToTimeline` to benefit from existing functions. 116 | 117 | This plugin processes files located in: 118 | - the `Little` archive, inside `SystemHives_little.7z`; 119 | - the `Detail` archive, inside `SystemHives.7z` and `UserHives.7z`; 120 | - the `SAM` archive, inside `SAM.7z`; 121 | - the `Offline` archive inside `SystemHives.7z`, `UserHives.7z`, `SAM.7z`. 122 | 123 | Configuration snippet: 124 | ``` 125 | [...] 126 | - RegistryToTimeline: 127 | archives: ["SAM", "Little", "Detail", "Offline"] 128 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 129 | match_pattern: ".*data$" 130 | sourcetype: "Registry" 131 | [...] 132 | ``` 133 | 134 | Output example: 135 | ``` 136 | 2009-07-14 04:49:35.659,FAKEMACHINE,Registry,HKEY_CURRENT_USER\Environment,\Windows\ServiceProfiles\LocalService\NTUSER.DAT 137 | 2009-07-14 04:49:35.659,FAKEMACHINE,Registry,KeyPath: HKEY_CURRENT_USER\Environment - KeyName: TEMP - KeyType: RegExpandSZ - KeyValue: %USERPROFILE%\AppData\Local\Temp,\Windows\ServiceProfiles\LocalService\NTUSER.DAT 138 | 2009-07-14 04:49:35.659,FAKEMACHINE,Registry,KeyPath: HKEY_CURRENT_USER\Environment - KeyName: TMP - KeyType: RegExpandSZ - KeyValue: %USERPROFILE%\AppData\Local\Temp,\Windows\ServiceProfiles\LocalService\NTUSER.DAT 139 | 2009-07-14 04:49:35.674,FAKEMACHINE,Registry,HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Winlogon,\Windows\ServiceProfiles\LocalService\NTUSER.DAT 140 | 2009-07-14 04:49:35.674,FAKEMACHINE,Registry,KeyPath: HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Winlogon - KeyName: ExcludeProfileDirs - KeyType: RegSZ - KeyValue: AppData\Local;AppData\LocalLow;$Recycle.Bin,\Windows\ServiceProfiles\LocalService\NTUSER.DAT 141 | ``` 142 | 143 | 144 | ### EventLogsToTimeline plugin 145 | 146 | This plugin processes Windows log events, for each `evtx` file, this plugin parses all the events to create one line per event in the final timeline. 147 | 148 | The file `EventLogsToTimeline-eventmap.txt` allows the analyst to specify tuples (Channel/Event ID) for which events description will be prefixed with a custom string. 149 | 150 | This plugin processes files located in: 151 | - the `General` archive, inside Event.7z; 152 | - the `Little` archive, inside Event.7z; 153 | - the `Offline` archive, inside Event.7z. 154 | 155 | Configuration snippet: 156 | ``` 157 | [...] 158 | - EventLogsToTimeline: 159 | archives: ["General", "Little", "Offline"] 160 | sub_archives: ["Event.7z"] 161 | match_pattern: ".*evtx.*" 162 | sourcetype: "Event" 163 | [...] 164 | ``` 165 | 166 | Output example: 167 | ``` 168 | 2021-02-12 15:56:30.372,FAKEMACHINE,Event,Microsoft-Windows-Servicing:1 S-1-5-18 (KBWUClient-SelfUpdate-Aux Staged Installed WindowsUpdateAgent),\Windows\System32\winevt\Logs\Setup.evtx 169 | 2021-02-12 15:56:32.512,FAKEMACHINE,Event,Microsoft-Windows-Servicing:4 S-1-5-18 (KBWUClient-SelfUpdate-Aux Installed 0x0 WindowsUpdateAgent),\Windows\System32\winevt\Logs\Setup.evtx 170 | 2022-10-24 01:46:29.681,FAKEMACHINE,Event,Microsoft-Windows-Servicing:2 S-1-5-18 (KBWUClient-SelfUpdate-Aux Installed 0x0 WindowsUpdateAgent),\Windows\System32\winevt\Logs\Setup.evtx 171 | ``` 172 | -------------------------------------------------------------------------------- /docs/6_Develop_your_own_plugin.md: -------------------------------------------------------------------------------- 1 | # Develop your own plugin 2 | 3 | orc2timeline works with plugins in order to ease features integration. Therefore, adding the parsing of an artefact can be done by modifying only two files. First the plugin file must be created, then the plugin configuration must be appended to the configuration file. 4 | 5 | ## MyPlugin.py 6 | 7 | ### File path and file name 8 | 9 | The first file **must** be named by the plugin name: if your plugin will process LNK files, it could be named `LNKToTimeline`, therefore the file will be named `LNKToTimeline.py`. 10 | 11 | The location of this file must be `/src/orc2timeline/plugins/LNKToTimeline.py`. 12 | 13 | In the following example, we assume that we have a very convenient library named `magic_lnk_library` that contains all the functions and class we need to parse lnk files. 14 | 15 | ## One plugin equals one class 16 | 17 | ### GenericToTimeline 18 | 19 | This file, is a python module that can contain multiple classes. This module **must contain a class that is named after the file name**, this class **must inherit** from `GenericToTimeline`. 20 | 21 | `GenericToTimeline` is a module that contains two classes: 22 | - `Event` (describes an event that represents one line in the final timeline); 23 | - `GenericToTimeline` (implements a collection of functions that will be useful during the plugin development). 24 | 25 | Example: 26 | ``` 27 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 28 | 29 | class LNKToTimeline(GenericToTimeline): 30 | def __init__( 31 | self, 32 | config: PluginConfig, 33 | orclist: list[str], 34 | output_file_path: str, 35 | hostname: str, 36 | tmp_dir: str, 37 | lock: Lock, 38 | ) -> None: 39 | """Construct.""" 40 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 41 | 42 | ``` 43 | 44 | ### Event 45 | 46 | As stated above, `Event` class **must** be used to add an event to the final timeline. It is a very simple class but all the attributes of this class must be completed. 47 | 48 | The event object **must** be added with the function `_add_event` of the class `GenericToTimeline`. 49 | 50 | How to add an event : 51 | ``` 52 | event = Event() 53 | event.description = "Good description" 54 | event.timestamp = datetime.now() 55 | # the following line could replace the previous line 56 | # event.timestamp_str = "2012-12-21 23:59:59.999" 57 | event.source = "/path/to/artefact" 58 | self._add_event(event) 59 | ``` 60 | 61 | ## Helpful and mandatory functions 62 | 63 | One function of your class that is **absolutely mandatory to override** is `_parse_artefact`, because the original one does **nothing**. 64 | 65 | Another function that **must** be called is `_add_event`, it take an Event as argument, and **adds it to the final timeline**. 66 | 67 | Based on the configuration, the artefact files will be extracted accordingly to GenericToTimeline's mechanisms. These files will then be passed one by one as argument to the function `_parse_artefact`. 68 | 69 | `self._get_original_path` can be used to retrieve the path of the artefact as it was on the original filesystem. If an error occurs, this function returns the path inside the archive instead. 70 | 71 | Example: 72 | ``` 73 | def _parse_artefact(self, artefact: Path) -> None: 74 | timestamp = magic_lnk_library.get_relevant_timestamp_from_file(artefact) 75 | source=self._get_original_path(artefact) 76 | description = magic_lnk_library.get_relevant_description_from_file(artefact) 77 | 78 | event = Event( 79 | timestamp=timestamp, 80 | source=source, 81 | description=description, 82 | ) 83 | 84 | self._add_event(event) 85 | ``` 86 | 87 | ## File header filter 88 | 89 | In your plugin class (LNKToTimeline in our example), it is possible to add an optional attribute called `file_header`. It is a byte array that is an additional filter on files that should be processed. 90 | 91 | If the file header matches the byte array, it will be processed, otherwise the file will be ignored. 92 | 93 | For our example, LNK files begin with the length of the header (0x4c) followed by the GUID {00021401-0000-0000-c000-000000000046}. Therefore, the header of the LNK files is `4c00 0000 0114 0200 0000 0000 c000 0000 0000 0046`. 94 | 95 | Example: 96 | ``` 97 | self.file_header = bytes([0x4c, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46]) 98 | ``` 99 | 100 | ## Plugin configuration 101 | 102 | DFIR-ORC configuration analysis shows that lnk files are collected: 103 | - in `General` archive in `Artefacts.7z`; 104 | - in `Offline` archive in `Artefacts.7z`. 105 | 106 | All the collected files contain `lnk` is their names. 107 | 108 | We could add the following snippet to orc2timeline's configuration: 109 | ``` 110 | - LNKToTimeline: 111 | archives: ["General", "Offline"] 112 | sub_archives: ["Artefacts.7z"] 113 | match_pattern: "^.*lnk.*$" 114 | sourcetype: "LNK" 115 | ``` 116 | 117 | ## Final example 118 | 119 | Considering all the above, here is the final result of our example plugin. 120 | 121 | ### LNKToTimeline.py 122 | ``` 123 | ####################################### 124 | # Following lines are only for typing # 125 | ####################################### 126 | """Plugin to parse LNK files.""" 127 | from __future__ import annotations 128 | 129 | from typing import TYPE_CHECKING 130 | 131 | if TYPE_CHECKING: 132 | from pathlib import Path 133 | from threading import Lock 134 | 135 | from orc2timeline.config import PluginConfig 136 | ####################################### 137 | 138 | 139 | import magic_lnk_library 140 | 141 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 142 | 143 | 144 | class LNKToTimeline(GenericToTimeline): 145 | def __init__( 146 | self, 147 | config: PluginConfig, 148 | orclist: list[str], 149 | output_file_path: str, 150 | hostname: str, 151 | tmp_dir: str, 152 | lock: Lock, 153 | ) -> None: 154 | """Construct.""" 155 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 156 | 157 | def _parse_artefact(self, artefact: Path) -> None: 158 | timestamp = magic_lnk_library.get_relevant_timestamp_from_file(artefact) 159 | source = self._get_original_path(artefact) 160 | description = magic_lnk_library.get_relevant_description_from_file(artefact) 161 | 162 | event = Event( 163 | timestamp=timestamp, 164 | source=source, 165 | description=description, 166 | ) 167 | 168 | self._add_event(event) 169 | 170 | ``` 171 | 172 | -------------------------------------------------------------------------------- /docs/7_Licenses.md: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | 167 | -------------------------------------------------------------------------------- /docs/8_FAQ.md: -------------------------------------------------------------------------------- 1 | # Frequently asked questions 2 | 3 | ### Processing one ORC takes a long time, is it normal ? 4 | 5 | Yes ! Parsing a large amount of data takes time. 6 | 7 | Processing an Offline ORC of 1,7G on a laptop with i5 CPU (1.60GHz) with a single thread takes 12 minutes. When using 4 threads on the same laptop, it takes less than 8 minutes. 8 | 9 | Processing an ORC of 500M can take 20 minutes when using 4 threads. 10 | 11 | ### Why does doubling the number of threads not halve the time of processing by two ? 12 | 13 | The processing can be divided in two parts. The first part is plugin execution, it ends only when the last plugin instance reaches its end. Only after that, orc2timeline begins to merge plugin timelines into final timelines. 14 | 15 | If a plugin instance takes significantly longer, it will have an impact on orc2timeline execution time. 16 | 17 | Nevertheless, it is worth mentioning that the more ORC are processed in parallel, the more effective orc2timeline will be. 18 | 19 | Do not hesitate to run orc2timeline against a directory with a large number of ORC in it. 20 | 21 | ### My laptop freezes while running orc2timeline ? 22 | 23 | orc2timeline can slow down your laptop. There can be two reasons that may explain this behavior. First orc2timeline may cause RAM exhaustion, second orc2timeline may use too much CPU. 24 | 25 | You can choose to use less threads, orc2timeline should then use less CPU and memory. 26 | 27 | Concerning memory consumption, another adjustment may be useful: you could decrease the size of chunks (line 198 of file GenericToTimeline.py), orc2timeline will use less memory, but will be less effective. 28 | 29 | ### My disk is out of space ? 30 | 31 | orc2timeline writes a lot of things on the disk, it may require a large amount of space. `TMP_DIR` global variable can be used to specify a directory to write temporary files to. The option `--tmp-directory` has the same effect. 32 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------- 2 | # Packaging 3 | # https://packaging.python.org/en/latest/tutorials/packaging-projects 4 | # ---------------------------------------------------------------------- 5 | 6 | [build-system] 7 | requires = ["setuptools>=67.0.0", "wheel"] 8 | build-backend = "setuptools.build_meta" 9 | 10 | [project] 11 | name = "orc2timeline" 12 | description = "Generate a timeline from list of Orc files" 13 | authors = [ 14 | {name = "Berenger Foucher", email = "berenger.foucher@ssi.gouv.fr" } 15 | ] 16 | maintainers = [ 17 | {name = "Berenger Foucher", email = "berenger.foucher@ssi.gouv.fr" } 18 | ] 19 | dependencies = [ 20 | "click>=8.1.0", 21 | "dateparser==1.2.1", 22 | "py7zr==0.22.0", 23 | "libevtx-python==20240504", 24 | "libesedb-python==20240420", 25 | "dfwinreg==20240229", 26 | "six==1.17.0", 27 | "pytz==2025.2", 28 | ] 29 | 30 | readme = "README.rst" 31 | requires-python = ">=3.8" 32 | keywords = ["python"] 33 | license = {file = "LICENSE"} 34 | 35 | # See https://pypi.org/classifiers/ 36 | classifiers = [ 37 | 38 | #"Development Status :: 1 - Planning", 39 | #"Development Status :: 2 - Pre-Alpha", 40 | "Development Status :: 3 - Alpha", 41 | #"Development Status :: 4 - Beta", 42 | #"Development Status :: 5 - Production/Stable", 43 | #"Development Status :: 6 - Mature", 44 | #"Development Status :: 7 - Inactive", 45 | 46 | # Default usage 47 | "Environment :: Console", 48 | 49 | # Framework used 50 | "Framework :: Pytest", 51 | "Framework :: Sphinx", 52 | "Framework :: tox", 53 | 54 | # Indicate who your project is intended for 55 | "Intended Audience :: Developers", 56 | "Intended Audience :: System Administrators", 57 | 58 | # Target OS 59 | "Operating System :: OS Independent", 60 | 61 | # Version available for this project 62 | "Programming Language :: Python :: 3", 63 | "Programming Language :: Python :: 3.8", 64 | "Programming Language :: Python :: 3.9", 65 | "Programming Language :: Python :: 3.10", 66 | "Programming Language :: Python :: 3.11", 67 | "Programming Language :: Python :: 3.12", 68 | "Programming Language :: Python :: 3 :: Only", 69 | 70 | # What is the language used in the project 71 | "Natural Language :: English", 72 | 73 | "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", 74 | 75 | # The project is typed 76 | "Typing :: Typed" 77 | ] 78 | dynamic = ["version"] 79 | 80 | [project.urls] 81 | Homepage = "https://github.com/ANSSI-FR/orc2timeline" 82 | Issues = "https://github.com/ANSSI-FR/orc2timeline/issues" 83 | Documentation = "https://github.com/ANSSI-FR/DECODE/tree/main/docs" 84 | Source = "https://github.com/ANSSI-FR/orc2timeline" 85 | 86 | [project.scripts] 87 | orc2timeline = "orc2timeline.cli:entrypoint" 88 | 89 | [project.optional-dependencies] 90 | tests = [ 91 | "pytest>=7.3.0", 92 | "pytest-mock>=3.10.0", 93 | ] 94 | cov = [ 95 | "orc2timeline[tests]", 96 | "coverage[toml]>=6.5.0", 97 | "pytest-cov>=4.0.0", 98 | ] 99 | lint = [ 100 | "orc2timeline[tests]", 101 | "mypy>=1.2.0", 102 | "black>=23.0.0", 103 | "ruff>=v0.0.275", 104 | "types-setuptools>=57.0", 105 | ] 106 | tox = [ 107 | "tox>=4.0.0", 108 | ] 109 | build = [ 110 | "build>=0.10.0", 111 | ] 112 | deploy = [ 113 | "twine>=4.0.0", 114 | ] 115 | dev = [ 116 | "orc2timeline[tests,cov,lint,tox,build,deploy]", 117 | ] 118 | # For add optional dependencies, uncomment the next section 119 | #[project.optional-dependencies] 120 | 121 | 122 | # ---------------------------------------------------------------------- 123 | # Setuptools 124 | # https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html 125 | # ---------------------------------------------------------------------- 126 | 127 | [tool.setuptools] 128 | include-package-data = true 129 | 130 | [tool.setuptools.dynamic] 131 | version = {attr = "orc2timeline.info.__version__"} 132 | 133 | [tool.setuptools.packages.find] 134 | where = ["src"] 135 | 136 | [tool.setuptools.package-data] 137 | orc2timeline = ["plugins/**/*.txt", "conf/*.yaml"] 138 | 139 | # ---------------------------------------------------------------------- 140 | # Tox 141 | # https://pypi.org/project/tox 142 | # ---------------------------------------------------------------------- 143 | 144 | [tool.tox] 145 | legacy_tox_ini = """ 146 | [tox] 147 | min_version = 4.0 148 | envlist = lint,tests 149 | 150 | [testenv] 151 | deps = .[tests] 152 | commands = 153 | pytest 154 | 155 | [testenv:lint] 156 | deps = .[lint] 157 | commands = 158 | ruff check . 159 | black --diff . 160 | mypy . 161 | 162 | [testenv:format] 163 | deps = .[lint] 164 | commands = 165 | black . 166 | ruff check --fix . 167 | 168 | [testenv:cov] 169 | deps = .[cov] 170 | commands = 171 | pytest -s --cov {envsitepackagesdir}/orc2timeline --cov-report html --cov-report term --cov-append 172 | """ 173 | 174 | 175 | # ---------------------------------------------------------------------- 176 | # Pytest 177 | # https://docs.pytest.org/en/7.3.x/ 178 | # ---------------------------------------------------------------------- 179 | 180 | [tool.pytest.ini_options] 181 | log_cli = true 182 | log_cli_level = "DEBUG" 183 | #asyncio_mode = "auto" 184 | 185 | 186 | # ---------------------------------------------------------------------- 187 | # Black 188 | # https://pypi.org/project/black 189 | # ---------------------------------------------------------------------- 190 | 191 | [tool.black] 192 | line-length = 120 193 | target-version = ["py38", "py39", "py310", "py311"] 194 | 195 | # Enable linting on pyi files 196 | include = "\\.pyi?$" 197 | 198 | 199 | # ---------------------------------------------------------------------- 200 | # Mypy 201 | # https://pypi.org/project/mypy 202 | # ---------------------------------------------------------------------- 203 | 204 | [tool.mypy] 205 | python_version = 3.8 206 | exclude = [ 207 | ".bzr", 208 | ".direnv", 209 | ".eggs", 210 | ".git", 211 | ".hg", 212 | ".mypy_cache", 213 | ".nox", 214 | ".pants.d", 215 | ".pytype", 216 | ".ruff_cache", 217 | ".svn", 218 | ".tox", 219 | ".venv", 220 | "__pypackages__", 221 | "_build", 222 | "buck-out", 223 | "build", 224 | "dist", 225 | "node_modules", 226 | "venv", 227 | ] 228 | enable_error_code = ["ignore-without-code", "truthy-bool", "redundant-expr"] 229 | 230 | # Disallow dynamic typing 231 | disallow_any_unimported = false 232 | disallow_any_expr = false # All attribut of argparse.Namespace are Any 233 | disallow_any_decorated = false # Too many package doesn't have typed decorator 234 | disallow_any_generics = true 235 | disallow_subclassing_any = true 236 | 237 | # Disallow untyped definitions and calls 238 | disallow_untyped_calls = true 239 | disallow_untyped_defs = true 240 | disallow_incomplete_defs = true 241 | check_untyped_defs = true 242 | disallow_untyped_decorators = false # Too many decorator are untyped 243 | 244 | # None and optional handling 245 | no_implicit_optional = true 246 | 247 | # Configuring warnings 248 | warn_unused_ignores = true 249 | warn_no_return = true 250 | warn_return_any = true 251 | warn_redundant_casts = true 252 | 253 | # Misc things 254 | strict_equality = true 255 | 256 | # Config file 257 | warn_unused_configs = true 258 | 259 | # Exemple for missing types 260 | [[tool.mypy.overrides]] 261 | module = ["py7zr", "dfwinreg", "pyevtx", "pytz", "pyesedb", "dateparser", "yaml"] 262 | ignore_missing_imports = true 263 | 264 | # ---------------------------------------------------------------------- 265 | # Ruff 266 | # https://pypi.org/project/ruff 267 | # ---------------------------------------------------------------------- 268 | 269 | [tool.ruff] 270 | exclude = [ 271 | ".bzr", 272 | ".direnv", 273 | ".eggs", 274 | ".git", 275 | ".hg", 276 | ".mypy_cache", 277 | ".nox", 278 | ".pants.d", 279 | ".pytype", 280 | ".ruff_cache", 281 | ".svn", 282 | ".tox", 283 | ".venv", 284 | "__pypackages__", 285 | "_build", 286 | "buck-out", 287 | "build", 288 | "dist", 289 | "node_modules", 290 | "venv", 291 | ] 292 | line-length = 120 293 | target-version = "py38" 294 | 295 | [tool.ruff.lint] 296 | select = ["ALL"] 297 | # D203 and D211 are incompatible 298 | # D212 and D213 are incompatible 299 | # D400 [*] First line should end with a period 300 | # D101 Missing docstring in public class 301 | ignore = ["D203", "D213", "D400", "D101", "PERF203", "N999"] 302 | fixable = ["ALL"] 303 | unfixable = [] 304 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 305 | isort.known-first-party = ["orc2timeline"] 306 | mccabe.max-complexity = 12 307 | 308 | [tool.ruff.lint.per-file-ignores] 309 | # E402 Module level import not at top of file 310 | # INP001 File `docs\conf.py` is part of an implicit namespace package. Add an `__init__.py`. 311 | # A001 Variable `copyright` is shadowing a python builtin 312 | # PTH100 `os.path.abspath()` should be replaced by `Path.resolve()` 313 | "docs/conf.py" = ["E402", "INP001", "A001", "PTH100"] 314 | # S101 Use of `assert` detected 315 | # S603 `subprocess` call: check for execution of untrusted input 316 | "tests/*.py" = ["S101", "S603"] 317 | # Q003 [*] Change outer quotes to avoid escaping inner quotes 318 | # E501 Line too long 319 | "*/info.py" = ["Q003", "E501"] 320 | # E501 Line too long 321 | "*/__main__.py" = ["E501"] 322 | 323 | # ---------------------------------------------------------------------- 324 | # Pylint 325 | # https://pylint.pycqa.org/en/latest/index.html 326 | # ---------------------------------------------------------------------- 327 | 328 | # We dont use pylint, so we disabled it 329 | [tool.pylint.main] 330 | ignore-patterns = ["*"] 331 | 332 | [tool.ruff.lint.pylint] 333 | max-args = 7 334 | -------------------------------------------------------------------------------- /src/orc2timeline/__init__.py: -------------------------------------------------------------------------------- 1 | """Main module.""" 2 | 3 | from .cli import entrypoint 4 | from .core import process 5 | from .info import __author__, __copyright__, __description__, __email__, __version__ 6 | 7 | __all__ = [ 8 | "__author__", 9 | "__copyright__", 10 | "__description__", 11 | "__email__", 12 | "__version__", 13 | "entrypoint", 14 | "process", 15 | ] 16 | -------------------------------------------------------------------------------- /src/orc2timeline/__main__.py: -------------------------------------------------------------------------------- 1 | """Entrypoint with `python -m cookiepython`.""" 2 | 3 | import sys 4 | 5 | from .cli import entrypoint 6 | 7 | if __name__ == "__main__": 8 | # Patch pickle for anyio.to_process.run_sync 9 | # ImportError: attempted relative import with no known parent package 10 | sys.modules["__main__"] = entrypoint # type: ignore[assignment] # pragma: no cover 11 | entrypoint() # pragma: no cover 12 | -------------------------------------------------------------------------------- /src/orc2timeline/cli.py: -------------------------------------------------------------------------------- 1 | """Module for command line interface.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | import os 7 | import re 8 | from pathlib import Path 9 | 10 | import click 11 | 12 | from .config import Config 13 | from .core import OrcArgument, process, process_dir 14 | from .info import __copyright__, __description__, __version__ 15 | 16 | LOG_LEVELS = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] 17 | ORC_REGEX = r"^(?:DFIR\-)?ORC_[^_]*_(.*)_[^_]*\.7z$" 18 | RESULT_EXTENSION = ".csv.gz" 19 | 20 | LOGGER = logging.getLogger(__name__) 21 | 22 | 23 | @click.group(name="orc2timeline", help=__description__, epilog=f"{__version__} - {__copyright__}") 24 | @click.version_option(__version__) 25 | @click.option( 26 | "tmp_dir", 27 | "--tmp-dir", 28 | envvar="TMPDIR", 29 | type=click.Path(dir_okay=True, file_okay=False, exists=True, writable=True, readable=True), 30 | help="Directory where to write temporary files into. TMPDIR global variable can also be used.", 31 | ) 32 | @click.option( 33 | "--log-level", 34 | metavar="level", 35 | type=click.Choice(LOG_LEVELS), 36 | default="INFO", 37 | show_default=True, 38 | help="Print log messages of this level and higher", 39 | ) 40 | @click.option("--log-file", help="Log file to store DEBUG level messages", metavar="file") 41 | def entrypoint(tmp_dir: str, log_level: str, log_file: str | None) -> None: 42 | """Cli function.""" 43 | # Setup logging 44 | if log_file: 45 | # Send everything (DEBUG included) in the log file and keep only log_level messages on the console 46 | logging.basicConfig( 47 | level=logging.DEBUG, 48 | format="[%(asctime)s] %(levelname)-8s - %(name)s - %(message)s", 49 | filename=log_file, 50 | filemode="w", 51 | ) 52 | # define a Handler which writes messages of log_level or higher to the sys.stderr 53 | console = logging.StreamHandler() 54 | console.setLevel(log_level) 55 | # set a format which is simpler for console use 56 | formatter = logging.Formatter("[%(asctime)s] %(levelname)-8s - %(message)s") 57 | # tell the handler to use this format 58 | console.setFormatter(formatter) 59 | # add the handler to the root logger 60 | logging.root.addHandler(console) 61 | else: 62 | logging.basicConfig( 63 | level=log_level, 64 | format="[%(asctime)s] %(levelname)-8s - %(message)s", 65 | ) 66 | 67 | if tmp_dir is not None: 68 | os.environ["TMPDIR"] = tmp_dir 69 | 70 | 71 | @entrypoint.command("show_conf_file") 72 | def cmd_show_conf_file() -> None: 73 | """Show path to configuration file.""" 74 | click.echo("Configuration file is located at the following path:") 75 | click.echo(Config().config_file) 76 | 77 | 78 | @entrypoint.command("show_conf") 79 | def cmd_show_conf() -> None: 80 | """Show the configuration file content.""" 81 | conf_path = Config().config_file 82 | click.echo("Configuration file content:") 83 | click.echo("=======================================================================") 84 | with conf_path.open("r") as f: 85 | data = f.read() 86 | click.echo(data) 87 | click.echo("=======================================================================") 88 | 89 | 90 | @entrypoint.command("process") 91 | @click.option("-j", "--jobs", type=int, default=-1, help="Number of threads to use") 92 | @click.argument("file_list", type=click.Path(dir_okay=False, exists=True), nargs=-1) 93 | @click.argument("output_path", type=click.Path(dir_okay=False, exists=False), nargs=1) 94 | @click.option( 95 | "--overwrite", 96 | is_flag=True, 97 | show_default=True, 98 | default=False, 99 | help="Overwrite destination file if it already exists", 100 | ) 101 | def cmd_process(jobs: int, file_list: str, output_path: str, *, overwrite: bool) -> None: 102 | """Command to process a list of files.""" 103 | if (not Path(output_path).parent.exists()) or (not Path(output_path).parent.is_dir()): 104 | msg = ( 105 | f"'OUTPUT_PATH': Directory '{click.format_filename(Path(output_path).parent.as_posix())}'" 106 | " does not exist or is not a directory." 107 | ) 108 | raise click.BadParameter(msg) 109 | if not overwrite and Path(output_path).exists(): 110 | msg = ( 111 | f"'OUTPUT_PATH': File '{click.format_filename(output_path)}' already exists," 112 | " use '--overwrite' if you know what you are doing." 113 | ) 114 | raise click.BadParameter(msg) 115 | if jobs == -1: 116 | LOGGER.warning( 117 | "--jobs option was not given, thus only one thread will be used. Therefore processing could take a while.", 118 | ) 119 | 120 | hostname_set = set() 121 | clean_file_list = [] 122 | for file in file_list: 123 | hostname = "" 124 | try: 125 | re_extract = re.search(ORC_REGEX, Path(file).name) 126 | if re_extract is not None: 127 | hostname = re_extract.group(1) 128 | clean_file_list.append(Path(file)) 129 | else: 130 | msg = ( 131 | rf"Impossible to extract hostname from filename '{file}', file will be ignored." 132 | rf" Tip: filename must match regex '{ORC_REGEX}'" 133 | ) 134 | LOGGER.info(msg) 135 | 136 | except AttributeError: 137 | msg = rf"Impossible to extract hostname from filename '{file}', filename must match regex '{ORC_REGEX}'" 138 | LOGGER.info(msg) 139 | 140 | if hostname != "": 141 | hostname_set.add(hostname) 142 | 143 | if len(hostname_set) != 1: 144 | msg = f"Bad file list, all files must belong to the same host. Parsed hosts: {hostname_set}" 145 | raise click.BadParameter(msg) 146 | 147 | process(clean_file_list, output_path, hostname_set.pop(), jobs) 148 | 149 | 150 | @entrypoint.command("process_dir") 151 | @click.option("-j", "--jobs", type=int, default=-1, help="Number of threads to use") 152 | @click.argument("input_dir", type=click.Path(dir_okay=True, file_okay=False, exists=True), nargs=1) 153 | @click.argument("output_dir", type=click.Path(dir_okay=True, file_okay=False, exists=True), nargs=1) 154 | @click.option( 155 | "--overwrite", 156 | is_flag=True, 157 | show_default=True, 158 | default=False, 159 | help="Overwrite destination file if it already exists", 160 | ) 161 | def cmd_process_dir(jobs: int, input_dir: str, output_dir: str, *, overwrite: bool) -> None: 162 | """Process all ORCs in INPUT_DIRECTORY, writes output files in OUTPUT_DIR.""" 163 | if jobs == -1: 164 | LOGGER.warning( 165 | "--jobs option was not given, thus only one thread will be used. Therefore processing could take a while.", 166 | ) 167 | 168 | orc_arguments = _crawl_input_dir_and_return_megastruct(input_dir, output_dir) 169 | 170 | final_orc_arguments = [] 171 | for orc_argument in orc_arguments: 172 | if orc_argument.output_path.exists() and not overwrite: 173 | # verify if destination output already exists 174 | # create output directory if it does not exist 175 | LOGGER.warning( 176 | "Output file '%s' already exists, processing will be ignored for host %s" 177 | " use '--overwrite' if you know what you are doing.", 178 | orc_argument.output_path.as_posix(), 179 | orc_argument.hostname, 180 | ) 181 | continue 182 | if not orc_argument.output_path.parent.exists(): 183 | orc_argument.output_path.parent.mkdir(parents=True, exist_ok=True) 184 | final_orc_arguments.append(orc_argument) 185 | 186 | process_dir(final_orc_arguments, jobs) 187 | 188 | 189 | def _crawl_input_dir_and_return_megastruct(input_dir: str, output_dir: str) -> list[OrcArgument]: 190 | orc_arguments: dict[str, OrcArgument] = {} 191 | for file_in_sub_dir in Path(input_dir).glob("**/*"): 192 | re_extract = re.search(ORC_REGEX, Path(file_in_sub_dir).name) 193 | hostname = "" 194 | if re_extract is not None: 195 | hostname = re_extract.group(1) 196 | if hostname != "": 197 | output_sub_path = Path(file_in_sub_dir.parent).relative_to(input_dir) / (hostname + RESULT_EXTENSION) 198 | output_total_path = str(Path(output_dir) / output_sub_path) 199 | if orc_arguments.get(output_total_path) is None: 200 | new_orc_argument = OrcArgument(hostname=hostname, output_path=Path(output_total_path)) 201 | orc_arguments[output_total_path] = new_orc_argument 202 | orc_arguments[output_total_path].orc_paths.append(Path(file_in_sub_dir)) 203 | 204 | return list(orc_arguments.values()) 205 | -------------------------------------------------------------------------------- /src/orc2timeline/conf/Orc2Timeline.yaml: -------------------------------------------------------------------------------- 1 | Plugins: 2 | - RegistryToTimeline: 3 | archives: ["SAM", "Little", "Detail", "Offline"] 4 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 5 | match_pattern: ".*data$" 6 | sourcetype: "Registry" 7 | 8 | - EventLogsToTimeline: 9 | archives: ["General", "Little", "Offline"] 10 | sub_archives: ["Event.7z"] 11 | match_pattern: ".*evtx.*data$" 12 | sourcetype: "Event" 13 | 14 | - NTFSInfoToTimeline: 15 | archives: ["Detail", "General", "Little", "Offline"] 16 | sub_archives: ["NTFSInfo_detail.7z", "NTFSInfo_quick.7z"] 17 | match_pattern: "^.*NTFSInfo[^/]*\\.csv$" 18 | sourcetype: "MFT" 19 | 20 | - USNInfoToTimeline: 21 | archives: ["Detail", "Little", "Offline"] 22 | sub_archives: ["USNInfo.7z"] 23 | match_pattern: "^USNInfo.*\\.csv$" 24 | sourcetype: "USN journal" 25 | 26 | - I30InfoToTimeline: 27 | archives: ["Detail", "Offline"] 28 | sub_archives: ["NTFSInfo_i30Info.7z"] 29 | match_pattern: "^I30Info.*\\.csv$" 30 | sourcetype: "I30" 31 | -------------------------------------------------------------------------------- /src/orc2timeline/config.py: -------------------------------------------------------------------------------- 1 | """Module for configuration.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | import sys 7 | from pathlib import Path 8 | 9 | import yaml 10 | 11 | DEFAULT_CONFIG_FILE = "Orc2Timeline.yaml" 12 | ROOT_DIR = Path(__file__).resolve().parent 13 | LOGGER = logger = logging.getLogger(__name__) 14 | 15 | 16 | class Orc2TimelineConfigError(Exception): 17 | pass 18 | 19 | 20 | class Config: 21 | def __init__(self) -> None: 22 | """Create Config object.""" 23 | self.plugin_conf_list: list[PluginConfig] = [] 24 | config_file = ROOT_DIR / "conf" / DEFAULT_CONFIG_FILE 25 | 26 | if not config_file.exists(): 27 | LOGGER.error('Cannot read configuration file "%s" (file does not exist)', config_file) 28 | error_str = f'Cannot read configuration file "{config_file}" (file does not exist)' 29 | raise Orc2TimelineConfigError(error_str) 30 | 31 | if not config_file.is_file(): 32 | LOGGER.error('Cannot read configuration file "%s" (is not a file)', config_file) 33 | error_str = f'Cannot read configuration file "{config_file}" (is not a file)' 34 | raise Orc2TimelineConfigError(error_str) 35 | 36 | try: 37 | with config_file.open("r") as conf_file: 38 | self.global_config = yaml.safe_load(conf_file) 39 | self._parse_global_config() 40 | except yaml.error.MarkedYAMLError: 41 | LOGGER.critical("An error occured while parsing configuration (file: %s)", str(config_file)) 42 | raise 43 | 44 | self.config_file = config_file 45 | 46 | def _parse_global_config(self) -> None: 47 | for plugin_conf_text in self.global_config["Plugins"]: 48 | for plug in plugin_conf_text: 49 | if plugin_conf_text[plug]["archives"] is None or len(plugin_conf_text[plug]["archives"]) == 0: 50 | msg = f"Plugin {plug}: configuration describes plugin without any archive." 51 | raise Orc2TimelineConfigError(msg) 52 | for archive in plugin_conf_text[plug]["archives"]: 53 | if ( 54 | plugin_conf_text[plug].get("sub_archives") is None 55 | or len(plugin_conf_text[plug].get("sub_archives")) == 0 56 | ): 57 | plugin_conf = PluginConfig( 58 | plug, 59 | [archive], 60 | plugin_conf_text[plug]["match_pattern"], 61 | plugin_conf_text[plug]["sourcetype"], 62 | [], 63 | ) 64 | self.plugin_conf_list.append(plugin_conf) 65 | else: 66 | if not isinstance(plugin_conf_text[plug].get("sub_archives", []), list): 67 | msg = f"Plugin {plug}: sub_archives is not a list." 68 | raise Orc2TimelineConfigError(msg) 69 | 70 | for sub_archive in plugin_conf_text[plug].get("sub_archives", []): 71 | plugin_conf = PluginConfig( 72 | plug, 73 | [archive], 74 | plugin_conf_text[plug]["match_pattern"], 75 | plugin_conf_text[plug]["sourcetype"], 76 | [sub_archive], 77 | ) 78 | self.plugin_conf_list.append(plugin_conf) 79 | if len(self.plugin_conf_list) == 0: 80 | LOGGER.critical("Plugin list seems empty, exiting.") 81 | sys.exit(1) 82 | 83 | 84 | class PluginConfig: 85 | def __init__( 86 | self, 87 | plugin_name: str, 88 | archives: list[str], 89 | match_pattern: str, 90 | sourcetype: str, 91 | sub_archives: list[str], 92 | ) -> None: 93 | """Create PluginConfig object.""" 94 | self.plugin_name = plugin_name 95 | self.archives = archives 96 | self.sub_archives = sub_archives 97 | self.match_pattern = match_pattern 98 | self.sourcetype = sourcetype 99 | 100 | if self.sub_archives is None: 101 | self.sub_archives = [] 102 | 103 | if self.plugin_name == "": 104 | msg = "Empty plugin name in configuration is not allowed." 105 | raise Orc2TimelineConfigError(msg) 106 | if not Path(ROOT_DIR, "plugins", self.plugin_name + ".py").is_file(): 107 | msg = ( 108 | f"Plugin {self.plugin_name}: {Path(ROOT_DIR, 'plugins', self.plugin_name + '.py').as_posix()}" 109 | f" does not exist." 110 | ) 111 | raise Orc2TimelineConfigError(msg) 112 | if len(self.archives) == 0: 113 | msg = f"Plugin {self.plugin_name}: archives should not be empty." 114 | raise Orc2TimelineConfigError(msg) 115 | if self.sourcetype == "": 116 | msg = f"Plugin {self.plugin_name}: empty sourcetype is not allowed." 117 | raise Orc2TimelineConfigError(msg) 118 | if self.match_pattern == "": 119 | msg = ( 120 | f"Plugin {self.plugin_name}: empty match_pattern is not allowed. " 121 | 'Hint: ".*" can be used to match all the files.' 122 | ) 123 | raise Orc2TimelineConfigError(msg) 124 | -------------------------------------------------------------------------------- /src/orc2timeline/core.py: -------------------------------------------------------------------------------- 1 | """Core module.""" 2 | 3 | from __future__ import annotations 4 | 5 | import concurrent.futures 6 | import csv 7 | import gzip 8 | import heapq 9 | import logging 10 | import multiprocessing 11 | import os 12 | import shutil 13 | import sys 14 | import tempfile 15 | from importlib import import_module 16 | from pathlib import Path 17 | from tempfile import TemporaryDirectory 18 | from typing import TYPE_CHECKING, Any, TextIO 19 | 20 | if TYPE_CHECKING: 21 | from threading import Lock as LockType 22 | 23 | from .config import Config 24 | 25 | ROOT_DIR = Path(__file__).resolve().parent 26 | TEMP_DIRECTORY: Any = None 27 | LOGGER = logging.getLogger(__name__) 28 | 29 | 30 | def _add_header_to_csv_file(output_path: str) -> None: 31 | """Add header at the beginning of csv file.""" 32 | header = ["Timestamp", "Hostname", "SourceType", "Description", "SourceFile"] 33 | with gzip.open(output_path, "wt", newline="") as f: 34 | csv_dict_writer = csv.DictWriter(f, delimiter=",", quotechar='"', fieldnames=header) 35 | csv_dict_writer.writeheader() 36 | 37 | 38 | def _map_open(input_file: Path) -> TextIO: 39 | return input_file.open(encoding="utf-8") 40 | 41 | 42 | def _merge_sorted_files(paths: list[Path], output_path: str, temp_dir: str) -> int: 43 | """Merge sorted files contained in paths list to output_path file and return number of unique lines.""" 44 | events_count = 0 45 | intermediate_file = tempfile.NamedTemporaryFile( # noqa: SIM115 46 | dir=temp_dir, 47 | encoding="utf-8", 48 | mode="w+", 49 | delete=False, 50 | ) 51 | old_intermediate_file_name = "" 52 | while len(paths) != 0: 53 | sub_paths = [] 54 | # We merge files by batch so that we do not reach the limitation of files opened at the same time 55 | # arbitrary value is 300 because 512 is the maximum value on Windows 56 | sub_paths = [paths.pop() for _ in range(min(300, len(paths)))] 57 | if old_intermediate_file_name != "": 58 | sub_paths.append(Path(old_intermediate_file_name)) 59 | 60 | files = map(_map_open, sub_paths) 61 | previous_comparable = "" 62 | for line in heapq.merge(*files): 63 | comparable = line 64 | if previous_comparable != comparable: 65 | intermediate_file.write(line) 66 | previous_comparable = comparable 67 | if len(paths) == 0: 68 | events_count += 1 69 | old_intermediate_file_name = intermediate_file.name 70 | intermediate_file.close() 71 | for f in files: 72 | f.close() 73 | intermediate_file = tempfile.NamedTemporaryFile( # noqa: SIM115 74 | dir=temp_dir, 75 | encoding="utf-8", 76 | mode="w+", 77 | delete=False, 78 | ) 79 | 80 | _add_header_to_csv_file(output_path) 81 | with Path(old_intermediate_file_name).open(encoding="utf-8") as infile, gzip.open( 82 | output_path, 83 | "at", 84 | encoding="utf-8", 85 | newline="", 86 | ) as outfile: 87 | shutil.copyfileobj(infile, outfile) 88 | 89 | return events_count 90 | 91 | 92 | def _merge_timelines_for_host(hostname: str, output_path: str, tmp_dir: tempfile.TemporaryDirectory[str]) -> int: 93 | """Merge subtimelines for a given host. 94 | 95 | Merge all files that match 'timeline_{hostname}_*' regex 96 | for hostname in temporary directory to output_path file. 97 | """ 98 | files_to_merge = list(Path(tmp_dir.name).glob(f"**/timeline_{hostname}_*")) 99 | LOGGER.info("Merging all timelines generated per artefact for host %s", hostname) 100 | 101 | result = _merge_sorted_files( 102 | files_to_merge, 103 | output_path, 104 | tmp_dir.name, 105 | ) 106 | 107 | for file in files_to_merge: 108 | file.unlink() 109 | 110 | return result 111 | 112 | 113 | def _is_list_uniq(host_list: list[str]) -> bool: 114 | """Return True if all elements are different in host_lists.""" 115 | return len(host_list) == len(set(host_list)) 116 | 117 | 118 | def _get_duplicate_values_from_list(input_list: list[str]) -> set[str]: 119 | """Return a sublist of input_list containing duplicate values of this list.""" 120 | seen = set() 121 | dupes = set() 122 | for x in input_list: 123 | if x in seen: 124 | dupes.add(x) 125 | else: 126 | seen.add(x) 127 | return dupes 128 | 129 | 130 | def _load_plugins( 131 | config: Config, 132 | orc_arguments: list[OrcArgument], 133 | tmp_dir: TemporaryDirectory[str], 134 | lock: LockType | None, 135 | ) -> list[Any]: 136 | plugin_classes_list = [] 137 | for orc_argument in orc_arguments: 138 | hostname = orc_argument.hostname 139 | for plugin_config in config.plugin_conf_list: 140 | mod = import_module(f"orc2timeline.plugins.{plugin_config.plugin_name}") 141 | plugin_class = getattr(mod, plugin_config.plugin_name, None) 142 | if plugin_class is not None: 143 | plugin_timeline_path = Path(tmp_dir.name) / f"timeline_{hostname}_{plugin_class.__name__}" 144 | plugin_classes_list.append( 145 | plugin_class( 146 | plugin_config, 147 | orc_argument.orc_paths, 148 | plugin_timeline_path, 149 | hostname, 150 | tmp_dir.name, 151 | lock, 152 | ), 153 | ) 154 | 155 | return plugin_classes_list 156 | 157 | 158 | def _run_plugin( 159 | plugin: Any, # noqa: ANN401 160 | ) -> Any: # noqa: ANN401 161 | return plugin.add_to_timeline() 162 | 163 | 164 | class OrcArgument: 165 | """Define all the needed parameters to process ORC and create timeline.""" 166 | 167 | def __init__(self, hostname: str = "", output_path: Path = Path(), orc_paths: list[Path] | None = None) -> None: 168 | """Construct.""" 169 | self.hostname = hostname 170 | self.output_path = output_path 171 | if orc_paths is None: 172 | self.orc_paths = [] 173 | else: 174 | self.orc_paths = orc_paths 175 | 176 | 177 | def process(file_list: list[Path], output_path: str, hostname: str, jobs: int) -> int: 178 | """Create a timeline for one host. 179 | 180 | Create timeline in output_path file from Orc given in file_list 181 | for a specific host (hostname), jobs variable is used to indicate 182 | how many threads can be used. 183 | """ 184 | orc_argument = OrcArgument(orc_paths=file_list, hostname=hostname, output_path=Path(output_path)) 185 | return _process_inner(orc_argument, jobs) 186 | 187 | 188 | def _process_inner(orc_argument: OrcArgument, jobs: int) -> int: 189 | """Create timeline from OrcArgument object with "jobs" threads.""" 190 | LOGGER.info("Processing files for host: %s", orc_argument.hostname) 191 | lock = None 192 | if jobs > 1: 193 | lock = multiprocessing.Manager().Lock() 194 | temp_directory_parent = os.environ.get("TMPDIR") 195 | tmp_dir = tempfile.TemporaryDirectory(dir=temp_directory_parent, prefix="Orc2TimelineTempDir_") 196 | orc_arguments = [orc_argument] 197 | plugin_classes_list = _load_plugins(Config(), orc_arguments, tmp_dir, lock) 198 | 199 | # all_results is a list of tuple(host, plugin_name, number_of_events) that is later used to print final summary 200 | all_results = [] 201 | if jobs <= 1: 202 | all_results.extend( 203 | [ 204 | ( 205 | orc_argument.hostname, 206 | plugin.__class__.__name__, 207 | _run_plugin(plugin), 208 | ) 209 | for plugin in plugin_classes_list 210 | ], 211 | ) 212 | else: 213 | with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as pool: 214 | # store parallel plugin execution results 215 | futures = [pool.submit(_run_plugin, plugin) for plugin in plugin_classes_list] 216 | concurrent.futures.wait(futures) 217 | futures_results = [future.result() for future in futures] 218 | # loop to match plugin results to initial parameters 219 | for plugin, res in zip(plugin_classes_list, futures_results): 220 | all_results.append((orc_argument.hostname, plugin.__class__.__name__, res)) 221 | 222 | total_result = _merge_timelines_for_host(orc_argument.hostname, str(orc_argument.output_path), tmp_dir) 223 | total_results_per_host = {orc_argument.hostname: total_result} 224 | 225 | _print_summaries(total_results_per_host, all_results) 226 | 227 | return total_result 228 | 229 | 230 | def process_dir(orc_arguments: list[OrcArgument], jobs: int) -> int: 231 | """Process all plugins for all hosts.""" 232 | lock = None 233 | if jobs > 1: 234 | lock = multiprocessing.Manager().Lock() 235 | 236 | temp_directory_parent = os.environ.get("TMPDIR") 237 | tmp_dir = tempfile.TemporaryDirectory(dir=temp_directory_parent, prefix="Orc2TimelineTempDir_") 238 | plugin_classes_list = _load_plugins(Config(), orc_arguments, tmp_dir, lock) 239 | 240 | _check_orc_list_and_print_intro(orc_arguments) 241 | 242 | # all_results is a list of tuple(host, plugin_name, number_of_events) that is later used to print final summary 243 | all_results = [] 244 | if jobs <= 1: 245 | all_results.extend( 246 | [ 247 | ( 248 | plugin.hostname, 249 | plugin.__class__.__name__, 250 | _run_plugin(plugin), 251 | ) 252 | for plugin in plugin_classes_list 253 | ], 254 | ) 255 | else: 256 | with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as pool: 257 | # store parallel plugin execution results 258 | futures = [] 259 | index_list = [] 260 | for plugin in plugin_classes_list: 261 | # need to keep trace of argument to make it match with results later 262 | index_list.append((plugin.hostname, plugin.__class__.__name__)) 263 | futures.append( 264 | pool.submit(_run_plugin, plugin), 265 | ) 266 | 267 | concurrent.futures.wait(futures) 268 | future_results = [future.result() for future in futures] 269 | # building all_results by using simultanueously index_list and plugin results (futures) 270 | for index_num, index_tup in enumerate(index_list): 271 | hostname = index_tup[0] 272 | plugin = index_tup[1] 273 | all_results.append((hostname, plugin, future_results[index_num])) 274 | 275 | total_results = 0 276 | # dictionnary total_results_per_host[hostname] = total_number_of_events_for_this_host 277 | total_results_per_host = _merge_timelines_with_jobs(orc_arguments, jobs, tmp_dir) 278 | total_results = sum(total_results_per_host.values()) 279 | 280 | _print_summaries(total_results_per_host, all_results) 281 | 282 | return total_results 283 | 284 | 285 | def _get_all_results_filtered_by_host(all_results: list[tuple[str, str, int]], host: str) -> list[tuple[str, str, int]]: 286 | """Return sublist of all_results where first element of tuple (hostname) match given host.""" 287 | return [result for result in all_results if host == result[0]] 288 | 289 | 290 | def _get_all_results_filtered_by_plugin( 291 | all_results: list[tuple[str, str, int]], 292 | plugin: str, 293 | ) -> list[tuple[str, str, int]]: 294 | """Return sublist of all_results where second element of tuple (hostname) match given host.""" 295 | return [result for result in all_results if plugin == result[1]] 296 | 297 | 298 | def _check_orc_list_and_print_intro(orc_arguments: list[OrcArgument]) -> None: 299 | """Verify that there is no duplicates in given orc_arguments (stops the program if there is) and print intro.""" 300 | host_list = [orc_argument.hostname for orc_argument in orc_arguments] 301 | if not _is_list_uniq(host_list): 302 | dupes = _get_duplicate_values_from_list(host_list) 303 | LOGGER.critical("Unable to process directory if the same host is used many times.") 304 | LOGGER.critical("Hint, these hosts seem to be the source of the problem : %s", dupes) 305 | sys.exit(2) 306 | 307 | _print_intro(orc_arguments) 308 | 309 | 310 | def _print_intro(orc_arguments: list[OrcArgument]) -> None: 311 | """Print simple intro that sums up the files that will be used to generate timelines.""" 312 | for orc_argument in orc_arguments: 313 | LOGGER.info("==============================================") 314 | LOGGER.info("Host: %s", orc_argument.hostname) 315 | LOGGER.info("Files used: [%s]", ", ".join(str(path) for path in orc_argument.orc_paths)) 316 | LOGGER.info("Result file: %s", orc_argument.output_path) 317 | 318 | 319 | def _print_summaries(total_results_per_host: dict[str, int], all_results: list[tuple[str, str, int]]) -> None: 320 | """Print summaries for every treated Orc at the end of the program execution. 321 | 322 | Parameters 323 | ---------- 324 | total_results_per_host: dict[str, int] 325 | Dictionary with hostname as key and total_events_for_this_host (after deduplication) as value 326 | all_results: list[tuple[str, str, int]] 327 | List of tuple (hostname, plugin_name, events_number) 328 | 329 | """ 330 | LOGGER.info("== Printing final summary of generated timelines:") 331 | host_list = sorted(set(total_results_per_host.keys())) 332 | for host in host_list: 333 | LOGGER.info( 334 | "=======================================================================", 335 | ) 336 | LOGGER.info("====== Hostname: %s - %s events", host, total_results_per_host[host]) 337 | results_filtered_by_host = _get_all_results_filtered_by_host(all_results, host) 338 | plugin_list = sorted({plugin[1] for plugin in results_filtered_by_host}) 339 | for plugin in plugin_list: 340 | results_filtered_by_plugin = _get_all_results_filtered_by_plugin(results_filtered_by_host, plugin) 341 | sum_for_plugin = sum([int(plugin[2]) for plugin in results_filtered_by_plugin]) 342 | # for plugin in results_filtered_by_host: 343 | LOGGER.info("========== %s %s %s", host, plugin, sum_for_plugin) 344 | LOGGER.info("====== Total for %s: %s", host, total_results_per_host[host]) 345 | 346 | LOGGER.info( 347 | "=======================================================================", 348 | ) 349 | LOGGER.info("====== Total: %s events processed", sum(total_results_per_host.values())) 350 | LOGGER.info( 351 | "=======================================================================", 352 | ) 353 | 354 | 355 | def _merge_timelines_with_jobs( 356 | orc_arguments: list[OrcArgument], 357 | jobs: int, 358 | tmp_dir: tempfile.TemporaryDirectory[str], 359 | ) -> dict[str, int]: 360 | """Create final timeline for every host by merging subtimelines. 361 | 362 | For a list of OrcArgument, for every host, this function will merge all 363 | the subtimelines that were generated by the execution of the plugins 364 | and create the final timeline. 365 | """ 366 | result_list = [] 367 | 368 | if jobs <= 1: 369 | result_list = [ 370 | _merge_timelines_for_host(orc_argument.hostname, str(orc_argument.output_path), tmp_dir) 371 | for orc_argument in orc_arguments 372 | ] 373 | else: 374 | with concurrent.futures.ProcessPoolExecutor(max_workers=jobs) as pool: 375 | futures = [ 376 | pool.submit(_merge_timelines_for_host, orc_argument.hostname, str(orc_argument.output_path), tmp_dir) 377 | for orc_argument in orc_arguments 378 | ] 379 | concurrent.futures.wait(futures) 380 | result_list = [future.result() for future in futures] 381 | 382 | # return value is a dictionnary with hostname as key 383 | # and number of events for this host as value dict_res : dict{str, int} 384 | return dict(zip([orc_argument.hostname for orc_argument in orc_arguments], result_list)) 385 | -------------------------------------------------------------------------------- /src/orc2timeline/info.py: -------------------------------------------------------------------------------- 1 | """Metadata for orc2timeline.""" 2 | 3 | # fmt: off 4 | __project__ = "orc2timeline" 5 | __author__ = "Berenger Foucher" 6 | __maintainer__ = "Berenger Foucher" 7 | __description__ = "Generate a timeline from list of Orc files" 8 | __email__ = "berenger.foucher@ssi.gouv.fr" 9 | __version__ = "1.0.0" 10 | __copyright__ = "Copyright 2023, ANSSI" 11 | __license__ = "LGPL-3.0-or-later" 12 | # fmt: on 13 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/EventLogsToTimeline-eventmap.txt: -------------------------------------------------------------------------------- 1 | Microsoft-Windows-Security-Auditing/4624:[Logon] 2 | Microsoft-Windows-Security-Auditing/4625:[Failed Login] 3 | Microsoft-Windows-Security-Auditing/1102:[Log Cleared] 4 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/EventLogsToTimeline.py: -------------------------------------------------------------------------------- 1 | """Plugin to parse windows event logs.""" 2 | 3 | from __future__ import annotations 4 | 5 | import logging 6 | from pathlib import Path 7 | from typing import TYPE_CHECKING 8 | 9 | if TYPE_CHECKING: 10 | from threading import Lock 11 | 12 | from orc2timeline.config import PluginConfig 13 | 14 | from typing import Any, Iterator 15 | 16 | import pyevtx 17 | 18 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 19 | 20 | LOGGER = logging.getLogger(__name__) 21 | 22 | 23 | def _get_event_id(event: Any) -> int | None: # noqa: ANN401 24 | try: 25 | raw_event_id = event.get_event_identifier() 26 | except OSError: 27 | LOGGER.debug("Error while trying to recover event identifier") 28 | return None 29 | # Mask the facility code, reserved, customer and severity bits. Only keeps the status code. 30 | return int(0xFFFF & raw_event_id) 31 | 32 | 33 | def _get_args(event: Any) -> list[str]: # noqa: ANN401 34 | args = [] 35 | args_number = 0 36 | try: 37 | args_number = event.get_number_of_strings() 38 | except OSError as e: 39 | if "unable to retrieve number of strings" in str(e): 40 | LOGGER.debug( 41 | "Unable to retrieve args_number for event. Error: %s", 42 | e, 43 | ) 44 | return [] 45 | raise 46 | 47 | for i in range(args_number): 48 | argi = None 49 | try: 50 | argi = event.get_string(i) 51 | except OSError as err: 52 | if "pyevtx_record_get_string_by_index: unable to determine size of string:" in str(err): 53 | LOGGER.debug("Unable to get string argument from event. Error: %s", err) 54 | else: 55 | raise 56 | 57 | if argi: 58 | argi = argi.replace("\r\n", "\\r\\n") 59 | argi = argi.replace("\n", "\\n") 60 | argi = argi.replace("\r", "\\r") 61 | args.append(argi) 62 | else: 63 | args.append("") 64 | 65 | return args 66 | 67 | 68 | class EventLogsToTimeline(GenericToTimeline): 69 | def __init__( 70 | self, 71 | config: PluginConfig, 72 | orclist: list[str], 73 | output_file_path: str, 74 | hostname: str, 75 | tmp_dir: str, 76 | lock: Lock, 77 | ) -> None: 78 | """Construct.""" 79 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 80 | 81 | self.event_tags_file = Path(__file__).parent / "EventLogsToTimeline-eventmap.txt" 82 | self.event_tags = self._parse_event_tags_file(self.event_tags_file) 83 | 84 | def _build_description_field(self, event_provider: str, event_id: int, user_id: str, args: list[str]) -> str: 85 | description = f"{event_provider}:{event_id}" 86 | 87 | if (event_provider in self.event_tags) and (event_id in self.event_tags[event_provider]): 88 | description += f" {self.event_tags[event_provider][event_id]}" 89 | 90 | description += f" {user_id}" 91 | 92 | if len(args) != 0: 93 | args_string = "|".join(args) 94 | description += f" ({args_string})" 95 | 96 | return description 97 | 98 | def _parse_artefact(self, artefact: Path) -> None: 99 | for event in self._evtx_events(artefact): 100 | evt_dict = self._evtx_get_event_object( 101 | event, 102 | artefact, 103 | recovered=False, 104 | ) 105 | 106 | if evt_dict and evt_dict.description and evt_dict.description != "": 107 | self._add_event(evt_dict) 108 | 109 | for event in self._evtx_recovered_events(artefact): 110 | evt_dict = self._evtx_get_event_object( 111 | event, 112 | artefact, 113 | recovered=True, 114 | ) 115 | if evt_dict and evt_dict.description and evt_dict.description != "": 116 | self._add_event(evt_dict) 117 | 118 | def _evtx_recovered_events(self, evtx_file_path: Path) -> Iterator[Any]: 119 | with Path(evtx_file_path).open("rb") as f: 120 | evtx_file = pyevtx.file() 121 | try: 122 | evtx_file.open_file_object(f) 123 | except OSError: 124 | self.logger.critical( 125 | "Error while opening the event log file %s", 126 | evtx_file_path, 127 | ) 128 | else: 129 | for i in range(evtx_file.number_of_recovered_records): 130 | try: 131 | evtx = evtx_file.get_recovered_record(i) 132 | except OSError as e: 133 | self.logger.debug( 134 | "Error while parsing a recovered event record in %s. Error: %s", 135 | evtx_file_path, 136 | e, 137 | ) 138 | continue 139 | yield evtx 140 | evtx_file.close() 141 | 142 | def _evtx_events(self, evtx_file_path: Path) -> Iterator[Any]: 143 | with Path(evtx_file_path).open("rb") as f: 144 | evtx_file = pyevtx.file() 145 | try: 146 | evtx_file.open_file_object(f) 147 | except OSError: 148 | self.logger.critical( 149 | "Error while opening the event log file %s", 150 | evtx_file_path, 151 | ) 152 | else: 153 | for i in range(evtx_file.number_of_records): 154 | try: 155 | evtx = evtx_file.get_record(i) 156 | except OSError as e: 157 | self.logger.debug( 158 | "Error while parsing an event record in %s. Error: %s", 159 | evtx_file_path, 160 | e, 161 | ) 162 | continue 163 | yield evtx 164 | evtx_file.close() 165 | 166 | def _evtx_get_event_object( 167 | self, 168 | event_input: Any, # noqa: ANN401 169 | event_file: Path, 170 | *, 171 | recovered: bool, 172 | ) -> Event | None: 173 | event_result = Event(source=self._get_original_path(event_file)) 174 | 175 | try: 176 | event_result.timestamp = event_input.get_written_time() 177 | except ValueError: 178 | self.logger.critical("Unable to get written time from event in %s", event_file) 179 | return None 180 | 181 | # Event ID 182 | event_id = _get_event_id(event_input) 183 | if event_id is None: 184 | return None 185 | 186 | # Get the non formatted arguments 187 | args = [] 188 | args = _get_args(event_input) 189 | 190 | event_provider = "Unknown" 191 | try: 192 | event_provider = event_input.get_source_name() 193 | except OSError as err: 194 | if "pyevtx_record_get_source_name: unable to determine size of source name as UTF-8 string." in str(err): 195 | self.logger.debug("Unable to get source name from event") 196 | else: 197 | raise 198 | user_id = event_input.get_user_security_identifier() 199 | 200 | event_result.description = self._build_description_field(event_provider, event_id, user_id, args) 201 | if recovered: 202 | event_result.description += " (Recovered)" 203 | 204 | return event_result 205 | 206 | def _parse_event_tags_file(self, event_tags_file: Path) -> dict[str, dict[int, str]]: 207 | """Parse a file containing information to add tags to some event.""" 208 | event_tags = {} 209 | if event_tags_file.exists(): 210 | with event_tags_file.open() as f: 211 | for line in f.readlines(): 212 | my_line = line.strip() 213 | 214 | # commented-out line 215 | if my_line.startswith("#") or len(my_line) == 0: 216 | continue 217 | 218 | splitted_line = my_line.split(":") 219 | if len(splitted_line) != 2: # noqa: PLR2004 220 | self.logger.warning( 221 | 'Wrong format for a line in %s: "%s"', 222 | event_tags_file, 223 | my_line, 224 | ) 225 | continue 226 | 227 | event, tag = splitted_line 228 | 229 | splitted_event = event.split("/") 230 | if len(splitted_event) != 2: # noqa: PLR2004 231 | self.logger.warning( 232 | 'Wrong format for a line in %s: "%s"', 233 | event_tags_file, 234 | my_line, 235 | ) 236 | continue 237 | 238 | event_provider, event_id = splitted_event[0], int(splitted_event[1]) 239 | 240 | if event_provider not in event_tags: 241 | event_tags[event_provider] = {event_id: tag} 242 | else: 243 | event_tags[event_provider][event_id] = tag 244 | 245 | return event_tags 246 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/GenericToTimeline.py: -------------------------------------------------------------------------------- 1 | """Generic plugin, all real plugin will inherit from this plugin.""" 2 | 3 | from __future__ import annotations 4 | 5 | import bisect 6 | import csv 7 | import logging 8 | import os 9 | import random 10 | import re 11 | import string 12 | import tempfile 13 | import time 14 | from datetime import datetime 15 | from pathlib import Path 16 | from typing import TYPE_CHECKING, Callable, Iterator 17 | 18 | import py7zr 19 | import pytz 20 | 21 | if TYPE_CHECKING: 22 | from threading import Lock 23 | 24 | from orc2timeline.config import PluginConfig 25 | 26 | MAX_FILE_NAME_LENGTH = 255 27 | 28 | 29 | def _delete_everything_in_dir(path: Path) -> None: 30 | """Mimic the command rm -r path.""" 31 | for subpath in path.iterdir(): 32 | if subpath.is_dir(): 33 | _delete_everything_in_dir(subpath) 34 | else: 35 | subpath.unlink() 36 | path.rmdir() 37 | 38 | 39 | def _get_relevant_archives(orc_list: list[str], archive_list: list[str]) -> Iterator[tuple[str, str]]: 40 | """Return Iterator that is a tuple of str. 41 | 42 | Return: 43 | ------ 44 | Iterator[tuple[str, str]] 45 | first element of tuple: path to orc archive 46 | second element of tuple: archive type (Details, Memory, Little, General...) 47 | 48 | """ 49 | for orc in orc_list: 50 | for archive in archive_list: 51 | if archive.casefold() in Path(orc).name.casefold(): 52 | yield orc, archive 53 | 54 | 55 | def _extract_sub_archives_from_archive(archive_path: str, extraction_path: Path, sub_archive: str) -> None: 56 | def _sub_archive_filter(f: str) -> bool: 57 | return f.casefold() == sub_archive.casefold() 58 | 59 | _extract_filtered_files_from_archive(archive_path, extraction_path, _sub_archive_filter) 60 | 61 | 62 | def _extract_matching_files_from_archive(archive_path: str, extraction_path: Path, match_pattern: str) -> None: 63 | filter_pattern = re.compile(match_pattern, re.IGNORECASE) 64 | 65 | def _re_filter(input_str: str) -> bool: 66 | return bool(filter_pattern.match(input_str)) 67 | 68 | _extract_filtered_files_from_archive(archive_path, extraction_path, _re_filter) 69 | 70 | 71 | def _extract_getthis_file_from_archive(archive_path: str, extraction_path: Path) -> None: 72 | def _get_this_filter(f: str) -> bool: 73 | return f.casefold() == "GetThis.csv".casefold() 74 | 75 | _extract_filtered_files_from_archive(archive_path, extraction_path, _get_this_filter) 76 | 77 | 78 | def _extract_filtered_files_from_archive( 79 | archive_path: str, 80 | extraction_path: Path, 81 | filter_function: Callable[[str], bool], 82 | ) -> None: 83 | try: 84 | with py7zr.SevenZipFile(archive_path, mode="r") as z: 85 | allfiles = z.getnames() 86 | targets = [f for f in allfiles if filter_function(f)] 87 | z.extract( 88 | targets=targets, 89 | path=extraction_path, 90 | ) 91 | except OSError as e: 92 | if "File name too long:" in str(e) or (os.name == "nt" and "Invalid argument" in str(e)): 93 | _extract_safe(archive_path, extraction_path, filter_function) 94 | else: 95 | raise 96 | 97 | 98 | def _extract_safe(archive_name: str, output_dir: Path, filter_function: Callable[[str], bool]) -> None: 99 | """Extract files from archive in a safe way. 100 | 101 | This function extracts files from the archive that is located in archive_name. 102 | All files that match filter_function (this function should return True) are extracted in 103 | a safe way. output_dir is the directory that will be used to write uncompressed files. 104 | 105 | To extract files in a safer way, the files that name does not exceed MAX_FILE_NAME_LENGTH 106 | are extracted in the simplest way. 107 | Matching files that name is too long are extracted using read function, and name will be truncated 108 | from the beginning until length is less than MAX_FILE_NAME_LENGTH. 109 | """ 110 | with py7zr.SevenZipFile(archive_name, "r") as z: 111 | allfiles = z.getnames() 112 | files_to_extract = [] 113 | exception_file = [] 114 | targets = [f for f in allfiles if filter_function(f)] 115 | for i in targets: 116 | if len(Path(i).name) < MAX_FILE_NAME_LENGTH: 117 | files_to_extract.append(i) 118 | else: 119 | exception_file.append(i) 120 | 121 | with py7zr.SevenZipFile(archive_name, "r") as z: 122 | z.extract(targets=files_to_extract, path=output_dir) 123 | z.reset() 124 | res = z.read(targets=exception_file) 125 | for data in res: 126 | new_path = output_dir / Path(data).parent 127 | new_path.mkdir(parents=True, exist_ok=True) 128 | new_filename = Path(data).name[(len(Path(data).name) - MAX_FILE_NAME_LENGTH) :] 129 | new_filepath = new_path / new_filename 130 | with new_filepath.open("wb") as result_file: 131 | result_file.write(res[data].read()) 132 | 133 | 134 | class Event: 135 | def __init__( 136 | self, 137 | timestamp: datetime | None = None, 138 | timestamp_str: str = "", 139 | sourcetype: str = "", 140 | description: str = "", 141 | source: str = "", 142 | ) -> None: 143 | """Construct.""" 144 | self.timestamp = timestamp 145 | self.timestamp_str = timestamp_str 146 | self.sourcetype = sourcetype 147 | self.description = description 148 | self.source = source 149 | 150 | 151 | class SortedChunk: 152 | """Store events temporary in a sorted way. 153 | 154 | This class describes an object that is used to store the events temporary in a sorted way. 155 | When the number of events reaches the limit (10 000 be default), the content of the chunk 156 | is written an disk. 157 | """ 158 | 159 | def __init__(self, max_size: int) -> None: 160 | """Construct.""" 161 | self.raw_lines: list[str] = [] 162 | self.max_size: int = max_size 163 | 164 | def write(self, s: str) -> None: 165 | """Write in sorted chink.""" 166 | bisect.insort(self.raw_lines, s) 167 | 168 | def new_chunk(self) -> None: 169 | """Create new chunk.""" 170 | self.raw_lines = [] 171 | 172 | def is_full(self) -> bool: 173 | """Check if chunk is full.""" 174 | return len(self.raw_lines) > self.max_size 175 | 176 | 177 | class GenericToTimeline: 178 | def __init__( 179 | self, 180 | config: PluginConfig, 181 | orclist: list[str], 182 | output_file_path: str, 183 | hostname: str, 184 | tmp_dir: str, 185 | lock: Lock | None, 186 | ) -> None: 187 | """Construct.""" 188 | self.orclist = orclist 189 | self.hostname = hostname 190 | self.lock = lock 191 | self.written_rows_count = 0 192 | self.current_chunk = SortedChunk(10000) # Default 10,000 lines at once 193 | self.output_file_nb = 0 194 | self.output_files_list: list[Path] = [] 195 | self.nonce = "".join(random.choices(string.ascii_uppercase + string.digits, k=5)) # noqa: S311 196 | self.output_file_path = f"{output_file_path}_{self.nonce}_nb{self.output_file_nb}" 197 | self.output_file_prefix = self.output_file_path 198 | 199 | self.archives: list[str] = [] 200 | self.sub_archives: list[str] = [] 201 | self.match_pattern = "" 202 | self.file_header = bytes([]) 203 | 204 | self.sourcetype = "" 205 | 206 | self.tmpDirectory = tempfile.TemporaryDirectory( 207 | dir=tmp_dir, 208 | prefix=f"orc2timeline_{self.__class__.__name__}_", 209 | ) 210 | 211 | self.logger = logging.getLogger() 212 | self.eventList: set[dict[str, str]] = set() 213 | self.originalPath: dict[str, str] = {} 214 | 215 | self._load_config(config) 216 | 217 | def _setup_next_output_file(self) -> None: 218 | """Switch output file to new one. 219 | 220 | When writing lines during the plugin execution, lines are not written straight ahead. 221 | Instead they are stored in a Chunk object (which hold sorted lines in memory), when this Chunk 222 | is full (10 000 events by default) all the events are written to disk and a new Chunk 223 | will be used (with a new output file). 224 | 225 | It is compulsary that a new file is used at every new chunk because the functions written 226 | in core.py consider that every subtimeline is already sorted when create the final timeline. 227 | 228 | File names follow this rule: timeline_{hostname}_{plugin_name}_nb{file_number} 229 | """ 230 | self.output_file_nb += 1 231 | self.output_file_path = f"{self.output_file_prefix}_{self.nonce}_nb{self.output_file_nb}" 232 | self.fd_plugin_file = Path(self.output_file_path).open("w", encoding="utf-8", newline="") # noqa: SIM115 233 | self.output_files_list.append(Path(self.output_file_path)) 234 | 235 | def _delete_all_result_files(self) -> None: 236 | """Flush current chunk and delete all result files. 237 | 238 | This is can be necessary when an unpredictable error occurs during plugin execution. 239 | After calling this function, processing can be re-run from the beginning without worrying 240 | of previous execution. 241 | """ 242 | self._flush_chunk() 243 | for output_file in self.output_files_list: 244 | self.logger.critical("Delete %s", self.output_files_list) 245 | output_file.unlink() 246 | self.logger.critical("Reinitialization of chunks") 247 | 248 | self.current_chunk = SortedChunk(10000) 249 | self.csvWriter = csv.writer(self.current_chunk, delimiter=",", quotechar='"') 250 | self.output_files_list = [] 251 | self._setup_next_output_file() 252 | 253 | def _deflate_archives(self) -> None: 254 | """Deflate files from Orc. 255 | 256 | For all Orcs contained in self.orclist: 257 | Select archive that match self.archives. 258 | Deflate sub_archive from archive 259 | Deflate files that match self.match_pattern from sub_archive in extraction_path 260 | 261 | extraction_path is built ad it follows: 262 | {tmp_dir}/{orc2timeline_tmp_dir}/{plugin_tmp_dir}/all_extraction 263 | """ 264 | for orc, archive in _get_relevant_archives(self.orclist, self.archives): 265 | path_to_create = Path(self.tmpDirectory.name) / archive 266 | if not path_to_create.exists(): 267 | path_to_create.mkdir(parents=True) 268 | extraction_path = path_to_create / "all_extraction" 269 | if len(self.sub_archives) == 0: 270 | # we look for matching files without subarchive 271 | try: 272 | _extract_matching_files_from_archive(orc, extraction_path, self.match_pattern) 273 | except Exception as e: # noqa: BLE001 274 | self.logger.critical( 275 | "Unable to open %s archive. Error: %s", 276 | orc, 277 | e, 278 | ) 279 | else: 280 | for sub_archive in self.sub_archives: 281 | try: 282 | sub_extraction_path = ( 283 | Path(self.tmpDirectory.name) / archive / (sub_archive + "_" + str(time.time())) 284 | ) 285 | 286 | _extract_sub_archives_from_archive(orc, sub_extraction_path, sub_archive) 287 | for f2 in Path(sub_extraction_path).glob("*"): 288 | if f2.name.casefold() == sub_archive.casefold(): 289 | _extract_matching_files_from_archive(str(f2), extraction_path, self.match_pattern) 290 | _extract_getthis_file_from_archive(str(f2), extraction_path) 291 | self._parse_then_delete_getthis_file( 292 | extraction_path / "GetThis.csv", 293 | ) 294 | _delete_everything_in_dir(sub_extraction_path) 295 | except Exception as e: # noqa: BLE001 296 | err_msg = f"Unable to deflate {sub_archive} from {orc}. Error: {e}" 297 | if "Invalid argument" in str(e): 298 | err_msg += " (this may happen when compressed file is empty)" 299 | self.logger.critical(err_msg) 300 | 301 | def _parse_artefact(self, artefact: Path) -> None: 302 | """Artefact specific function. 303 | 304 | The content of this function is specific to every plugin. Events will not be parsed 305 | the same way LNK files are. Therefore this function should not be implemented in 306 | the Generic plugin. 307 | 308 | When writing a specific plugin, this function is the only one that should be overwritten. 309 | """ 310 | 311 | def _get_original_path(self, path: Path) -> str: 312 | original_formatted_path = str(path.relative_to(Path(self.tmpDirectory.name)).as_posix()) 313 | return str(self.originalPath.get(path.name, original_formatted_path)) 314 | 315 | def _parse_then_delete_getthis_file(self, path_to_file: Path) -> None: 316 | try: 317 | with Path(path_to_file).open(encoding="utf-8") as infile: 318 | for line in csv.reader(infile): 319 | self.originalPath[Path(line[5].replace("\\", "/")).name] = line[4] 320 | path_to_file.unlink() 321 | except Exception as e: # noqa: BLE001 322 | self.logger.debug(str(e)) 323 | 324 | def _parse_all_artefacts(self) -> None: 325 | for art in Path(self.tmpDirectory.name).glob("**/all_extraction/**/*"): 326 | if not art.is_file(): 327 | continue 328 | file_path_split = Path(art).parts 329 | try: 330 | file_name = file_path_split[-1] 331 | archive_name = "unknown" 332 | # Get archive name from artefact path (for logging purposes only) 333 | for i in range(len(file_path_split)): 334 | if file_path_split[i] == "all_extraction": 335 | archive_name = file_path_split[i - 1] 336 | except Exception: # noqa: BLE001 337 | archive_name = "unknown" 338 | self.logger.debug( 339 | "[%s] [%s] parsing : %s", 340 | self.hostname, 341 | archive_name, 342 | file_name, 343 | ) 344 | self._parse_artefact(art) 345 | 346 | def _add_event(self, event: Event) -> None: 347 | timestamp = "" 348 | 349 | if event.timestamp is None and event.timestamp_str == "": 350 | self.logger.critical("None Timestamp given for event %s", event) 351 | timestamp = datetime.fromtimestamp(0, tz=pytz.UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] 352 | 353 | if event.timestamp_str != "": 354 | timestamp = event.timestamp_str 355 | elif event.timestamp is not None: 356 | try: 357 | timestamp = event.timestamp.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] 358 | except ValueError as e: 359 | self.logger.critical(e) 360 | timestamp = datetime.fromtimestamp(0, tz=pytz.UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] 361 | 362 | self._write_line( 363 | timestamp, 364 | self.sourcetype, 365 | event.description, 366 | event.source, 367 | ) 368 | 369 | def _write_line(self, date: str, sourcetype: str, description: str, sourcefile: str) -> None: 370 | """Write event to timeline. 371 | 372 | The events are not written to disk along the way, instead they are store in a chunk object 373 | (in memory), when the chunk reaches the event number limit, all the events are written to 374 | disk in a sorted way. A new chunk will be used and its content will be written in another 375 | file. 376 | """ 377 | # sanitize output 378 | rows_to_write = [row.replace("\n", "\\n") for row in (date, self.hostname, sourcetype, description, sourcefile)] 379 | self.csvWriter.writerow(rows_to_write) 380 | if self.current_chunk.is_full(): 381 | self._flush_chunk_and_new_chunk() 382 | 383 | def _flush_chunk(self) -> None: 384 | self.fd_plugin_file.writelines(self.current_chunk.raw_lines) 385 | self.fd_plugin_file.close() 386 | self.written_rows_count += len(self.current_chunk.raw_lines) 387 | 388 | def _flush_chunk_and_new_chunk(self) -> None: 389 | self._flush_chunk() 390 | self.current_chunk.new_chunk() 391 | self._setup_next_output_file() 392 | 393 | def _filter_files_based_on_first_bytes(self) -> None: 394 | if len(self.file_header) == 0: 395 | return 396 | 397 | for art in Path(self.tmpDirectory.name).glob("**/all_extraction/**/*"): 398 | if not art.is_file(): 399 | continue 400 | must_delete = False 401 | with Path(art).open("rb") as fd: 402 | first_bytes_of_file = fd.read(len(self.file_header)) 403 | if first_bytes_of_file != self.file_header: 404 | must_delete = True 405 | if must_delete: 406 | art.unlink() 407 | 408 | def _load_config(self, config: PluginConfig) -> None: 409 | self.archives = config.archives 410 | self.sub_archives = config.sub_archives 411 | self.match_pattern = config.match_pattern 412 | self.sourcetype = config.sourcetype 413 | 414 | def add_to_timeline(self) -> int: 415 | """Create the result file with the result of argument parsing.""" 416 | self.logger.debug("%s started", self.__class__.__name__) 417 | self.csvWriter = csv.writer(self.current_chunk, delimiter=",", quotechar='"') 418 | self._setup_next_output_file() 419 | self._deflate_archives() 420 | self._filter_files_based_on_first_bytes() 421 | self._parse_all_artefacts() 422 | self._flush_chunk() 423 | self.logger.debug("%s ended", self.__class__.__name__) 424 | return self.written_rows_count 425 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/I30InfoToTimeline.py: -------------------------------------------------------------------------------- 1 | """Plugin to parse I30Info files.""" 2 | 3 | from __future__ import annotations 4 | 5 | import _csv 6 | import csv 7 | import string 8 | from io import StringIO 9 | from pathlib import Path 10 | from typing import TYPE_CHECKING 11 | 12 | if TYPE_CHECKING: 13 | from threading import Lock 14 | 15 | from orc2timeline.config import PluginConfig 16 | 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 18 | 19 | 20 | class I30InfoToTimeline(GenericToTimeline): 21 | def __init__( 22 | self, 23 | config: PluginConfig, 24 | orclist: list[str], 25 | output_file_path: str, 26 | hostname: str, 27 | tmp_dir: str, 28 | lock: Lock, 29 | ) -> None: 30 | """Construct.""" 31 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 32 | 33 | def _generate_one_csv_line( 34 | self, 35 | i30_info_row: dict[str, str], 36 | i30_info_path_source: str, 37 | same_timestamps_group: list[str], 38 | ref_timestamp: str, 39 | ) -> None: 40 | fn = [ 41 | ("FileNameLastModificationDate", "M"), 42 | ("FileNameLastAccessDate", "A"), 43 | ("FileNameLastAttrModificationDate", "C"), 44 | ("FileNameCreationDate", "B"), 45 | ] 46 | 47 | event = Event(timestamp_str=ref_timestamp, source=i30_info_path_source) 48 | meaning = "" 49 | for t in fn: 50 | if t[0] in same_timestamps_group: 51 | meaning += t[1] 52 | else: 53 | meaning += "." 54 | 55 | event.description = "Entry in slackspace - $FN: {} - Name: {} - MFT segment num: {} - Parent FRN: {} ".format( 56 | meaning, 57 | i30_info_row["Name"], 58 | str(int(i30_info_row["FRN"], 16) & 0xFFFFFFFFFFFF), 59 | i30_info_row["ParentFRN"], 60 | ) 61 | self._add_event(event) 62 | 63 | def _parse_line(self, i30_info_row: dict[str, str], artefact: Path) -> None: 64 | # CarvedEntry 65 | if "CarvedEntry" in i30_info_row and i30_info_row["CarvedEntry"] == "Y": 66 | timestamp_fields = [ 67 | "FileNameCreationDate", 68 | "FileNameLastModificationDate", 69 | "FileNameLastAccessDate", 70 | "FileNameLastAttrModificationDate", 71 | ] 72 | while len(timestamp_fields) > 0: 73 | ref_field = timestamp_fields.pop() 74 | ref_timestamp = i30_info_row[ref_field] 75 | same_timestamps_group = [ref_field] 76 | same_timestamps_group.extend( 77 | field for field in timestamp_fields if ref_timestamp == i30_info_row[field] 78 | ) 79 | 80 | # generate an event for a groupe sharing the same timestamp 81 | self._generate_one_csv_line( 82 | i30_info_row, 83 | Path(artefact).name, 84 | same_timestamps_group, 85 | ref_timestamp, 86 | ) 87 | 88 | for field in same_timestamps_group: 89 | if field != ref_field: 90 | timestamp_fields.remove(field) 91 | 92 | def _parse_artefact(self, artefact: Path) -> None: 93 | # It is compulsary to use new chunk because if an error occurs 94 | # all files in self.output_files_list will be deleted an artefact 95 | # will be reprocessed. 96 | # Processing as it follow ensures that events extracted from previous 97 | # artefacts will not be deleted is an error occurs while processing 98 | # current artefact. 99 | self.output_files_list = [] 100 | self._flush_chunk_and_new_chunk() 101 | try: 102 | with Path(artefact).open(encoding="utf-8") as fd: 103 | csv_reader = csv.DictReader(fd) 104 | for i30_info_row in csv_reader: 105 | self._parse_line(i30_info_row, artefact) 106 | # when file contains NULL character, old versions of csv can crash 107 | except (_csv.Error, UnicodeDecodeError) as e: 108 | with Path(artefact).open(encoding="utf-8", errors="ignore") as fd: 109 | self.logger.critical("csv error caught alternative way for host %s: %s", self.hostname, e) 110 | self._delete_all_result_files() 111 | data = fd.read() 112 | clean_data = "".join(c for c in data if c in string.printable) 113 | data_io = StringIO(clean_data) 114 | csv_reader = csv.DictReader(data_io) 115 | for i30_info_row in csv_reader: 116 | self._parse_line(i30_info_row, artefact) 117 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/NTFSInfoToTimeline.py: -------------------------------------------------------------------------------- 1 | """Plugin to parse NTFSInfo files.""" 2 | 3 | from __future__ import annotations 4 | 5 | import _csv 6 | import csv 7 | import string 8 | from io import StringIO 9 | from pathlib import Path 10 | from typing import TYPE_CHECKING, Any 11 | 12 | if TYPE_CHECKING: 13 | from threading import Lock 14 | 15 | from orc2timeline.config import PluginConfig 16 | 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 18 | 19 | 20 | class NTFSInfoToTimeline(GenericToTimeline): 21 | def __init__( 22 | self, 23 | config: PluginConfig, 24 | orclist: list[str], 25 | output_file_path: str, 26 | hostname: str, 27 | tmp_dir: str, 28 | lock: Lock, 29 | ) -> None: 30 | """Construct.""" 31 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 32 | 33 | def _set_separator(self, parentname: str) -> None: 34 | if len(parentname) == 0: 35 | self.separator = "\\" 36 | elif len(parentname) == 1: 37 | if parentname != "\\": 38 | self.separator = "\\" 39 | else: 40 | self.separator = "" 41 | elif parentname[-1] != "\\": 42 | self.separator = "\\" 43 | else: 44 | self.separator = "" 45 | 46 | def _generate_one_csv_line( 47 | self, 48 | ntfs_info_row: dict[str, str], 49 | ntfs_info_path_source: str, 50 | same_timestamps_group: list[str], 51 | ref_timestamp: str, 52 | ) -> None: 53 | si = [ 54 | ("LastModificationDate", "M"), 55 | ("LastAccessDate", "A"), 56 | ("LastAttrChangeDate", "C"), 57 | ("CreationDate", "B"), 58 | ] 59 | fn = [ 60 | ("FileNameLastModificationDate", "M"), 61 | ("FileNameLastAccessDate", "A"), 62 | ("FileNameLastAttrModificationDate", "C"), 63 | ("FileNameCreationDate", "B"), 64 | ] 65 | 66 | event = Event(timestamp_str=ref_timestamp, source=ntfs_info_path_source) 67 | 68 | fn_flag = ntfs_info_row.get("FilenameFlags") 69 | if fn_flag is not None and fn_flag == "2": 70 | return 71 | 72 | meaning = "$SI: " 73 | for t in si: 74 | if t[0] in same_timestamps_group: 75 | meaning += t[1] 76 | else: 77 | meaning += "." 78 | meaning += " - $FN: " 79 | for t in fn: 80 | if t[0] in same_timestamps_group: 81 | meaning += t[1] 82 | else: 83 | meaning += "." 84 | 85 | if not hasattr(self, "separator"): 86 | self._set_separator(ntfs_info_row["ParentName"]) 87 | name = ntfs_info_row["ParentName"] + self.separator + ntfs_info_row["File"] 88 | 89 | size_in_bytes: str | None = "unknown" 90 | 91 | try: 92 | size_in_bytes = ntfs_info_row.get("SizeInBytes") 93 | except ValueError as e: 94 | self.logger.debug("Error while getting FRN or Size. Error: %s", e) 95 | 96 | event.description = f"{meaning} - Name: {name} - Size in bytes: {size_in_bytes}" 97 | self._add_event(event) 98 | 99 | def __parse_artefact(self, csv_reader: Any, artefact: Path) -> None: # noqa: ANN401 100 | for ntfs_info_row in csv_reader: 101 | timestamp_fields = [ 102 | "CreationDate", 103 | "LastModificationDate", 104 | "LastAccessDate", 105 | "LastAttrChangeDate", 106 | "FileNameCreationDate", 107 | "FileNameLastModificationDate", 108 | "FileNameLastAccessDate", 109 | "FileNameLastAttrModificationDate", 110 | ] 111 | 112 | while len(timestamp_fields) > 0: 113 | ref_field = timestamp_fields.pop() 114 | ref_timestamp = ntfs_info_row[ref_field] 115 | same_timestamps_group = [ref_field] 116 | same_timestamps_group.extend( 117 | field for field in timestamp_fields if ref_timestamp == ntfs_info_row[field] 118 | ) 119 | 120 | self._generate_one_csv_line( 121 | ntfs_info_row, 122 | Path(artefact).name, 123 | same_timestamps_group, 124 | ref_timestamp, 125 | ) 126 | 127 | for field in same_timestamps_group: 128 | if field != ref_field: 129 | timestamp_fields.remove(field) 130 | 131 | def _parse_artefact(self, artefact: Path) -> None: 132 | # It is compulsary to use new chunk because if an error occurs 133 | # all files in self.output_files_list will be deleted an artefact 134 | # will be reprocessed. 135 | # Processing as it follow ensures that events extracted from previous 136 | # artefacts will not be deleted is an error occurs while processing 137 | # current artefact. 138 | self.output_files_list = [] 139 | self._flush_chunk_and_new_chunk() 140 | try: 141 | with Path(artefact).open(encoding="utf-8") as fd: 142 | csv_reader = csv.DictReader(fd) 143 | self.__parse_artefact(csv_reader, artefact) 144 | # when file contains NULL character, old versions of csv can crash 145 | except (_csv.Error, UnicodeDecodeError) as e: 146 | with Path(artefact).open(encoding="utf-8", errors="ignore") as fd: 147 | self.logger.critical("csv error caught alternative way for host %s: %s", self.hostname, e) 148 | self._delete_all_result_files() 149 | data = fd.read() 150 | clean_data = "".join(c for c in data if c in string.printable) 151 | data_io = StringIO(clean_data) 152 | csv_reader = csv.DictReader(data_io) 153 | self.__parse_artefact(csv_reader, artefact) 154 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/RegistryToTimeline-important-keys.txt: -------------------------------------------------------------------------------- 1 | HKEY_CURRENT_USER\Environment 2 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\Run 3 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\RunEx 4 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnce 5 | HKEY_CURRENT_USER\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnceEx 6 | HKEY_CURRENT_USER\Software\Microsoft\HtmlHelp Author 7 | HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Windows 8 | HKEY_CURRENT_USER\Software\Microsoft\Windows NT\CurrentVersion\Winlogon 9 | HKEY_CURRENT_USER\Software\Microsoft\Windows\Windows Error Reporting\Hangs 10 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\Run 11 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\RunEx 12 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnce 13 | HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Windows\CurrentVersion\RunOnceEx 14 | HKEY_LOCAL_MACHINE\SYSTEM\ControlSet001\Control\Lsa 15 | HKEY_LOCAL_MACHINE\SYSTEM\ControlSet002\Control\Lsa 16 | HKEY_LOCAL_MACHINE\SYSTEM\ControlSet003\Control\Lsa 17 | HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\Lsa 18 | HKEY_LOCAL_MACHINE\Software\Classes\CLSID\{52A2AAAE-085D-4187-97EA-8C30DB990436}\InprocServer32 19 | HKEY_LOCAL_MACHINE\Software\Classes\Wow6432Node\CLSID\{52A2AAAE-085D-4187-97EA-8C30DB990436}\InprocServer32 20 | HKEY_LOCAL_MACHINE\Software\Microsoft\Command Processor 21 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\AeDebug 22 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Windows 23 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon 24 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon\Notify 25 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon\Shell 26 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows NT\CurrentVersion\Winlogon\Userinit 27 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows\CurrentVersion\Explorer\MyComputer 28 | HKEY_LOCAL_MACHINE\Software\Microsoft\Windows\Windows Error Reporting\Hangs 29 | HKEY_LOCAL_MACHINE\Software\WOW6432Node\Microsoft\Windows NT\CurrentVersion\AeDebug 30 | HKEY_LOCAL_MACHINE\Software\WOW6432Node\Microsoft\Windows NT\CurrentVersion\Winlogon 31 | HKEY_LOCAL_MACHINE\Software\Wow6432Node\Microsoft\Command Processor 32 | HKEY_LOCAL_MACHINE\Software\Wow6432Node\Microsoft\Windows NT\CurrentVersion\Windows 33 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\ContentIndex\Language 34 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\LsaExtensionConfig\LsaSrv 35 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\Terminal Server\WinStations\RDP-Tcp 36 | HKEY_LOCAL_MACHINE\System\ControlSet001\Services\DNS\Parameters 37 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\ContentIndex\Language 38 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\LsaExtensionConfig\LsaSrv 39 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\Terminal Server\WinStations\RDP-Tcp 40 | HKEY_LOCAL_MACHINE\System\ControlSet002\Services\DNS\Parameters 41 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\ContentIndex\Language 42 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\LsaExtensionConfig\LsaSrv 43 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\Terminal Server\WinStations\RDP-Tcp 44 | HKEY_LOCAL_MACHINE\System\ControlSet003\Services\DNS\Parameters 45 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\LsaExtensionConfig\LsaSrv 46 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\LsaExtensionConfig\LsaSrv 47 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\LsaExtensionConfig\LsaSrv 48 | HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\LsaExtensionConfig\LsaSrv 49 | HKEY_LOCAL_MACHINE\System\ControlSet001\Control\Session Manager\AppCertDlls 50 | HKEY_LOCAL_MACHINE\System\ControlSet002\Control\Session Manager\AppCertDlls 51 | HKEY_LOCAL_MACHINE\System\ControlSet003\Control\Session Manager\AppCertDlls 52 | HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\Session Manager\AppCertDlls 53 | HKEY_LOCAL_MACHINE\System\ControlSet001\Services\DNS\Parameters 54 | HKEY_LOCAL_MACHINE\System\ControlSet002\Services\DNS\Parameters 55 | HKEY_LOCAL_MACHINE\System\ControlSet003\Services\DNS\Parameters 56 | HKEY_LOCAL_MACHINE\System\CurrentControlSet\Services\DNS\Parameters 57 | \Environment 58 | \Software\Microsoft\Command Processor 59 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/RegistryToTimeline.py: -------------------------------------------------------------------------------- 1 | """Plugin to parse hives.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING 7 | 8 | if TYPE_CHECKING: 9 | from threading import Lock 10 | 11 | from orc2timeline.config import PluginConfig 12 | 13 | from dfwinreg import definitions as dfwinreg_definition 14 | from dfwinreg import regf as dfwinreg_regf 15 | from dfwinreg import registry as dfwinreg_registry 16 | 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 18 | 19 | Type = {} 20 | Type[0x0001] = "RegSZ" 21 | Type[0x0002] = "RegExpandSZ" 22 | Type[0x0003] = "RegBin" 23 | Type[0x0004] = "RegDWord" 24 | Type[0x0007] = "RegMultiSZ" 25 | Type[0x000B] = "RegQWord" 26 | Type[0x0000] = "RegNone" 27 | Type[0x0005] = "RegBigEndian" 28 | Type[0x0006] = "RegLink" 29 | Type[0x0008] = "RegResourceList" 30 | Type[0x0009] = "RegFullResourceDescriptor" 31 | Type[0x000A] = "RegResourceRequirementsList" 32 | Type[0x0010] = "RegFileTime" 33 | 34 | 35 | def _decode_utf16le(s: bytes) -> str: 36 | if b"\x00\x00" in s: 37 | index = s.index(b"\x00\x00") 38 | if index > 2: # noqa: PLR2004 39 | if s[index - 2] != b"\x00"[0]: # py2+3 # noqa: SIM108 40 | # 61 00 62 00 63 64 00 00 41 | # ^ ^-- end of string 42 | # +-- index 43 | s = s[: index + 2] 44 | else: 45 | # 61 00 62 00 63 00 00 00 46 | # ^ ^-- end of string 47 | # +-- index 48 | s = s[: index + 3] 49 | if (len(s) % 2) != 0: 50 | s = s + b"\x00" 51 | res = s.decode("utf16", errors="ignore") 52 | return res.partition("\x00")[0] 53 | 54 | 55 | def _readable_multi_sz(value: bytes) -> str: 56 | new_value = value[:-4] 57 | res = "" 58 | for word in new_value.split(b"\x00\x00\x00"): 59 | res += _decode_utf16le(word) 60 | res += "|" 61 | 62 | return res[:-1] 63 | 64 | 65 | def _readable_reg_value(value: dfwinreg_regf.REGFWinRegistryValue) -> bytes | str: 66 | simple_types = {dfwinreg_definition.REG_EXPAND_SZ, dfwinreg_definition.REG_SZ, dfwinreg_definition.REG_LINK} 67 | if value.data_type in simple_types: 68 | return _decode_utf16le(value.data) 69 | if value.data_type == dfwinreg_definition.REG_MULTI_SZ: 70 | return _readable_multi_sz(value.data) 71 | 72 | return bytes(value.data) 73 | 74 | 75 | class RegistryToTimeline(GenericToTimeline): 76 | def __init__( 77 | self, 78 | config: PluginConfig, 79 | orclist: list[str], 80 | output_file_path: str, 81 | hostname: str, 82 | tmp_dir: str, 83 | lock: Lock, 84 | ) -> None: 85 | """Construct.""" 86 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 87 | 88 | self.file_header = bytes([0x72, 0x65, 0x67, 0x66]) 89 | 90 | self.importantKeysFile = Path(Path(__file__).parent) / "RegistryToTimeline-important-keys.txt" 91 | self.importantKeys = self._parse_important_keys_file(self.importantKeysFile) 92 | 93 | def _parse_important_keys_file(self, file_path: Path) -> list[str]: 94 | result = [] 95 | if file_path.exists(): 96 | with Path(file_path).open() as f: 97 | for line in f: 98 | my_line = line.strip() 99 | if my_line.startswith("#") or len(my_line) == 0: 100 | continue 101 | 102 | result.append(my_line) 103 | return result 104 | 105 | def _print_only_key(self, key: dfwinreg_regf.REGFWinRegistryValue, artefact: Path) -> None: 106 | try: 107 | event = Event( 108 | timestamp_str=key.last_written_time.CopyToDateTimeString()[:-4], 109 | source=self._get_original_path(artefact), 110 | description=key.path, 111 | ) 112 | self._add_event(event) 113 | except Exception as e: # noqa: BLE001 114 | key_path = "Unknown" 115 | if key_path: 116 | key_path = key.path 117 | self.logger.critical("Unable to print key %s from %s. Error: %s", key_path, artefact, e) 118 | 119 | def _print_all_keyvalues(self, key: dfwinreg_regf.REGFWinRegistryValue, artefact: Path) -> None: 120 | for value in key.GetValues(): 121 | readable_type = Type[value.data_type] 122 | readable_data = _readable_reg_value(value) 123 | event = Event( 124 | timestamp_str=key.last_written_time.CopyToDateTimeString()[:-4], 125 | source=self._get_original_path(artefact), 126 | description=( 127 | f"KeyPath: {key.path} - ValueName: {value.name} - " 128 | f"ValueType: {readable_type} - ValueData: {readable_data!s}" 129 | ), 130 | ) 131 | self._add_event(event) 132 | 133 | def _parse_key(self, key: dfwinreg_regf.REGFWinRegistryValue, artefact: Path) -> None: 134 | if key is not None: 135 | self._print_only_key(key, artefact) 136 | if key.path in self.importantKeys: 137 | self._print_all_keyvalues(key, artefact) 138 | 139 | for subkey_index in range(key.number_of_subkeys): 140 | try: 141 | subkey = key.GetSubkeyByIndex(subkey_index) 142 | self._parse_key(subkey, artefact) 143 | except OSError as e: 144 | self.logger.debug("Error while parsing registry keys: %s", e) 145 | 146 | def _parse_artefact(self, artefact: Path) -> None: 147 | with Path(artefact).open("rb") as f: 148 | try: 149 | reg_file = dfwinreg_regf.REGFWinRegistryFile(emulate_virtual_keys=False) 150 | reg_file.Open(f) 151 | win_registry = dfwinreg_registry.WinRegistry() 152 | key_path_prefix = win_registry.GetRegistryFileMapping(reg_file) 153 | reg_file.SetKeyPathPrefix(key_path_prefix) 154 | root_key = reg_file.GetRootKey() 155 | self._parse_key(root_key, artefact) 156 | except Exception as e: # noqa: BLE001 157 | self.logger.warning( 158 | "Error while parsing %s: %s", 159 | Path(artefact).name, 160 | e, 161 | ) 162 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/USNInfoToTimeline.py: -------------------------------------------------------------------------------- 1 | """Plugin to parse USNInfo files.""" 2 | 3 | from __future__ import annotations 4 | 5 | import _csv 6 | import csv 7 | import string 8 | from io import StringIO 9 | from pathlib import Path 10 | from typing import TYPE_CHECKING, Any 11 | 12 | if TYPE_CHECKING: 13 | from threading import Lock 14 | 15 | from orc2timeline.config import PluginConfig 16 | 17 | from orc2timeline.plugins.GenericToTimeline import Event, GenericToTimeline 18 | 19 | 20 | class USNInfoToTimeline(GenericToTimeline): 21 | def __init__( 22 | self, 23 | config: PluginConfig, 24 | orclist: list[str], 25 | output_file_path: str, 26 | hostname: str, 27 | tmp_dir: str, 28 | lock: Lock, 29 | ) -> None: 30 | """Construct.""" 31 | super().__init__(config, orclist, output_file_path, hostname, tmp_dir, lock) 32 | 33 | def _parse_usn_file(self, csv_reader: Any, artefact: Path) -> None: # noqa: ANN401 34 | for row in csv_reader: 35 | # Not pretty but it's a way to skip header 36 | if row["USN"] == "USN": 37 | continue 38 | event = Event( 39 | timestamp_str=row["TimeStamp"], 40 | source=Path(artefact).name, 41 | ) 42 | mft_segment_number = 0 43 | try: 44 | mft_segment_number = int(row["FRN"], 16) & 0xFFFFFFFF 45 | except ValueError as e: 46 | self.logger.warning("Error while getting FRN. Error: %s", e) 47 | full_path = row["FullPath"] 48 | reason = row["Reason"] 49 | event.description = f"{full_path} - {reason} - MFT segment num : {mft_segment_number}" 50 | 51 | self._add_event(event) 52 | 53 | def _parse_artefact(self, artefact: Path) -> None: 54 | # It is compulsary to use new chunk because if an error occurs 55 | # all files in self.output_files_list will be deleted an artefact 56 | # will be reprocessed. 57 | # Processing as it follow ensures that events extracted from previous 58 | # artefacts will not be deleted is an error occurs while processing 59 | # current artefact. 60 | self.output_files_list = [] 61 | self._flush_chunk_and_new_chunk() 62 | try: 63 | with Path(artefact).open(encoding="utf-8") as fd: 64 | csv_reader = csv.DictReader(fd) 65 | self._parse_usn_file(csv_reader, artefact) 66 | # when file contains NULL character, old versions of csv can crash 67 | except (_csv.Error, UnicodeDecodeError) as e: 68 | with Path(artefact).open(encoding="utf-8", errors="ignore") as fd: 69 | self.logger.critical("csv error caught alternative way for host %s: %s", self.hostname, e) 70 | self._delete_all_result_files() 71 | data = fd.read() 72 | clean_data = "".join(c for c in data if c in string.printable) 73 | data_io = StringIO(clean_data) 74 | csv_reader = csv.DictReader(data_io) 75 | self._parse_usn_file(csv_reader, artefact) 76 | -------------------------------------------------------------------------------- /src/orc2timeline/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | """Directory with plugins code.""" 2 | -------------------------------------------------------------------------------- /src/orc2timeline/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/src/orc2timeline/py.typed -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Package for test project.""" 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Configuration for all tests.""" 2 | 3 | from pathlib import Path 4 | 5 | import pytest 6 | 7 | 8 | @pytest.fixture 9 | def resources_path() -> Path: 10 | """Fixture for create a path to test resources.""" 11 | return Path(__file__).parent / "resources" 12 | -------------------------------------------------------------------------------- /tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z -------------------------------------------------------------------------------- /tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z -------------------------------------------------------------------------------- /tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z -------------------------------------------------------------------------------- /tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z -------------------------------------------------------------------------------- /tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z -------------------------------------------------------------------------------- /tests/data/null_csv/ORC_Server_FAKEMACHINE_Detail.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_Detail.7z -------------------------------------------------------------------------------- /tests/data/null_csv/ORC_Server_FAKEMACHINE_General.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_General.7z -------------------------------------------------------------------------------- /tests/data/null_csv/ORC_Server_FAKEMACHINE_Little.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_Little.7z -------------------------------------------------------------------------------- /tests/data/null_csv/ORC_Server_FAKEMACHINE_Memory.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_Memory.7z -------------------------------------------------------------------------------- /tests/data/null_csv/ORC_Server_FAKEMACHINE_SAM.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/data/null_csv/ORC_Server_FAKEMACHINE_SAM.7z -------------------------------------------------------------------------------- /tests/output/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ANSSI-FR/orc2timeline/42c38c03fba5d6f80d0ae5f083b7e9e8c14df0d2/tests/output/.gitignore -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | """Test for command line interface.""" 2 | 3 | from __future__ import annotations 4 | 5 | import gzip 6 | import hashlib 7 | import subprocess 8 | import sys 9 | from pathlib import Path 10 | 11 | from click.testing import CliRunner 12 | 13 | from orc2timeline import __version__, entrypoint 14 | 15 | 16 | def _zcat_and_sha1(file: str) -> str: 17 | buf_size = 65536 18 | with gzip.open(file, "rb") as fd: 19 | my_sha1 = hashlib.sha1() # noqa: S324 20 | while True: 21 | data = fd.read(buf_size) 22 | if not data: 23 | break 24 | my_sha1.update(data) 25 | 26 | return str(my_sha1.hexdigest()) 27 | 28 | 29 | def _run_process(args: list[str]) -> tuple[str, str, int]: 30 | process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 31 | out, err = process.communicate() 32 | code = process.returncode 33 | return out.decode("utf"), err.decode("utf"), code 34 | 35 | 36 | def test_cli_version() -> None: 37 | """Test if the command line interface is installed correctly.""" 38 | ver = f"version {__version__}" 39 | out = subprocess.check_output( 40 | ( 41 | "orc2timeline", 42 | "--version", 43 | ), 44 | text=True, 45 | shell=False, 46 | ) 47 | assert ver in out 48 | out = subprocess.check_output( 49 | ( 50 | sys.executable, 51 | "-m", 52 | "orc2timeline", 53 | "--version", 54 | ), 55 | text=True, 56 | shell=False, 57 | ) 58 | assert ver in out 59 | runner = CliRunner() 60 | result = runner.invoke(entrypoint, ["--version"]) 61 | out = result.output 62 | assert ver in out 63 | 64 | 65 | def test_import() -> None: 66 | """Test if module entrypoint has correct imports.""" 67 | import orc2timeline.__main__ # noqa: F401 68 | 69 | 70 | def test_dir_input_dir_is_a_file() -> None: 71 | """Test is error a properly triggered when a file is given instead of the input dir.""" 72 | out, err, code = _run_process( 73 | [ 74 | "orc2timeline", 75 | "process_dir", 76 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 77 | "tests/output/", 78 | ], 79 | ) 80 | 81 | assert ( 82 | "Invalid value for 'INPUT_DIR': Directory " 83 | "'tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z' is a file." in err 84 | ) 85 | 86 | 87 | def test_dir_output_dir_is_a_file() -> None: 88 | """Test is error a properly triggered when a file is given instead of the output dir.""" 89 | Path("tests/output/file_instead_of_dir").touch() 90 | out, err, code = _run_process( 91 | [ 92 | "orc2timeline", 93 | "process_dir", 94 | "tests/data/conf_7_archives/", 95 | "tests/output/file_instead_of_dir", 96 | ], 97 | ) 98 | 99 | Path("tests/output/file_instead_of_dir").unlink() 100 | 101 | assert "Invalid value for 'OUTPUT_DIR': Directory 'tests/output/file_instead_of_dir' is a file." in err 102 | 103 | 104 | def test_dir_no_job() -> None: 105 | """Test if processing directory with 1 job works correctly.""" 106 | out, err, code = _run_process( 107 | [ 108 | "orc2timeline", 109 | "process_dir", 110 | "--overwrite", 111 | "tests/data/conf_7_archives/", 112 | "tests/output/", 113 | ], 114 | ) 115 | 116 | assert "== Printing final summary of generated timelines:" in err 117 | assert "====== Hostname: FAKEMACHINE - 1149 events" in err 118 | assert "========== FAKEMACHINE RegistryToTimeline 683" in err 119 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in err 120 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err 121 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in err 122 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in err 123 | assert "====== Total for FAKEMACHINE: 1149" in err 124 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 125 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 126 | 127 | for f in Path("tests/output").glob("**"): 128 | if f.is_file(): 129 | f.unlink() 130 | 131 | 132 | def test_dir_1_jobs() -> None: 133 | """Test if processing directory with 1 job works correctly.""" 134 | out, err, code = _run_process( 135 | [ 136 | "orc2timeline", 137 | "process_dir", 138 | "--overwrite", 139 | "-j 1", 140 | "tests/data/conf_7_archives/", 141 | "tests/output/", 142 | ], 143 | ) 144 | 145 | assert "== Printing final summary of generated timelines:" in err 146 | assert "====== Hostname: FAKEMACHINE - 1149 events" in err 147 | assert "========== FAKEMACHINE RegistryToTimeline 683" in err 148 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in err 149 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err 150 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in err 151 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in err 152 | assert "====== Total for FAKEMACHINE: 1149" in err 153 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 154 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 155 | 156 | for f in Path("tests/output").glob("**"): 157 | if f.is_file(): 158 | f.unlink() 159 | 160 | 161 | def test_dir_5_jobs() -> None: 162 | """Test if processing directory with 5 jobs works correctly.""" 163 | out, err, code = _run_process( 164 | [ 165 | "orc2timeline", 166 | "process_dir", 167 | "--overwrite", 168 | "-j 5", 169 | "tests/data/conf_7_archives/", 170 | "tests/output/", 171 | ], 172 | ) 173 | 174 | assert "== Printing final summary of generated timelines:" in err 175 | assert "====== Hostname: FAKEMACHINE - 1149 events" in err 176 | assert "========== FAKEMACHINE RegistryToTimeline 683" in err 177 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in err 178 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err 179 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in err 180 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in err 181 | assert "====== Total for FAKEMACHINE: 1149" in err 182 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 183 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 184 | 185 | for f in Path("tests/output").glob("**"): 186 | if f.is_file(): 187 | f.unlink() 188 | 189 | 190 | def test_dir_twice_same_hostname() -> None: 191 | """Test if error is properly triggered when two Orc with the same hostname are in input directory.""" 192 | out, err, code = _run_process( 193 | [ 194 | "orc2timeline", 195 | "process_dir", 196 | "tests/data/", 197 | "tests/output/", 198 | ], 199 | ) 200 | 201 | assert "CRITICAL - Unable to process directory if the same host is used many times." in err 202 | assert "CRITICAL - Hint, these hosts seem to be the source of the problem : {'FAKEMACHINE'}" in err 203 | 204 | 205 | def test_dir_output_file_already_exists() -> None: 206 | """Test if the error meggase is displayed when process_dir is called for result file that already exists.""" 207 | Path("tests/output/FAKEMACHINE.csv.gz").touch() 208 | out, err, code = _run_process( 209 | [ 210 | "orc2timeline", 211 | "process_dir", 212 | "tests/data/conf_7_archives/", 213 | "tests/output/", 214 | ], 215 | ) 216 | 217 | assert ( 218 | "Output file 'tests/output/FAKEMACHINE.csv.gz' already exists, processing" 219 | " will be ignored for host FAKEMACHINE use '--overwrite' if you know what you are doing." in err 220 | ) 221 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 222 | 223 | 224 | def test_simple_5_jobs() -> None: 225 | """Test if processing the test ORCs with 5 jobs works correctly.""" 226 | out, err, code = _run_process( 227 | [ 228 | "orc2timeline", 229 | "process", 230 | "--overwrite", 231 | "-j 5", 232 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 233 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z", 234 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z", 235 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z", 236 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z", 237 | "tests/output/FAKEMACHINE.csv.gz", 238 | ], 239 | ) 240 | 241 | assert "== Printing final summary of generated timelines:" in err 242 | assert "====== Hostname: FAKEMACHINE - 1149 events" in err 243 | assert "========== FAKEMACHINE RegistryToTimeline 683" in err 244 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in err 245 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err 246 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in err 247 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in err 248 | assert "====== Total for FAKEMACHINE: 1149" in err 249 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 250 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 251 | 252 | 253 | def test_simple_1_job() -> None: 254 | """Test if processing the test ORCs with 1 job works correctly.""" 255 | out, err, code = _run_process( 256 | [ 257 | "orc2timeline", 258 | "process", 259 | "--overwrite", 260 | "-j 1", 261 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 262 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z", 263 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z", 264 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z", 265 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z", 266 | "tests/output/FAKEMACHINE.csv.gz", 267 | ], 268 | ) 269 | 270 | assert "== Printing final summary of generated timelines:" in err 271 | assert "====== Hostname: FAKEMACHINE - 1149 events" in err 272 | assert "========== FAKEMACHINE RegistryToTimeline 683" in err 273 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in err 274 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err 275 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in err 276 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in err 277 | assert "====== Total for FAKEMACHINE: 1149" in err 278 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 279 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 280 | 281 | 282 | def test_simple_no_job() -> None: 283 | """Test if processing the test ORCs with 1 job works correctly.""" 284 | out, err, code = _run_process( 285 | [ 286 | "orc2timeline", 287 | "process", 288 | "--overwrite", 289 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 290 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z", 291 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z", 292 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z", 293 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z", 294 | "tests/output/FAKEMACHINE.csv.gz", 295 | ], 296 | ) 297 | 298 | assert "== Printing final summary of generated timelines:" in err 299 | assert "====== Hostname: FAKEMACHINE - 1149 events" in err 300 | assert "========== FAKEMACHINE RegistryToTimeline 683" in err 301 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in err 302 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in err 303 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in err 304 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in err 305 | assert "====== Total for FAKEMACHINE: 1149" in err 306 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 307 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 308 | 309 | 310 | def test_simple_log_file() -> None: 311 | """Test if processing the test ORCs with 1 job works correctly.""" 312 | out, err, code = _run_process( 313 | [ 314 | "orc2timeline", 315 | "--log-file", 316 | "tests/output/blabla.log", 317 | "process", 318 | "--overwrite", 319 | "-j 1", 320 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 321 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z", 322 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z", 323 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z", 324 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z", 325 | "tests/output/FAKEMACHINE.csv.gz", 326 | ], 327 | ) 328 | 329 | if Path("tests/output/blabla.log").exists(): 330 | with Path("tests/output/blabla.log").open("r") as f: 331 | data = f.read() 332 | assert "== Printing final summary of generated timelines:" in data 333 | assert "====== Hostname: FAKEMACHINE - 1149 events" in data 334 | assert "========== FAKEMACHINE RegistryToTimeline 683" in data 335 | assert "========== FAKEMACHINE EventLogsToTimeline 125" in data 336 | assert "========== FAKEMACHINE NTFSInfoToTimeline 413" in data 337 | assert "========== FAKEMACHINE USNInfoToTimeline 99" in data 338 | assert "========== FAKEMACHINE I30InfoToTimeline 54" in data 339 | assert "====== Total for FAKEMACHINE: 1149" in data 340 | 341 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 342 | Path("tests/output/blabla.log").unlink() 343 | 344 | 345 | def test_simple_input_file_doesnt_exist() -> None: 346 | """Test if the error is triggered when orc2timeline is used with wrong parameters.""" 347 | out, err, code = _run_process( 348 | ["orc2timeline", "process", "tests/data/DOES_NOT_EXIST", "tests/output/FAKEMACHINE.csv.gz"], 349 | ) 350 | 351 | assert "Error: Invalid value for '[FILE_LIST]...': File 'tests/data/DOES_NOT_EXIST' does not exist." in err 352 | 353 | 354 | def test_simple_output_file_already_exists() -> None: 355 | """Test if the error is triggered when orc2timeline is used with wrong parameters.""" 356 | Path("tests/output/FAKEMACHINE.csv.gz").touch() 357 | out, err, code = _run_process( 358 | [ 359 | "orc2timeline", 360 | "process", 361 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 362 | "tests/output/FAKEMACHINE.csv.gz", 363 | ], 364 | ) 365 | 366 | assert ( 367 | "Error: Invalid value: 'OUTPUT_PATH': File 'tests/output/FAKEMACHINE.csv.gz' already exists, use '--overwrite' if you know what you are doing." # noqa: E501 368 | in err 369 | ) 370 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 371 | 372 | 373 | def test_simple_output_dir_does_not_exist() -> None: 374 | """Test if the error is triggered when orc2timeline is used with wrong parameters.""" 375 | out, err, code = _run_process( 376 | [ 377 | "orc2timeline", 378 | "process", 379 | "tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z", 380 | "tests/DOES_NOT_EXIST/FAKEMACHINE.csv.gz", 381 | ], 382 | ) 383 | 384 | assert ( 385 | "Error: Invalid value: 'OUTPUT_PATH': Directory 'tests/DOES_NOT_EXIST' does not exist or is not a directory." 386 | in err 387 | ) 388 | 389 | 390 | def test_show_conf() -> None: 391 | """Test if show_conf works properly.""" 392 | out, err, code = _run_process( 393 | [ 394 | "orc2timeline", 395 | "show_conf", 396 | ], 397 | ) 398 | 399 | 400 | def test_show_conf_file() -> None: 401 | """Test if show_conf_file works properly.""" 402 | out, err, code = _run_process( 403 | [ 404 | "orc2timeline", 405 | "show_conf_file", 406 | ], 407 | ) 408 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | """Test config parser.""" 2 | 3 | from __future__ import annotations 4 | 5 | import subprocess 6 | from pathlib import Path 7 | 8 | 9 | def _run_process(args: list[str]) -> tuple[str, str, int]: 10 | process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 11 | out, err = process.communicate() 12 | code = process.returncode 13 | return out.decode("utf"), err.decode("utf"), code 14 | 15 | 16 | def _get_conf_file_path() -> Path: 17 | out, err, code = _run_process( 18 | [ 19 | "orc2timeline", 20 | "show_conf_file", 21 | ], 22 | ) 23 | 24 | file_path = out.splitlines()[-1] 25 | return Path(file_path) 26 | 27 | 28 | def test_conf_file_do_not_exist() -> None: 29 | """Test config parsing when file does not exist.""" 30 | file_path = _get_conf_file_path() 31 | conf_file_path = Path(file_path) 32 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 33 | conf_file_path.rename(str(conf_file_path_bak)) 34 | 35 | out, err, code = _run_process( 36 | [ 37 | "orc2timeline", 38 | "process_dir", 39 | "--overwrite", 40 | "tests/data/conf_7_archives/", 41 | "tests/output/", 42 | ], 43 | ) 44 | 45 | conf_file_path_bak.rename(str(conf_file_path)) 46 | 47 | assert "Cannot read configuration file" in err 48 | assert "(file does not exist)" in err 49 | 50 | 51 | def test_conf_file_is_a_dir() -> None: 52 | """Test config parsing when configuration is in fact a directory.""" 53 | file_path = _get_conf_file_path() 54 | conf_file_path = Path(file_path) 55 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 56 | conf_file_path.rename(str(conf_file_path_bak)) 57 | conf_file_path.mkdir() 58 | 59 | out, err, code = _run_process( 60 | [ 61 | "orc2timeline", 62 | "process_dir", 63 | "--overwrite", 64 | "tests/data/conf_7_archives/", 65 | "tests/output/", 66 | ], 67 | ) 68 | 69 | conf_file_path.rmdir() 70 | conf_file_path_bak.rename(str(conf_file_path)) 71 | 72 | assert "Cannot read configuration file" in err 73 | assert "(is not a file)" in err 74 | 75 | 76 | def test_conf_file_wrong_yaml() -> None: 77 | """Test config when yaml parsing goes wrong.""" 78 | content = '''Plugins: 79 | - RegistryToTimeline: 80 | archives: ["SAM", "Little", "Detail", "Offline"] 81 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 82 | match_pattern: ".*data$" 83 | sourcetype: "Registry"''' 84 | file_path = _get_conf_file_path() 85 | conf_file_path = Path(file_path) 86 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 87 | conf_file_path.rename(str(conf_file_path_bak)) 88 | with conf_file_path.open("w") as conf_file: 89 | conf_file.write(content) 90 | 91 | out, err, code = _run_process( 92 | [ 93 | "orc2timeline", 94 | "process_dir", 95 | "--overwrite", 96 | "tests/data/conf_7_archives/", 97 | "tests/output/", 98 | ], 99 | ) 100 | 101 | conf_file_path.unlink() 102 | conf_file_path_bak.rename(str(conf_file_path)) 103 | 104 | assert "An error occured while parsing configuration" in err 105 | 106 | 107 | def test_conf_file_empty_archive() -> None: 108 | """Test configuration parsing when archive is empty.""" 109 | content = '''Plugins: 110 | - RegistryToTimeline: 111 | archives: [] 112 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 113 | match_pattern: ".*data$" 114 | sourcetype: "Registry"''' 115 | file_path = _get_conf_file_path() 116 | conf_file_path = Path(file_path) 117 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 118 | conf_file_path.rename(str(conf_file_path_bak)) 119 | with conf_file_path.open("w") as conf_file: 120 | conf_file.write(content) 121 | 122 | out, err, code = _run_process( 123 | [ 124 | "orc2timeline", 125 | "process_dir", 126 | "--overwrite", 127 | "tests/data/conf_7_archives/", 128 | "tests/output/", 129 | ], 130 | ) 131 | 132 | conf_file_path.unlink() 133 | conf_file_path_bak.rename(str(conf_file_path)) 134 | 135 | assert "configuration describes plugin without any archive." in err 136 | 137 | 138 | def test_conf_file_sub_archives_empty() -> None: 139 | """Test configuration parsing when sub_archive is empty.""" 140 | content = '''Plugins: 141 | - RegistryToTimeline: 142 | archives: ["SAM", "Little", "Detail", "Offline"] 143 | sub_archives: [] 144 | match_pattern: ".*data$" 145 | sourcetype: "Registry"''' 146 | file_path = _get_conf_file_path() 147 | conf_file_path = Path(file_path) 148 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 149 | conf_file_path.rename(str(conf_file_path_bak)) 150 | with conf_file_path.open("w") as conf_file: 151 | conf_file.write(content) 152 | 153 | out, err, code = _run_process( 154 | [ 155 | "orc2timeline", 156 | "process_dir", 157 | "--overwrite", 158 | "tests/data/conf_7_archives/", 159 | "tests/output/", 160 | ], 161 | ) 162 | 163 | conf_file_path.unlink() 164 | conf_file_path_bak.rename(str(conf_file_path)) 165 | 166 | assert "FAKEMACHINE RegistryToTimeline 0" in err 167 | 168 | 169 | def test_conf_file_empty_plugin_name() -> None: 170 | """Test configuration when plugin name is empty.""" 171 | content = '''Plugins: 172 | - "": 173 | archives: ["SAM", "Little", "Detail", "Offline"] 174 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 175 | match_pattern: ".*data$" 176 | sourcetype: "Registry"''' 177 | file_path = _get_conf_file_path() 178 | conf_file_path = Path(file_path) 179 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 180 | conf_file_path.rename(str(conf_file_path_bak)) 181 | with conf_file_path.open("w") as conf_file: 182 | conf_file.write(content) 183 | 184 | out, err, code = _run_process( 185 | [ 186 | "orc2timeline", 187 | "process_dir", 188 | "--overwrite", 189 | "tests/data/conf_7_archives/", 190 | "tests/output/", 191 | ], 192 | ) 193 | 194 | conf_file_path.unlink() 195 | conf_file_path_bak.rename(str(conf_file_path)) 196 | 197 | assert "Empty plugin name in configuration is not allowed." in err 198 | 199 | 200 | def test_conf_file_fake_plugin() -> None: 201 | """Test configuration when plugin file does not exist.""" 202 | content = '''Plugins: 203 | - "FAKEPLUGIN": 204 | archives: ["SAM", "Little", "Detail", "Offline"] 205 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 206 | match_pattern: ".*data$" 207 | sourcetype: "Registry"''' 208 | file_path = _get_conf_file_path() 209 | conf_file_path = Path(file_path) 210 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 211 | conf_file_path.rename(str(conf_file_path_bak)) 212 | with conf_file_path.open("w") as conf_file: 213 | conf_file.write(content) 214 | 215 | out, err, code = _run_process( 216 | [ 217 | "orc2timeline", 218 | "process_dir", 219 | "--overwrite", 220 | "tests/data/conf_7_archives/", 221 | "tests/output/", 222 | ], 223 | ) 224 | 225 | conf_file_path.unlink() 226 | conf_file_path_bak.rename(str(conf_file_path)) 227 | 228 | assert "Plugin FAKEPLUGIN:" in err 229 | assert "orc2timeline/plugins/FAKEPLUGIN.py does not exist." in err 230 | 231 | 232 | def test_conf_file_empty_sourcetype() -> None: 233 | """Test error in.""" 234 | content = '''Plugins: 235 | - RegistryToTimeline: 236 | archives: ["SAM", "Little", "Detail", "Offline"] 237 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 238 | match_pattern: ".*data$" 239 | sourcetype: ""''' 240 | file_path = _get_conf_file_path() 241 | conf_file_path = Path(file_path) 242 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 243 | conf_file_path.rename(str(conf_file_path_bak)) 244 | with conf_file_path.open("w") as conf_file: 245 | conf_file.write(content) 246 | 247 | out, err, code = _run_process( 248 | [ 249 | "orc2timeline", 250 | "process_dir", 251 | "--overwrite", 252 | "tests/data/conf_7_archives/", 253 | "tests/output/", 254 | ], 255 | ) 256 | 257 | conf_file_path.unlink() 258 | conf_file_path_bak.rename(str(conf_file_path)) 259 | 260 | assert "empty sourcetype is not allowed." in err 261 | 262 | 263 | def test_conf_file_empty_match_pattern() -> None: 264 | """Test configuration when match_pattern is empty.""" 265 | content = '''Plugins: 266 | - RegistryToTimeline: 267 | archives: ["SAM", "Little", "Detail", "Offline"] 268 | sub_archives: ["SAM.7z", "SystemHives_little.7z", "UserHives.7z", "SystemHives.7z"] 269 | match_pattern: "" 270 | sourcetype: "Registry"''' 271 | file_path = _get_conf_file_path() 272 | conf_file_path = Path(file_path) 273 | conf_file_path_bak = Path(file_path).parent / "Orc2Timeline.yaml.bak" 274 | conf_file_path.rename(str(conf_file_path_bak)) 275 | with conf_file_path.open("w") as conf_file: 276 | conf_file.write(content) 277 | 278 | out, err, code = _run_process( 279 | [ 280 | "orc2timeline", 281 | "process_dir", 282 | "--overwrite", 283 | "tests/data/conf_7_archives/", 284 | "tests/output/", 285 | ], 286 | ) 287 | 288 | conf_file_path.unlink() 289 | conf_file_path_bak.rename(str(conf_file_path)) 290 | 291 | assert "empty match_pattern is not allowed." in err 292 | -------------------------------------------------------------------------------- /tests/test_core.py: -------------------------------------------------------------------------------- 1 | """Test core module.""" 2 | 3 | import gzip 4 | import hashlib 5 | from pathlib import Path 6 | 7 | from orc2timeline import process 8 | 9 | 10 | def _zcat_and_sha1(file: str) -> str: 11 | buf_size = 65536 12 | with gzip.open(file, "rb") as fd: 13 | my_sha1 = hashlib.sha1() # noqa: S324 14 | while True: 15 | data = fd.read(buf_size) 16 | if not data: 17 | break 18 | my_sha1.update(data) 19 | 20 | return str(my_sha1.hexdigest()) 21 | 22 | 23 | def test_process_1_job() -> None: 24 | """Test import mode with 1 job.""" 25 | file_list = [ 26 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z"), 27 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z"), 28 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z"), 29 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z"), 30 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z"), 31 | ] 32 | 33 | if Path("tests/output/FAKEMACHINE.csv.gz").exists(): 34 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 35 | 36 | process(file_list, "tests/output/FAKEMACHINE.csv.gz", "FAKEMACHINE", 1) 37 | 38 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 39 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 40 | 41 | 42 | def test_process_5_jobs() -> None: 43 | """Test import mode with 5 jobs.""" 44 | file_list = [ 45 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_General.7z"), 46 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Detail.7z"), 47 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Little.7z"), 48 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_Memory.7z"), 49 | Path("tests/data/conf_7_archives/ORC_Server_FAKEMACHINE_SAM.7z"), 50 | ] 51 | 52 | if Path("tests/output/FAKEMACHINE.csv.gz").exists(): 53 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 54 | 55 | process(file_list, "tests/output/FAKEMACHINE.csv.gz", "FAKEMACHINE", 5) 56 | 57 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 58 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 59 | 60 | 61 | def test_null_in_csv_files() -> None: 62 | """Test import mode with 1 job.""" 63 | file_list = [ 64 | Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_General.7z"), 65 | Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_Detail.7z"), 66 | Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_Little.7z"), 67 | Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_Memory.7z"), 68 | Path("tests/data/null_csv/ORC_Server_FAKEMACHINE_SAM.7z"), 69 | ] 70 | 71 | if Path("tests/output/FAKEMACHINE.csv.gz").exists(): 72 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 73 | 74 | process(file_list, "tests/output/FAKEMACHINE.csv.gz", "FAKEMACHINE", 1) 75 | 76 | assert _zcat_and_sha1("tests/output/FAKEMACHINE.csv.gz") == "6c9f7897fef29c7006a4cb992117abaeba8fa8eb" 77 | Path("tests/output/FAKEMACHINE.csv.gz").unlink() 78 | --------------------------------------------------------------------------------