├── evtxdump ├── __init__.py ├── linux │ └── x64 │ │ ├── fd │ │ └── evtx_dump ├── windows │ └── x64 │ │ ├── fd.exe │ │ └── evtx_dump.exe ├── evtx - LICENSE-MIT └── evtxdump.py ├── env.sample ├── requirements.txt ├── LICENSE-MIT ├── .gitignore ├── README.md ├── splunk_helper.py └── evtx2splunk.py /evtxdump/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evtxdump/linux/x64/fd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whikernel/evtx2splunk/HEAD/evtxdump/linux/x64/fd -------------------------------------------------------------------------------- /evtxdump/linux/x64/evtx_dump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whikernel/evtx2splunk/HEAD/evtxdump/linux/x64/evtx_dump -------------------------------------------------------------------------------- /evtxdump/windows/x64/fd.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whikernel/evtx2splunk/HEAD/evtxdump/windows/x64/fd.exe -------------------------------------------------------------------------------- /evtxdump/windows/x64/evtx_dump.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whikernel/evtx2splunk/HEAD/evtxdump/windows/x64/evtx_dump.exe -------------------------------------------------------------------------------- /env.sample: -------------------------------------------------------------------------------- 1 | SPLUNK_URL = mydomain.com 2 | SPLUNK_PORT = 8000 3 | SPLUNK_MPORT = 8089 4 | SPLUNK_SSL = False 5 | SPLUNK_USER = user 6 | SPLUNK_PASS = userpass 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2021.10.8 2 | chardet==4.0.0 3 | idna==3.3 4 | python-dotenv==0.15.0 5 | requests==2.26.0 6 | semantic-version==2.8.5 7 | git+https://github.com/georgestarcher/Splunk-Class-httpevent.git 8 | urllib3>=1.26.5 9 | toml==0.10.2 10 | tqdm==4.59.0 11 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 Whitekernel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /evtxdump/evtx - LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Omer Ben-Amram 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | *.json 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | .idea/ -------------------------------------------------------------------------------- /evtxdump/evtxdump.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python 3 | 4 | """ 5 | Splunk HEC Helper, part of evtx2splunk 6 | 7 | Thanks to Ektoplasma for its contribution 8 | 9 | """ 10 | __progname__ = "evtx2splunk" 11 | __date__ = "2020-01-10" 12 | __version__ = "0.1" 13 | __author__ = "Ektoplasma" 14 | 15 | import logging as log 16 | import subprocess 17 | from pathlib import Path 18 | 19 | 20 | class EvtxDump(object): 21 | """ 22 | Wrapper around evtx_dump, a tool writen in go for speed conversion of evtx 23 | """ 24 | def __init__(self, output_path: Path=None, path_evtx_dump: Path=None, fdfind: str ="fdfind"): 25 | """ 26 | Init method of the EvtxDump class. Just saves some input args 27 | :param output_path: Path - Output path of the files 28 | :param path_evtx_dump: Path - Path of the evtx path binary 29 | :param fdfind: Path - ffind 30 | """ 31 | self._output_path = output_path 32 | self._evtx_dump = path_evtx_dump 33 | self._fdfind = fdfind 34 | 35 | def run(self, evtxdata: Path): 36 | """ 37 | Dispatch depending whether it's a file or a directory 38 | :param evtxdata: 39 | :return: 40 | """ 41 | if evtxdata.is_file(): 42 | return self._convert_file(evtxdata) 43 | 44 | elif evtxdata.is_dir(): 45 | return self._convert_files(evtxdata) 46 | 47 | else: 48 | log.error("Data is neither a file nor a folder, not supported") 49 | return False 50 | 51 | def _convert_file(self, evtxdata: Path): 52 | """ 53 | Convert a file to json thanks to evtx_dump 54 | :param evtxdata: Path - Path to the evtx file 55 | :return: True if successful, else False 56 | """ 57 | 58 | completed = False 59 | filename = evtxdata.name + ".json" 60 | 61 | out_file = Path(self._output_path, filename) 62 | 63 | if out_file.exists(): 64 | log.error("Destination file already exists") 65 | return completed 66 | 67 | try: 68 | command = (self._evtx_dump, evtxdata, "-o", "jsonl", "-f", out_file, "--no-confirm-overwrite") 69 | completed = subprocess.check_call(command) 70 | except Exception as e: 71 | log.error(e) 72 | 73 | return completed 74 | 75 | def _convert_files(self, evtxdata: Path): 76 | """ 77 | Convert a set of files to json thanks to evtx_dump and ffind 78 | :param evtxdata: Path - Path to a folder containing EVTX 79 | :return: True if successful, else False 80 | """ 81 | 82 | completed = False 83 | list_evtx = evtxdata.rglob("*.evtx*") 84 | 85 | for evtx in list_evtx: 86 | 87 | filename = evtx.name + ".json" 88 | 89 | out_file = Path(self._output_path, filename) 90 | 91 | if out_file.exists(): 92 | log.error("Destination file already exists") 93 | return 1 94 | 95 | try: 96 | command = (self._fdfind, r".*\.evtx\w*", evtxdata, "-x", 97 | self._evtx_dump, "-o", "jsonl", "{}", "--no-confirm-overwrite", 98 | "-f", Path(self._output_path, "{/.}.json")) 99 | completed = subprocess.check_call(command) 100 | except Exception as e: 101 | log.error(e) 102 | 103 | return completed 104 | 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # evtx2splunk 2 | Ingest EVTX files into a Splunk instance. 3 | 4 | This tool is based on the work of : 5 | - [Omer BenAmram](https://github.com/omerbenamram/) 6 | - [Blardy](https://github.com/blardy/evtx2elk) 7 | 8 | *Thanks to [Ekto](https://github.com/Ektoplasma) for its contribution.* 9 | 10 | 11 | **Key features** 12 | - Splunk HEC support with token auto-creation 13 | - Splunk index auto-creation 14 | - Multiprocessing support 15 | - Caching for evtx reuse without reconverting 16 | - Windows and Linux compatibility 17 | - Rely on the great and fast *evtx_dump* Rust tool of Omer 18 | - **New** : Evtx message resolutions from database 19 | 20 | **Note**: *evtx2splunk* converts the EVTX to JSON and stores them in a temporary place. 21 | Hence, up to the size of source EVTX can be created during the process. These files are removed at the end of the process, except if `keep_cache` is enabled. 22 | 23 | ## Installation 24 | **Usage of a *venv* is recommended to avoid conflicts. Please use Python 3.7 or later.** 25 | 1. Clone the repo : `git clone https://github.com/whikernel/evtx2splunk.git && cd evtx2splunk` 26 | 2. Install the requirements: `pip3 install -r ./requirements.txt` 27 | 3. Copy env configuration : `cp env.sample .env` and fill it with your Splunk configuration 28 | 4. Enable HEC on the Splunk server (see configuration section) 29 | 5. Run evtx2splunk :-) 30 | 31 | ## Usage 32 | Ingest a folder containing evtx files into `case_0001` index. 33 | ``` 34 | # Default 35 | python3 evtx2splunk.py --input /data/evtx/folder --index case_0001 36 | 37 | # Keep cache 38 | python3 evtx2splunk.py --input /data/evtx/folder --index case_0001 --keep_cache 39 | 40 | # Reuse cache and keep it 41 | python3 evtx2splunk.py --input /data/evtx/folder --index case_0001 --keep_cache --use_cache 42 | 43 | # Disable message resolution 44 | python3 evtx2splunk.py --input /data/evtx/folder --index case_0001 --no_resolve 45 | 46 | # Generates the JSON Evtx message file 47 | python3 build_resolver.py -d winevt-kb.db 48 | ``` 49 | 50 | ## Options 51 | - `--input`: Folder containing EVTX files to parse or unitary file 52 | - `--index`: Splunk index to push the evtx 53 | - `--nb_process`: Number of ingest processes to create. Default to number of cores 54 | - `--keep_cache`: Keep JSON cache for future use - Might take a lot of space 55 | - `--use_cache` : Use the cache saved previously. Add `--keep_cache` to avoid erase of the case at the end. 56 | - `--test` : Enable test mode. Do not push the events into to Splunk to preserve license. 57 | - `--no_resolve` : Disable the messages resolution 58 | 59 | ## Configuration 60 | The environment variables should follow : 61 | ``` 62 | SPLUNK_URL = Domain or IP hosting the Splunk. Please input without the HTTP or HTTPS - for instance `localhost` or `mydomain.com` 63 | SPLUNK_PORT = Splunk HTTP port - 8000 by default (unused at the moment in the script) 64 | SPLUNK_MPORT = Splunk Management port - 8089 by default 65 | SPLUNK_SSL = If set to True, the SSL certificate will be checked. Set to False for autogenerated certs. 66 | SPLUNK_USER = Splunk user with the rights to make configuration changes (add HEC token, indexes,etc) 67 | SPLUNK_PASS = User password 68 | ``` 69 | Please also note that HEC needs to be enabled on Splunk before use : Settings > Data Input > HTTP Event Collector > Global Settings > All tokens : Enabled 70 | 71 | ## Improvements to come 72 | - ~~Use the `evtx` python binding instead of the binaries~~ : Huge loss of performance after testing 73 | - Add the possibility to dynamically add fields 74 | - Add the possibility to dynamically change the computer name 75 | - Add the possibility to recreate an already-existing index 76 | -------------------------------------------------------------------------------- /splunk_helper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python 3 | 4 | """ 5 | Splunk HEC Helper, part of evtx2splunk 6 | """ 7 | 8 | __progname__ = "evtx2splunk" 9 | __date__ = "2020-01-10" 10 | __version__ = "0.1" 11 | __author__ = "whitekernel - PAM" 12 | 13 | from typing import Any 14 | 15 | import requests 16 | from requests.auth import HTTPBasicAuth 17 | import logging as log 18 | from xml.dom.minidom import parse, parseString 19 | 20 | from requests.packages.urllib3.exceptions import InsecureRequestWarning 21 | 22 | 23 | class SplunkHelper(object): 24 | def __init__(self, splunk_url: str, splunk_port: int, splunk_ssl_verify: bool, username: str, password: str): 25 | """ 26 | Init class of the helper. 27 | :param splunk_url: URL of the Splunk instance 28 | :param splunk_port: port of the splunk instance 29 | :param splunk_ssl_verify: True to check ssl certificate 30 | :param username: Administrative account 31 | :param password: Password account 32 | """ 33 | self._surl = "https://{url}:{port}/".format(url=splunk_url, 34 | port=splunk_port) 35 | self._suser = username 36 | self._spwd = password 37 | self._ssl_verify = splunk_ssl_verify 38 | if not self._ssl_verify: 39 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 40 | 41 | self.link_up = self.test_connection() 42 | self._hec_token = None 43 | 44 | def _uri(self, uri: str): 45 | """ 46 | Return a complete URL from an URI 47 | :param uri: URI 48 | :return: A string with the complete URL 49 | """ 50 | return self._surl + uri 51 | 52 | def _request(self, uri: str, method: str = "GET", data: Any = None): 53 | """ 54 | Make a request and handle the errors 55 | :param uri: URI to request 56 | :param method: Method to index 57 | :param data: Data for post method 58 | :return: Tuple (Bool, Response) 59 | """ 60 | ret = False 61 | response = None 62 | 63 | try: 64 | 65 | if method == "GET": 66 | response = requests.get(url=self._uri(uri=uri), 67 | verify=self._ssl_verify, 68 | auth=HTTPBasicAuth(self._suser, self._spwd), 69 | timeout=2) 70 | elif method == "POST": 71 | response = requests.post(url=self._uri(uri=uri), 72 | data=data, 73 | verify=self._ssl_verify, 74 | auth=HTTPBasicAuth(self._suser, self._spwd), 75 | timeout=2) 76 | except Exception as e: 77 | log.error(e) 78 | log.error("Unable to connect to Splunk. Please check URL and ports") 79 | return ret, None 80 | 81 | if response.status_code == 401: 82 | log.error("Unable to connect to Splunk. Please check administrative credentials") 83 | 84 | elif response.status_code == 200 or response.status_code == 201: 85 | ret = True 86 | 87 | else: 88 | log.error("Server error. See message below. Status {status}".format(status=response.status_code)) 89 | 90 | return ret, response 91 | 92 | def test_connection(self): 93 | """ 94 | Test connection to the Splunk instance 95 | :return: True if successful, else False 96 | """ 97 | ret, _ = self._request(uri='services/data/inputs/http') 98 | return ret 99 | 100 | def create_index(self, index: str): 101 | """ 102 | Create an index in the Splunk instance. 103 | :return: True if created successfully 104 | """ 105 | if not self.link_up: 106 | return False 107 | 108 | # Check if we have one index with this name already 109 | ret, response = self._request(uri='services/data/indexes/{index}'.format(index=index)) 110 | if ret: 111 | log.info("{index} already created. Continuing".format(index=index)) 112 | return True 113 | 114 | data = { 115 | "name": index 116 | } 117 | 118 | ret, response = self._request(uri='services/data/indexes', 119 | method="POST", 120 | data=data) 121 | if ret: 122 | log.info("Index {index} created successfully".format(index=index)) 123 | return True 124 | 125 | else: 126 | log.error("Unable to create index {index}".format(index=index)) 127 | log.error("{message}".format(message=response.text)) 128 | return False 129 | 130 | def get_or_create_hect(self): 131 | """ 132 | Look for an HEC token or create one if none is available 133 | :return: 134 | """ 135 | hect = None 136 | if not self.link_up: 137 | return hect 138 | 139 | if self._hec_token: 140 | return self._hec_token 141 | 142 | # Check if a token is already registered under the same name 143 | ret, response = self._request(uri='services/data/inputs/http/evtx2splunk') 144 | if ret: 145 | 146 | dom = parseString(response.text) 147 | 148 | for e in dom.getElementsByTagName("s:key"): 149 | 150 | if e.getAttribute("name") == "token": 151 | 152 | # We have a key, return the node value which 153 | # is the HEC token 154 | log.info("HEC token found") 155 | self._hec_token = e.firstChild.nodeValue 156 | return self._hec_token 157 | 158 | # If we are here, we don't have a token yet, so create it 159 | data = { 160 | "name": "evtx2splunk" 161 | } 162 | 163 | ret, response = self._request(uri='services/data/inputs/http', 164 | method="POST", 165 | data=data) 166 | if ret: 167 | 168 | dom = parseString(response.text) 169 | 170 | for e in dom.getElementsByTagName("s:key"): 171 | 172 | if e.getAttribute("name") == "token": 173 | # We have a key, return the node value which 174 | # is the HEC token 175 | log.info("HEC token created successfully") 176 | self._hec_token = e.firstChild.nodeValue 177 | return self._hec_token 178 | 179 | log.error("Unable to create HEC token") 180 | log.error("{message}".format(message=response.text)) 181 | 182 | return hect 183 | 184 | def register_index_to_hec(self, index: str): 185 | """ 186 | Register an index to the HEC listener 187 | :param index: Index to add 188 | :return: True if added successfully, else False 189 | """ 190 | # Retrieve the HEC token 191 | if not self._hec_token: 192 | self._hec_token = self.get_or_create_hect() 193 | 194 | # Retrieve the list of indexes allowed to be pushed with the HEC token 195 | ret, response = self._request(uri='services/data/inputs/http/evtx2splunk') 196 | indexes = [] 197 | if ret: 198 | 199 | dom = parseString(response.text) 200 | 201 | for e in dom.getElementsByTagName("s:key"): 202 | 203 | if e.getAttribute("name") == "indexes": 204 | indexes = [item.firstChild.nodeValue for item in e.getElementsByTagName("s:item")] 205 | 206 | # Check if the index is already associated 207 | if index in indexes: 208 | log.info("Index already registered, continuing") 209 | return True 210 | 211 | # Add the index and push 212 | indexes.append(index) 213 | data = { 214 | "indexes": ",".join(indexes), 215 | "index": indexes[0] 216 | } 217 | ret, response = self._request(uri='services/data/inputs/http/evtx2splunk', 218 | method="POST", 219 | data=data) 220 | 221 | if ret: 222 | log.info("Index associated successfully") 223 | return True 224 | 225 | log.error("Unable to create associate index with the HEC token") 226 | log.error("{message}".format(message=response.text)) 227 | return False 228 | -------------------------------------------------------------------------------- /evtx2splunk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # !/usr/bin/env python 3 | 4 | """ 5 | Ingest EVTX file(s) into an Splunk 6 | Based on Blardy work (https://github.com/blardy/evtx2elk) 7 | Based on Dan Gunter work (https://dragos.com/blog/20180717EvtxToElk.html) 8 | 9 | Special thanks to Ektoplasma for its contribution 10 | """ 11 | __progname__ = "evtx2splunk" 12 | __date__ = "2020-01-10" 13 | __version__ = "0.1" 14 | __author__ = "whitekernel - PAM" 15 | 16 | 17 | import argparse 18 | import json 19 | import time 20 | import os 21 | import logging as log 22 | import sys 23 | import shutil 24 | from datetime import datetime, timezone 25 | from functools import partial 26 | from glob import glob 27 | from multiprocessing.dummy import Pool 28 | from multiprocessing import cpu_count 29 | from pathlib import Path 30 | from typing import TextIO 31 | import tqdm 32 | 33 | from splunk_http_event_collector import http_event_collector 34 | from dotenv import load_dotenv 35 | 36 | from evtxdump.evtxdump import EvtxDump 37 | from splunk_helper import SplunkHelper 38 | 39 | 40 | LOG_FORMAT = '%(asctime)s %(levelname)s %(funcName)s: %(message)s' 41 | LOG_VERBOSITY = { 42 | 'DEBUG': log.DEBUG, 43 | 'INFO': log.INFO, 44 | 'WARNING': log.WARNING, 45 | 'ERROR': log.ERROR, 46 | 'CRITICAL': log.CRITICAL, 47 | } 48 | log.basicConfig(format=LOG_FORMAT, level=log.INFO, datefmt='%Y-%m-%d %I:%M:%S') 49 | 50 | 51 | class Evtx2Splunk(object): 52 | """ 53 | Convert EVTX to JSON and index the data in Splunk 54 | Features auto create of index and HEC token 55 | """ 56 | 57 | def __init__(self): 58 | """ 59 | Init functton of the Evtx2Splunk class 60 | """ 61 | self._sh = None 62 | self._hec_server = None 63 | self._nb_ingestors = 1 64 | self._is_test = False 65 | self._resolve = True 66 | self._resolver = {} 67 | self.myevent = [] 68 | 69 | def configure(self, index:str, nb_ingestors: int, testing: bool, no_resolve: bool): 70 | """ 71 | Configure the instance of SplunkHelper 72 | :param nb_ingestors: NB of ingestors to use 73 | :param testing: If yes, no file would be injected into splunk to preserve licenses 74 | :param index: Index where to push the files 75 | :param no_resolve: Disable Event ids resolution 76 | :return: True if successfully configured else False 77 | """ 78 | # Load the environment variables for .env 79 | load_dotenv() 80 | 81 | self._nb_ingestors = nb_ingestors 82 | 83 | if no_resolve : 84 | log.info("Event ID resolution disabled") 85 | self._resolve = False 86 | 87 | elif not Path("evtx_data.json").exists(): 88 | log.error("Event ID data file not found") 89 | log.error("Will without resolution") 90 | self._resolve = False 91 | 92 | if self._resolve: 93 | with open("evtx_data.json", "r") as fdata: 94 | try: 95 | self._resolver = json.load(fdata) 96 | except Exception as e: 97 | log.error("Unable to read event data file. Error {e}".format(e=e)) 98 | return False 99 | 100 | self._is_test = testing 101 | if self._is_test: 102 | log.warning("Testing mode enabled. NO data will be injected into Splunk") 103 | 104 | log.info("Init SplunkHelper") 105 | self._sh = SplunkHelper(splunk_url=os.getenv("SPLUNK_URL"), 106 | splunk_port=os.getenv("SPLUNK_MPORT"), 107 | splunk_ssl_verify=os.getenv("SPLUNK_SSL") == "True", 108 | username=os.getenv("SPLUNK_USER"), 109 | password=os.getenv("SPLUNK_PASS")) 110 | 111 | # The SplunkHelper instantiation holds a link_up 112 | # flag that indicated whether it could successfully reach 113 | # the specified SPlunk instance 114 | if self._sh.link_up: 115 | 116 | # Fetch or create the HEC token from Splunk 117 | hect = self._sh.get_or_create_hect() 118 | 119 | # Create a new index 120 | if self._sh.create_index(index=index): 121 | 122 | # Associate the index to the HEC token so the script can send 123 | # the logs to it 124 | self._sh.register_index_to_hec(index=index) 125 | 126 | # Instantiate HEC class and configure 127 | self._hec_server = http_event_collector(token=hect, 128 | http_event_server=os.getenv("SPLUNK_URL")) 129 | self._hec_server.http_event_server_ssl = True 130 | self._hec_server.index = index 131 | self._hec_server.input_type = "json" 132 | self._hec_server.popNullFields = True 133 | 134 | return True 135 | 136 | return False 137 | 138 | def send_jevtx_file_to_splunk(self, records_stream: TextIO, source: str, sourcetype: str): 139 | """ 140 | From a record stream - aka file json stream - read and update the stream with enhanced data 141 | then push to splunk 142 | :param records_stream: TextIO - Input JSON stream to index 143 | :param source: Str representing the source indexed as in the Splunk sense 144 | :param sourcetype: Str representing the source type to index - always JSON here 145 | :param source_size: Size of the input file 146 | :return: True if the indexing was successfully else False 147 | """ 148 | 149 | try: 150 | if records_stream is not None: 151 | 152 | is_host_set = False 153 | 154 | # Prepare Splunk payload to send 155 | payload = {} 156 | payload.update({"source": source}) 157 | payload.update({"sourcetype": sourcetype}) 158 | 159 | # Send batch of events it will be handled consecutively 160 | # and sent to the Splunk HEC endpoint 161 | 162 | for record_line in records_stream: 163 | 164 | try: 165 | record = json.loads(record_line) 166 | except ValueError: 167 | continue 168 | 169 | if is_host_set is False: 170 | payload.update({"host": record["Event"]["System"]["Computer"]}) 171 | is_host_set = True 172 | 173 | # Must convert the timestamp in epoch format... seconds.milliseconds 174 | # examples evtx time "2020-06-16T12:54:38.766579Z" "'%Y-%m-%dT%H:%M:%S.%fZ' 175 | # But sometimes, milliseconds are not present 176 | try: 177 | dt_obj = datetime.strptime( 178 | record["Event"]["System"]["TimeCreated"]["#attributes"]["SystemTime"], 179 | '%Y-%m-%dT%H:%M:%S.%fZ') 180 | except: 181 | dt_obj = datetime.strptime( 182 | record["Event"]["System"]["TimeCreated"]["#attributes"]["SystemTime"], 183 | '%Y-%m-%dT%H:%M:%SZ') 184 | 185 | try: 186 | 187 | # Splunk does not want microseconds but it can be sent anyway 188 | dt_obj = dt_obj.replace(tzinfo=timezone.utc) 189 | epoch = dt_obj.timestamp() 190 | 191 | except Exception as e: 192 | log.warning("Timestamp warning. {error}".format(error=e)) 193 | log.warning("Falling back to default") 194 | # Use case for NTFS : 1601-01-01 00:00:00.000 195 | dt_obj = datetime.now() 196 | dt_obj = dt_obj.replace(tzinfo=timezone.utc) 197 | epoch = dt_obj.timestamp() 198 | 199 | record["module"] = record["Event"]["System"]["Channel"] 200 | 201 | if self._resolve: 202 | message = self.format_resolve(record) 203 | if message: 204 | record["message"] = message 205 | 206 | payload.update({"time": epoch}) 207 | payload.update({"event": record}) 208 | 209 | # Finally send the stream 210 | if not self._is_test: 211 | self._hec_server.batchEvent(payload) 212 | else: 213 | log.debug("Test mode. Would have injected : {payload}".format(payload=payload)) 214 | 215 | return True 216 | 217 | else: 218 | return False 219 | 220 | except Exception as e: 221 | log.warning(e) 222 | return False 223 | 224 | def format_resolve(self, record): 225 | """ 226 | Return a formatted string of the record if formatting is available 227 | :param record: Record to format 228 | :return: Formatted string of the record 229 | """ 230 | try: 231 | provider = record["Event"]["System"]["Provider"]["#attributes"]["Name"] 232 | event_id = record["Event"]["System"]["EventID"] 233 | 234 | if type(event_id) == dict: 235 | event_id = record["Event"]["System"]["EventID"]["#text"] 236 | 237 | if provider in self._resolver: 238 | 239 | if self._resolver[provider].get(str(event_id)): 240 | message = self._resolver[provider].get(str(event_id)) 241 | return message 242 | 243 | except Exception as e: 244 | log.error(e) 245 | 246 | return "" 247 | 248 | def ingest(self, input_files: str, keep_cache: bool, use_cache: bool): 249 | """ 250 | Main function of the class. List the files, call the converter 251 | and then multiprocess the input. 252 | :param input_files: Path to a file or a folder to ingest 253 | :param keep_cache: Set to true to keep json temporary folder at the end of the process 254 | :return: Nothing 255 | """ 256 | # Get the folder to index 257 | input_folder = Path(input_files) 258 | 259 | # Temporary files are placed in the same directory, not in tmp as there is a 260 | # a risk over overloading tmp dir depending on the partitioning 261 | if input_folder.is_file(): 262 | output_folder = input_folder.parents[0] / "json_evtx" 263 | self._nb_ingestors = 1 264 | 265 | elif input_folder.is_dir(): 266 | output_folder = input_folder / "json_evtx" 267 | 268 | else: 269 | log.error("Input is neither a file or a directory") 270 | return 271 | 272 | if not use_cache: 273 | log.info("Starting EVTX conversion. Nothing will be output until the end of conversion") 274 | if sys.platform == "win32": 275 | evtxdump = EvtxDump(output_folder, Path("evtxdump/windows/x64/evtx_dump.exe"), 276 | fdfind="evtxdump/windows/x64/fd.exe") 277 | else: 278 | evtxdump = EvtxDump(output_folder, Path("evtxdump/linux/x64/evtx_dump"), 279 | fdfind="evtxdump/linux/x64/fd") 280 | 281 | evtxdump.run(input_folder) 282 | 283 | else: 284 | log.warning("Using cached files") 285 | 286 | # Files are converted, now build a list of the files to index 287 | # dispatch by size 288 | evtx_files = [files for files in output_folder.rglob('*.json')] 289 | 290 | sublists = self.dispatch_files_bysize(self._nb_ingestors, evtx_files) 291 | self.desc = "" 292 | 293 | # Create pool of processes and partial the input 294 | master_pool = Pool(self._nb_ingestors) 295 | master_partial = partial(self.ingest_worker, sublists) 296 | 297 | master_pool.map(master_partial, range(self._nb_ingestors)) 298 | master_pool.close() 299 | 300 | # Assure to flush all the threads before we end the function 301 | self._hec_server.flushBatch() 302 | 303 | # Clean the temporary folder if not indicated not to do so 304 | if not keep_cache: 305 | shutil.rmtree(output_folder, ignore_errors=True) 306 | 307 | def ingest_worker(self, sublist: list, index: int): 308 | """ 309 | Ingestor worker that actually index a set of JSON files into Splunk 310 | Meant to be Pool-ed 311 | :param sublist: list - List of sublist of files to index 312 | :param index: int - index of the sublist ot index 313 | :return: Tuple CountSuccess,TotalCount 314 | """ 315 | count = 0 316 | sum = 0 317 | desc = "" 318 | file_log = tqdm.tqdm(total=0, position=index*2, bar_format='{desc}') 319 | with tqdm.tqdm(total=len(sublist[index]), position=(index*2)+1, desc=desc, unit="files") as progress: 320 | for jevtx_file in sublist[index]: 321 | 322 | sum += 1 323 | with open(jevtx_file, "r") as jevtx_stream: 324 | 325 | if not self._is_test: 326 | desc = "[Worker {index}] Processing {evtx}".format(index=index, evtx=jevtx_file.name) 327 | else: 328 | desc = "[Worker {index}] [TEST] Processing {evtx}".format(index=index, evtx=jevtx_file.name) 329 | 330 | ret_t = self.send_jevtx_file_to_splunk(records_stream=jevtx_stream, 331 | source="event_" + jevtx_file.name, 332 | sourcetype="json" 333 | ) 334 | count += 1 if ret_t else 0 335 | file_log.set_description_str(desc) 336 | progress.update(1) 337 | 338 | return count, sum 339 | 340 | @staticmethod 341 | def list_files(file: Path, folder: Path, extension='*.evtx'): 342 | """ 343 | It returns a list of files based on teh given input path and filter on extension 344 | :param file: Unitary file to index 345 | :param folder: Folder to index 346 | :param extension: Extensions of the files to index - evtx by default 347 | :return: A list of files to index 348 | """ 349 | if file: 350 | return [file] 351 | elif folder: 352 | return [y for x in os.walk(folder) for y in glob(os.path.join(x[0], extension))] 353 | else: 354 | return [] 355 | 356 | @staticmethod 357 | def dispatch_files_bysize(nb_list: int, files: list): 358 | """ 359 | It creates N list of files based on filesize to average the size between lists. 360 | :param nb_list: Number of lists to create 361 | :param files: List of files to dispatch 362 | :return: List of list 363 | """ 364 | 365 | log.info('Having {} files to dispatch in {} lists'.format(len(files), nb_list)) 366 | 367 | sublists = {} 368 | for list_id in range(0, nb_list): 369 | sublists[list_id] = { 370 | 'files': [], 371 | 'size': 0 372 | } 373 | 374 | def _get_smallest_sublist(sublists): 375 | """ 376 | get the smallest sublist 377 | """ 378 | smallest_list_id = 0 379 | for list_id, sublist in sublists.items(): 380 | if sublist['size'] < sublists[smallest_list_id]['size']: 381 | smallest_list_id = list_id 382 | 383 | return smallest_list_id 384 | 385 | for file in files: 386 | log.debug('dispatching {}'.format(file)) 387 | list_id = _get_smallest_sublist(sublists) 388 | sublists[list_id]['files'].append(file) 389 | sublists[list_id]['size'] += os.stat(file).st_size 390 | 391 | for list_id, sublist in sublists.items(): 392 | log.info( 393 | ' List [{}] Having {} files for a size of {}'.format(list_id, len(sublist['files']), sublist['size'])) 394 | 395 | return [sublist['files'] for list_id, sublist in sublists.items()] 396 | 397 | 398 | if __name__ == "__main__": 399 | 400 | parser = argparse.ArgumentParser() 401 | parser.add_argument("-v", "--verbosity", help="increase output verbosity", choices=LOG_VERBOSITY, default='INFO') 402 | 403 | parser.add_argument('--input', help="Evtx file to parse") 404 | 405 | parser.add_argument('--nb_process', type=int, default=cpu_count(), 406 | help="Number of ingest processes to spawn, only useful for more than 1 file") 407 | 408 | parser.add_argument('--index', default="winevt", help="index to use for ingest process") 409 | 410 | parser.add_argument('--keep_cache', action="store_true", 411 | help="Keep JSON cache for future use - Might take a lot of space") 412 | 413 | parser.add_argument('--use_cache', action="store_true", 414 | help="Use the cached files") 415 | 416 | parser.add_argument('--test', action="store_true", 417 | help="Testing mode. No data is sent to Splunk but index and HEC token are created.") 418 | 419 | parser.add_argument('--no_resolve', action="store_true", 420 | help="Disable the event id resolution. If the data file is not found, will be disabled automatically") 421 | 422 | args = parser.parse_args() 423 | log.basicConfig(format=LOG_FORMAT, level=LOG_VERBOSITY[args.verbosity], datefmt='%Y-%m-%d %I:%M:%S') 424 | 425 | start_time = time.time() 426 | 427 | e2s = Evtx2Splunk() 428 | 429 | if e2s.configure(index=args.index, nb_ingestors=args.nb_process, testing=args.test, no_resolve=args.no_resolve): 430 | e2s.ingest(input_files=args.input, keep_cache=args.keep_cache, use_cache=args.use_cache) 431 | 432 | end_time = time.time() 433 | 434 | log.info("Finished in {time}".format(time=end_time-start_time)) 435 | 436 | --------------------------------------------------------------------------------