├── .gitignore ├── .gitmodules ├── Dockerfile ├── LICENSE ├── README.md ├── docs ├── _config.yml ├── _toc.yml ├── cdm │ ├── entities │ │ ├── alert.md │ │ ├── any.md │ │ ├── certificate.md │ │ ├── cloud.md │ │ ├── destination.md │ │ ├── destination_nat.md │ │ ├── device.md │ │ ├── dns.md │ │ ├── etl.md │ │ ├── event.md │ │ ├── file.md │ │ ├── geo.md │ │ ├── group.md │ │ ├── hash.md │ │ ├── http.md │ │ ├── intro.md │ │ ├── ip.md │ │ ├── kerberos.md │ │ ├── logon.md │ │ ├── mac.md │ │ ├── meta.md │ │ ├── module.md │ │ ├── network.md │ │ ├── pipe.md │ │ ├── port.md │ │ ├── process.md │ │ ├── registry.md │ │ ├── rule.md │ │ ├── source.md │ │ ├── source_nat.md │ │ ├── target.md │ │ ├── threat.md │ │ ├── tls.md │ │ ├── url.md │ │ ├── user.md │ │ └── user_agent.md │ ├── guidelines │ │ ├── data_types.md │ │ ├── domain_or_hostname_or_fqdn.md │ │ ├── entity_structure.md │ │ ├── intro.md │ │ ├── source_or_destination_or_target.md │ │ └── table_structure.md │ ├── intro.md │ └── tables │ │ ├── intro.md │ │ └── network_session.md ├── dd │ ├── dictionaries │ │ └── linux │ │ │ ├── intro.md │ │ │ └── sysmon │ │ │ ├── event-1.md │ │ │ ├── event-11.md │ │ │ ├── event-16.md │ │ │ ├── event-23.md │ │ │ ├── event-3.md │ │ │ ├── event-4.md │ │ │ ├── event-5.md │ │ │ ├── event-9.md │ │ │ └── intro.md │ ├── guidelines │ │ ├── authoring_data_dictionaries.md │ │ ├── contributing_data_dictionaries.md │ │ └── intro.md │ ├── intro.md │ └── notebooks │ │ ├── intro.md │ │ └── security_events_correlation.ipynb ├── dm │ ├── intro.md │ ├── mitre_attack │ │ ├── attack_ds_events_mappings.md │ │ ├── attack_techniques_to_events.ipynb │ │ └── intro.md │ └── ossem_relationships_to_events.md ├── images │ └── logo │ │ ├── favicon.ico │ │ └── logo.png └── intro.md └── resources ├── images ├── CarbonBlackDataModel.png ├── EndgameDataModel.png ├── OSSEM_logo.png ├── SysmonDataModel.png ├── attck_datasource_eventlogs_example.png ├── datasource_dataobject_example.png ├── event-1.png ├── event-10.png ├── event-11.png ├── event-12.png ├── event-13.png ├── event-14.png ├── event-15.png ├── event-16.png ├── event-17.png ├── event-18.png ├── event-19.png ├── event-2.png ├── event-20.png ├── event-21.png ├── event-22.png ├── event-255.png ├── event-3.png ├── event-4.png ├── event-400.png ├── event-403.png ├── event-4103.png ├── event-4104.png ├── event-5.png ├── event-6.png ├── event-600.png ├── event-7.png ├── event-8.png └── event-9.png ├── parsers ├── SysmonKQLParserV12.0.txt ├── SysmonKQLParserV13.01.txt ├── SysmonKQLParserV13.10.txt ├── SysmonKQLParserV13.22.txt ├── SysmonKQLParserV13.34.txt └── SysmonKQLParserV14.0.txt ├── schemas └── sysmon │ ├── linux │ └── sysmonv1.0.0.xml │ └── windows │ ├── sysmonv11.0_4.30.xml │ ├── sysmonv11.10_4.32.xml │ ├── sysmonv11.11_4.32.xml │ ├── sysmonv12.03_4.40.xml │ ├── sysmonv12_4.40.xml │ ├── sysmonv13.01_4.50.xml │ ├── sysmonv13.10_4.60.xml │ ├── sysmonv13.21_4.70.xml │ ├── sysmonv13.22_4.70.xml │ ├── sysmonv13.34_4.81.xml │ ├── sysmonv14.0_4.82.xml │ └── sysmonv14.14_4.83.xml └── scripts ├── md_to_yaml.py ├── ossem2logstash.py ├── ossemATTCKDM.py ├── ossemSysmonKQLParser.py ├── ossem_converter.py ├── ossem_converter2.py ├── templates ├── attack │ ├── ds_mapping_template.md │ └── ds_template.md ├── attack_ds_event_mappings.md ├── cim_entity_template.md ├── data_dictionary_template.md ├── ddm_relationships_template.md ├── entity.md ├── kql │ └── sysmon_parser.txt ├── logstash │ └── sysmon.conf ├── ossem_relationships_to_events.md ├── readme_template.md ├── table.md ├── toc_template.json └── xlsx_to_yaml_template.xlsx └── xlsx_to_yaml.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | resources/.DS_Store 3 | /.idea/ 4 | /.testing/ 5 | /.vscode/ 6 | /common_information_model/.z_copies.md 7 | /z.sort/ 8 | /env 9 | docs/_build/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "OSSEM-CDM"] 2 | path = OSSEM-CDM 3 | url = https://github.com/OTRF/OSSEM-CDM 4 | branch = master 5 | [submodule "OSSEM-DM"] 6 | path = OSSEM-DM 7 | url = https://github.com/OTRF/OSSEM-DM 8 | branch = main 9 | [submodule "OSSEM-DD"] 10 | path = OSSEM-DD 11 | url = https://github.com/OTRF/OSSEM-DD 12 | branch = main 13 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Mordor script: Jupyter Environment Dockerfile 2 | # Author: Roberto Rodriguez (@Cyb3rWard0g) 3 | # License: GPL-3.0 4 | 5 | FROM cyb3rward0g/jupyter-base:0.0.6 6 | LABEL maintainer="Roberto Rodriguez @Cyb3rWard0g" 7 | LABEL description="Dockerfile OSSEM Project." 8 | 9 | ARG NB_USER 10 | ARG NB_UID 11 | ENV NB_USER jovyan 12 | ENV NB_UID 1000 13 | ENV HOME /home/${NB_USER} 14 | ENV PATH "$HOME/.local/bin:$PATH" 15 | 16 | USER root 17 | 18 | RUN adduser --disabled-password \ 19 | --gecos "Default user" \ 20 | --uid ${NB_UID} \ 21 | ${NB_USER} 22 | 23 | USER ${NB_USER} 24 | 25 | RUN mkdir -p ${HOME}/docs/notebooks \ 26 | && python3 -m pip install requests PyYAML attackcti==0.3.4.3 pandas==1.1.4 bokeh==2.2.3 networkx==2.5 openhunt==1.7.7 --user 27 | 28 | COPY docs/dm ${HOME}/docs/dm 29 | 30 | USER root 31 | 32 | RUN chown -R ${NB_USER}:${NB_USER} ${HOME} ${JUPYTER_DIR} 33 | 34 | WORKDIR ${HOME} 35 | 36 | USER ${NB_USER} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Open Threat Research Forge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open Source Security Events Metadata (OSSEM) 2 | 3 | [![Open Source Love](https://badges.frapsoft.com/os/v3/open-source.svg?v=103)](https://github.com/ellerbrock/open-source-badges/) 4 | ![Open_Threat_Research Community](https://img.shields.io/badge/Open_Threat_Research-Community-brightgreen.svg) 5 | [![Twitter](https://img.shields.io/twitter/follow/OSSEM_Project.svg?style=social&label=Follow)](https://twitter.com/OSSEM_Project) 6 | 7 | A community-led project focused primarily on the documentation, standardization and modeling of security event logs. 8 | 9 | 10 | 11 | ## https://ossemproject.com/intro.html 12 | 13 | # Goals 14 | 15 | * Define and share a common data moel in order to improve the data standardization and transformation of security event logs 16 | * Define and share data structures and relationships identified in security events logs 17 | * Provide detailed information in a dictionary format about several security event logs to the community 18 | * Learn more about security event logs (Windows, Linux, MacOS, Azure, AWS, etc) 19 | 20 | # Project Structure 21 | 22 | * [Data Dictionaries (DD)](https://github.com/OTRF/OSSEM-DD): 23 | * Contains specific information about several security event logs organized by operating system and their respective data providers. 24 | * Each dictionary describes a single event log and its corresponding field names. 25 | * It provides the foundational concepts to create a data wiki in an organization. 26 | * [Common Data Model (CDM)](https://github.com/OTRF/OSSEM-CDM) 27 | * Facilitates the normalization of data by providing a standard way to parse security event logs. 28 | * The project is organized by [schema entities](https://github.com/OTRF/OSSEM-CDM/tree/master/schemas/entities) identified in several data sources. 29 | * The definitions of each schema entity and its respective attributes (field names) are mostly general descriptions that could help and expedite event logs parsing procedures. 30 | * The project also provides the concept of [schema tables](https://github.com/OTRF/OSSEM-CDM/tree/master/schemas/tables) to aggregate common entities and parse similar data sources. For example, HTTP, Port and User Agent entities can be used to normalize network traffic metadata captured in a network environment. 31 | * [Detection Model (DM)](https://github.com/OTRF/OSSEM-DM): 32 | * Focuses on identifying [relationships](https://github.com/OTRF/OSSEM-DM/tree/main/relationships) among security events to facilitate the development of data analytics and help validate the detection of adversary techniques. 33 | 34 | # Sponsors 35 | 36 | [](https://www.tines.com/?utm_source=oss&utm_medium=sponsorship&utm_campaign=Cyb3rWard0g) 37 | 38 | # Author 39 | 40 | * Roberto Rodriguez [@Cyb3rWard0g](https://twitter.com/Cyb3rWard0g) 41 | 42 | # Current Committers 43 | 44 | * Jose Luis Rodriguez [@Cyb3rPandaH](https://twitter.com/Cyb3rPandaH) 45 | * Nate Guagenti [@neu5ron](https://twitter.com/neu5ron) 46 | * Ricardo Dias [@hxnoyd](https://twitter.com/hxnoyd) 47 | 48 | # Projects Using OSSEM 49 | 50 | * [HELK](https://github.com/Cyb3rWard0g/HELK) 51 | * [Azure Sentinel Normalization](https://docs.microsoft.com/en-us/azure/sentinel/normalization-schema) 52 | 53 | # Resources 54 | 55 | * [Ready to hunt? First, Show me your data!](https://cyberwardog.blogspot.com/2017/12/ready-to-hunt-first-show-me-your-data.html) 56 | * [What's new in Windows 10, versions 1507 and 1511](https://docs.microsoft.com/en-us/windows/whats-new/whats-new-windows-10-version-1507-and-1511#bkmk-lsass) 57 | * [Download Security Audit Events for Windows (Spreadsheet)](https://www.microsoft.com/en-us/download/details.aspx?id=50034) 58 | * [Advanced Security Audit Policy Settings](https://docs.microsoft.com/en-us/windows/security/threat-protection/auditing/advanced-security-audit-policy-settings) 59 | * [Monitoring Active Directory for Signs of Compromise](https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/monitoring-active-directory-for-signs-of-compromise#audit-account-management) 60 | * [Audit Policy Recommendations](https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/audit-policy-recommendations) 61 | * [Use Windows Event Forwarding to help with intrusion detection](https://docs.microsoft.com/en-us/windows/security/threat-protection/use-windows-event-forwarding-to-assist-in-intrusion-detection) 62 | * [Minimum recommended minimum audit policy](https://docs.microsoft.com/en-us/windows/security/threat-protection/use-windows-event-forwarding-to-assist-in-intrusion-detection#a-href-idbkmk-appendixaaappendix-a---minimum-recommended-minimum-audit-policy) 63 | * [Windows ITPro Docs - Threat Protection](https://github.com/MicrosoftDocs/windows-itpro-docs/tree/master/windows/security/threat-protection) 64 | * [MITRE ATT&CKcon 2018: Hunters ATT&CKing with the Data](https://youtu.be/QCDBjFJ_C3g) 65 | * [MITRE ATT&CKcon 2.0: Ready to ATT&CK? Bring Your Own Data (BYOD) and Validate Your Data Analytics!](https://youtu.be/eM0c_Gil-38) 66 | * [Defining ATT&CK Data Sources, Part I: Enhancing the Current State](https://medium.com/mitre-attack/defining-attack-data-sources-part-i-4c39e581454f) 67 | * [Defining ATT&CK Data Sources, Part II: Operationalizing the Methodology](https://medium.com/mitre-attack/defining-attack-data-sources-part-ii-1fc98738ba5b) 68 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # Book settings 3 | title: "" 4 | logo: images/logo/logo.png 5 | author: Roberto Rodriguez @Cyb3rWard0g 6 | email: "" 7 | description: >- # this means to ignore newlines until "baseurl:" 8 | The official Jupyter Book for the Open Source Security Events Metadata (OSSEM). A community-led project that focuses primarily on the documentation and standardization of security event logs from diverse data sources and operating systems. 9 | execute: 10 | execute_notebooks: 'off' 11 | 12 | # Define the name of the latex output file for PDF builds 13 | latex: 14 | latex_documents: 15 | targetname: book.tex 16 | 17 | # Information about where the book exists on the web 18 | repository: 19 | url: https://github.com/OTRF/OSSEM # Online location of your book 20 | path_to_book: docs # Optional path to your book, relative to the repository root 21 | branch: master # Which branch of the repository should be used when creating links (optional) 22 | 23 | # Add GitHub buttons to your book 24 | # See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository 25 | html: 26 | favicon: images/logo/favicon.ico 27 | home_page_in_navbar: false 28 | use_edit_page_button: true 29 | use_repository_button: true 30 | use_issues_button: true 31 | baseurl: https://ossemproject.com/ 32 | 33 | launch_buttons: 34 | notebook_interface: "classic" # The interface interactive links will activate ["classic", "jupyterlab"] 35 | binderhub_url: "https://mybinder.org" 36 | colab_url: "https://colab.research.google.com" 37 | thebe: true 38 | 39 | parse: 40 | myst_enable_extensions: # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html 41 | - amsmath 42 | - colon_fence 43 | - deflist 44 | - dollarmath 45 | - html_admonition 46 | - html_image 47 | - linkify 48 | - replacements 49 | - smartquotes 50 | - substitution -------------------------------------------------------------------------------- /docs/_toc.yml: -------------------------------------------------------------------------------- 1 | format: jb-book 2 | root: intro 3 | parts: 4 | - caption: Data Dictionaries 5 | chapters: 6 | - file: dd/intro 7 | - file: dd/guidelines/intro 8 | sections: 9 | - file: dd/guidelines/contributing_data_dictionaries 10 | - file: dd/guidelines/authoring_data_dictionaries 11 | - file: dd/notebooks/intro 12 | sections: 13 | - file: dd/notebooks/security_events_correlation 14 | - file: dd/dictionaries/linux/intro 15 | sections: 16 | - file: dd/dictionaries/linux/sysmon/intro 17 | sections: 18 | - file: dd/dictionaries/linux/sysmon/event-1 19 | - file: dd/dictionaries/linux/sysmon/event-11 20 | - file: dd/dictionaries/linux/sysmon/event-16 21 | - file: dd/dictionaries/linux/sysmon/event-23 22 | - file: dd/dictionaries/linux/sysmon/event-3 23 | - file: dd/dictionaries/linux/sysmon/event-4 24 | - file: dd/dictionaries/linux/sysmon/event-5 25 | - file: dd/dictionaries/linux/sysmon/event-9 26 | - caption: Common Data Model 27 | chapters: 28 | - file: cdm/intro 29 | - file: cdm/guidelines/intro 30 | sections: 31 | - file: cdm/guidelines/entity_structure 32 | - file: cdm/guidelines/table_structure 33 | - file: cdm/guidelines/data_types 34 | - file: cdm/guidelines/domain_or_hostname_or_fqdn 35 | - file: cdm/guidelines/source_or_destination_or_target 36 | - file: cdm/entities/intro 37 | sections: 38 | - file: cdm/entities/alert 39 | - file: cdm/entities/any 40 | - file: cdm/entities/certificate 41 | - file: cdm/entities/cloud 42 | - file: cdm/entities/destination 43 | - file: cdm/entities/destination_nat 44 | - file: cdm/entities/device 45 | - file: cdm/entities/dns 46 | - file: cdm/entities/etl 47 | - file: cdm/entities/event 48 | - file: cdm/entities/file 49 | - file: cdm/entities/geo 50 | - file: cdm/entities/group 51 | - file: cdm/entities/hash 52 | - file: cdm/entities/http 53 | - file: cdm/entities/ip 54 | - file: cdm/entities/kerberos 55 | - file: cdm/entities/logon 56 | - file: cdm/entities/mac 57 | - file: cdm/entities/meta 58 | - file: cdm/entities/module 59 | - file: cdm/entities/network 60 | - file: cdm/entities/pipe 61 | - file: cdm/entities/port 62 | - file: cdm/entities/process 63 | - file: cdm/entities/registry 64 | - file: cdm/entities/rule 65 | - file: cdm/entities/source 66 | - file: cdm/entities/source_nat 67 | - file: cdm/entities/target 68 | - file: cdm/entities/threat 69 | - file: cdm/entities/tls 70 | - file: cdm/entities/url 71 | - file: cdm/entities/user 72 | - file: cdm/entities/user_agent 73 | - file: cdm/tables/intro 74 | sections: 75 | - file: cdm/tables/network_session 76 | - caption: Detection Model 77 | chapters: 78 | - file: dm/intro 79 | - file: dm/ossem_relationships_to_events 80 | - file: dm/mitre_attack/intro 81 | sections: 82 | - file: dm/mitre_attack/attack_techniques_to_events 83 | - file: dm/mitre_attack/attack_ds_events_mappings 84 | -------------------------------------------------------------------------------- /docs/cdm/entities/alert.md: -------------------------------------------------------------------------------- 1 | # alert 2 | 3 | Alert fields that describe/normalize an indicator from a tool of a possible issue. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | alert_category | string | The category of an alert | ```Malware``` | 10 | | alert_description | string | The expanded description of the alert event | ```This is event x``` | 11 | | alert_id | integer | Alert identifier defined by the tool or system that triggered the alert. Alert ids might repeat across different data sources | ```1234``` | 12 | | alert_message | string | The message provided by the alert | ```A file exhibiting behavior of the evil/actor command and control framework 2 was detected.``` | 13 | | alert_severity | string | The severity of an alert | ```Priority 5``` | 14 | | alert_signature | string | The name or title of an alert | ```EvilActor:CnCv2``` | 15 | | alert_version | string | A signature or alert version | ```1.2``` | 16 | -------------------------------------------------------------------------------- /docs/cdm/entities/any.md: -------------------------------------------------------------------------------- 1 | # any 2 | 3 | Fields used to define metadata for a single field to include data from multiple fields with similar/same values/data. This data is most commonly created from an ETL pipeline. Any fields below that contain a '*' indicates those are searches and not actual fields (key/values). This is because certain values are not desirable to copy/duplicate. However, because of a common schema we can still find are values for a specific common type, without duplicating or copying everything to one field! 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | any_event_id | string | Allows searching a single field for all log IDs. All log ID fields copied/duplicated to a single field as an array. | ```````` | 10 | | any_hash | string | Allows searching a single field for all hashes. All hash fields copied/duplicated to a single field as an array. | ```````` | 11 | | any_ip_addr | ip | Allows searching a single field for all IPs. All IP fields copied/duplicated to a single field as an array. | ```````` | 12 | | any_ip_addr | ip | IP address assigned to the device generating the event and/or the IP address in the network packet. This could be used in the context of source, destination, device and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```192.168.1.2``` | 13 | | any_ip_dhcp_assigned_ip_addr | ip | IP address assigned by the DHCP server. | ```192.168.1.2``` | 14 | | any_ip_geo.as_org | string | Allows searching a single field for all BGP AS Organization Names. All AS name fields copied/duplicated to a single field as an array. | ```````` | 15 | | any_ip_geo.asn | integer | Allows searching a single field for all BGP AS Numbers. All AS number fields copied/duplicated to a single field as an array. | ```````` | 16 | | any_ip_is_ipv6 | boolean | If IP address is IP version 6 | ```false``` | 17 | | any_mac_addr | string | Allows searching a single field for all MAC addresses. All MAC address fields copied/duplicated to a single field as an array. | ```````` | 18 | | any_user | string | Allows searching a single field for all users. All user fields copied/duplicated to a single field as an array. | ```````` | 19 | | any_vlan_id | integer | Allows searching a single field for all VLAN IDs. All VLAN ID fields copied/duplicated to a single field as an array. | `````` | 20 | -------------------------------------------------------------------------------- /docs/cdm/entities/certificate.md: -------------------------------------------------------------------------------- 1 | # certificate 2 | 3 | This document is a work in progress, but is a foundational. Specifically the main foundations of certificate information is already in here. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | certificate_hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 10 | | certificate_hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 11 | | certificate_hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 12 | | certificate_hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 13 | | certificate_hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 14 | | certificate_issuer | string | Information about the CA that issued the certificate | ```CN=neu5ron.local,OU=Admin``` | 15 | | certificate_serial_number | string | Serial number, this is chosen by the CA (certificate authority) which issued the certificate. Therefore this can relatively be arbritary if the CA does not follow a standard or is malicious. | ```5157550``` | 16 | | certificate_subject | string | Information about the CA that issued the certificate | ```CN=natetoken,OU=Admin,DC=neu5ron,DC=local``` | 17 | -------------------------------------------------------------------------------- /docs/cdm/entities/cloud.md: -------------------------------------------------------------------------------- 1 | # cloud 2 | 3 | Event fields used to identify/normalize infrastructure and application in the cloud from different cloud providers. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | cloud_app_id | string | The ID of the application for an HTTP application as identified by a proxy. This value is usually specific to the proxy used. | ```124``` | 10 | | cloud_app_name | string | The name of an application provided by a cloud service. | ```AppOne``` | 11 | | cloud_app_operation | string | The operation the user performed in the context of the application for an HTTP application as identified by a proxy. This value is usually specific to the proxy used. | ```DELETE``` | 12 | | cloud_app_risk_level | string | The risk level associated with an HTTP application as identified by a proxy. This value is usually specific to the proxy used. | ```3``` | 13 | -------------------------------------------------------------------------------- /docs/cdm/entities/destination_nat.md: -------------------------------------------------------------------------------- 1 | # destination_nat 2 | 3 | Event fields used to define/normalize the destination NAT (network address translation) in a network connection event. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | dst_nat_ip_addr | ip | IP address assigned to the device generating the event and/or the IP address in the network packet. This could be used in the context of source, destination, device and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```192.168.1.2``` | 10 | | dst_nat_ip_dhcp_assigned_ip_addr | ip | IP address assigned by the DHCP server. | ```192.168.1.2``` | 11 | | dst_nat_ip_is_ipv6 | boolean | If IP address is IP version 6 | ```false``` | 12 | | dst_nat_original_value | string | original value of a destination NAT before any modifications. For example, if wanting to cleanup a network share and keep the IP - this field would be used to keep the original value | ```8.8.8.8``` | 13 | | dst_nat_port_name | string | Name of the port used in a network connection. This is usually determined by IANA common port assignment. Therefore, this means its a guess and NOT actually what the application/ is what the actually. | ```netbios-dgm``` | 14 | | dst_nat_port_number | integer | Port number used in a network connection. This could be used in the context of source, destination and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```138``` | 15 | -------------------------------------------------------------------------------- /docs/cdm/entities/device.md: -------------------------------------------------------------------------------- 1 | # device 2 | 3 | Events used to normalize events for the device or endpoint that generated the event (source or destination). 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | dvc_action | string | If reported by an intermediary device such as a firewall, the action taken by device. | ```allow``` | 10 | | dvc_domain | string | Name of the domain the device is part of. | ```hunt.wardog.com``` | 11 | | dvc_fqdn | string | The fully qualified domain name of the host | ```WKHR001.hunt.wardog.com``` | 12 | | dvc_hostname | string | The host name from which the event/log came from. There may be multiple host names in an event (i.e. syslog could have forwarder host name), this field is to be the most true log host name (i.e. NOT the forwarders name). | ```bobs.uncle-pc``` | 13 | | dvc_inbound_interface | string | If reported by an intermediary device such as a firewall, the network interface used by it for the connection to the source device | ```eth0``` | 14 | | dvc_interface_guid | string | GUID of the network interface which was used for authentication request | ```{2BB33827-6BB6-48DB-8DE6-DB9E0B9F9C9B}``` | 15 | | dvc_interface_name | string | the name (description) of the network interface that was used for authentication request. You can get the list of all available network adapters using "ipconfig /all" command | ```Microsoft Hyper-V Network Adapter``` | 16 | | dvc_ip_addr | ip | IP address assigned to the device generating the event and/or the IP address in the network packet. This could be used in the context of source, destination, device and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```192.168.1.2``` | 17 | | dvc_ip_dhcp_assigned_ip_addr | ip | IP address assigned by the DHCP server. | ```192.168.1.2``` | 18 | | dvc_ip_is_ipv6 | boolean | If IP address is IP version 6 | ```false``` | 19 | | dvc_mac_addr | string | MAC address of the device where the event was generated or network interface where a connection starts or ends. | ```00:11:22:33:44:55``` | 20 | | dvc_model_name | string | The model name of the device | ```Samsung Galaxy Note``` | 21 | | dvc_model_number | string | The model number of the device | ```10``` | 22 | | dvc_os | string | The OS of the device | ```iOS``` | 23 | | dvc_outbound_interface | string | If reported by an intermediary device such as a firewall, the network interface used by it for the connection to the destination device. | ```Ethernet 4``` | 24 | | dvc_type | string | The type of the device | ```mobile``` | 25 | -------------------------------------------------------------------------------- /docs/cdm/entities/dns.md: -------------------------------------------------------------------------------- 1 | # dns 2 | 3 | Event fields used to define metadata in DNS events. This commonly includes data in logs that contain DNS queries. Including, but not limited to, Zeek dns.log, Suricata DNS, Sysmon EventID 22, Windows DNS debug/trace logs. In the verbiage below, request is used to denote the client (or forwarded address if applicable) that is making the DNS request. This would commonly be the client/source that is looking up a domain.The response/answer, is used to denote the server that responded with the answer or responded to the request/client. It is important to remember that in DNS logs their are multiple servers that may be involved in the response. This is similar to how packets are forwarded through routers. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | dns_additional_authoritative_name | string | additional authoritative response data from the supplemental information in the "additional" section of the DNS response defined in https://tools.ietf.org/html/rfc2181#section-5.4.1 | ```google.com``` | 10 | | dns_additional_name | string | additional response data from the supplemental information in the "additional" section of the DNS response defined in https://tools.ietf.org/html/rfc2181#section-5.4.1 | ```10.10.10.1``` | 11 | | dns_flags | array_string | An array of DNS flags if the data source does not parse them or set as boolean | ```[ "1", "0" ]``` | 12 | | dns_flags_authenticated | boolean | The "AD" flag. Indicates in a response that all data included in the answer and authority sections of the response have been authenticated by the server according to the policies of that server. see https://tools.ietf.org/html/rfc3655#section-6.1 for more information. This is related to DNSSEC | ```false``` | 13 | | dns_flags_authoritative | boolean | The "AA" flag. Whether the response (answer) from the server was authoritative | ```true``` | 14 | | dns_flags_checking_disabled | boolean | The "CD" flag. Indicates checking disabled for DNSSEC | ```true``` | 15 | | dns_flags_recursion_available | boolean | The "RA" flag. Indicates the server supports recursive queries | ```false``` | 16 | | dns_flags_recursion_desired | boolean | The "RD" flag. Client requested recursion for the lookup/request | ```true``` | 17 | | dns_flags_truncated | boolean | The "TC" flag. Indicating (from the server) that response was more than permitted for the single sessions channel, this is usually 512 bytes. | ```true``` | 18 | | dns_flags_z | integer | The "Z" flag. This is a reserved field for older DNS implementations https://tools.ietf.org/html/rfc5395 | ```0``` | 19 | | dns_query_class | string | The class of the dns record requested in decimal format, normally this should be 1. Query class is related to zone information, therefore most clients would be request this type of class | ```1``` | 20 | | dns_query_class_name | integer | The class of the dns record requested as a string, normally this should be C_INTERNET. Query class is related to zone information, therefore most clients would be request this type of class | ```C_INTERNET``` | 21 | | dns_query_name | string | what was queried | ```google.com``` | 22 | | dns_query_type | string | The type of dns requested in decimal format | ```28``` | 23 | | dns_query_type_name | integer | The type of dns requested as a string | ```AAAA``` | 24 | | dns_rejected | boolean | The server responded to the query but no answers were given. If not in the log source, could also be determined by a successful dns response code and no answers/replies returned | ```false``` | 25 | | dns_response_code | integer | The response code returned from the server for the request in decimal format | ```0``` | 26 | | dns_response_code_name | string | The response code returned from the server for the request as a string | ```NOERROR``` | 27 | | dns_response_name | array_string | The results returned for the dns query. can contain a mix of IPs or domains | ```8.8.8.8``` | 28 | | dns_response_ttl | array_float | The time to live (TTL) for each response_name | ```````` | 29 | | dns_rtt | float | round trip time (RTT) of the dns query to answer | ```0.006946``` | 30 | | dns_transaction_id | integer | Hexadecimal identifier assigned by the program that generated the DNS query. Is 16-bit. Can be used to match up DNS requests across software/clients | ```4D11``` | 31 | | dns_transaction_id_hex | string | transaction_id in decimal format | ```19729``` | 32 | -------------------------------------------------------------------------------- /docs/cdm/entities/etl.md: -------------------------------------------------------------------------------- 1 | # etl 2 | 3 | Event fields used to define/normalize specific metadata about the event during the processing of an ETL (Extract, Transform, Load) pipeline. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | etl_format_applied | string | Formatting or encoding applied during the ETL processing. Also referred to as CODEC in some use cases. Can be an array if multiple formats were applied/determined | ```[ "sylog", "json" ]``` | 10 | | etl_format_is_cef | boolean | During ETL processing, event is determined to be CEF (format) | ```false``` | 11 | | etl_format_is_json | boolean | During ETL processing, event is determined to be JSON (format) | ```true``` | 12 | | etl_format_is_syslog | boolean | During ETL processing, event is determined to be Syslog (format). Technically you could send data encoded in different format over syslog (ie: CEF or JSON), therefore an event/log can have this tag/field as well as other format fields | ```true``` | 13 | | etl_format_is_xml | boolean | During ETL processing, event is determined to be XML (format) | ```true``` | 14 | | etl_host_agent_type | string | Type of forwarder from the client (i.e. winlogbeat, nxlog, rsyslog, etc) | ```nxlog``` | 15 | | etl_host_agent_uid | string | UID for the host's software/agent a part of the event | ```fe4fb818-088f-4529-a343-b94baf057a53``` | 16 | | etl_info_tags | string | Use for any additional information about an event/log during ETL/processing pipeline. Commonly, you would use this for things that are rare but happen (i.e. parsing error for non conforming RFC). Use this field to alert or give context to a user/analyst when looking at the data. | ```inferred network_protocol as udp``` | 17 | | etl_input_application_name | string | Application name used to receive or gather the log for the very first portion of the ETL processing (i.e. kafka, beats, syslog) | ```kafka``` | 18 | | etl_input_application_protocol | string | Application protocol used to receive or gather the log for the very first portion of the ETL processing (ex: syslog, http, sftp) | ```kafka``` | 19 | | etl_input_port | integer | Port (network) used to receive or gather the log for the very first portion of the ETL processing | ```9092``` | 20 | | etl_input_protocol | string | Protocol (network) used to receive or gather the log for the very first portion of the ETL processing (ie: tcp, udp, icmp) | ```tcp``` | 21 | | etl_input_src_port | integer | The Port (network) the client/source used to send the log for the very first portion of the ETL processing. Mostly used in syslog | ```48231``` | 22 | | etl_kafka_consumer_group | string | Consumer group that the etl was apart of from consuming from a Kafka topic | ```helk_logstash``` | 23 | | etl_kafka_key | string | Record key, if any | `````` | 24 | | etl_kafka_offset | long | Kafka partition for the event | ```204802842``` | 25 | | etl_kafka_partition | integer | Kafka partition for the event | ```1``` | 26 | | etl_kafka_time | date | Depending on your Kafka broker configuration, this can be either when the record was created (default) or when it was received by the broker | ```4/11/2018 5:49:25``` | 27 | | etl_kafka_topic | string | Kafka topic name | ```winevent``` | 28 | | etl_pipeline | string | Used to keep track of tags related to transforms, enrichment, or modifications made in an ETL pipeline | ```all-add_processed_timestamp``` | 29 | | etl_processed_time | date | The first time the event gets processed by the ETL (processing pipeline) | ```4/11/2018 5:49:25``` | 30 | | etl_version | string | The schema or transform versioning that is being applied | ```v1.0.1``` | 31 | -------------------------------------------------------------------------------- /docs/cdm/entities/event.md: -------------------------------------------------------------------------------- 1 | # event 2 | 3 | Event attributes used to define/normalize specific metadata of the event itself. This also includes information about the host where the event was originally generated. In scenarios where an event is forwarded (Windows Event Forwarding, Syslog, etc), the ETL entity must be used. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | event_category_type | string | A description of the event, which can help with categorization. If the vendor defines a category/grouping for its log. i.e. Zeek has a few category types for its many logs (network-protocols, network-observations, etc...). Example. sysmon event id 12 is EventType field is this. | ```network-protocols``` | 10 | | event_category_type | string | If the event contains a category, then this it. i.e For the Windows Security channel, this could be something such as Audit object access. For Zeek conn.log, this would be network-protocols. | ```Audit Object Access``` | 11 | | event_count | integer | The number of aggregated events, if applicable | ```10``` | 12 | | event_creation_time | datetime | original time when event/log was created as reported from the log source itself | ```2017-01-21 09:12:34``` | 13 | | event_duration | float | The length/duration of the event in seconds (e.g., 1 min is 60.0) | ```60``` | 14 | | event_end_time | datetime | The time in which the event ended | ```2017-04-12 12:00:00``` | 15 | | event_error | string | Information about an error | ```an error occurred``` | 16 | | event_error_code | integer | Integer that defines a particular error | ```4564``` | 17 | | event_id | integer | event identifier for specific event logs. Event ids might repeat across different data sources. This is most common in Windows using EventID | ```4688``` | 18 | | event_message | string | A general message or description, either included in, or generated from the record | ```TCP access denied``` | 19 | | event_original_message | string | The (original) log message from the source before any ETL manipulations/modifications | ```a long message``` | 20 | | event_original_time | datetime | original time when event/log was created as reported from the log source itself. | ```4/11/2018 5:46:18``` | 21 | | event_original_uid | string | Original unique ID specific to the log/event as recorded from the source. | ```CMzY3i4YoNZ3mT5yu5``` | 22 | | event_product | string | The product generating the event. Vendor and product might be the same for some data sources. | ```Windows``` | 23 | | event_product_version | string | The version of the product generating the event | ```10``` | 24 | | event_recorded_time | datetime | The time the log was recorded on disk or data plane or if there is another timestamp with the log (common scenario if there is a a manager of many devices or the log itself tracks log time and log written/recorded time) (e.g., 1 min is 60.0). | ```4/11/2018 5:46:18``` | 25 | | event_report_url | string | url of the full analysis report, if applicable | ```https://192.168.1.1/reports/ade-123-afa.log``` | 26 | | event_resource_group | string | The resource group to which the device generating the record belongs. This might be an AWS account, or an Azure subscription or Resource Group | ```DBVM``` | 27 | | event_resource_id | string | The resource ID of the device generating the message. | ```/subscriptions/aaabbbcc-dddd-eeee-1234-1234567890ab/resourcegroups/shokobo/providers/microsoft.compute/virtualmachines/sysmachine``` | 28 | | event_result | string | The result reported for the activity. Empty value when not applicable | ```success``` | 29 | | event_result_details | string | Reason or details for the result reported in event_result | ```Wrong Password``` | 30 | | event_schema_version | string | Azure Sentinel Schema Version | ```0.1``` | 31 | | event_severity | string | The severity of the event as defined manually or usually via the original log, commonly this would be syslog severity. The number codes should be converted to their corresponding string value. | ```high``` | 32 | | event_start_time | datetime | The time in which the event stated | ```2017-01-21 09:12:34``` | 33 | | event_status | string | Defines the status of a particular event | ```User logon with expired account``` | 34 | | event_status_code | integer | Integer that defines a particular status | ```3221225875``` | 35 | | event_sub_category_type | string | If the event contains a sub-category, then this it. i.e For the Windows Security channel, this could be something such as Audit Registry. | ```Audit Registry``` | 36 | | event_sub_status | string | Additional status information | ```Account expired 300 days ago``` | 37 | | event_sub_status_code | integer | Integer that defines a particular event_sub_status | ```0``` | 38 | | event_sub_type | string | If there are subsets of an event log type, this field carries the next level value. i.e For windows, it would be the Security channel. | ```Security``` | 39 | | event_time_ingested | datetime | The time the event was ingested to SIEM or data pipeline. | ```2157-01-21 09:12:34``` | 40 | | event_timestamp | datetime | The most accurate timestamp of the log. Commonly this will be the original reporting timestamp from the log. However, if you believe the log timestamp has been altered or skewed (ie: either due to timezone issues or NTP skew)then replace this field with the most likely timestamp. Always keep the original log timestamp in the field creation_timestamp | ```2017-01-21 09:12:34``` | 41 | | event_timezone | string | Timezone of the event if it can be determined. Format such as UTC, UTC+1, UTC-5, etc.. | ```UTC``` | 42 | | event_type | string | Type of event being collected. i.e For Windows it would be the Event Provider (Microsoft-Windows-Security-Auditing). I.e. Paloalto, it would be the type of event such as Traffic or Threat. I.e. Zeek Logs, one example could be the conn.log. | ```Microsoft-Windows-Security-Auditing``` | 43 | | event_type_detailed | string | Additional description of type if applicable | ```````` | 44 | | event_uid | string | Original unique ID specific to the log/event assigned to the event (not original). | ```516a64e3-8360-4f1e-a67c-d96b3d52df54``` | 45 | | event_vendor | string | The vendor of the product generating the event | ```Microsoft``` | 46 | -------------------------------------------------------------------------------- /docs/cdm/entities/file.md: -------------------------------------------------------------------------------- 1 | # file 2 | 3 | Event fields used to define/normalize metadata about files either locally or over the wire (Network Traffic). This entity and attributes can extend other entities such as source and destination. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | file_accessed_time | date | When the file was last accessed . Also known as `atime` | ```2016-11-25 18:21:47``` | 10 | | file_changed_time | date | When the file was last changed. Also known as `ctime` | ```2016-11-25 18:21:47``` | 11 | | file_company | string | Company name a file belongs to | ```Microsoft Corporation``` | 12 | | file_creation_time | date | When the file was created. Also known as `crtime` | ```2016-11-25 18:21:47``` | 13 | | file_description | string | Description of a file | ```Console Window Host``` | 14 | | file_directory | string | Directory of file(s). It does not include the file name | ```C:\users\wardog\``` | 15 | | file_extension | string | The extension name or type of the file. | ```exe``` | 16 | | file_hard_links | integer | Number of hard links | ```3``` | 17 | | file_hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 18 | | file_hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 19 | | file_hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 20 | | file_hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 21 | | file_hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 22 | | file_inode | integer | Filesystem inode number | `````` | 23 | | file_link_name | string | path of the hard link | ```C:\Docs\My.exe``` | 24 | | file_mime_type | string | MIME type name specified for the file | ```application/msword``` | 25 | | file_modified_time | date | When the file was last modified. Also known as `mtime` | ```2016-11-25 18:21:47``` | 26 | | file_name | string | name of the file without its full path. This could be a local file or one transmitted over the network. | ```a.exe``` | 27 | | file_path | string | full path of a file including the name of the file. This could be a local file or one transmitted over the network. | ```C:\users\wardog\z.exe``` | 28 | | file_previous_accessed_time | date | When the file was previously accessed | ```2016-11-25 18:21:47``` | 29 | | file_previous_changed_time | date | When the file was previously changed | ```2016-11-25 18:21:47``` | 30 | | file_previous_creation_time | date | When the file was previously created | ```2016-11-25 18:21:47``` | 31 | | file_previous_modified_time | date | When the file was previously modified | ```2016-11-25 18:21:47``` | 32 | | file_previous_name | string | The file's previous name | ```cmd.exe``` | 33 | | file_previous_path | string | The file's previous path | ```C:\\Windows\system32\cmd.exe``` | 34 | | file_product | string | The file's product name | ```Microsoft® Windows® Operating System``` | 35 | | file_size | integer | Size of the file, in bytes. | ```45``` | 36 | | file_symlink | integer | 1 if the path is a symlink, otherwise 0 | ```0``` | 37 | | file_symlink_name | string | path of the symlink | ```C:\Docs\My.exe``` | 38 | | file_system_block_size | integer | Block size of filesystem | `````` | 39 | | file_system_type | string | The file system type, ex: fat32, ntfs, vmfs, ext3, ext4, xfs | ```ntfs``` | 40 | | file_version | string | file version. i.e. image loaded version | ```10.0.16299.15 (WinBuild.160101.0800)``` | 41 | -------------------------------------------------------------------------------- /docs/cdm/entities/geo.md: -------------------------------------------------------------------------------- 1 | # geo 2 | 3 | Event fields used to define/normalize metadata about a geographical location. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | geo_city | string | The city associated to the IP address in the network session. | ```San Miguel``` | 10 | | geo_continent | string | The continent associated with the IP address in the network session. | ```South America``` | 11 | | geo_country | string | The country associated with the IP address in the network session. | ```Peru``` | 12 | | geo_country_capital | string | The capital of the country associated with the IP address in the network session. | ```Lima``` | 13 | | geo_country_code | string | 51 | ```Country code``` | 14 | | geo_latitude | string | The latitude of the geographical coordinate associated with the IP address in the network session. | ```38.8951``` | 15 | | geo_longitude | string | The longitude of the geographical coordinate associated with the IP address in the network session. | ```-77.0364``` | 16 | | geo_region | string | The region within a country associated with the IP address in the network session. | ```East US``` | 17 | -------------------------------------------------------------------------------- /docs/cdm/entities/group.md: -------------------------------------------------------------------------------- 1 | # group 2 | 3 | Event fields used to define/normalize metadata about a security group, or distribution group that is created changed or deleted. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | group_domain | string | domain or computer name of the group | ```CONTOSO``` | 10 | | group_name | string | the name of a security group, or a distribution group that is created,changed, or deleted | ```AccountOperators``` | 11 | | group_sam_name | string | this is a name of the group used to support clients and servers from previous versions of Windows (pre-Windows 2000 logon name). The value of sAMAccountName attribute of new group object. For example: ServiceDesk. For local groups it is simply a name of new group | ```AccountOperators``` | 12 | | group_sid | string | SID of a group | ```S-1-5-21-3457937927-2839227994-823803824-6605``` | 13 | | group_sid_history | string | contains previous SIDs used for the object if the object was moved from another domain. Whenever an object is moved from one domain to another, a new SID is created and becomes the objectSID. The previous SID is added to the sIDHistory property. This parameter contains the value of sIDHistory attribute of a group object. This parameter might not be captured in the event, and in that case appears as "-". For local groups it is not applicable and always has "-" value. | ```-``` | 14 | -------------------------------------------------------------------------------- /docs/cdm/entities/hash.md: -------------------------------------------------------------------------------- 1 | # hash 2 | 3 | Event fields used to define/normalize data related to hashes of an image/binary/file. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 10 | | hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 11 | | hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 12 | | hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 13 | | hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 14 | -------------------------------------------------------------------------------- /docs/cdm/entities/http.md: -------------------------------------------------------------------------------- 1 | # http 2 | 3 | Event fields used to define/normalize metadata about HTTP (Hypertext Transfer Protocol) information. This is based on information in the layer 7 (HTTP) application, however can also include HTTP information from an endpoint/server. IIS, Apache, NGINX, proxy logs, and other variances of logs that have HTTP information would go in here. Also, if the HTTP connection is from a decrypted/MITM HTTPS/TLS session then portions of that information, where applicable, would go in here. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | http_content_type | string | The HTTP Response content type header for HTTP/HTTPS network sessions. | `````` | 10 | | http_cookie_variables | string | The values of (HTTP) cookies | ```T1NTRU0K``` | 11 | | http_informational_code | integer | integer response code of 100-199 | ```101``` | 12 | | http_informational_message | string | message/text of the integer response code that was 100-199 | ```Switching Protocols``` | 13 | | http_proxied_headers | string | All of the headers that may indicate if the request was proxied. i.e. FORWARDED;X-FORWARDED-FOR;X-FORWARDED-FROM;CLIENT-IP;VIA;XROXY-CONNECTION;PROXY-CONNECTION | ```Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36``` | 14 | | http_referrer_original | string | HTTP header "Referer". The HTTP referer header for HTTP/HTTPS network sessions. | ```https://sub.domain.tld/path/a/b/JavaScript``` | 15 | | http_request_body_bytes | integer | Amount of bytes that the source/client sent | ```2``` | 16 | | http_request_header_host | string | Value of the HOST header from the client. This should be copied to dst_host_name | ```www.activewebsoftwares.com``` | 17 | | http_request_header_names | string | List of any additional (or all) HTTP headers. Because a client can use any HTTP header they want and there are already hundreds of https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers it would be impossible to define a specific field for each one. | ```X-Forwarded-For``` | 18 | | http_request_header_origin | string | Value of the Origin header from the client | ```origin``` | 19 | | http_request_header_values | string | Values for the request_header_names parameters | ```10.1.1.1``` | 20 | | http_request_method | string | Type of HTTP request that was made. Other examples could be (anything) PUT, POST, HEAD, DELETE | ```GET``` | 21 | | http_request_time | integer | The amount of time in milliseconds it took to send the request to the server, if applicable. | ```700``` | 22 | | http_request_xff | string | The HTTP X-Forwarded-For header for HTTP/HTTPS network sessions. | ```203.0.113.195``` | 23 | | http_response_body_bytes | integer | Amount of bytes that the destination/server returned | ```87``` | 24 | | http_response_body_original | string | The raw HTTP (response) body | ```
This is title
Hello world ``` | 25 | | http_response_header_names | string | List of any additional (or all) HTTP headers. Because a server can use any HTTP header they want and there are already hundreds of https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers it would be impossible to define a specific field for each one. | ```X-Forwarded-For``` | 26 | | http_response_header_values | string | Values for the response_header_names parameters | ```10.1.1.1``` | 27 | | http_response_time | inte | The amount of time in milliseconds it took to receive a response in the server, if applicable. | ```800``` | 28 | | http_status_code | integer | HTTP Server reply code | ```200``` | 29 | | http_status_message | string | HTTP server reply message | ```OK``` | 30 | | http_user_agent_original | string | The User agent seen in the HTTP request. | ```Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36``` | 31 | | http_version | string | HTTP request version | ```1.1``` | 32 | -------------------------------------------------------------------------------- /docs/cdm/entities/intro.md: -------------------------------------------------------------------------------- 1 | # Entities -------------------------------------------------------------------------------- /docs/cdm/entities/ip.md: -------------------------------------------------------------------------------- 1 | # ip 2 | 3 | Event fields used to define/normalize metadata about IP addresses in a network. It follows the standard from the Destination, Source and device categories. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | ip_addr | ip | IP address assigned to the device generating the event and/or the IP address in the network packet. This could be used in the context of source, destination, device and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```192.168.1.2``` | 10 | | ip_dhcp_assigned_ip_addr | ip | IP address assigned by the DHCP server. | ```192.168.1.2``` | 11 | | ip_is_ipv6 | boolean | If IP address is IP version 6 | ```false``` | 12 | -------------------------------------------------------------------------------- /docs/cdm/entities/kerberos.md: -------------------------------------------------------------------------------- 1 | # kerberos 2 | 3 | Event fields used to define/normalize Kerberos Ticket Granting Service and Kerberos Ticket Granting Tickets. For certificate information within Kerberos see the ./x509_and_certificates.md 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | krb_service_name | string | the name of the account or computer for which the TGS ticket was requested | ```WIN2008R2$``` | 10 | | krb_ticket_encryption_type | string | the cryptographic suite that was used for issued TGS. | ```0x12``` | 11 | | krb_ticket_options | string | this is a set of different ticket flags in hexadecimal format. | ```0x40810010``` | 12 | | krb_ticket_pre_auth_type | integer | the code number of pre-Authentication type which was used in TGT request. | ```15``` | 13 | | krb_ticket_request_type | string | Request type - Authentication Service ("AS") or Ticket Granting Service ("TGS") | ```TGS``` | 14 | | krb_ticket_status | string | hexadecimal result code of TGS issue operation. | ```0x0``` | 15 | -------------------------------------------------------------------------------- /docs/cdm/entities/logon.md: -------------------------------------------------------------------------------- 1 | # logon 2 | 3 | Event fields used to define/normalize metadata about logon events. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | logon_authentication_lan_package_name | string | The name of the LAN Manager sub-package (NTLM-family protocol name) that was used during logon. Possible values are: NTLM V1, NTLM V2, LM. Only populated if Authentication Package = NTLM. | ```-``` | 10 | | logon_authentication_package_name | string | The name of the authentication package which was used for the logon authentication process. Default packages loaded on LSA startup are located in "HKLM\SYSTEM\CurrentControlSet\Control\Lsa\OSConfig" registry key. Other packages can be loaded at runtime. When a new package is loaded a "4610: An authentication package has been loaded by the Local Security Authority" (typically for NTLM) or "4622: A security package has been loaded by the Local Security Authority" (typically for Kerberos) event is logged to indicate that a new package has been loaded along with the package name. | ```Negotiate``` | 11 | | logon_device_claims | string | list of device claims for new logon session | ```-``` | 12 | | logon_elevated_token | string | a "Yes" or "No" flag. If "Yes" then the session this event represents is elevated and has administrator privileges. | ```%%1842``` | 13 | | logon_guid | string | a GUID that can help you correlate this event with another event that can contain the same Logon GUID, "4769(S, F): A Kerberos service ticket was requested event on a domain controller. It also can be used for correlation between a 4624 event and several other events (on the same computer) that can contain the same Logon GUID, "4648(S): A logon was attempted using explicit credentials" and "4964(S): Special groups have been assigned to a new logon." | ```{00000000-0000-0000-0000-000000000000}``` | 14 | | logon_id | integer | hexadecimal value that can help you correlate this event with recent events that might contain the same Logon ID | ```0x8dcdc``` | 15 | | logon_impersonation_level | string | Impersonation level | ```%%1833``` | 16 | | logon_key_length | integer | the length of NTLM Session Security key. Typically it has 128 bit or 56 bit length. This parameter is always 0 if "Authentication Package" = "Kerberos", because it is not applicable for Kerberos protocol. This field will also have "0" value if Kerberos was negotiated using Negotiate authentication package. | ```0``` | 17 | | logon_process_name | string | The name of the trusted logon process that was used for the logon. See event "4611: A trusted logon process has been registered with the Local Security Authority" description for more information. | ```User32``` | 18 | | logon_restricted_admin_mode | string | Only populated for RemoteInteractive logon type sessions. This is a Yes/No flag indicating if the credentials provided were passed using Restricted Admin mode. Restricted Admin mode was added in Win8.1/2012R2 but this flag was added to the event in Win10. If not a RemoteInteractive logon, then this will be "-" string. | ```-``` | 19 | | logon_transmitted_services | string | the list of transmitted services. Transmitted services are populated if the logon was a result of a S4U (Service For User) logon process. S4U is a Microsoft extension to the Kerberos Protocol to allow an application service to obtain a Kerberos service ticket on behalf of a user - most commonly done by a front-end website to access an internal resource on behalf of a user. | ```-``` | 20 | | logon_type | integer | the type of logon which was performed | ```2``` | 21 | | logon_user_claims | string | list of user claims for new logon session. This field contains user claims if user account was logged in and device claims if computer account was logged in | ```ad://ext/cn:88d2b96fdb2b4c49 <%%1818> : "dadmin" ad://ext/Department:88d16a8edaa8c66b <%%1818> : "IT"``` | 22 | | logon_user_linked_id | integer | A hexadecimal value of the paired logon session. If there is no other logon session associated with this logon session, then the value is "0x0". | ```0x0``` | 23 | | logon_virtual_account | string | a "Yes" or "No" flag, which indicates if the account is a virtual account (e.g., "Managed Service Account"), which was introduced in Windows 7 and Windows Server 2008 R2 to provide the ability to identify the account that a given Service uses, instead of just using "NetworkService". | ```%%1843``` | 24 | -------------------------------------------------------------------------------- /docs/cdm/entities/mac.md: -------------------------------------------------------------------------------- 1 | # mac 2 | 3 | Event fields used to define/normalize metadata about MAC addresses in a network. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | mac_addr | string | MAC address of the device where the event was generated or network interface where a connection starts or ends. | ```00:11:22:33:44:55``` | 10 | -------------------------------------------------------------------------------- /docs/cdm/entities/meta.md: -------------------------------------------------------------------------------- 1 | # meta 2 | 3 | For example: url_category would be copied to meta_url_category 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | meta_alert | string | Data describing an alert | ```````` | 10 | | meta_as_number | integer | Autonomous System (AS) number (BGP AS Number) | ```````` | 11 | | meta_as_org | string | Autonomous System (AS) organization (BGP AS Name) | ```````` | 12 | | meta_category | string | Description to define a grouping of a value. Commonly used for URL/domain category (ie: Adult, Abuse, Parked, RFC-1918, etc) | ```````` | 13 | | meta_geo_location | geo_point | Geo longitude and latitude point of a field | ```````` | 14 | | meta_ttp | string | Tactic, technique, and procedure | ```````` | 15 | -------------------------------------------------------------------------------- /docs/cdm/entities/module.md: -------------------------------------------------------------------------------- 1 | # module 2 | 3 | Event fields used to define/normalize metadata about modules loaded into a process. A process module represents a .dll or .exe file that is loaded into a particular process. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | module_is_signed | boolean | is the module loaded signed? | ```TRUE``` | 10 | | module_name | string | name of the modules loaded into a process without the full path | ```msvcrt.dll``` | 11 | | module_path | string | full path of a module loaded into a process | ```C:\Windows\System32\msvcrt.dll``` | 12 | | module_signature | string | The signer | ```Microsoft Corporation``` | 13 | | module_signature_status | string | status of the signature | ```Valid``` | 14 | -------------------------------------------------------------------------------- /docs/cdm/entities/network.md: -------------------------------------------------------------------------------- 1 | # network 2 | 3 | Event fields used to define metadata about network information seen in a typical OSI layer. This includes data both from an endpoint as well as a network monitoring device/application (NSM, Firewall, IPS, IDS, etc...). This differentiates from data that is specific to Source and Destination specific information such as Source or Destination bytes, packets, IP address, mac address, TCP flags. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | network_application_name | string | Layer 7 (application) name specific to service/name/software as provided by a device or user | ```google-drive``` | 10 | | network_application_protocol | string | Layer 7 (application) in the OSI model. Ex: HTTP,SMB,FTP,SSH, etc. | ```HTTP``` | 11 | | network_bytes | integer | Total bytes for the session. If this field does not exist in the log source, then its possible in your ETL pipeline to combine the source and destination bytes. | ```102034``` | 12 | | network_connection_history | string | TCP Flags and other potential IP header info | `````` | 13 | | network_connection_history_detailed | string | Detailed description of the information in connection_history | `````` | 14 | | network_connection_state | string | The end state of the session/connection as defined in short abbreviation | `````` | 15 | | network_connection_state_detailed | string | Detailed description of the information in network_connection_state | `````` | 16 | | network_direction | string | User/Device defined name of the direction of the connection or session (Inbound or Outbound). | ```outbound``` | 17 | | network_duration | integer | The amount of time, in millisecond, for the completion of the network session or connection. | ```1500``` | 18 | | network_fingerprint_network_community_id | string | Network community ID as outlined by the standard from https://github.com/corelight/community-id-spec. Standardized hashing of network tuple. The combination, most commonly, of Source IP, Source Port, Destination IP, Destination Port, and IP Protocol allows pivoting between multiple log types | ```1:EeVyZ07VGj1n0rld+xCLFdM+u8M=``` | 19 | | network_icmp_code | integer | For an ICMP message, ICMP message type numeric value (RFC 2780 or RFC 4443). | ```34``` | 20 | | network_icmp_type | string | For an ICMP message, ICMP message type text representation (RFC 2780 or RFC 4443) | ```Destination Unreachable``` | 21 | | network_initiated | boolean | Whether the session was initiated or received. Most commonly used in relation to an endpoint/device. False = the endpoint did not initiate the session (ie: was scanned or RDP connection made to it) | ```TRUE``` | 22 | | network_inner_vlan_id | integer | Normally the VLAN can not be determined as source/destination and VLANs are stacked/wrapped. This is the VLAN "inside" | ```150``` | 23 | | network_ip_bytes | long | Total IP bytes, according to ip headers, for the session. If this field does not exist in the log source, then its possible in your ETL pipeline to combine the source and destination bytes | ```14564``` | 24 | | network_missed_bytes | long | bytes that a network sensor or other system/application may have missed | ```5``` | 25 | | network_outer_vlan_id | integer | Normally the VLAN can not be determined as source/destination and VLANs are stacked/wrapped. This is the VLAN on the "outside" | ```160``` | 26 | | network_packets | long | Total packets for the session. If this field does not exist in the log source, then its possible in your ETL pipeline to combine the source and destination packets | ```143``` | 27 | | network_protocol | string | Transport layer in the OSI model. Also known as, IP Protocol. Ex: TCP,UDP,ICMP,ICMP-v6, etc. Convert to lowercase | ```tcp``` | 28 | | network_rule_name | string | The name or ID of the rule by which DeviceAction was decided upon | ```AnyAnyDrop``` | 29 | | network_rule_number | integer | Matched rule number | ```23``` | 30 | | network_session_id | string | The session identifier as reported by the network sensor device. Typically, not available for connections. | ```S198_13_1_27_12321_D205_13_1_27_443_0012``` | 31 | -------------------------------------------------------------------------------- /docs/cdm/entities/pipe.md: -------------------------------------------------------------------------------- 1 | # pipe 2 | 3 | Event fields used to define/normalize metadata about pipes being created or connected for inter-process communication locally or remotely. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | pipe_flags | string | The flags indicating whether this pipe connection is a server or client end, and if the pipe for sending messages or bytes | `````` | 10 | | pipe_instances | integer | Number of instances of the named pipe | `````` | 11 | | pipe_max_instances | integer | The maximum number of instances creatable for this pipe | `````` | 12 | | pipe_name | string | name of pipe created or connected | ```\srvsvc``` | 13 | -------------------------------------------------------------------------------- /docs/cdm/entities/port.md: -------------------------------------------------------------------------------- 1 | # port 2 | 3 | Event fields used to define/normalize metadata about ports in a network connection. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | port_name | string | Name of the port used in a network connection. This is usually determined by IANA common port assignment. Therefore, this means its a guess and NOT actually what the application/ is what the actually. | ```netbios-dgm``` | 10 | | port_number | integer | Port number used in a network connection. This could be used in the context of source, destination and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```138``` | 11 | -------------------------------------------------------------------------------- /docs/cdm/entities/process.md: -------------------------------------------------------------------------------- 1 | # process 2 | 3 | Event fields used to define metadata about processes in an system. Isolated memory address space that is used to run a program. Inside a processes' address space the system can load code modules, but must have at latest one thread running to do so. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | process_call_trace | string | Stack trace of where open process is called | ```C:\WINDOWS\SYSTEM32\ntdll.dll+a0344 | C:\WINDOWS\System32\KERNELBASE.dll+64794| c:\windows\system32\lsm.dll+10e93| c:\windows\system32\lsm.dll+f9ea| C:\WINDOWS\System32\RPCRT4.dll+76d23| C:\WINDOWS\System32\RPCRT4.dll+d9390| C:\WINDOWS\System32\RPCRT4.dll+a81c| C:\WINDOWS\System32\RPCRT4.dll+273b4| C:\WINDOWS\System32\RPCRT4.dll+2654e| C:\WINDOWS\System32\RPCRT4.dll+26cfb| C:\WINDOWS\System32\RPCRT4.dll+3083f| C:\WINDOWS\System32\RPCRT4.dll+313a6| C:\WINDOWS\System32\RPCRT4.dll+2d12e| C:\WINDOWS\System32\RPCRT4.dll+2e853| C:\WINDOWS\System32\RPCRT4.dll+5cc68| C:\WINDOWS\SYSTEM32\ntdll.dll+365ce| C:\WINDOWS\SYSTEM32\ntdll.dll+34b46| C:\WINDOWS\System32\KERNEL32.DLL+11fe4| C:\WINDOWS\SYSTEM32\ntdll.dll+6efc1``` | 10 | | process_command_line | string | Command arguments that were were executed by the process in the endpoint. | ```C:\WINDOWS\system32\conhost.exe 0xffffffff -ForceV1``` | 11 | | process_company | string | Company name metadata of the Image file | ```Microsoft Corporation``` | 12 | | process_current_directory | string | The full path to the current directory for the process. The string can also specify a UNC path. | ```C:\Users\Panda\Test``` | 13 | | process_file_description | string | Description of the Image file | ```Console Window Host``` | 14 | | process_file_name | string | Name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 15 | | process_file_path | string | The complete path and name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. | ```C:\Windows\System32\conhost.exe``` | 16 | | process_file_product | string | The Image's file product name | ```Microsoft Windows Operating System``` | 17 | | process_file_version | string | Version of the Image file | ```10.0.16299.15 (WinBuild.160101.0800)``` | 18 | | process_granted_access | string | granted access code requested/used to open a target process | ```0x1000``` | 19 | | process_guid | string | Process global unique identifer used to identify a process across other operating systems. This can be created by group hashing values such as Process Name, Process Id, Process Start Time, Process Path and even Computer Name. Datasets such as Sysmon call this the ProcessGuid. This is similar to the output from the UUIDGEN command. | ```A98268C1-9C2E-5ACD-0000-0010396CAB00``` | 20 | | process_hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 21 | | process_hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 22 | | process_hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 23 | | process_hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 24 | | process_hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 25 | | process_id | integer | Process unique identifier used by the current operating system to identify a process. | ```4756``` | 26 | | process_injected_address | string | The memory address where the subprocess is injected | ```0xFFFFBC6422DD9C20``` | 27 | | process_integrity_level | string | Integrity label assigned to a process | ```Medium``` | 28 | | process_is_hidden | boolean | Describes if the process is hidden. | ```True``` | 29 | | process_name | string | Name of the process derived from the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 30 | | process_parent_call_trace | string | Stack trace of where open process is called | ```C:\WINDOWS\SYSTEM32\ntdll.dll+a0344 | C:\WINDOWS\System32\KERNELBASE.dll+64794| c:\windows\system32\lsm.dll+10e93| c:\windows\system32\lsm.dll+f9ea| C:\WINDOWS\System32\RPCRT4.dll+76d23| C:\WINDOWS\System32\RPCRT4.dll+d9390| C:\WINDOWS\System32\RPCRT4.dll+a81c| C:\WINDOWS\System32\RPCRT4.dll+273b4| C:\WINDOWS\System32\RPCRT4.dll+2654e| C:\WINDOWS\System32\RPCRT4.dll+26cfb| C:\WINDOWS\System32\RPCRT4.dll+3083f| C:\WINDOWS\System32\RPCRT4.dll+313a6| C:\WINDOWS\System32\RPCRT4.dll+2d12e| C:\WINDOWS\System32\RPCRT4.dll+2e853| C:\WINDOWS\System32\RPCRT4.dll+5cc68| C:\WINDOWS\SYSTEM32\ntdll.dll+365ce| C:\WINDOWS\SYSTEM32\ntdll.dll+34b46| C:\WINDOWS\System32\KERNEL32.DLL+11fe4| C:\WINDOWS\SYSTEM32\ntdll.dll+6efc1``` | 31 | | process_parent_command_line | string | Command arguments that were were executed by the process in the endpoint. | ```C:\WINDOWS\system32\conhost.exe 0xffffffff -ForceV1``` | 32 | | process_parent_company | string | Company name metadata of the Image file | ```Microsoft Corporation``` | 33 | | process_parent_current_directory | string | The full path to the current directory for the process. The string can also specify a UNC path. | ```C:\Users\Panda\Test``` | 34 | | process_parent_file_description | string | Description of the Image file | ```Console Window Host``` | 35 | | process_parent_file_name | string | Name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 36 | | process_parent_file_path | string | The complete path and name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. | ```C:\Windows\System32\conhost.exe``` | 37 | | process_parent_file_product | string | The Image's file product name | ```Microsoft Windows Operating System``` | 38 | | process_parent_file_version | string | Version of the Image file | ```10.0.16299.15 (WinBuild.160101.0800)``` | 39 | | process_parent_granted_access | string | granted access code requested/used to open a target process | ```0x1000``` | 40 | | process_parent_guid | string | Process global unique identifer used to identify a process across other operating systems. This can be created by group hashing values such as Process Name, Process Id, Process Start Time, Process Path and even Computer Name. Datasets such as Sysmon call this the ProcessGuid. This is similar to the output from the UUIDGEN command. | ```A98268C1-9C2E-5ACD-0000-0010396CAB00``` | 41 | | process_parent_hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 42 | | process_parent_hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 43 | | process_parent_hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 44 | | process_parent_hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 45 | | process_parent_hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 46 | | process_parent_id | integer | Process unique identifier used by the current operating system to identify a process. | ```4756``` | 47 | | process_parent_injected_address | string | The memory address where the subprocess is injected | ```0xFFFFBC6422DD9C20``` | 48 | | process_parent_integrity_level | string | Integrity label assigned to a process | ```Medium``` | 49 | | process_parent_is_hidden | boolean | Describes if the process is hidden. | ```True``` | 50 | | process_parent_name | string | Name of the process derived from the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 51 | -------------------------------------------------------------------------------- /docs/cdm/entities/registry.md: -------------------------------------------------------------------------------- 1 | # registry 2 | 3 | Event fields used to define metadata about Windows registry entries in a system. The registry is a hierarchical database that contains data that is critical for the operation of Windows and the applications and services that run on Windows. The data is structured in a tree format. Each node in the tree is called a key. Each key can contain both subkeys and data entries called values. Sometimes, the presence of a key is all the data that an application requires; other times, an application opens a key and uses the values associated with the key. A key can have any number of values, and the values can be in any form. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | registry_hive_path | string | A hive is a logical group of keys, subkeys, and values in the registry that has a set of supporting files loaded into memory when the operating system is started or a user logs in. | ```HKEY_LOCAL_MACHINE\SAM``` | 10 | | registry_key_access_rights | string | The Windows security model enables you to control access to registry keys. The valid access rights for registry keys include the DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER standard access rights. Registry keys do not support the SYNCHRONIZE standard access right. | ```KEY_ALL_ACCESS (0xF003F)``` | 11 | | registry_key_name | string | This field contains the key name without the full path. Take in consideration the name of the key value in the registry key path. | ```Run``` | 12 | | registry_key_name_modified | string | Original registry key name before being modified. | ```Run``` | 13 | | registry_key_path | string | Next-level down from registry root-keys. This field contains the full path of a registry key. This is a combination of the root key, hive, key, sub-key, and value. A key is a folder in the registry that contain other sub-keys. | ```HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Run\WardogPersistence``` | 14 | | registry_key_path_modified | string | Original registry key path before being modified. | ```HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Run\WardogPersistence``` | 15 | | registry_root_key | string | Root-Keys are the root, or primary divisions, of the registry. They do not contain configuration data; they contain the keys, subkeys, and values in which the data is stored. There are six root keys (HKCU, HKU, HKCR, HKLM, HKCC and HKPD) that store information related to currently looged on users, local accounts, performance, and even the current hardware profile. Root-key names represent Windows handles (H) to Keys (K). | ```HKLM or HKEY_LOCAL_MAHINE``` | 16 | | registry_value_data | string | Each registry key value consists of a value name and its associated data. Registry key value data store the actual configuration data for the operating system and the programs that run on the system. As such, they are different from subtrees, keys, and subkeys, which are merely containers. | ```C:\Path\malware``` | 17 | | registry_value_data_modified | string | Original registry key value data before being modified. | ```C:\malware.exe``` | 18 | | registry_value_name | string | Registry values are the lowest-level element in the registry. They appear in the right pane of the registry editor window. Each entry consists of the value name, its Data Types in the Registry (which defines the length and format of data that the entry can store), and a field known as the data of the registry value. These are also known as registry entries. This field contains the key value name without the full registry key path. | ```WardogPersistence``` | 19 | | registry_value_name_modified | string | Original registry key vakue name before being modified. | ```WardogPersistence``` | 20 | | registry_value_type | string | values store different kinds of data such as REG_NONE (No value type), REG_SZ (Fixed-length Unicode string), REG_EXPAND_SZ (Variable-length Unicode string that can have embedded environment variables), etc. | ```REG_EXPAND_SZ``` | 21 | | registry_value_type_modified | string | Original registry key vakue type before being modified. | ```REG_EXPAND_SZ``` | 22 | -------------------------------------------------------------------------------- /docs/cdm/entities/rule.md: -------------------------------------------------------------------------------- 1 | # rule 2 | 3 | Event fields used to define/normalize metadata about rules. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | rule_name | string | The name or ID of the rule by which DeviceAction was decided upon | ```Any Any Drop``` | 10 | | rule_number | string | Matched rule number | ```7``` | 11 | -------------------------------------------------------------------------------- /docs/cdm/entities/source_nat.md: -------------------------------------------------------------------------------- 1 | # source_nat 2 | 3 | Event fields used to define the destination NAT (network address translation) in a network connection event. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | src_nat_ip_addr | ip | IP address assigned to the device generating the event and/or the IP address in the network packet. This could be used in the context of source, destination, device and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```192.168.1.2``` | 10 | | src_nat_ip_dhcp_assigned_ip_addr | ip | IP address assigned by the DHCP server. | ```192.168.1.2``` | 11 | | src_nat_ip_is_ipv6 | boolean | If IP address is IP version 6 | ```false``` | 12 | | src_nat_port_name | string | Name of the port used in a network connection. This is usually determined by IANA common port assignment. Therefore, this means its a guess and NOT actually what the application/ is what the actually. | ```netbios-dgm``` | 13 | | src_nat_port_number | integer | Port number used in a network connection. This could be used in the context of source, destination and even NAT when it is provided by an intermediary NAT device such as a firewall. | ```138``` | 14 | -------------------------------------------------------------------------------- /docs/cdm/entities/target.md: -------------------------------------------------------------------------------- 1 | # target 2 | 3 | Event fields used to define entities being targeted by other entities locally in a system. This is different from a network connection event. It is more related to events that involve relationships defined locally by entities such as files, processes,users, etc. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | target_process_call_trace | string | Stack trace of where open process is called | ```C:\WINDOWS\SYSTEM32\ntdll.dll+a0344 | C:\WINDOWS\System32\KERNELBASE.dll+64794| c:\windows\system32\lsm.dll+10e93| c:\windows\system32\lsm.dll+f9ea| C:\WINDOWS\System32\RPCRT4.dll+76d23| C:\WINDOWS\System32\RPCRT4.dll+d9390| C:\WINDOWS\System32\RPCRT4.dll+a81c| C:\WINDOWS\System32\RPCRT4.dll+273b4| C:\WINDOWS\System32\RPCRT4.dll+2654e| C:\WINDOWS\System32\RPCRT4.dll+26cfb| C:\WINDOWS\System32\RPCRT4.dll+3083f| C:\WINDOWS\System32\RPCRT4.dll+313a6| C:\WINDOWS\System32\RPCRT4.dll+2d12e| C:\WINDOWS\System32\RPCRT4.dll+2e853| C:\WINDOWS\System32\RPCRT4.dll+5cc68| C:\WINDOWS\SYSTEM32\ntdll.dll+365ce| C:\WINDOWS\SYSTEM32\ntdll.dll+34b46| C:\WINDOWS\System32\KERNEL32.DLL+11fe4| C:\WINDOWS\SYSTEM32\ntdll.dll+6efc1``` | 10 | | target_process_command_line | string | Command arguments that were were executed by the process in the endpoint. | ```C:\WINDOWS\system32\conhost.exe 0xffffffff -ForceV1``` | 11 | | target_process_company | string | Company name metadata of the Image file | ```Microsoft Corporation``` | 12 | | target_process_current_directory | string | The full path to the current directory for the process. The string can also specify a UNC path. | ```C:\Users\Panda\Test``` | 13 | | target_process_file_description | string | Description of the Image file | ```Console Window Host``` | 14 | | target_process_file_name | string | Name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 15 | | target_process_file_path | string | The complete path and name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. | ```C:\Windows\System32\conhost.exe``` | 16 | | target_process_file_product | string | The Image's file product name | ```Microsoft Windows Operating System``` | 17 | | target_process_file_version | string | Version of the Image file | ```10.0.16299.15 (WinBuild.160101.0800)``` | 18 | | target_process_granted_access | string | granted access code requested/used to open a target process | ```0x1000``` | 19 | | target_process_guid | string | Process global unique identifer used to identify a process across other operating systems. This can be created by group hashing values such as Process Name, Process Id, Process Start Time, Process Path and even Computer Name. Datasets such as Sysmon call this the ProcessGuid. This is similar to the output from the UUIDGEN command. | ```A98268C1-9C2E-5ACD-0000-0010396CAB00``` | 20 | | target_process_hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 21 | | target_process_hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 22 | | target_process_hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 23 | | target_process_hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 24 | | target_process_hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 25 | | target_process_id | integer | Process unique identifier used by the current operating system to identify a process. | ```4756``` | 26 | | target_process_injected_address | string | The memory address where the subprocess is injected | ```0xFFFFBC6422DD9C20``` | 27 | | target_process_integrity_level | string | Integrity label assigned to a process | ```Medium``` | 28 | | target_process_is_hidden | boolean | Describes if the process is hidden. | ```True``` | 29 | | target_process_name | string | Name of the process derived from the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 30 | | target_process_parent_call_trace | string | Stack trace of where open process is called | ```C:\WINDOWS\SYSTEM32\ntdll.dll+a0344 | C:\WINDOWS\System32\KERNELBASE.dll+64794| c:\windows\system32\lsm.dll+10e93| c:\windows\system32\lsm.dll+f9ea| C:\WINDOWS\System32\RPCRT4.dll+76d23| C:\WINDOWS\System32\RPCRT4.dll+d9390| C:\WINDOWS\System32\RPCRT4.dll+a81c| C:\WINDOWS\System32\RPCRT4.dll+273b4| C:\WINDOWS\System32\RPCRT4.dll+2654e| C:\WINDOWS\System32\RPCRT4.dll+26cfb| C:\WINDOWS\System32\RPCRT4.dll+3083f| C:\WINDOWS\System32\RPCRT4.dll+313a6| C:\WINDOWS\System32\RPCRT4.dll+2d12e| C:\WINDOWS\System32\RPCRT4.dll+2e853| C:\WINDOWS\System32\RPCRT4.dll+5cc68| C:\WINDOWS\SYSTEM32\ntdll.dll+365ce| C:\WINDOWS\SYSTEM32\ntdll.dll+34b46| C:\WINDOWS\System32\KERNEL32.DLL+11fe4| C:\WINDOWS\SYSTEM32\ntdll.dll+6efc1``` | 31 | | target_process_parent_command_line | string | Command arguments that were were executed by the process in the endpoint. | ```C:\WINDOWS\system32\conhost.exe 0xffffffff -ForceV1``` | 32 | | target_process_parent_company | string | Company name metadata of the Image file | ```Microsoft Corporation``` | 33 | | target_process_parent_current_directory | string | The full path to the current directory for the process. The string can also specify a UNC path. | ```C:\Users\Panda\Test``` | 34 | | target_process_parent_file_description | string | Description of the Image file | ```Console Window Host``` | 35 | | target_process_parent_file_name | string | Name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 36 | | target_process_parent_file_path | string | The complete path and name of the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. | ```C:\Windows\System32\conhost.exe``` | 37 | | target_process_parent_file_product | string | The Image's file product name | ```Microsoft Windows Operating System``` | 38 | | target_process_parent_file_version | string | Version of the Image file | ```10.0.16299.15 (WinBuild.160101.0800)``` | 39 | | target_process_parent_granted_access | string | granted access code requested/used to open a target process | ```0x1000``` | 40 | | target_process_parent_guid | string | Process global unique identifer used to identify a process across other operating systems. This can be created by group hashing values such as Process Name, Process Id, Process Start Time, Process Path and even Computer Name. Datasets such as Sysmon call this the ProcessGuid. This is similar to the output from the UUIDGEN command. | ```A98268C1-9C2E-5ACD-0000-0010396CAB00``` | 41 | | target_process_parent_hash_imphash | string | IMPHASH hash of the image/binary/file | ```2505BD03D7BD285E50CE89CEC02B333B``` | 42 | | target_process_parent_hash_md5 | string | MD5 hash of the image/binary/file | ```6A255BEBF3DBCD13585538ED47DBAFD7``` | 43 | | target_process_parent_hash_sha1 | string | SHA1 hash of the image/binary/file | ```B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2``` | 44 | | target_process_parent_hash_sha256 | string | SHA256 hash of the image/binary/file | ```4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C``` | 45 | | target_process_parent_hash_sha512 | string | SHA512 hash of the image/binary/file | ```1AD1D79F85D8F6A50EA282F63898D652661DAA0C1FD361C22647CABC98A70E8CBCE83200D579D10DD0A3D46BE9496DCDFDDF28B0C5E9709343B032A8796FBECB``` | 46 | | target_process_parent_id | integer | Process unique identifier used by the current operating system to identify a process. | ```4756``` | 47 | | target_process_parent_injected_address | string | The memory address where the subprocess is injected | ```0xFFFFBC6422DD9C20``` | 48 | | target_process_parent_integrity_level | string | Integrity label assigned to a process | ```Medium``` | 49 | | target_process_parent_is_hidden | boolean | Describes if the process is hidden. | ```True``` | 50 | | target_process_parent_name | string | Name of the process derived from the Image file or executable file used to define the initial code and data mapped into the process' virtual address space. This does not contain the full patth of the Image file. | ```conhost.exe``` | 51 | | target_server_name | string | the name of the server on which the new process was run. Has "localhost" value if the process was run locally. | ```localhost``` | 52 | | target_user_aadid | string | The User Azure AD ID of the identity associated with a cloud network session. It applies to source and destination entities. | ```5e8b0f4d-2cd4-4e17-9467-b0f6a5c0c4d0``` | 53 | | target_user_cred_type | string | types of credentials which were presented for delegation | ```%%8098``` | 54 | | target_user_domain | string | The domain or computer name associated to the user in a session. In active directory, this would be the name of the domain the user belongs to. | ```CONTOSO``` | 55 | | target_user_identity | string | User Principal Name (UPN) or another type of account identifier for which 802.1x authentication request was made. | ```host/XXXXXXXX.redmond.corp.microsoft.com``` | 56 | | target_user_linked_logon_id | integer | A hexadecimal value of the paired logon session. | ```0x0``` | 57 | | target_user_logon_authentication_lan_package_name | string | The name of the LAN Manager sub-package (NTLM-family protocol name) that was used during logon. Possible values are: NTLM V1, NTLM V2, LM. Only populated if Authentication Package = NTLM. | ```-``` | 58 | | target_user_logon_authentication_package_name | string | The name of the authentication package which was used for the logon authentication process. Default packages loaded on LSA startup are located in "HKLM\SYSTEM\CurrentControlSet\Control\Lsa\OSConfig" registry key. Other packages can be loaded at runtime. When a new package is loaded a "4610: An authentication package has been loaded by the Local Security Authority" (typically for NTLM) or "4622: A security package has been loaded by the Local Security Authority" (typically for Kerberos) event is logged to indicate that a new package has been loaded along with the package name. | ```Negotiate``` | 59 | | target_user_logon_device_claims | string | list of device claims for new logon session | ```-``` | 60 | | target_user_logon_elevated_token | string | a "Yes" or "No" flag. If "Yes" then the session this event represents is elevated and has administrator privileges. | ```%%1842``` | 61 | | target_user_logon_guid | string | a GUID that can help you correlate this event with another event that can contain the same Logon GUID, "4769(S, F): A Kerberos service ticket was requested event on a domain controller. It also can be used for correlation between a 4624 event and several other events (on the same computer) that can contain the same Logon GUID, "4648(S): A logon was attempted using explicit credentials" and "4964(S): Special groups have been assigned to a new logon." | ```{00000000-0000-0000-0000-000000000000}``` | 62 | | target_user_logon_id | integer | hexadecimal value that can help you correlate this event with recent events that might contain the same Logon ID | ```0x8dcdc``` | 63 | | target_user_logon_impersonation_level | string | Impersonation level | ```%%1833``` | 64 | | target_user_logon_key_length | integer | the length of NTLM Session Security key. Typically it has 128 bit or 56 bit length. This parameter is always 0 if "Authentication Package" = "Kerberos", because it is not applicable for Kerberos protocol. This field will also have "0" value if Kerberos was negotiated using Negotiate authentication package. | ```0``` | 65 | | target_user_logon_process_name | string | The name of the trusted logon process that was used for the logon. See event "4611: A trusted logon process has been registered with the Local Security Authority" description for more information. | ```User32``` | 66 | | target_user_logon_restricted_admin_mode | string | Only populated for RemoteInteractive logon type sessions. This is a Yes/No flag indicating if the credentials provided were passed using Restricted Admin mode. Restricted Admin mode was added in Win8.1/2012R2 but this flag was added to the event in Win10. If not a RemoteInteractive logon, then this will be "-" string. | ```-``` | 67 | | target_user_logon_transmitted_services | string | the list of transmitted services. Transmitted services are populated if the logon was a result of a S4U (Service For User) logon process. S4U is a Microsoft extension to the Kerberos Protocol to allow an application service to obtain a Kerberos service ticket on behalf of a user - most commonly done by a front-end website to access an internal resource on behalf of a user. | ```-``` | 68 | | target_user_logon_type | integer | the type of logon which was performed | ```2``` | 69 | | target_user_logon_user_claims | string | list of user claims for new logon session. This field contains user claims if user account was logged in and device claims if computer account was logged in | ```ad://ext/cn:88d2b96fdb2b4c49 <%%1818> : "dadmin" ad://ext/Department:88d16a8edaa8c66b <%%1818> : "IT"``` | 70 | | target_user_logon_user_linked_id | integer | A hexadecimal value of the paired logon session. If there is no other logon session associated with this logon session, then the value is "0x0". | ```0x0``` | 71 | | target_user_logon_virtual_account | string | a "Yes" or "No" flag, which indicates if the account is a virtual account (e.g., "Managed Service Account"), which was introduced in Windows 7 and Windows Server 2008 R2 to provide the ability to identify the account that a given Service uses, instead of just using "NetworkService". | ```%%1843``` | 72 | | target_user_name | string | Name of the user associated with the main event (i.e. Network session). There could be a sense of direction depending how it is used together with other entities (i.e. src_user_name or dst_user_name) | ```wardog``` | 73 | | target_user_network_account_domain | string | Domain for the user that will be used for outbound (network) connections. | ```-``` | 74 | | target_user_network_account_name | string | User name used for outbound (network) connections | ```-``` | 75 | | target_user_password | string | User password if seen in the request. Commonly seen in network logs and authentication proxy/logs. | ```bobspassword``` | 76 | | target_user_reporter_domain | string | domain name of the user that reported the main event | ```WORKGROUP``` | 77 | | target_user_reporter_id | integer | unique identifier of the user that reported the main event | ```0x3e7``` | 78 | | target_user_reporter_name | string | the name of the account that reported information about the main event | ```WIN-GG82ULGC9GO$``` | 79 | | target_user_reporter_sid | string | SID of account that reported information about the main event | ```S-1-5-18``` | 80 | | target_user_security_package | string | the name of Security Package used during an authentication event. | ```CREDSSP``` | 81 | | target_user_session_id | integer | ID of the session the user belongs to. | ```1``` | 82 | | target_user_sid | string | Security identifier of the user. Typically, the identity used to authenticate a server. | ```S-1-5-21-1377283216-344919071-3415362939-500``` | 83 | | target_user_sid_list | string | the list of special group SIDs, which New Logon\Security ID is a member of. | ```{S-1-5-21-3457937927-2839227994-823803824-512}``` | 84 | | target_user_upn | string | In Active Directory, the User Principal Name (UPN) attribute is a user identifier for logging in, separate from a Windows domain login. | ```dadmin@contoso``` | 85 | -------------------------------------------------------------------------------- /docs/cdm/entities/threat.md: -------------------------------------------------------------------------------- 1 | # threat 2 | 3 | Event fields used to define/normalize metadata about threats in an network. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | threat_category | string | Trojan The category of a threat identified by a security system such as Web Security Gateway of an IPS and is associated with this network session. | ```Trojan``` | 10 | | threat_id | string | The ID of a threat identified by a security system such as Web Security Gateway of an IPS and is associated with this network session. | ```Tr.124``` | 11 | | threat_name | string | The name of the threat or malware identified | ```EICAR Test File``` | 12 | -------------------------------------------------------------------------------- /docs/cdm/entities/tls.md: -------------------------------------------------------------------------------- 1 | # tls 2 | 3 | Event fields used to define/normalize metadata about a TLS(SSL) record. This document is a work in progress, but is a foundational start there is included. Specifically the main foundations of TLS info is already in here 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | tls_cipher | string | The cipher (encryption) parameters used to make the TLS connection | ```TLS_RSA_WITH_AES_128_CBC_SHA``` | 10 | | tls_curve | string | Elliptic curve the server chose when using ECDH/ECDHE | ```TLS_RSA_WITH_AES_128_CBC_SHA``` | 11 | | tls_established | boolean | Indicates if the session has been established successfully, or if it was aborted during the handshake | ```true``` | 12 | | tls_next_protocol | string | Next protocol the server chose using the application layer next protocol extension, if present. | ```spdy/3.1``` | 13 | | tls_resumed | boolean | If the session was resumed from previous established connection | ```false``` | 14 | | tls_server_name | string | The name of the requested server/destination, this should be copied to dst_host_name | ```www.google.com``` | 15 | | tls_version | string | Version of TLS/SSL used (ie: SSLv3.0, TLSv1.1, TLSv1.2, TLSv1.3 | ```TLSv10``` | 16 | | tls_version_number | integer | Numeric value of the tls_version | ```````` | 17 | -------------------------------------------------------------------------------- /docs/cdm/entities/url.md: -------------------------------------------------------------------------------- 1 | # url 2 | 3 | Event fields used to define/normalize metadata about a URL/URI. There is a lot of ambiguity from the community on the difference URL vs URI. Granted, URL would normally include the domain, port (if applicable), user, password, query, fragment, and URI. 4 | However, there are many scenarios from log sources where one could not distinguish whether it was the full URL or just the URI.
5 | URL data can be seen in various log sources as defined in http.md as well as other applications such as SIP. URLs, especially in HTTP, have a best practice implementation however it is not necessary to adhere for connections/data to be established. 6 | 7 | ## Attributes 8 | 9 | | Name | Type | Description | Sample Value | 10 | |:---|:---|:---|:---| 11 | | url_category | string | The defined grouping of a URL (or could be just based on the domain in the URL) related to what it is (ie: adult, news, advertising, parked domains, etc) | ```Search Engines``` | 12 | | url_extension | string | The extension (.dll, .php, zip, .msi, .txt, etc) without the "." | ```php``` | 13 | | url_fragment | string | The portion of the URL after the last "#", this is defined in https://tools.ietf.org/html/rfc3986#section-3.5. This is also referred to as the "hash" in some implementations. This value does not always exist | ```````` | 14 | | url_hostname | string | The domain/host/hostname of the URL. This could be an IP address or any variation of a value but is more than likely a domain/hostname | ```google.com``` | 15 | | url_original | string | The entirety of the URL combined together and or the URL in the truest form from the log source. Some log sources will already parse out portions of the URL into their respective fields. Other logs will even parse out the portions of the URL into their respective field but also include the "original" URL. Always try to include this field, because HTTP/URLs never truly have to conform to any RFC/implementation and thus any parsing/logging implementation could have any number of assumptions/mistakes - therefore it is best to keep a original value | ```ftp://BigwheelPassword:BigwheelBobUser@google.com:8088/common/Current/client/search/greatsearch.php?hash=215696fc36392ca70f89228b98060afb%20processname=example.exe#gid=l1k4h``` | 16 | | url_path | string | Everything beginning with and after the first "/". This portion should usually exist in the log source / URL.. Even if the path is just "/". Also, even if the query or fragment have not been parsed yet then you still include them in this value | ```````` | 17 | | url_port | integer | The port in the URL. This is not to be confused with destination.md. In your ETL pipeline you should check if the value derived from the URL is actually an integer (unless properly verified in the data source)... because as mentioned throughout, URLs can be manipulated/mis-implemented in many different ways | ```8088``` | 18 | | url_query_names | string | The keys/fields derived from the query. Due to the limitless variations of implementations of a URL, providing a nested object of key/values is not recommened. Whether an attacker is injecting data into a URL or an incorrect implementaiton or malicious implementation - it's possible you could have keys/fields of values of anything you can imagine (ie: "%*%)%*#Nf..$2f>hr..n fa.fa s\\\\\"\\jhrwq": "somevalue" | ```[ "hash" , "processname" ]``` | 19 | | url_query_values | string | The values derived from the query. Due to the limitless variations of implementations of a URL, providing a nested object of key/values is not recommened. Whether an attacker is injecting data into a URL or an incorrect implementaiton or malicious implementation - it's possible you could have keys/fields of values of anything you can imagine (ie: "%*%)%*#Nf..$2f>hr..n fa.fa s\\\\\"\\jhrwq": "somevalue" | ```[ "215696fc36392ca70f89228b98060afb", "example.exe" ]``` | 20 | | url_scheme | string | Defines the network location (ie: smtp, ftp, smb, ldap, etc). This portion may not exist in many log sources. The is usually the value that comes before the first "://". This is also referred to as URN/origin | ```ftp``` | 21 | | url_user_name | string | The username defined in the URL. This is meant to be distinguished from something such as the value in the Authorization header in an HTTP request (or even the Proxy Authentication HTTP header). This value should be copied to any.md | ```BigwheelBobUser``` | 22 | | url_user_password | string | The password defined in the URL. This is meant to be distinguished from something such as the value in the Authorization header in an HTTP request (or even the Proxy Authentication HTTP header) | ```BigwheelPassword``` | 23 | -------------------------------------------------------------------------------- /docs/cdm/entities/user.md: -------------------------------------------------------------------------------- 1 | # user 2 | 3 | Event fields used to define/normalize metadata about users in a network environment. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | user_aadid | string | The User Azure AD ID of the identity associated with a cloud network session. It applies to source and destination entities. | ```5e8b0f4d-2cd4-4e17-9467-b0f6a5c0c4d0``` | 10 | | user_cred_type | string | types of credentials which were presented for delegation | ```%%8098``` | 11 | | user_domain | string | The domain or computer name associated to the user in a session. In active directory, this would be the name of the domain the user belongs to. | ```CONTOSO``` | 12 | | user_identity | string | User Principal Name (UPN) or another type of account identifier for which 802.1x authentication request was made. | ```host/XXXXXXXX.redmond.corp.microsoft.com``` | 13 | | user_linked_logon_id | integer | A hexadecimal value of the paired logon session. | ```0x0``` | 14 | | user_logon_authentication_lan_package_name | string | The name of the LAN Manager sub-package (NTLM-family protocol name) that was used during logon. Possible values are: NTLM V1, NTLM V2, LM. Only populated if Authentication Package = NTLM. | ```-``` | 15 | | user_logon_authentication_package_name | string | The name of the authentication package which was used for the logon authentication process. Default packages loaded on LSA startup are located in "HKLM\SYSTEM\CurrentControlSet\Control\Lsa\OSConfig" registry key. Other packages can be loaded at runtime. When a new package is loaded a "4610: An authentication package has been loaded by the Local Security Authority" (typically for NTLM) or "4622: A security package has been loaded by the Local Security Authority" (typically for Kerberos) event is logged to indicate that a new package has been loaded along with the package name. | ```Negotiate``` | 16 | | user_logon_device_claims | string | list of device claims for new logon session | ```-``` | 17 | | user_logon_elevated_token | string | a "Yes" or "No" flag. If "Yes" then the session this event represents is elevated and has administrator privileges. | ```%%1842``` | 18 | | user_logon_guid | string | a GUID that can help you correlate this event with another event that can contain the same Logon GUID, "4769(S, F): A Kerberos service ticket was requested event on a domain controller. It also can be used for correlation between a 4624 event and several other events (on the same computer) that can contain the same Logon GUID, "4648(S): A logon was attempted using explicit credentials" and "4964(S): Special groups have been assigned to a new logon." | ```{00000000-0000-0000-0000-000000000000}``` | 19 | | user_logon_id | integer | hexadecimal value that can help you correlate this event with recent events that might contain the same Logon ID | ```0x8dcdc``` | 20 | | user_logon_impersonation_level | string | Impersonation level | ```%%1833``` | 21 | | user_logon_key_length | integer | the length of NTLM Session Security key. Typically it has 128 bit or 56 bit length. This parameter is always 0 if "Authentication Package" = "Kerberos", because it is not applicable for Kerberos protocol. This field will also have "0" value if Kerberos was negotiated using Negotiate authentication package. | ```0``` | 22 | | user_logon_process_name | string | The name of the trusted logon process that was used for the logon. See event "4611: A trusted logon process has been registered with the Local Security Authority" description for more information. | ```User32``` | 23 | | user_logon_restricted_admin_mode | string | Only populated for RemoteInteractive logon type sessions. This is a Yes/No flag indicating if the credentials provided were passed using Restricted Admin mode. Restricted Admin mode was added in Win8.1/2012R2 but this flag was added to the event in Win10. If not a RemoteInteractive logon, then this will be "-" string. | ```-``` | 24 | | user_logon_transmitted_services | string | the list of transmitted services. Transmitted services are populated if the logon was a result of a S4U (Service For User) logon process. S4U is a Microsoft extension to the Kerberos Protocol to allow an application service to obtain a Kerberos service ticket on behalf of a user - most commonly done by a front-end website to access an internal resource on behalf of a user. | ```-``` | 25 | | user_logon_type | integer | the type of logon which was performed | ```2``` | 26 | | user_logon_user_claims | string | list of user claims for new logon session. This field contains user claims if user account was logged in and device claims if computer account was logged in | ```ad://ext/cn:88d2b96fdb2b4c49 <%%1818> : "dadmin" ad://ext/Department:88d16a8edaa8c66b <%%1818> : "IT"``` | 27 | | user_logon_user_linked_id | integer | A hexadecimal value of the paired logon session. If there is no other logon session associated with this logon session, then the value is "0x0". | ```0x0``` | 28 | | user_logon_virtual_account | string | a "Yes" or "No" flag, which indicates if the account is a virtual account (e.g., "Managed Service Account"), which was introduced in Windows 7 and Windows Server 2008 R2 to provide the ability to identify the account that a given Service uses, instead of just using "NetworkService". | ```%%1843``` | 29 | | user_name | string | Name of the user associated with the main event (i.e. Network session). There could be a sense of direction depending how it is used together with other entities (i.e. src_user_name or dst_user_name) | ```wardog``` | 30 | | user_network_account_domain | string | Domain for the user that will be used for outbound (network) connections. | ```-``` | 31 | | user_network_account_name | string | User name used for outbound (network) connections | ```-``` | 32 | | user_password | string | User password if seen in the request. Commonly seen in network logs and authentication proxy/logs. | ```bobspassword``` | 33 | | user_reporter_domain | string | domain name of the user that reported the main event | ```WORKGROUP``` | 34 | | user_reporter_id | integer | unique identifier of the user that reported the main event | ```0x3e7``` | 35 | | user_reporter_name | string | the name of the account that reported information about the main event | ```WIN-GG82ULGC9GO$``` | 36 | | user_reporter_sid | string | SID of account that reported information about the main event | ```S-1-5-18``` | 37 | | user_security_package | string | the name of Security Package used during an authentication event. | ```CREDSSP``` | 38 | | user_session_id | integer | ID of the session the user belongs to. | ```1``` | 39 | | user_sid | string | Security identifier of the user. Typically, the identity used to authenticate a server. | ```S-1-5-21-1377283216-344919071-3415362939-500``` | 40 | | user_sid_list | string | the list of special group SIDs, which New Logon\Security ID is a member of. | ```{S-1-5-21-3457937927-2839227994-823803824-512}``` | 41 | | user_upn | string | In Active Directory, the User Principal Name (UPN) attribute is a user identifier for logging in, separate from a Windows domain login. | ```dadmin@contoso``` | 42 | -------------------------------------------------------------------------------- /docs/cdm/entities/user_agent.md: -------------------------------------------------------------------------------- 1 | # user_agent 2 | 3 | Event fields used to define/normalize metadata related to user agents in http requests. 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | | user_agent_original | string | The User agent seen in the HTTP request. | ```Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36``` | 10 | -------------------------------------------------------------------------------- /docs/cdm/guidelines/data_types.md: -------------------------------------------------------------------------------- 1 | # Data Types 2 | 3 | Commond data types used to define entity attributes 4 | 5 | | Name | Description | Sample Value | 6 | |:---|:---|:---| 7 | | string | A series of Unicode characters | `hello` | 8 | | integer | A whole number | `1` | 9 | | boolean | A value of true or false | `true` | 10 | | datetime | Date and time | `2020-07-01 20:10:05` | 11 | 12 | ## References 13 | 14 | * http://docs.oasis-open.org/cti/stix/v2.0/stix-v2.0-part3-cyber-observable-core.html -------------------------------------------------------------------------------- /docs/cdm/guidelines/domain_or_hostname_or_fqdn.md: -------------------------------------------------------------------------------- 1 | # Domain vs Host Name vs FQDN Implementation 2 | 3 | This guide will show you how to label FQDNs, Domains, and Hostnames (for both source/destination) commonly found in logs for Endpoint, DNS, HTTP, SSL, SMB, Radius, URLs, etc... 4 | However, there is an incredible amount of ambiguity, in logging, regarding the values for a Domain, Hostname, and FQDN (fully qualified domain name). 5 | 6 | Therefore, we are going to (try) to clear up this ambiguity in order to properly implement a schema. The order of this guideline is as follows (skip to [Implementation](./domain_or_hostname_or_fqdn.md#Implementation) if you have already read the definitions and problem framing) 7 | 1. Explain the [Common Definitions](./domain_or_hostname_or_fqdn.md#Common-Definitions), apart from this schema, for these three terms. 8 | 2. Show some [examples](./domain_or_hostname_or_fqdn.md#Examples-of-Ambiguity) that cause ambiguity in these common definitions. 9 | 3. Outline and guideline for how to perform the [Implementation](./domain_or_hostname_or_fqdn.md#Implementation) for this schema 10 | 11 | ## Common Definitions 12 | The following are the most common definitions for a Domain, Hostname, and FQDN. We will use the example value `bob-berto-pc.bigwheel.corporation.local` as the example to visualize the definitions. 13 | ### FQDN 14 | The absolute (entire) value of the DNS hierarchy from the lowest level to the top level domain (TLD). Consists of the Hostname and Domain. This is best defined in [this Wikipedia](https://en.wikipedia.org/w/index.php?title=Fully_qualified_domain_name&oldid=911195384#Syntax) article on FQDN. 15 | example FQDN value = `bob-berto-pc.bigwheel.corporation.local` 16 | ### Hostname 17 | The name of a host, device, node, or entity that is separate from the FQDN and Domain. Think of this in the context of running the "hostname" command. 18 | example Hostname value = `bob-berto-pc` 19 | ### Domain 20 | The (DNS) hierarchy that encompasses multiple hosts (ie: a Windows Active Directory environment). 21 | example Domain value = `bigwheel.corporation.local` 22 | 23 | ## Examples of Ambiguity 24 | Lets use some common examples that will (hopefully) begin to illustrate the ambiguity in being able to determine what is "truly" the FQDN, Hostname, or Domain from a logging perspective. 25 | 26 | ### Background 27 | - Organization owns `corporation.local` 28 | - Active Directory forest at `bigwheel.corporation.local` 29 | - Another AD environment that is a sub domain of their AD forest, and is located at `finance-group.bigwheel.corporation.local` 30 | - External web server at `www.corporation.local` 31 | - Internal ISS server hosting a Wiki at `wiki.bigwheel.corporation.local` 32 | - You have logging for both endpoint, the IIS sever, DNS, HTTP, SSL, Proxy, and other network logs. 33 | 34 | #### Scenario 1 35 | HTTP request for `wiki.bigwheel.corporation.local` was made by an endpoint. You now have the following log sources and their field name for this value: 36 | 1. Proxy log defining the field as `hostname` 37 | 2. Endpoint log defining the field as `DestinationDomain` 38 | 3. DNS log defining the field as `dns_query` 39 | 4. IIS web server log defining two fields, **a)** `destination_hostname` with the value `wiki` and **b)** `destination_domain` with the value `bigwheel.corporation.local` 40 | ##### Scenario 1 Problem Framing 41 | Now, because you have 4 logs that you can (and should definitely) pivot between related to this 1 HTTP request, you set out to define these fields into a common format in order to accomplish this pivoting 42 | - The proxy log defining the field as `hostname` is incorrect, we know that the hostname is actually `wiki` 43 | - The endpoint log using the verbiage `DestinationDomain` is incorrect, we know that `Domain` is actually `bigwheel.corporation.local` 44 | - The DNS log, at least took a passive stance and just labeled the value specific to the DNS application - however we are left with being able to pivot from this DNS query to the other log sources (if we don't change it) 45 | - The IIS log correctly labels the fields. However, this is a very critical piece that in many scenarios you may not have this level of intimate knowledge into the environment. The endpoint log that belongs to the same (AD) domain as the web server, had no knowledge of the destination's hostname or (AD) domain! Also, you may be just placing a network sensor for incident response. 46 | #### Scenario 2 47 | Endpoint makes an HTTP request to an external IP (Destination IP) with the HTTP Host header set via the command line in cURL of `mwi2xha9lpqn41lo`. For example, the command `curl --header "Host: mwi2xha9lpqn41lo" http://8.8.8.8/` was used. You now have the following log sources and their field name for this value: 48 | 1. Proxy log defining the field as `hostname` 49 | 2. Endpoint log defining the field as `DestinationDomain` 50 | ##### Scenario 2 Problem Framing 51 | Now, because the connection was direct to an IP, we don't have a DNS log but we have two log sources with the same value that we would want to pivot on. 52 | - The value has no clear indication of whether its a FQDN, Hostname, or Domain. It is just random characters and doesn't even include any TLD. Whether this is malicious or not, is irrelevant - this sort of things happens consistently whether by mistake or malicious intent and for the purpose of this guideline - we are interested in normalizing fields into a schema..before we take the next step of determining the intent of malicious, mistake, or bad practice/hygiene. 53 | - Also, lets say the Host header wasn't set and instead the command , and instead the command `curl http://8.8.8.8/` was used. You now have two fields with the value `8.8.8.8` (outside of the Destination IP). Again, this is definitely not a FQDN, Hostname, or Domain - and you don't want to delete the field either. 54 | 55 | ------------------------------------------------------------------------------------------------------ 56 | ## Implementation 57 | Due to the ambiguity that will happen in log sources and not being able to, always let alone the majority of the time, distinguish the FQDN vs the Domain vs the Hostname as well as the confusion caused by log source's field names - we will define definitions in order that this delineation is NOT necessary between the three. Also, provide specific examples of log sources and what to call the fields, in order to even further clear any ambiguity. 58 | 59 | These apply to both source and destination FQDNs, Domains, and hostnames. Therefore, if you only see destination verbiage below - just replace that with source for the applicable log scenario. 60 | 61 | ### Implementation Outline 62 | #### FQDN 63 | This is an optional field. Because there are many scenarios (as briefly outlined above) where one can NOT determine the true FQDN, we will leave this field as defined but should only be used if the log source has intimate knowledge that this is in fact the FQDN. 64 | #### Hostname 65 | This field should always exist if there is some sort of domain, FQDN, or hostname in the log/event. 66 | #### Domain 67 | This field is optional. Because there are many scenarios (as briefly outlined above) where one can NOT determine the true domain, we will leave this field as defined but should only be used if the log source has intimate knowledge that this is in fact the domain. 68 | 69 | #### Implementation Field Examples: 70 | - `dst_host_name` 71 | - `dst_domain` 72 | - `dst_fqdn` 73 | - `src_host_name` 74 | - `src_domain` 75 | - `src_fqdn` 76 | 77 | ### Implementation Examples 78 | 1. Sysmon [EventID:3 network connection event log](https://github.com/OTRF/OSSEM/blob/master/data_dictionaries/windows/sysmon/events/event-3.md) field for `DestinationHostName` should be set as `dst_host_name` 79 | 2. HTTP or Proxy or web server application logs (ie: IIS, Apache, NGINX, etc...), with the hostname/domain (also known as the HTTP Host header) should be set as `dst_host_name` 80 | this would include: 81 | - Zeek HTTP field `host` 82 | - Suricata HTTP field `hostname` 83 | - NGINX field `hostname` 84 | - IIS field `vhost` 85 | 3. TLS/SSL server name (SNI) should be set as `dst_host_name` 86 | - Zeek SSL field `server_name` 87 | - Suricata TLS field `sni` 88 | - NGINX field `hostname` 89 | - IIS field `vhost` 90 | 4. Kerberos service name should be set as `dst_host_name` 91 | 5. Sysmon [EventID:22 dns query event log](https://github.com/OTRF/OSSEM/blob/master/data_dictionaries/windows/sysmon/events/event-22.md) field for `QueryName` should be set as `dst_host_name` 92 | 6. DNS query name field should be set as `dst_host_name` 93 | - Zeek DNS field `query` 94 | - Suricata DNS field `rrname` 95 | 7. For events/logs with URLs or URIs and the HTTP Host header (option 2) doesn't exist, parse the hostname/domain portion out of the URL. 96 | For example: 97 | - Zeek or Suricata HTTP log, skip this because it is defined in option 2 98 | - PaloAlto Threat log using the field `URL/Filename` field as [outlined here](https://docs.paloaltonetworks.com/pan-os/9-0/pan-os-admin/monitoring/use-syslog-for-monitoring/syslog-field-descriptions.html) , after first renaming to `url.original` as defined in [URL Schema](https://github.com/OTRF/OSSEM/blob/master/common_information_model/entities/url.md), you would parse the domain/host out of this field and then set it as `dst_host_name` 99 | 8. RDP client/source name should be set as `src_host_name` 100 | 9. Endpoint (ie: Windows/Linux) logs that do **NOT** apply to the following: 101 | a) defined in the use cases above 102 | b) already defined in [data dictionaries](https://github.com/OTRF/OSSEM/tree/master/data_dictionaries) (ex: [Windows Kerberos EventID:4768](https://github.com/OTRF/OSSEM/blob/master/data_dictionaries/windows/etw-providers/Microsoft-Windows-Security-Auditing/events/event-4768.md), [Windows Logon EventID:4624](https://github.com/OTRF/OSSEM/blob/master/data_dictionaries/windows/etw-providers/Microsoft-Windows-Security-Auditing/events/event-4624_v2.md)) 103 | c) logically fit into Target or and are NOT 104 | 1. FQDN and Domain values exist. 105 | - Set the field for the FQDN value as `dst_fqdn` 106 | - Copy the value for `dst_fqdn` into the field `dst_host_name` 107 | - Set the field for the Domain value as `dst_domain` 108 | 2. FQDN and Hostname and Domain values exist 109 | - Set the field for the FQDN value as `dst_fqdn` 110 | - Set the field for Domain value as `dst_domain` 111 | - Set the field for the Hostname value as `dst_host_name` 112 | 3. Hostname and Domain values exist 113 | - Set the field for the Domain value as `dst_domain` 114 | - Set the field for the Hostname value as `dst_host_name` 115 | 4. Hostname value only exists 116 | - Set the field for the Hostname value as `dst_host_name` 117 | 5. Domain value only exists 118 | - Set the field for the Domain value as `dst_domain` 119 | - Copy the value for `dst_domain` into the field `dst_host_name` 120 | 6. FQDN value only exists 121 | - Set the field for the FQDN value as `dst_fqdn` 122 | - Copy the value for `dst_fqdn` into the field `dst_host_name` -------------------------------------------------------------------------------- /docs/cdm/guidelines/entity_structure.md: -------------------------------------------------------------------------------- 1 | # Entity Structure 2 | 3 | Entities are documented in `YAML` format, and are the source files for every entity documentation in the common data model. These source files can be used to define the logic behind each entity and automate the creation of field names and documentation. 4 | 5 | **Example:** 6 | 7 | ```yaml 8 | name: hash 9 | prefix: 10 | - hash 11 | id: 42C1A34E-D474-468D-8EFB-09454CA8BFC2 12 | description: Event fields used to capture metadata about hashes of an image/binary/file. 13 | extends_entities: 14 | - process 15 | - file 16 | attributes: 17 | - name: md5 18 | type: string 19 | description: MD5 hash of the image/binary/file 20 | sample_value: 6A255BEBF3DBCD13585538ED47DBAFD7 21 | - name: sha1 22 | type: string 23 | description: SHA1 hash of the image/binary/file 24 | sample_value: B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2 25 | - name: sha256 26 | type: string 27 | description: SHA256 hash of the image/binary/file 28 | sample_value: 4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C 29 | references: [] 30 | tags: [] 31 | ``` 32 | 33 | ## Entity Definitions 34 | 35 | ```yaml 36 | name: hash 37 | prefix: 38 | - hash 39 | id: 42C1A34E-D474-468D-8EFB-09454CA8BFC2 40 | description: Event fields used to capture metadata about hashes of an image/binary/file. 41 | ``` 42 | * **name:** Name of the current entity. 43 | * Name must be lower case 44 | * Multiple words in an entity name must be separated by an underscore (i.e `user_agent`) 45 | 46 | * **prefix:** Prefix used for every attribute defined under the current entity. 47 | * `hash`_md5 48 | * `hash`_sha1 49 | * `hash`_sha256 50 | 51 | You can specify more than one prefix. This is the case for entities that might have attributes that can be defined twice under the same entity. This is different from extending other entities since the additional prefix only makes sense to the current entity (i.e `process` and `process_parent`). 52 | * `process`_name 53 | * `process`_path 54 | * `process_parent`_name 55 | * `process_parent`_path 56 | 57 | * **id:** Unique identifier for the current entity (i.e `035E058E-5405-4B3B-9288-E78A63B40DAA`) 58 | * You can generate an ID value by running the following command `uuidgen` in macOS. 59 | 60 | * **description:** Description of the current entity. 61 | 62 | ## Entity Extensions 63 | 64 | ```yaml 65 | extends_entities: 66 | - process 67 | - file 68 | ``` 69 | 70 | * An entity can extend another entity. 71 | * This is the case of entities such as `Hash` extending other entities such as `Process` and `File`. 72 | * As we know, telemetry from processes could also provide `hash` information of the file backing up the process (i.e an executable). 73 | * Therefore, we can say that the `Hash` entity and its atrributes (i.e. md5,sha1,sha256) could extend the `Process` entity. 74 | * We can describe this logic by leveraging the **`extends_entities`** property of the current entity. 75 | * It only accepts a `list` of entity names. 76 | * By default, in the example above, the `Hash` entity extended the `Process` entity by appending all possible field names from the `Hash` entity to the `Process` prefix values. Since `Process` has two prefixes, we end up with something similar to: 77 | * `process_hash_md5` 78 | * `process_hash_sha256` 79 | * `process_parent_hash_md5` 80 | * `process_parent_hash_sha256` 81 | * This does not apply to every attributes in the extended entity (i.e `process_id`, `process_command_line`, `process_integrity`, etc). We can only extend the entity **prefix** values to create the entity field names. The following would not make sense: 82 | * `process_id_hash_md5` 83 | * `process_command_line_hash_256` 84 | 85 | ## Entity Attributes 86 | 87 | ```yaml 88 | attributes: 89 | - name: md5 90 | type: string 91 | description: MD5 hash of the image/binary/file 92 | sample_value: 6A255BEBF3DBCD13585538ED47DBAFD7 93 | - name: sha1 94 | type: string 95 | description: SHA1 hash of the image/binary/file 96 | sample_value: B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2 97 | - name: sha256 98 | type: string 99 | description: SHA256 hash of the image/binary/file 100 | sample_value: 4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C 101 | ``` 102 | 103 | * Entity attributes are provided under the **attributes** property of the current entity. 104 | * The **attributes** property is a list of dictionaries. 105 | * Each dictionary provides metadata about each attribute 106 | * **name:** Name of the attribute 107 | * **type:** Data type of the attribute 108 | * **description:** Description of the attribute 109 | * **sample_value:** An example of the expected attribute value 110 | * Attribute names must be lower case 111 | * Multiple words in an attribute must be separated by an underscore (i.e. record_id) 112 | * Descriptions must be generic enough to cover entities extensions (extending other entities and itself) 113 | * For example, the description of the attribute `name` in a `Process` entity should fit `process_name`, `process_parent_name`, `source_process_name` 114 | * we can simply describe the attribute `name` in a `Process` entity as the `name of the application backing up the process in a system. Name without full path of the file or executable backing up the process. It can be leveraged in the context of child, parent, source and even target process` -------------------------------------------------------------------------------- /docs/cdm/guidelines/intro.md: -------------------------------------------------------------------------------- 1 | # Guidelines -------------------------------------------------------------------------------- /docs/cdm/guidelines/source_or_destination_or_target.md: -------------------------------------------------------------------------------- 1 | # Source or Destination or Target 2 | 3 | Several events that the OSSEM CDM project describes have a sense of direction. 4 | Usually in a network connection, this sense of direction is represented by `source` and `destination` to describe the `origin` of the connection and where the network packets are `sent to`. 5 | This concept of direction is not only represented in a `network connection`, but also other events such as `creation of a process` where an entity interacts with another entity. 6 | Therefore, the OSEEM project is also using the concept of `target` instead of `destination` when describing an interaction between entities that are not part of a network connection. 7 | 8 | ## Source and Destination Example 9 | 10 | When the metadata of the event describes a network connection and the same entity type is used: 11 | 12 | * `source.ip` -> `connected_to` -> `destination.ip` 13 | * `source.host` -> `connected_to` -> `destination.host` 14 | 15 | ## Source and Target Example 16 | 17 | When the metadata of the event does `not` describe a network connection and the same entity type is used: 18 | 19 | * `source.process` -> `created` -> `target.process` 20 | * `source.process` -> `accessed` -> `target.process` 21 | 22 | ## The use of Aliases 23 | 24 | When the metadata of the event does not use the same entity type, we can use `aliases` for both entities and query them without `source`, `destination` and `target` annotations. 25 | This also works when the event provides metadata of only one entity which could be the `source`, `destination` or `target`. 26 | 27 | ### Before 28 | `source.process` 29 | 30 | * `source.process` -> `created` -> `target.file` 31 | * `source.host` -> `connected_to` -> `target.ip` 32 | 33 | ### After 34 | `process` 35 | 36 | * `process` -> `created` -> `file` 37 | * `host` -> `connected_to` -> `ip` 38 | 39 | -------------------------------------------------------------------------------- /docs/cdm/guidelines/table_structure.md: -------------------------------------------------------------------------------- 1 | # Table Structure 2 | 3 | Tables are documented in `YAML` format, and are the source files for every table documentationin the common data model. These source files can be used to define the logic behind each table and automate the creation of field names and documentation. Tables are objects that group several entities together creating relationships among them to standardize diverse datasets that share a common definition. For example, the `NetworkSession` table leverages entities such as `http`, `url`, `network`, etc. 4 | 5 | **Example:** 6 | 7 | ```yaml 8 | name: NetworkSession 9 | id: 189BC2EE-44BF-4A8A-A257-5521C67D457B 10 | description: Event fields used to define network sessions in an endpoint. 11 | entities: 12 | - hash 13 | - user 14 | - name: file 15 | prefix: 16 | - file 17 | attributes: 18 | - name 19 | - path 20 | - name: custom 21 | entities: 22 | - name: threat 23 | attributes: 24 | - name: name 25 | type: string 26 | description: The name of the threat or malware identified 27 | sample_value: 'Win32.Small.ahif(90603579)' 28 | ``` 29 | ## Table Definitions 30 | 31 | ```yaml 32 | name: network_session 33 | id: 189BC2EE-44BF-4A8A-A257-5521C67D457B 34 | description: Event fields used to define network sessions in an endpoint. 35 | ``` 36 | * **name:** Name of the current table. 37 | * Name must be lower case 38 | * Multiple words in an entity name must be separated by an underscore (i.e `user_agent`) 39 | 40 | * **id:** Unique identifier for the current entity (i.e `035E058E-5405-4B3B-9288-E78A63B40DAA`) 41 | * You can generate an ID value by running the following command `uuidgen` in macOS. 42 | 43 | * **description:** Description of the current table. 44 | 45 | ## Entities 46 | 47 | ```yaml 48 | entities: 49 | - hash 50 | - user 51 | - name: file 52 | prefix: 53 | - file 54 | attributes: 55 | - name 56 | - path 57 | - name: custom 58 | entities: 59 | - name: threat 60 | prefix: 61 | - threat 62 | attributes: 63 | - name: name 64 | type: string 65 | description: The name of the threat or malware identified 66 | sample_value: 'Win32.Small.ahif(90603579)' 67 | ``` 68 | 69 | * As mentioned before, we can define tables by grouping more than one pre-defined entity. 70 | * Entities that form a table are added under the `entities` property. 71 | * The `entities` property expects a list of `strings` or `dictionaries` 72 | * `Strings` are used to directly call an entity and all its attributes. 73 | * They must be lower case (Following the same guidelines defined in the `Entity Structure` documentation) 74 | * `Dictionaries` can be used for two things: 75 | 76 | * You can use a dictionary to define what specific attributes you want from existing entities. 77 | 78 | ```yaml 79 | - name: file 80 | prefix: 81 | - file 82 | attributes: 83 | - name 84 | - path 85 | ``` 86 | * We define the name of the entity and the valid prefix for the entity. 87 | * We define the entity to be flexible and be able to be very specific when pulling field names (prefix + attribute) from our already defined entities. 88 | * All we do next is just take the `prefix`, append it to the `attribute` and compare it with attributes in our official entities list. If we find a match then we add it to the table. 89 | * A table would end up with the following attributes from the `file` entity 90 | * `file_name` 91 | * `file_path` 92 | 93 | * You can also use a dictionary to introduce custom entities with custom attributes that do not exist currently in the common data model. This is good for collaboration purposes. 94 | 95 | * `Custom` dictionaries contain a list of dictionaries. 96 | * Each dictionary is a custom/new entity 97 | * Each entity name follows the same guidelines defined in the `Entity Structure` documentation. 98 | * Each entity has a `prefix` property (same to the entity concepts in this common data model) 99 | * Each dictionary/entity has an `attributes` property that allows users to introduce new attributes to the common data model 100 | * Each attribute contains the usual properties (`name`, `type`, `description`, `sample_value`) 101 | * Each attribute name follows the same guidelines defined in the `Entity Structure` documentation. -------------------------------------------------------------------------------- /docs/cdm/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | The OSSEM commond data model (CDM) facilitates the normalization and consistency of diverse data sources by providing standardized and extensible data schemas. 4 | 5 | **This projects is designed to:** 6 | 7 | * Simplify data management procedures by creating a structure around diverse data sources 8 | * Integrate diverse data sources and provide a standardized way for security analysts to query data 9 | * Extend the standard schemas to accomodate custom data entities and allow the community to contribute back -------------------------------------------------------------------------------- /docs/cdm/tables/intro.md: -------------------------------------------------------------------------------- 1 | # Tables -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/intro.md: -------------------------------------------------------------------------------- 1 | # linux dictionaries -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-1.md: -------------------------------------------------------------------------------- 1 | # Event ID 1: Process creation 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | The **process creation** event provides extended information about a newly created process. The full command line provides context on the process execution. The ProcessGUID field is a unique value for this process across a domain to make event correlation easier. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |RuleName|string|custom tag mapped to event. i.e ATT&CK technique ID|`T1114`| 11 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T20:06:22.6500000Z`| 12 | |ProcessGuid|string|Process Guid of the process that got spawned/created (child)|`{844e14fa-3c3e-6167-98ab-cd236b550000}`| 13 | |ProcessId|integer|Process ID used by the os to identify the created process (child)|`5079`| 14 | |Image|string|File path of the process being spawned/created. Considered also the child or source process|`/usr/sbin/rsyslogd`| 15 | |FileVersion|string|Version of the image associated with the main process (child)|``| 16 | |Description|string|Description of the image associated with the main process (child)|``| 17 | |Product|string|Product name the image associated with the main process (child) belongs to|``| 18 | |Company|string|Company name the image associated with the main process (child) belongs to|``| 19 | |OriginalFileName|string|original file name|``| 20 | |CommandLine|string|Arguments which were passed to the executable associated with the main process|`/usr/sbin/rsyslogd -n`| 21 | |CurrentDirectory|string|Current working directory from which the main process executed.|``| 22 | |IntegrityLevel|string|Integrity label assigned to a process|`no level`| 23 | |User|string|Name of the account who created the process (child) .|`root`| 24 | |LogonGuid|string|Logon GUID of the user who created the new process. Value that can help you correlate this event with others that contain the same Logon GUID (Sysmon Events)|`{844e14fa-0000-0000-0000-000000000000}`| 25 | |LogonId|integer|Login ID of the user who created the new process. Value that can help you correlate this event with others that contain the same Logon ID|`0xf6219`| 26 | |TerminalSessionId|integer|ID of the session the user belongs to|`4294967295`| 27 | |Hashes|string|Hashes captured by sysmon driver|``| 28 | |ParentUser|string|Name of the account who created the process that spawned/created the main process (child)|`root`| 29 | |ParentProcessGuid|string|ProcessGUID of the process that spawned/created the main process (child)|`{A98268C1-9C2E-5ACD-0000-00100266AB00}`| 30 | |ParentProcessId|integer|Process ID of the process that spawned/created the main process (child)|`240`| 31 | |ParentImage|string|File path that spawned/created the main process|`/lib/systemd/systemd`| 32 | |ParentCommandLine|string|Arguments which were passed to the executable associated with the parent process|`/sbin/init`| 33 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-11.md: -------------------------------------------------------------------------------- 1 | # Event ID 11: FileCreate 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | **File create** operations are logged when a file is created or overwritten. This event is useful for monitoring autostart locations, like the temporary and download directories, which are common places malware drops during initial infection. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |RuleName|string|custom tag mapped to event. i.e ATT&CK technique ID|`T1114`| 11 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T20:06:22.6590000Z`| 12 | |ProcessGuid|string|Process Guid of the process that created the file|`{A98268C1-958A-5ACD-0000-0010C62F0100}`| 13 | |ProcessId|integer|Process ID used by the os to identify the process that created the file.|`1044`| 14 | |Image|string|File path of the process that created the file|`/usr/sbin/rsyslogd`| 15 | |TargetFilename|string|Name of the file|`/run/rsyslogd.pid.tmp`| 16 | |CreationUtcTime|date|File creation time|`2021-10-14T22:39:15.5650000Z`| 17 | |User|string|user that created the file|`root`| 18 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-16.md: -------------------------------------------------------------------------------- 1 | # Event ID 16 - Sysmon Config State Changed 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | This event logs when the local **sysmon configuration is updated**. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T21:04:30.3520000Z`| 11 | |Configuration|string|name of the sysmon config file being updated|`config2.xml`| 12 | |ConfigurationFileHash|string|hash (SHA1) of the sysmon config file being updated|``| 13 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-23.md: -------------------------------------------------------------------------------- 1 | # Event ID 23: FileDelete (A file delete was detected) 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | This event logs when a **file is deleted** by a process. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |RuleName|string|custom tag mapped to event. i.e ATT&CK technique ID|`T1114`| 11 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T20:06:22.6490000Z`| 12 | |ProcessGuid|string|Process Guid of the process that deleted the file|`{A98268C1-959E-5ACD-0000-0010236E0300}`| 13 | |ProcessId|integer|Process ID used by the os to identify the process that deleted the file|`1896`| 14 | |Image|string|File path of the process that deleted the file|`/lib/systemd/systemd`| 15 | |User|string|Name of the account who deleted the file.|`root`| 16 | |TargetFilename|string|full path name of the deleted file|`/run/systemd/units/invocation:rsyslog.service`| 17 | |Hashes|string|Hashes captured by sysmon driver of the deleted file|`SHA1=B0BF5AC2E81BBF597FAD5F349FEEB32CAC449FA2, MD5=6A255BEBF3DBCD13585538ED47DBAFD7, SHA256=4668BB2223FFB983A5F1273B9E3D9FA2C5CE4A0F1FB18CA5C1B285762020073C, IMPHASH=2505BD03D7BD285E50CE89CEC02B333B`| 18 | |IsExecutable|bool|TBD|`TBD`| 19 | |Archived|string|States if the file was archived when deleted|`True`| 20 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-3.md: -------------------------------------------------------------------------------- 1 | # Event ID 3: Network connection 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | The **network connection** event logs TCP/UDP connections on the machine. It is disabled by default. Each connection is linked to a process through the ProcessId and ProcessGUID fields. The event also contains the source and destination host names IP addresses, port numbers and IPv6 status. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |RuleName|string|custom tag mapped to event. i.e ATT&CK technique ID|`T1114`| 11 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T20:06:22.6600000Z`| 12 | |ProcessGuid|string|Process Guid of the process that made the network connection|`{A98268C1-957F-5ACD-0000-0010EB030000}`| 13 | |ProcessId|integer|Process ID used by the os to identify the process that made the network connection|`5079`| 14 | |Image|string|File path of the process that made the network connection|`/usr/sbin/rsyslogd`| 15 | |User|string|Name of the account who made the network connection. It usually containes domain name and user name|`root`| 16 | |Protocol|string|Protocol being used for the network connection|`udp`| 17 | |Initiated|boolean|Indicated process initiated tcp connection|`true`| 18 | |SourceIsIpv6|boolean|is the source ip an Ipv6|`false`| 19 | |SourceIp|ip|source ip address that made the network connection|`127.0.0.1`| 20 | |SourceHostname|string|name of the host that made the network connection|``| 21 | |SourcePort|integer|source port number|`43336`| 22 | |SourcePortName|string|name of the source port being used|``| 23 | |DestinationIsIpv6|boolean|is the destination ip an Ipv6|`false`| 24 | |DestinationIp|ip|ip address destination|`127.0.0.1`| 25 | |DestinationHostname|string|name of the host that received the network connection|``| 26 | |DestinationPort|integer|destination port number|`25224`| 27 | |DestinationPortName|string|name of the destination port|``| 28 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-4.md: -------------------------------------------------------------------------------- 1 | # Event ID 4: Sysmon service state changed 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | The **service state change** event reports the state of the Sysmon service (started or stopped). 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |UtcTime|date|Time in UTC when event was created|`2021-10-15T00:26:12.0920000Z`| 11 | |State|string|sysmon service state (i.e. stopped)|`Started`| 12 | |Version|string|sysmon version|`1.0.0`| 13 | |SchemaVersion|string|sysmon config schema version|`4.81`| 14 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-5.md: -------------------------------------------------------------------------------- 1 | # Event ID 5: Process terminated 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | The **process terminate** event reports when a process terminates. It provides the UtcTime, ProcessGuid and ProcessId of the process. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |RuleName|string|custom tag mapped to event. i.e ATT&CK technique ID|`T1114`| 11 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T20:06:22.6470000Z`| 12 | |ProcessGuid|string|Process Guid of the process that terminated|`{A98268C1-9ECD-5ACD-0000-0010EF6BAF00}`| 13 | |ProcessId|integer|Process ID used by the os to identify the process that terminated|`2428`| 14 | |Image|string|File path of the process that terminated|`rsyslogd`| 15 | |User|string|Name of the account that terminated the process.|`syslog`| 16 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/event-9.md: -------------------------------------------------------------------------------- 1 | # Event ID 9: RawAccessRead 2 | ###### Version: 4.81 3 | 4 | ## Description 5 | The **RawAccessRead** event detects when a process conducts reading operations from the drive. This technique is often used by malware for data exfiltration of files that are locked for reading, as well as to avoid file access auditing tools. The event indicates the source process and target device. 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | |RuleName|string|custom tag mapped to event. i.e ATT&CK technique ID|`T1114`| 11 | |UtcTime|date|Time in UTC when event was created|`2021-10-13T20:14:04.3360000Z`| 12 | |ProcessGuid|string|Process Guid of the process that conducted reading operations from the drive|`{A98268C1-959B-5ACD-0000-0010EFD50200}`| 13 | |ProcessId|integer|Process ID used by the os to identify the process that conducted reading operations from the drive|`2708`| 14 | |Image|string|File path of the process that conducted reading operations from the drive|`/sbin/dumpe2fs`| 15 | |Device|string|Target device|`/dev/sda1`| 16 | |User|string|Name of the account that read.|`root`| 17 | -------------------------------------------------------------------------------- /docs/dd/dictionaries/linux/sysmon/intro.md: -------------------------------------------------------------------------------- 1 | # sysmon events -------------------------------------------------------------------------------- /docs/dd/guidelines/authoring_data_dictionaries.md: -------------------------------------------------------------------------------- 1 | # Authoring Guide 2 | This guide details the process of data dictionary authoring, by describing the structure and organization of events. 3 | 4 | Lets recap what a data dictionary event is, before going into the nitty-gritty details. 5 | 6 | ## Table of contents 7 | * [Definition](#definition) 8 | * [Structure](#structure) 9 | * [Standardization](#standardization) 10 | * [Organization](#organization) 11 | * [README Files](#readme-files) 12 | 13 | ## Definition 14 | Data dictionaries are atomic structures of base events emitted by log sources on a platform. These structures contain the **definition** of a base event and its **fields**. 15 | 16 | The event **definition** enables the entity to be fully functional by itself. It describes the title, code, platform, log source, and other meta-data that provides context. The event **definition** plays a major role on how the event is consumed. 17 | 18 | The event **fields** contain the list of fields available. Each field have properties that provide context about the field, and ultimately enable it to be correlated with other OSSEM data like the [Common Data Model](https://ossemproject.com/cdm/intro.html) and [Detection Data Model](https://ossemproject.com/ddm/intro.html). 19 | 20 | OSSEM data dictionaries are structured to be as lean as possible, the reason is twofold: not only to avoid redundant information between events, but also to promote the adoption of external references (pointers). 21 | 22 | OSSEM data dictionaries are represented in YAML. Again, the goal is to find the best balance between human readability, and ease of automation, hence YAML was a relatively easy pick when choosing an OSSEM data language. 23 | 24 | ## Structure 25 | The event **definion** fields are: 26 | * Title: the event title if any, otherwise use the event code 27 | * Description: the description of the event 28 | * Platform: the platform where the log source is hosted 29 | * Log source: the log source that generates the event 30 | * Event code: the code or ID of the event 31 | * Event version: the version of the event 32 | * Reference list: text and link for external references relevant to the event 33 | * Tag list: tags applicable to the event 34 | 35 | For every **field** in the event, the properties are: 36 | * Standard name: the standard name assigned to the field name, if applicable 37 | * Name: the field name as per vendor documentation 38 | * Type: the field type as per vendor documentation 39 | * Description: the field description 40 | * Sample value: the field sample value, if applicable 41 | 42 | An example of an Windows Security Auditing Event 4616 follows: 43 | ``` 44 | title: 'Event ID 4616: The system time was changed.' 45 | description: This event generates every time system time was changed. 46 | platform: windows 47 | log_source: Microsoft-Windows-Security-Auditing 48 | event_code: '4616' 49 | event_version: '1' 50 | event_fields: 51 | - standard_name: user_sid 52 | name: SubjectUserSid 53 | type: SID 54 | description: SID of account that requested the "change system time" operation. 55 | sample_value: S-1-5-21-3457937927-2839227994-823803824-1104 56 | - standard_name: user_name 57 | name: SubjectUserName 58 | type: UnicodeString 59 | description: the name of the account that requested the "change system time" operation. 60 | sample_value: dadmin 61 | - standard_name: user_domain 62 | name: SubjectDomainName 63 | type: UnicodeString 64 | description: subject's domain or computer name. 65 | sample_value: CONTOSO 66 | - standard_name: user_logon_id 67 | name: SubjectLogonId 68 | type: HexInt64 69 | description: 'hexadecimal value that can help you correlate this event with recent 70 | events that might contain the same Logon ID, for example, "4624: An account was 71 | successfully logged on".' 72 | sample_value: '0x48f29' 73 | - standard_name: TBD 74 | name: PreviousTime 75 | type: FILETIME 76 | description: previous time in UTC time zone. 77 | sample_value: '2015-10-09T05:04:30.000941900Z' 78 | - standard_name: TBD 79 | name: NewTime 80 | type: FILETIME 81 | description: new time that was set in UTC time zone. 82 | sample_value: '2015-10-09T05:04:30.000000000Z' 83 | - standard_name: process_id 84 | name: ProcessId 85 | type: Pointer 86 | description: hexadecimal Process ID of the process that changed the system time. 87 | Process ID (PID) is a number used by the operating system to uniquely identify 88 | an active process. 89 | sample_value: '0x1074' 90 | - standard_name: process_path 91 | name: ProcessName 92 | type: UnicodeString 93 | description: full path and the name of the executable for the process. 94 | sample_value: C:\Windows\WinSxS\amd64_microsoft-windows-com-surrogate-core_31bf3856ad364e35_6.3.9600.16384_none_25a8f00faa8f185c\dllhost.exe 95 | references: 96 | - text: MS SOURCE 97 | link: https://github.com/MicrosoftDocs/windows-itpro-docs/blob/public/windows/security/threat-protection/auditing/event-4616.md 98 | - text: MS Security Auditing Category - System 99 | link: https://docs.microsoft.com/en-us/windows/security/threat-protection/auditing/advanced-security-audit-policy-settings#system 100 | - text: MS Security Auditing Sub-category - Audit Security State Change 101 | link: https://github.com/MicrosoftDocs/windows-itpro-docs/tree/master/windows/security/threat-protection/auditing/audit-security-state-change.md 102 | tags: 103 | - etw_level_Informational 104 | - etw_task_task_0 105 | - version_1 106 | - System 107 | - Audit Security State Change 108 | ``` 109 | 110 | ### Standardization 111 | The **standard_name** is a special property of event fields, as it represents the first layer of data standardization on the event. 112 | 113 | In the example above (event 4616), the `SubjectUserSid` name was *translated* to `user_logon_id` standard name. This *translation* ensures the data dictionary is aligned with the [Common Data Model](https://ossemproject.com/cdm/intro.html) (CDM) [User schema](https://ossemproject.com/cdm/entities/user.html), reduces complexity, and enhances the development of detection analytics. 114 | 115 | Note that its not mandatory that you define a standard name for every field on your event. Some good practices when defining standard names include: 116 | * Search for the field name in other OSSEM events. Its not uncommon that you can apply the same standard name to identical field names, specially if the log source is the same. 117 | * Check if the standard name already exists in one of the [Common Data Model](https://ossemproject.com/cdm/intro.html) entities schema. 118 | 119 | ## Organization 120 | OSSEM built-in data dictionaries are primarily organized in a file system folder structure, that ensures the grouping according to data dictionaries characteristics. While there is no limit to the folder depth, the **platform** and **log sources** folders must follow a predefined structure. 121 | 122 | Data dictionaries are located in the [OSSEM-DD sub-repository](https://github.com/OTRF/OSSEM-DD). The first level of organization is by **platform**. 123 | 124 | An example of a build-in **Sysmon** data dictionary follows: 125 | ``` 126 | . 127 | ├── OSSEM-DD <--------- sub-repository 128 | │   ├── README.yml 129 | │   ├── windows <--------- platform (operating system/sensor folder) 130 | │   │   ├── README.yml 131 | │   │   ├── sysmon <--------- log source folder 132 | │   │   │   ├── README.yml 133 | │   │   │   └── events <--------- events folder 134 | │   │   │   ├── event-1.yml 135 | │   │   │   ├── event-7.yml 136 | │   │   │   ├── event-8.yml <- data dictionary entry 137 | ... 138 | ``` 139 | 140 | Each **platform** folder contain sub-folder for the **log source**, which in turn *always* contain an `events/`folder where the events are stored. 141 | 142 | Since the **platform** and **log source** properties are already defined in the event, is fairly straightforward to figure out where to store your events. 143 | 144 | Because ensuring the consistency of this folder structure can be tricky, specially when dealing with dozens of log sources, OSSEM provides README files that provide additional information about the current folder. These files are particularly helpful when converting OSSEM to markdown, where they are used as indexes. 145 | 146 | ### README Files 147 | Similarly to data dictionaries, README files are also defined in YAML, contain the following properties: 148 | * title: for example the log source title 149 | * description: for example the log source description 150 | * images: text and path to images relevant to the readme 151 | * references: text and link for external references relevant to the readme 152 | 153 | An example of a README follows: 154 | ``` 155 | title: Sysmon Event Logs 156 | description: System Monitor (Sysmon) is a Windows system service and device 157 | driver that, once installed on a system, remains resident across system 158 | reboots to monitor and log system activity to the Windows event log. It 159 | provides detailed information about process creations, network connections, 160 | and changes to file creation time. By collecting the events it generates 161 | using Windows Event Collection or SIEM agents and subsequently analyzing 162 | them, you can identify malicious or anomalous activity and understand how 163 | intruders and malware operate on your network. 164 | images: 165 | - title: Data model 166 | source: /resources/images/SysmonDataModel.png 167 | references: 168 | - text: Sysmon Source 169 | link: https://docs.microsoft.com/en-us/sysinternals/downloads/sysmon 170 | - text: TrustedSec Sysinternals Sysmon Community Guide 171 | link: https://github.com/trustedsec/SysmonCommunityGuide 172 | ``` 173 | 174 | [Go to top](#table-of-contents) -------------------------------------------------------------------------------- /docs/dd/guidelines/contributing_data_dictionaries.md: -------------------------------------------------------------------------------- 1 | # Contributing Guide 2 | This guide details the process of data dictionary contributing. 3 | 4 | ## Table of Contents 5 | * [Official Contribution Format](#official-contribution-format) 6 | * [Other Contribution Formats](#other-contribution-formats) 7 | * [Markdown](#markdown) 8 | * [MS Excel](#ms-excel) 9 | * [How to contribute?](#how-to-contribute?) 10 | 11 | ## Official Contribution Format 12 | Data dictionaries are stored in **yaml** format. You can use the following schema as a reference when creating a new data dictionary. 13 | 14 | ``` yaml 15 | title: Conn Log 16 | description: This event generates data most similar to network flow.Also, is very similar to firewall logs. 17 | platform: zeek 18 | log_source: network-protocols 19 | event_code: conn 20 | event_version: '0' 21 | attack_data_sources: 22 | - Network Traffic 23 | event_fields: 24 | - standard_name: '@timestamp' 25 | name: ts 26 | type: date_time 27 | description: Timestamp of the beginning of the event in epoch format 28 | sample_value: '1300475167.096535' 29 | - standard_name: event_duration 30 | name: duration 31 | type: float 32 | description: How long the connection lasted. For 3-way or 4-way connection tear-downs, this will not include the final ACK 33 | sample_value: '0.120338' 34 | references: 35 | - text: Zeek Source 36 | link: https://docs.zeek.org/en/stable/scripts/base/protocols/conn/main.zeek.html#base-protocols-conn-main-zeek 37 | - text: OSSEM-DD 38 | link: https://github.com/OTRF/OSSEM-DD 39 | tags: 40 | - Network data source 41 | - Network connection 42 | ``` 43 | ## Other Contribution Formats 44 | ### Markdown 45 | If you are not familiarized with yaml files, you can use the following **markdown** template to contribute a data dictionary. 46 | 47 | ```markdown 48 | # Conn Log 49 | ## Description 50 | This event generates data most similar to network flow. 51 | Also, is very similar to firewall logs. 52 | ## Platform 53 | zeek 54 | ## Log Source 55 | network-protocols 56 | ## Event Code 57 | conn 58 | ## Event Version 59 | 0 60 | ## ATT&CK Data Sources 61 | Network Traffic 62 | ## Data Dictionary 63 | | Standard Name | Field Name | Type | Description | Sample Value | 64 | | --- | --- | --- | --- | --- | 65 | | @timestamp | ts | date_time | Timestamp of the beginning of the event in epoch format | 1300475167.096535 | 66 | | event_duration | duration | float | How long the connection lasted. For 3-way or 4-way connection tear-downs, this will not include the final ACK | 0.120338 | 67 | ## References 68 | - [Zeek Source](https://docs.zeek.org/en/stable/scripts/base/protocols/conn/main.zeek.html#base-protocols-conn-main-zeek) 69 | - [OSSEM-DD](https://github.com/OTRF/OSSEM-DD) 70 | ## Tags 71 | - Network data source 72 | - Network connection 73 | ``` 74 | 75 | *Note: To complete the attack_data_sources field, consider a comma (,) to separate data sources.* 76 | 77 | We have created a [python script](https://github.com/OTRF/OSSEM/tree/master/resources/scripts/md_to_yaml.py) that creates a data dictionary in yaml format per markdown file. The name of the markdown file will be used as the name of the yaml file. By using the following commands, you can parse all the **.md** files in your current directory (except readme.md). 78 | 79 | ```python 80 | python3 md_to_yaml.py 81 | ``` 82 | 83 | ### MS Excel 84 | If you love working on MS Excel, you can use the following [template](https://github.com/OTRF/OSSEM/tree/master/resources/scripts/templates/xlsx_to_yaml_template.xlsx) in order to document your proposed data dictionaries. 85 | 86 | *Note: To complete the attack_data_sources field, consider a comma (,) to separate data sources.* 87 | 88 | We have created a [python script](https://github.com/OTRF/OSSEM/tree/master/resources/scripts/xlsx_to_yaml.py) that creates a data dictionary in yaml format per sheet within the Ms Excel file. The name of the sheet will be used as the name of the yaml file. By using the following commands, you can parse all the **.xlsx** files in your current directory. 89 | 90 | ```python 91 | python3 xlsx_to_yaml.py 92 | ``` 93 | 94 | ## How to Contribute? 95 | All the data dictionaries in yaml format are stored in the [OSSEM-DD](https://github.com/OTRF/OSSEM-DD) sub-repository. You can add a new data dictionary following the organization section of the [data dictionary authoring guide](https://github.com/OTRF/OSSEM/blob/master/docs/dd/guidelines/authoring_data_dictionaries.md) -------------------------------------------------------------------------------- /docs/dd/guidelines/intro.md: -------------------------------------------------------------------------------- 1 | # Guidelines -------------------------------------------------------------------------------- /docs/dd/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction -------------------------------------------------------------------------------- /docs/dd/notebooks/intro.md: -------------------------------------------------------------------------------- 1 | # Notebooks -------------------------------------------------------------------------------- /docs/dm/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This part of the project focuses on defining the required data in form of data objects and the relationships among each other needed to facilitate the creation of data analytics and validate the detection of adversary techniques. We have also extended this concept to the MITRE-ATT&CK framework [here](https://github.com/OTRF/OSSEM-DM/tree/main/use-cases/mitre_attack). 4 | 5 | ## Available documents 6 | |File|Description| 7 | |---|---| 8 | |[OSSEM Event Mappings (YAML file)](https://github.com/OTRF/OSSEM-DM/blob/main/relationships/_all_ossem_relationships.yml)|Security event logs mapped to OSSEM relationships (Includes ATT&CK data sources metadata)|| 9 | |[ATT&CK Event Mappings (MD file)](https://github.com/OTRF/OSSEM/tree/master/docs/dm/mitre_attack/attack_ds_events_mappings.md)|Security event logs mapped to ATT&CK Data Sources Objects|| 10 | |[ATT&CK Event Mappings (YAML file)](https://github.com/OTRF/OSSEM-DM/blob/main/use-cases/mitre_attack/attack_relationships.yml)|Security event logs mapped to ATT&CK Data Sources Objects|| 11 | |[ATT&CK Event Mappings (CSV file)](https://github.com/OTRF/OSSEM-DM/blob/main/use-cases/mitre_attack/attack_events_mapping.csv)|Security event logs mapped to ATT&CK Data Sources Objects|| 12 | 13 | ## References 14 | * [Defining ATT&CK Data Sources, Part I: Enhancing the Current State](https://medium.com/mitre-attack/defining-attack-data-sources-part-i-4c39e581454f) 15 | * [Defining ATT&CK Data Sources, Part II: Operationalizing the Methodology](https://medium.com/mitre-attack/defining-attack-data-sources-part-ii-1fc98738ba5b) 16 | * [ATT&CK Data Sources GitHub repository](https://github.com/mitre-attack/attack-datasources) 17 | * [CAR Analytics](https://car.mitre.org/wiki/Main_Page) 18 | * [Common Information Model](https://github.com/Cyb3rWard0g/OSSEM/blob/master/common_information_model) 19 | * [STIX Cybox ObjectRelationshipVOcab-1.1](http://stixproject.github.io/data-model/1.2/cyboxVocabs/ObjectRelationshipVocab-1.1/) 20 | * [Cybox Object](http://cyboxproject.github.io/documentation/objects/) 21 | * [STIX Version 2.0. Part 4 - Cyber Observable Object](https://docs.oasis-open.org/cti/stix/v2.0/stix-v2.0-part4-cyber-observable-objects.html) 22 | * [Finding Cyber Threats with ATTCK Based Analytics](https://www.mitre.org/sites/default/files/publications/16-3713-finding-cyber-threats%20with%20att%26ck-based-analytics.pdf) 23 | * [CAR Analytics Data Model](https://car.mitre.org/wiki/Data_Model) 24 | * [Quantifying your hunt - not your parent's red teaming](http://www.irongeek.com/i.php?page=videos/bsidescharm2018/track-1-06-quantify-your-hunt-not-your-parents-red-teaming-devon-kerr) -------------------------------------------------------------------------------- /docs/dm/mitre_attack/intro.md: -------------------------------------------------------------------------------- 1 | # MITRE ATT&CK -------------------------------------------------------------------------------- /docs/images/logo/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/docs/images/logo/favicon.ico -------------------------------------------------------------------------------- /docs/images/logo/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/docs/images/logo/logo.png -------------------------------------------------------------------------------- /docs/intro.md: -------------------------------------------------------------------------------- 1 | # OSSEM 2 | 3 | [![Open Source Love](https://badges.frapsoft.com/os/v3/open-source.svg?v=103)](https://github.com/ellerbrock/open-source-badges/) 4 | ![Open_Threat_Research Community](https://img.shields.io/badge/Open_Threat_Research-Community-brightgreen.svg) 5 | [![Twitter](https://img.shields.io/twitter/follow/OSSEM_Project.svg?style=social&label=Follow)](https://twitter.com/OSSEM_Project) 6 | 7 | The Open Source Security Events Metadata (OSSEM) is a community-led project that focuses primarily on the documentation and standardization of security event logs from diverse data sources and operating systems. Security events are documented in a dictionary format and can be used as a reference while mapping data sources to data analytics used to validate the detection of adversarial techniques. In addition, the project provides a common data model (CDM) that can be used for data engineers during data normalization procedures to allow security analysts to query and analyze data across diverse data sources. Finally, the project also provides documentation about the structure and relationships identified in specific data sources to facilitate the development of data analytics. 8 | 9 | # Goals 10 | 11 | * Define and share a common data model in order to improve the data standardization and transformation of security event logs 12 | * Define and share data structures and relationships identified in security events logs 13 | * Provide detailed information in a dictionary format about several security event logs to the community 14 | * Learn more about security event logs (Windows, Linux, MacOS, Azure, AWS, etc) 15 | * Have fun and think more about the data structure in your SIEM when it comes down to detection!! 16 | 17 | # Project Structure 18 | 19 | There are three main folders: 20 | 21 | * **Common Data Model (CDM)**: 22 | * Facilitates the normalization of data sets by providing a standard way to parse security event logs. 23 | * It is organized by specific [schema entities](https://github.com/OTRF/OSSEM-CDM/tree/master/schemas/entities) identified in several data sources. 24 | * The definitions of each schema entity and its respective attributes (field names) are mostly general descriptions that could help and expedite event logs parsing procedures. 25 | * Besides data [schema entities](https://github.com/OTRF/OSSEM-CDM/tree/master/schemas/entities), it provides the concept of [schema tables](https://github.com/OTRF/OSSEM-CDM/tree/master/schemas/tables) to aggregate common entities that can be used to parse several data sources with similar context. For example, the HTTP,Port and User Agent entities can be used to normalize data providing context about the network traffic metadata captured in a network environment. 26 | * **Data Dictionaries (DD)**: 27 | * Contains specific information about several security event logs organized by operating system and their respective data providers 28 | * Each dictionary describes a single event log and its corresponding event field names 29 | * It provides the foundational concepts to create a data wiki in an organization. 30 | * **Detection Data Model (DDM)**: 31 | * Focuses on defining the required data in form of data objects and relationships among each other needed to facilitate the creation of data analytics and validate the detection of adversary techniques 32 | * Developed initially to extend the definitions of ATT&CK Data Sources. 33 | * [MITRE ATT&CKcon 2018: Hunters ATT&CKing with the Data](https://youtu.be/QCDBjFJ_C3g) 34 | * [MITRE ATT&CKcon 2.0: Ready to ATT&CK? Bring Your Own Data (BYOD) and Validate Your Data Analytics!](https://youtu.be/eM0c_Gil-38) 35 | * Initial work in this project has been migrated to ATT&CK and improved by [@Cyb3rPandah](https://twitter.com/Cyb3rPandaH) 36 | * [Defining ATT&CK Data Sources, Part I: Enhancing the Current State](https://medium.com/mitre-attack/defining-attack-data-sources-part-i-4c39e581454f) 37 | * We are currently extending the model to map security events to the relationships identified in ATT&CK. 38 | 39 | # Author 40 | 41 | * Roberto Rodriguez [@Cyb3rWard0g](https://twitter.com/Cyb3rWard0g) 42 | 43 | # Current Committers 44 | 45 | * Jose Luis Rodriguez [@Cyb3rPandaH](https://twitter.com/Cyb3rPandaH) 46 | * Nate Guagenti [@neu5ron](https://twitter.com/neu5ron) 47 | * Ricardo Dias [@hxnoyd](https://twitter.com/hxnoyd) 48 | 49 | # Projects Using OSSEM 50 | 51 | * [HELK](https://github.com/Cyb3rWard0g/HELK) 52 | * [Azure Sentinel Normalization](https://docs.microsoft.com/en-us/azure/sentinel/normalization-schema) 53 | 54 | # Resources 55 | 56 | * [Ready to hunt? First, Show me your data!](https://cyberwardog.blogspot.com/2017/12/ready-to-hunt-first-show-me-your-data.html) 57 | * [What's new in Windows 10, versions 1507 and 1511](https://docs.microsoft.com/en-us/windows/whats-new/whats-new-windows-10-version-1507-and-1511#bkmk-lsass) 58 | * [Download Security Audit Events for Windows (Spreadsheet)](https://www.microsoft.com/en-us/download/details.aspx?id=50034) 59 | * [Advanced Security Audit Policy Settings](https://docs.microsoft.com/en-us/windows/security/threat-protection/auditing/advanced-security-audit-policy-settings) 60 | * [Monitoring Active Directory for Signs of Compromise](https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/monitoring-active-directory-for-signs-of-compromise#audit-account-management) 61 | * [Audit Policy Recommendations](https://docs.microsoft.com/en-us/windows-server/identity/ad-ds/plan/security-best-practices/audit-policy-recommendations) 62 | * [Use Windows Event Forwarding to help with intrusion detection](https://docs.microsoft.com/en-us/windows/security/threat-protection/use-windows-event-forwarding-to-assist-in-intrusion-detection) 63 | * [Minimum recommended minimum audit policy](https://docs.microsoft.com/en-us/windows/security/threat-protection/use-windows-event-forwarding-to-assist-in-intrusion-detection#a-href-idbkmk-appendixaaappendix-a---minimum-recommended-minimum-audit-policy) 64 | * [Windows ITPro Docs - Threat Protection](https://github.com/MicrosoftDocs/windows-itpro-docs/tree/master/windows/security/threat-protection) 65 | -------------------------------------------------------------------------------- /resources/images/CarbonBlackDataModel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/CarbonBlackDataModel.png -------------------------------------------------------------------------------- /resources/images/EndgameDataModel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/EndgameDataModel.png -------------------------------------------------------------------------------- /resources/images/OSSEM_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/OSSEM_logo.png -------------------------------------------------------------------------------- /resources/images/SysmonDataModel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/SysmonDataModel.png -------------------------------------------------------------------------------- /resources/images/attck_datasource_eventlogs_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/attck_datasource_eventlogs_example.png -------------------------------------------------------------------------------- /resources/images/datasource_dataobject_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/datasource_dataobject_example.png -------------------------------------------------------------------------------- /resources/images/event-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-1.png -------------------------------------------------------------------------------- /resources/images/event-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-10.png -------------------------------------------------------------------------------- /resources/images/event-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-11.png -------------------------------------------------------------------------------- /resources/images/event-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-12.png -------------------------------------------------------------------------------- /resources/images/event-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-13.png -------------------------------------------------------------------------------- /resources/images/event-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-14.png -------------------------------------------------------------------------------- /resources/images/event-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-15.png -------------------------------------------------------------------------------- /resources/images/event-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-16.png -------------------------------------------------------------------------------- /resources/images/event-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-17.png -------------------------------------------------------------------------------- /resources/images/event-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-18.png -------------------------------------------------------------------------------- /resources/images/event-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-19.png -------------------------------------------------------------------------------- /resources/images/event-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-2.png -------------------------------------------------------------------------------- /resources/images/event-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-20.png -------------------------------------------------------------------------------- /resources/images/event-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-21.png -------------------------------------------------------------------------------- /resources/images/event-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-22.png -------------------------------------------------------------------------------- /resources/images/event-255.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-255.png -------------------------------------------------------------------------------- /resources/images/event-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-3.png -------------------------------------------------------------------------------- /resources/images/event-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-4.png -------------------------------------------------------------------------------- /resources/images/event-400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-400.png -------------------------------------------------------------------------------- /resources/images/event-403.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-403.png -------------------------------------------------------------------------------- /resources/images/event-4103.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-4103.png -------------------------------------------------------------------------------- /resources/images/event-4104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-4104.png -------------------------------------------------------------------------------- /resources/images/event-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-5.png -------------------------------------------------------------------------------- /resources/images/event-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-6.png -------------------------------------------------------------------------------- /resources/images/event-600.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-600.png -------------------------------------------------------------------------------- /resources/images/event-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-7.png -------------------------------------------------------------------------------- /resources/images/event-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-8.png -------------------------------------------------------------------------------- /resources/images/event-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/images/event-9.png -------------------------------------------------------------------------------- /resources/scripts/md_to_yaml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Project: OSSEM Data Dictionaries 4 | # Author: Jose Rodriguez (@Cyb3rPandaH) 5 | # License: GPLv3 6 | 7 | # Importing libraries 8 | import yaml 9 | yaml.Dumper.ignore_aliases = lambda *args : True 10 | 11 | import glob 12 | import os 13 | from os import path 14 | 15 | # Creating a list with markdown files' names in your current directory 16 | md_files = glob.glob(path.join(path.dirname(__file__),"*.md")) 17 | 18 | # Parsing every markdown file in your current directory 19 | for md_file_path in md_files: 20 | # Getting name of file (includes extension .md) 21 | file_name = os.path.basename(md_file_path) 22 | # Getting name of file (without extension .md) 23 | file_name = file_name[:-3] 24 | # Not parsing README.md files 25 | if file_name.startswith('README'): 26 | continue 27 | # Getting content of markdown file 28 | file_content = [ line for line in open(md_file_path) ] 29 | # Defining a control variable to parse sections of the markdown file 30 | control = '' 31 | # Defining references 32 | event_fields_yaml = [] 33 | description_yaml = '' 34 | references_yaml = [] 35 | tags_yaml = [] 36 | # Parsing every line of markdown file 37 | for line in file_content: 38 | # Getting title 39 | if line.startswith('# '): 40 | title_yaml = line[2:].rstrip() 41 | # Updating control variable to identify sections of the markdown file 42 | if line.startswith('## Description'): 43 | control = 'description' 44 | continue 45 | if line.startswith('## Platform'): 46 | control = 'platform' 47 | continue 48 | if line.startswith('## Log Source'): 49 | control = 'log source' 50 | continue 51 | if line.startswith('## Event Code'): 52 | control = 'event code' 53 | continue 54 | if line.startswith('## Event Version'): 55 | control = 'event version' 56 | continue 57 | if line.startswith('## ATT&CK Data Sources'): 58 | control = 'attack data sources' 59 | continue 60 | if line.startswith('## Data Dictionary'): 61 | control = 'data dictionary' 62 | continue 63 | if line.startswith('## References'): 64 | control = 'references' 65 | continue 66 | if line.startswith('## Tags'): 67 | control = 'tags' 68 | continue 69 | # Getting values to create yaml file 70 | if control == 'description': 71 | description_yaml = description_yaml + line.rstrip() 72 | if control == 'platform': 73 | platform_yaml = line.rstrip() 74 | if control == 'log source': 75 | log_source_yaml = line.rstrip() 76 | if control == 'event code': 77 | event_code_yaml = line.rstrip() 78 | if control == 'event version': 79 | event_version_yaml = line.rstrip() 80 | if control == 'attack data sources': 81 | attack_data_sources_yaml = line.rstrip().split(',') 82 | if control == 'data dictionary': 83 | if line.startswith('|'): 84 | if not (line.startswith('| Standard Name') or line.startswith('| --')): 85 | data = line.split('|') 86 | dict = {'standard_name' : data[1].strip(' '), 87 | 'name' : data[2].strip(' '), 88 | 'type' : data[3].strip(' '), 89 | 'description' : data[4].strip(' '), 90 | 'sample_value' : data[5].strip(' ')} 91 | event_fields_yaml.append(dict) 92 | if control == 'references': 93 | references_dict_yaml = {'text':line[line.find('[') + 1 : line.find(']')], 94 | 'link':line[line.find('(') + 1 : line.find(')')]} 95 | references_yaml.append(references_dict_yaml) 96 | if control == 'tags': 97 | tags_yaml.append(line[2:].rstrip()) 98 | # Dictionary of data to create yaml file 99 | data_dict = {'title' : title_yaml, 100 | 'description' : description_yaml, 101 | 'platform' : platform_yaml, 102 | 'log_source' : log_source_yaml, 103 | 'event_code' : event_code_yaml, 104 | 'event_version' : event_version_yaml, 105 | 'attack_data_sources' : attack_data_sources_yaml, 106 | 'event_fields' : event_fields_yaml, 107 | 'references' : references_yaml, 108 | 'tags' : tags_yaml} 109 | # Creating yaml file 110 | with open(file_name + '.yaml', 'w') as file: 111 | yaml.dump(data_dict, file, sort_keys = False, width = float("inf")) -------------------------------------------------------------------------------- /resources/scripts/ossem2logstash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Author: Roberto Rodriguez (@Cyb3rWard0g) 4 | # License: GPLv3 5 | # Reference: 6 | 7 | from jinja2 import Template 8 | import copy 9 | import yaml 10 | import json 11 | import glob 12 | from os import path 13 | 14 | print("[+] Processing files inside {} directory".format('../../source/data_dictionaries/windows/sysmon/events')) 15 | 16 | # ******** Open every event yaml file available **************** 17 | print("[+] Opening Sysmon Events Yaml files..") 18 | yaml_files = sorted(glob.glob(path.join(path.dirname(__file__), '../../source/data_dictionaries/windows/sysmon/events', "*.yml")), key=lambda x: (int(path.basename(x).split(".")[0].split('event-')[1]))) 19 | yaml_loaded = [yaml.safe_load(open(yf).read()) for yf in yaml_files] 20 | 21 | # ******** Creating Logstash Config ******** 22 | print("\n[+] Creating Logstash config..") 23 | print(" [>] Reading logstash template..") 24 | yaml_template = Template(open("templates/logstash/sysmon.conf").read()) 25 | 26 | # Create config file 27 | print(" [>] Writing steps to config ..") 28 | yaml_for_render = copy.deepcopy(yaml_loaded) 29 | 30 | # Generate the config 31 | config = yaml_template.render(renderyaml=yaml_for_render) 32 | print("\n [>] Writing config report to sysmon.conf") 33 | open('../parsers/logstash/sysmon.conf', 'w').write(config) -------------------------------------------------------------------------------- /resources/scripts/ossemATTCKDM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Author: Roberto Rodriguez (@Cyb3rWard0g) 4 | # License: GPLv3 5 | # Reference: 6 | 7 | import yaml 8 | import glob 9 | from os import path 10 | import copy 11 | from jinja2 import Template 12 | 13 | print("[+] Processing files inside {} directory".format('../../attack_data_sources/event-mappings')) 14 | 15 | # ******** Open every ATT&CK data source YAML **************** 16 | print("[+] Opening ATT&CK YAML files..") 17 | ds_files = glob.glob(path.join(path.dirname(__file__), '../../attack_data_sources/event-mappings', "[!all_data_sources]*.yml")) 18 | ds_loaded = [yaml.safe_load(open(yf).read()) for yf in ds_files] 19 | 20 | # Initiate all data sources list 21 | all_data_sources = [] 22 | 23 | # Loop through all data source recors to create one file for easy consumption 24 | for ds in ds_loaded: 25 | all_data_sources.extend(ds) 26 | 27 | print("[+] Writing one ATT&CK data sources YAML files..") 28 | with open(r'../../attack_data_sources/event-mappings/all_data_sources.yml', 'w') as file: 29 | yaml.dump(all_data_sources, file, sort_keys=False) 30 | 31 | # ***** Creating Mappings Table ***** 32 | table_template = Template(open('templates/attack/ds_mapping_template.md').read()) 33 | print("[+] Creating data soures mappings table.") 34 | yaml_for_render = copy.deepcopy(all_data_sources) 35 | markdown = table_template.render(datasources=yaml_for_render) 36 | open('../../docs/attack/windows/ds_mapping_table.md', 'w').write(markdown) -------------------------------------------------------------------------------- /resources/scripts/ossemSysmonKQLParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Authors: Roberto Rodriguez (@Cyb3rWard0g) Ashwin Patil (@ashwinpatil) 4 | # Community: Open Threat Research (@OTR_Community) 5 | # License: GPL-3.0 6 | 7 | from jinja2 import Template 8 | import copy 9 | import argparse 10 | import untangle 11 | from datetime import date 12 | import urllib.request 13 | import os 14 | import logging 15 | 16 | # ******** Setting up Argument Parsers **************** 17 | # Initial description 18 | text = "This script reads a local or remote sysmon.xml schema file and creates a Kusto Qquery Language (KQL) parser (function)" 19 | 20 | # Initiate the parser 21 | parser = argparse.ArgumentParser(description=text) 22 | 23 | # Add arguments 24 | parser.add_argument("-s", "--schema-file", help="sysmon xml schema file. It can be a local or remote file", type=str , required=True) 25 | parser.add_argument("-t", "--target-version", help="sysmon version", type=str , required=True) 26 | parser.add_argument("-o", "--output-path", help="path to where to write the new sysmon KQL parser. i.e. parsers/sysmon/", type=str , required=True) 27 | parser.add_argument("-d", "--debug", help="Print lots of debugging statements", action="store_const", dest="loglevel", const=logging.DEBUG, default=logging.WARNING) 28 | parser.add_argument("-v", "--verbose", help="Be verbose", action="store_const", dest="loglevel", const=logging.INFO) 29 | 30 | # ******** Validating Input Arguments **************** 31 | args = parser.parse_args() 32 | schema_file = args.schema_file 33 | sysmon_version = args.target_version 34 | output_file_path = os.path.abspath(args.output_path) 35 | 36 | # Setting Logging 37 | logging.basicConfig(format='%(asctime)s [%(name)s][%(levelname)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=args.loglevel) 38 | log = logging.getLogger('Sysmon Parser') 39 | 40 | # Aggregate files from Input Paths 41 | if os.path.isfile(schema_file): 42 | log.info(f'Local file Provided: {schema_file}') 43 | sysmon_schema = schema_file 44 | elif urllib.request.urlopen(schema_file): 45 | log.info(f'Url Provided: {schema_file}') 46 | log.debug("Initializing url request") 47 | req = urllib.request.Request(schema_file) 48 | log.debug("Making a network connection") 49 | r = urllib.request.urlopen(req).read() 50 | log.debug("Reading contents of remote schema file") 51 | sysmon_schema = r.decode('utf-8') 52 | else: 53 | quit() 54 | 55 | # ******** Processing Sysmon Schema **************** 56 | # Parse Sysnon schema XML 57 | log.info('Parsing Sysmon schema file') 58 | obj = untangle.parse(sysmon_schema) 59 | 60 | # Sysmon Manifest 61 | log.debug("Getting Sysmon Manifest") 62 | sysmon_manifest = obj.manifest 63 | # Events Metadata 64 | log.debug("Getting Sysmon Events Data") 65 | eventlist = obj.manifest.events.event 66 | 67 | # Initializing list 68 | all_sysmon = [] 69 | 70 | # ******** Iterating over Sysmon Events **************** 71 | log.info('Iterating over Sysmon events') 72 | for item in eventlist: 73 | log.info('Processing Event: {} - {}'.format(item['name'],item['value'])) 74 | sysmon_event = dict() 75 | sysmon_event['name'] = item['name'] 76 | sysmon_event['id'] = item['value'] 77 | sysmon_event['events'] = [] 78 | 79 | fieldlist = item.data 80 | count = 0 81 | log.info('Iterating over event field names') 82 | for field in fieldlist: 83 | log.debug('Field Name: {}'.format(field['name'])) 84 | field_name = dict() 85 | field_name['name'] = field['name'] 86 | field_name['index'] = count 87 | sysmon_event['events'].append(field_name) 88 | count += 1 89 | all_sysmon.append(sysmon_event) 90 | 91 | # ******** Unique List of Events **************** 92 | log.info('Creating a list of all unique field names') 93 | unique_fields = ['TimeGenerated','Source','Computer','UserName','EventID'] 94 | for sysevent in all_sysmon: 95 | for field in sysevent['events']: 96 | if field['name'] not in unique_fields: 97 | unique_fields.append(field['name']) 98 | 99 | # ******** Open Sysmon KQL Parser template **************** 100 | sysmon_parser_template = os.path.join(os.path.dirname(__file__), "templates/kql/sysmon_parser.txt") 101 | log.info('Reading KQL parser template') 102 | kql_parser_template = Template(open(sysmon_parser_template).read()) 103 | 104 | # ******** Processing Sysmon Events and Jinja template **************** 105 | log.info('Processing Jinja template') 106 | sysmon_for_render = copy.deepcopy(all_sysmon) 107 | parser = kql_parser_template.render(sysmon=sysmon_for_render, uniquesysmon=unique_fields, today=date.today(), sysmonversion=sysmon_version, schemaversion=sysmon_manifest['schemaversion'], binaryversion=sysmon_manifest['binaryversion']) 108 | 109 | # ******** Creating File **************** 110 | log.info('Creating Parser in: {}'.format(output_file_path)) 111 | open(f'{output_file_path}/SysmonKQLParserV{sysmon_version}.txt', 'w').write(parser) -------------------------------------------------------------------------------- /resources/scripts/ossem_converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | __appname__ = 'OSSEM Converter' 5 | __author__ = 'Ricardo Dias @hxnoyd' 6 | __version__ = "0.1.6" 7 | 8 | import os 9 | import yaml 10 | import argparse 11 | from natsort import natsorted 12 | from jinja2 import Environment, FileSystemLoader 13 | 14 | class ossemParser(): 15 | def __init__(self): 16 | self.cim_entities = [] 17 | self.cim_entities_indexes = [] 18 | self.cim_ignore = [] 19 | self.ignored_paths = [] 20 | self.data_dictionaries = [] 21 | self.data_dictionaries_indexes = [] 22 | self.data_dictionaries_ignore = [] 23 | self.ddm_list = [] 24 | self.ddm_list_indexes = [] 25 | self.ds_list = [] 26 | self.ds_list_indexes = [] 27 | 28 | def remove_new_lines(self, text): 29 | if text: 30 | return text.replace('\n',' ') 31 | else: 32 | return text 33 | 34 | def read_yml(self, context, root_path, file_path): 35 | """ read a yaml file and return dict """ 36 | rootpath = root_path[:root_path.index(context)+1] 37 | filepath = root_path[root_path.index(context)+1:] 38 | try: 39 | yml_file = yaml.load(open(file_path, 'r'), Loader=yaml.Loader) 40 | filename = file_path.split('/')[-1].split('.')[0] 41 | except Exception as e: 42 | print('[!] Failed parsing', file_path) 43 | return None 44 | 45 | if not yml_file: 46 | print('[!] Failed parsing {}'.format(file_path)) 47 | return None 48 | else: 49 | yml_file['rootpath'] = '/'.join(rootpath) 50 | yml_file['filepath'] = '/'.join(filepath) 51 | yml_file['filename'] = filename 52 | return yml_file 53 | 54 | def parse_yaml(self, path): 55 | """ parse ossem yaml data """ 56 | 57 | cim = 'common_information_model' 58 | dd = 'data_dictionaries' 59 | ddm = 'detection_data_model' 60 | ds = 'attack_data_sources' 61 | 62 | for root, dirs, files in os.walk(path): 63 | for name in files: 64 | filepath = root + os.sep + name 65 | path = root.split('/') 66 | 67 | #parse yaml event files 68 | if name.endswith('.yml') and 'README' not in name: 69 | if cim in path and name not in self.cim_ignore: 70 | yml_data = self.read_yml(cim, path, filepath) 71 | if yml_data: 72 | if len(yml_data['data_fields']) == 0 or \ 73 | yml_data['title'] == None or \ 74 | yml_data['description'] == None: 75 | print('[!] Skipping {} because entity is incomplete'.format(filepath)) 76 | self.ignored_paths.append(filepath) 77 | else: 78 | self.cim_entities.append(yml_data) 79 | 80 | elif dd in path: 81 | yml_data = self.read_yml(dd, path, filepath) 82 | if yml_data: 83 | self.data_dictionaries.append(yml_data) 84 | 85 | elif ddm in path: 86 | yml_data = self.read_yml(ddm, path, filepath) 87 | if yml_data: 88 | self.ddm_list.append(yml_data) 89 | 90 | elif ds in path: 91 | yml_data = self.read_yml(ds, path, filepath) 92 | if yml_data: 93 | self.ds_list.append(yml_data) 94 | 95 | #parse yaml index files 96 | elif name == 'README.yml': 97 | if cim in path: 98 | yml_data = self.read_yml(cim, path, filepath) 99 | if yml_data: 100 | self.cim_entities_indexes.append(yml_data) 101 | 102 | elif dd in path: 103 | yml_data = self.read_yml(dd, path, filepath) 104 | if yml_data: 105 | self.data_dictionaries_indexes.append(yml_data) 106 | 107 | elif ddm in path: 108 | yml_data = self.read_yml(ddm, path, filepath) 109 | if yml_data: 110 | self.ddm_list_indexes.append(yml_data) 111 | 112 | elif ds in path: 113 | yml_data = self.read_yml(ds, path, filepath) 114 | if yml_data: 115 | self.ds_list_indexes.append(yml_data) 116 | 117 | def write_yml(self, root, filename, entry): 118 | """ writes yml file """ 119 | context = entry['rootpath'].split('/')[-1] 120 | yml_path = os.path.join(root, context, entry['filepath']) 121 | 122 | #create fullpath if needed 123 | if not os.path.exists(yml_path): 124 | os.makedirs(yml_path) 125 | 126 | filepath = os.path.join(yml_path, filename) 127 | 128 | #remove temporary fields 129 | entry.pop('rootpath') 130 | entry.pop('filepath') 131 | entry.pop('filename') 132 | 133 | dd_yaml_file = open(filepath, 'w') 134 | dd_yaml_file.write(yaml.dump(entry, sort_keys=False)) 135 | dd_yaml_file.close() 136 | print('[*] Created {}'.format(filepath)) 137 | 138 | def export_to_yaml(self, root): 139 | """ generates a yaml version of OSSEM data """ 140 | 141 | #generate data dictionary event yaml 142 | for entry in self.data_dictionaries: 143 | filename = '{}.yml'.format(entry['filename']) 144 | self.write_yml(root, filename, entry) 145 | 146 | #generate data dictionary index yaml 147 | for entry in self.data_dictionaries_indexes: 148 | filename = 'README.yml' 149 | self.write_yml(root, filename, entry) 150 | 151 | #generate cim entities yaml 152 | for entry in self.cim_entities: 153 | filename = '{}.yml'.format(entry['filename']) 154 | self.write_yml(root, filename, entry) 155 | 156 | #generate cim entities index yaml 157 | for entry in self.cim_entities_indexes: 158 | filename = 'README.yml' 159 | self.write_yml(root, filename, entry) 160 | 161 | #generate ddm tables yaml 162 | for entry in self.ddm_list: 163 | filename = '{}.yml'.format(entry['filename']) 164 | self.write_yml(root, filename, entry) 165 | 166 | #generate ddm tables index yaml 167 | for entry in self.ddm_list_indexes: 168 | filename = 'README.yml' 169 | self.write_yml(root, filename, entry) 170 | 171 | #generate ds tables yaml 172 | for entry in self.ds_list: 173 | filename = '{}.yml'.format(entry['filename']) 174 | self.write_yml(root, filename, entry) 175 | 176 | #generate ds tables index yaml 177 | for entry in self.ds_list_indexes: 178 | filename = 'README.yml' 179 | self.write_yml(root, filename, entry) 180 | 181 | return True 182 | 183 | def write_markdown(self, root, entry, template, entry_type=False): 184 | context = entry['rootpath'].split('/')[-1] 185 | md_path = os.path.join(root, context, entry['filepath']) 186 | root_path = entry['rootpath'] 187 | sub_data_sets = [] 188 | 189 | #create fullpath if needed 190 | if not os.path.exists(md_path): 191 | os.makedirs(md_path) 192 | 193 | #enrich markdown with sub data set 194 | if entry_type: 195 | md_file_path = os.path.join(md_path, 'README.md') 196 | for item in sorted(os.listdir(os.path.join(root_path, entry['filepath']))): 197 | if os.path.isdir(os.path.join(root_path, entry['filepath'], item)): 198 | 199 | #indexes poiting to events 200 | if item == entry_type: 201 | entry['data_set_type'] = item #TODO: capitalize 202 | events_root_path = os.path.join(root_path, entry['filepath'], item) 203 | for event in natsorted(os.listdir(events_root_path)): 204 | if event.endswith('.yml'): 205 | event_file_path = os.path.join(events_root_path, event) 206 | 207 | #skip file paths marked as ignored 208 | if event_file_path in self.ignored_paths: 209 | continue 210 | 211 | try: 212 | readme = yaml.load(open(event_file_path, 'r'), Loader=yaml.Loader) 213 | if readme: 214 | sub_data_sets.append({ 215 | 'title': readme['event_code'] if 'event_code' in readme else readme['title'], 216 | 'link': '{}/{}.md'.format(item, event.split('.')[0]), 217 | 'description': self.remove_new_lines(readme['description']), 218 | 'tags': readme['tags'], 219 | 'version': readme['event_version']}) 220 | except Exception as e: 221 | print('[!] Failed parsing', event_file_path) 222 | 223 | #indexes pointing to other indexes 224 | else: 225 | entry['data_set_type'] = 'Data Set' 226 | index_root_path = os.path.join(root_path, entry['filepath'], item, 'readme.yml') 227 | readme = yaml.load(open(index_root_path, 'r'), Loader=yaml.Loader) 228 | if readme: 229 | if readme['description']: 230 | desc = '{}.'.format(readme['description'].split('.')[0]) 231 | else: 232 | desc = readme['description'] 233 | sub_data_sets.append({ 234 | 'title': readme['title'], 235 | 'link': '{}/'.format(item), 236 | 'description': desc}) 237 | else: 238 | filename = '{}.md'.format(entry['filename']) 239 | md_file_path = os.path.join(md_path, filename) 240 | 241 | entry['sub_data_sets'] = sub_data_sets 242 | 243 | with open(md_file_path, 'w') as md: 244 | md.write(template.render(entry=entry)) 245 | 246 | print('[*] Created {}'.format(md_file_path)) 247 | 248 | def export_to_markdown(self, root): 249 | env = Environment(loader=FileSystemLoader('templates/')) 250 | readme_template = env.get_template('readme_template.md') 251 | dds_template = env.get_template('data_dictionary_template.md') 252 | cim_template = env.get_template('cim_entity_template.md') 253 | ddm_template = env.get_template('ddm_relationships_template.md') 254 | ds_template = env.get_template('attack/ds_template.md') 255 | 256 | #generate data dictionary event markdown 257 | for entry in self.data_dictionaries: 258 | self.write_markdown(root, entry, dds_template) 259 | 260 | # generate data dictionary index markdown 261 | for entry in self.data_dictionaries_indexes: 262 | self.write_markdown(root, entry, readme_template, 'events') 263 | 264 | #generate common information model markdown 265 | for entry in self.cim_entities: 266 | self.write_markdown(root, entry, cim_template) 267 | 268 | # generate cim index markdown 269 | for entry in self.cim_entities_indexes: 270 | self.write_markdown(root, entry, readme_template, 'entities') 271 | 272 | #generate detection data model markdown 273 | for entry in self.ddm_list: 274 | self.write_markdown(root, entry, ddm_template) 275 | 276 | #generate detection data model index markdown 277 | for entry in self.ddm_list_indexes: 278 | self.write_markdown(root, entry, readme_template, 'tables') 279 | 280 | #generate attack data sources markdown 281 | for entry in self.ds_list: 282 | self.write_markdown(root, entry, ds_template) 283 | 284 | #generate attack data sources index markdown 285 | for entry in self.ds_list_indexes: 286 | self.write_markdown(root, entry, readme_template, 'tables') 287 | 288 | 289 | if __name__ == "__main__": 290 | parser = argparse.ArgumentParser(description='A tool to convert OSSEM data') 291 | #parser.add_argument('--from-md', 292 | # help='path to import OSSEM markdown data from') 293 | parser.add_argument('--from-yml', 294 | help='path to import OSSEM yaml data from') 295 | parser.add_argument('--to-md', 296 | help='path to export OSSEM markdown data') 297 | #parser.add_argument('--to-yml', 298 | # help='path to export OSSEM yaml data') 299 | 300 | args = parser.parse_args() 301 | ossem = ossemParser() 302 | 303 | if not args.to_md: 304 | print('[!] You forgot to select an output. Check the available output arguments with --help.') 305 | 306 | if args.to_md: 307 | if not args.from_yml: 308 | print('[!] You can only export to Markdown from YAML') 309 | else: 310 | print('[*] Parsing OSSEM from YAML') 311 | ossem.parse_yaml(args.from_yml) 312 | print('[*] Exporting OSSEM to Markdown') 313 | ossem.export_to_markdown(args.to_md) -------------------------------------------------------------------------------- /resources/scripts/templates/attack/ds_mapping_template.md: -------------------------------------------------------------------------------- 1 | # Data Modeling Table 2 | 3 | |Data Source|Sub Data Source|Source|Relationship|Target|EventID|Event Description|Event Channel| 4 | | :---| :---| :---| :---|:---|:---|:---|:---| 5 | {% for ds in datasources|sort(attribute='data_source') %}|{{ds['data_source']}} |{{ds['sub_data_source']}} |{{ds['source_data_element']}} |{{ds['relationship']}} |{{ds['target_data_element']}} |{{ds['event_id']}} |{{ds['event_id_description']}} |{{ds['event_channel']}} | 6 | {% endfor %} -------------------------------------------------------------------------------- /resources/scripts/templates/attack/ds_template.md: -------------------------------------------------------------------------------- 1 | # {{entry['title']}} 2 | {{entry['description']}} 3 | 4 | ## Data Fields 5 | |Data Source|Description| 6 | |---|---| 7 | {%- if entry['data_fields'] %} 8 | {%- for row in entry['data_fields'] %} 9 | |{{row['data_source']}}|{{row['description']}}| 10 | {%- endfor %} 11 | {% endif %} 12 | 13 | {% if entry['resources'] %} 14 | ## References 15 | {%- for resource in entry['references'] %} 16 | * [{{resource['text']}}]({{resource['link']}}) 17 | {%- endfor %} 18 | {% endif %} 19 | {% if entry['tags'] %} 20 | ## Tags 21 | {%- for tag in entry['tags'] %} 22 | * {{tag}} 23 | {%- endfor %} 24 | {% endif %} -------------------------------------------------------------------------------- /resources/scripts/templates/attack_ds_event_mappings.md: -------------------------------------------------------------------------------- 1 | # ATT&CK DS Event Mappings 2 | 3 | |Data Source|Component|Source|Relationship|Target|EventID|Event Name|Event Platform|Log Provider|Log Channel|Audit Category|Audit Sub-Category|Enable Commands| GPO Audit Policy| 4 | | :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| 5 | {% for dr in ds_event_mappings|sort(attribute='name') %}{% for se in dr['security_events'] %}|{{dr['attack'].data_source}}|{{dr['attack'].data_component}}|{{dr['behavior'].source}}|{{dr['behavior'].relationship}}|{{dr['behavior'].target}}|{{se['event_id']}}|{{se['name']}}|{{se['platform']}}|{{se['log_provider']}}|{{se['log_channel']|default('None')}}|{{se['audit_category']|default('None')}}|{{se['audit_sub_category']|default('None')}}|{% if se['log_channel'] == "Security" %} `auditpol /set /subcategory:"{{se['audit_sub_category']}}" /success:enable /failure:enable` {% elif se['log_channel'] == "Microsoft-Windows-Sysmon/Operational" %} `<{{se['audit_category']}} onmatch="exclude" />` {% else %}None{% endif %}|{% if se['log_channel'] == "Security" %} Computer Configuration -> Windows Settings -> Security Settings -> Advanced Audit Policy Configuration -> System Audit Policies -> {{se['audit_category']}} -> Audit {{se['audit_sub_category']}} {% else %}None{% endif %}| 6 | {% endfor %}{% endfor %} -------------------------------------------------------------------------------- /resources/scripts/templates/cim_entity_template.md: -------------------------------------------------------------------------------- 1 | # {{entry['title']}} 2 | {{entry['description']}} 3 | 4 | ## Data Fields 5 | |Standard Name|Type|Description|Sample Value| 6 | |---|---|---|---|---| 7 | {%- for row in entry['data_fields'] %} 8 | |{{row['standard_name']}}|{{row['type']}}|{{row['description']}}|{{row['sample_value']}}| 9 | {%- endfor %} 10 | 11 | {%- if entry['references'] %} 12 | 13 | ## References 14 | {%- for reference in entry['references'] %} 15 | * [{{reference['text']}}]({{reference['link']}}) 16 | {%- endfor %} 17 | {% endif %} 18 | {%- if entry['tags'] %} 19 | ## Tags 20 | {%- for tag in entry['tags'] %} 21 | * {{tag}} 22 | {%- endfor %} 23 | {%- endif %} -------------------------------------------------------------------------------- /resources/scripts/templates/data_dictionary_template.md: -------------------------------------------------------------------------------- 1 | # {{entry['title']}} 2 | ###### Version: {{entry['event_version']}} 3 | 4 | ## Description 5 | {{entry['description']}} 6 | 7 | ## Data Dictionary 8 | |Field Name|Type|Description|Sample Value| 9 | |---|---|---|---| 10 | {%- for row in entry['event_fields'] %} 11 | |{{row['name']}}|{{row['type']}}|{{row['description']}}|`{{row['sample_value']}}`| 12 | {%- endfor %} 13 | {% if entry['references'] %} 14 | ## References 15 | {%- for resource in entry['references'] %} 16 | * [{{resource['text']}}]({{resource['link']}}) 17 | {%- endfor %} 18 | {% endif %} 19 | {%- if entry['tags'] %} 20 | ## Tags 21 | {%- for tag in entry['tags'] %} 22 | * {{tag}} 23 | {%- endfor %} 24 | {%- endif %} -------------------------------------------------------------------------------- /resources/scripts/templates/ddm_relationships_template.md: -------------------------------------------------------------------------------- 1 | # {{entry['title']}} 2 | {{entry['description']}} 3 | 4 | ## Data Fields 5 | {% if entry['title'] == 'Data Object Relationships' %} 6 | |Sub Data Sources|Data Objects (Origin)|Relationship|Data Objects (Destination)| 7 | |---|---|---|---| 8 | {%- for row in entry['data_fields'] %} 9 | |{{row['sub_data_sources']}}|{{row['data_objects_(origin)']}}|{{row['relationship']}}|{{row['data_objects_(destination)']}}| 10 | {%- endfor %} 11 | {% else %} 12 | |ATT&CK Data Source|Sub Data Source|Source Data Object|Relationship|Destination Data Object|EventID| 13 | |---|---|---|---|---|---| 14 | {%- for row in entry['data_fields'] %} 15 | |{{row['att&ck_data_source']}}|{{row['sub_data_source']}}|{{row['source_data_object']}}|{{row['relationship']}}|{{row['destination_data_object']}}|{{row['eventid']}}| 16 | {%- endfor %} 17 | {% endif %} 18 | 19 | {% if entry['references'] %} 20 | ## References 21 | {%- for reference in entry['references'] %} 22 | * [{{reference['text']}}]({{reference['link']}}) 23 | {%- endfor %} 24 | {% endif %} 25 | {% if entry['tags'] %} 26 | ## Tags 27 | {%- for tag in entry['tags'] %} 28 | * {{tag}} 29 | {%- endfor %} 30 | {% endif %} -------------------------------------------------------------------------------- /resources/scripts/templates/entity.md: -------------------------------------------------------------------------------- 1 | # {{entidad['name']}} 2 | 3 | {{entidad['description']}} 4 | 5 | ## Attributes 6 | 7 | | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---| 9 | {% for e in entidad['attributes'] | sort(attribute='name') %} | {{e['name']}} | {{e['type']}} | {{e['description']}} | ```{{e['sample_value']}}``` | 10 | {% endfor %} -------------------------------------------------------------------------------- /resources/scripts/templates/kql/sysmon_parser.txt: -------------------------------------------------------------------------------- 1 | // KQL Sysmon Event Parser 2 | // Last Updated Date: {{today}} 3 | // Sysmon Version: {{sysmonversion}}, Binary Version : {{binaryversion}}, Schema Version: {{schemaversion}} 4 | // 5 | // Authors: 6 | // Roberto Rodriguez (@Cyb3rWard0g), Ashwin Patil (@ashwinpatil), MSTIC R&D 7 | //{% raw %} 8 | let EventData = Event 9 | | where Source == "Microsoft-Windows-Sysmon" 10 | | extend RenderedDescription = tostring(split(RenderedDescription, ":")[0]) 11 | | project TimeGenerated, 12 | Source, 13 | EventID, 14 | Computer, 15 | UserName, 16 | EventData, 17 | RenderedDescription 18 | | extend EvData = parse_xml(EventData) 19 | | extend EventDetail = EvData.DataItem.EventData.Data 20 | | project-away EventData, 21 | EvData;{% endraw %} 22 | {% for event in sysmon %}// Event ID {{event['id']}} 23 | //-------------------------- 24 | let {{event['name']}}_{{event['id']}}{% raw %}=() { 25 | let processEvents = EventData 26 | | where EventID == {% endraw %}{{event['id']}} 27 | | extend {% for field in event['events'] if field['name'] not in ['Hashes','Hash'] %}{{field['name']}}{% raw %} = EventDetail.[{% endraw %}{{field['index']}}{% raw %}].["#text"]{% endraw %}{{ ", " if not loop.last else "" }} 28 | {% endfor -%} 29 | {% for field in event['events'] -%} 30 | {% if field['name'] in ['Hashes','Hash'] -%} 31 | {%- raw %}| extend {% endraw %}{{field['name']}}{% raw %} = extract_all(@"(?P\w+)=(?P[a-zA-Z0-9]+)", dynamic(["key","value"]), tostring(EventDetail.[{% endraw %}{{field['index']}}{% raw %}].["#text"])){% endraw %} 32 | {%- raw %}| mv-apply {% endraw %}{{field['name']}}{% raw %} on (summarize {% endraw %}{{field['name']}}{% raw %} = make_bag(pack(tostring({% endraw %}{{field['name']}}{% raw %}[0]), tostring({% endraw %}{{field['name']}}{% raw %}[1])))){% endraw %} 33 | {% endif %}{% endfor %} 34 | {%- raw %}| project-away EventDetail 35 | ; 36 | processEvents; 37 | }; 38 | {% endraw %} 39 | {%- endfor -%} 40 | {% raw %}(union isfuzzy=true{% endraw %} 41 | {% for event in sysmon -%} 42 | {{event['name']}}_{{event['id']}}{{ ", " if not loop.last else "" }} 43 | {% endfor -%}){% raw %} 44 | | extend Details = column_ifexists("Details", ""), 45 | RuleName = column_ifexists("RuleName", ""), 46 | PreviousCreationUtcTime=column_ifexists("PreviousCreationUtcTime", ""), 47 | Hashes = column_ifexists("Hashes", ""), 48 | Hash = column_ifexists("Hash", ""){% endraw %} 49 | {%raw%}| project {% endraw %}{% for uniqueevent in uniquesysmon %}{{ uniqueevent }}{{ ", " if not loop.last else "" }} 50 | {% endfor -%} -------------------------------------------------------------------------------- /resources/scripts/templates/logstash/sysmon.conf: -------------------------------------------------------------------------------- 1 | # Author: Roberto Rodriguez (@Cyb3rWard0g) 2 | # License: GPL-3.0 3 | 4 | filter { 5 | if [log_name] =~ /^[mM]icrosoft\-[wW]indows\-[sS]ysmon\/[oO]perational$/ { 6 | mutate { 7 | add_field => { 8 | "event_timezone" => "UTC" 9 | "etl_pipeline" => "winevent-sysmon-all-1531" 10 | "[@metadata][index_name]" => "sysmon" 11 | } 12 | # Sysmon uses its own timestamp using the field `UtcTime` 13 | rename => { "event_original_time" => "event_recorded_time" } 14 | } 15 | date { 16 | timezone => "UTC" 17 | match => [ "UtcTime", "YYYY-MM-dd HH:mm:ss.SSS" ] 18 | target=> "@timestamp" 19 | tag_on_failure => [ "_parsefailure", "parsefailure-critical", "parsefailure-date-@timestamp", "parsefailure-date-sysmon-UtcTime" ] 20 | add_field => { "event_original_time" => "%{@timestamp}" } 21 | } 22 | mutate { 23 | rename => { 24 | "[user][domain]" => "user_reporter_domain" 25 | "[user][identifier]" => "user_reporter_sid" 26 | "[user][name]" => "user_reporter_name" 27 | "[user][type]" => "user_reporter_type" 28 | } 29 | } 30 | if [RuleName] { 31 | kv { 32 | source => "RuleName" 33 | field_split => "," 34 | value_split => "=" 35 | prefix => "rule_" 36 | transform_key => "lowercase" 37 | } 38 | } 39 | if [Hashes] { 40 | kv { 41 | source => "Hashes" 42 | field_split => "," 43 | value_split => "=" 44 | prefix => "hash_" 45 | transform_key => "lowercase" 46 | } 47 | } 48 | if [User] { 49 | grok { 50 | match => { "User" => "%{GREEDYDATA:user_domain}\\%{GREEDYDATA:user_name}" } 51 | add_field => { "etl_pipeline" => "sysmon-all-extract_domain_and_user_name" } 52 | tag_on_failure => [ "_parsefailure", "parsefailure-grok-User-extract_domain_and_user_name" ] 53 | } 54 | mutate { 55 | rename => { "User" => "user_account" } 56 | } 57 | } 58 | {% for event in renderyaml -%} 59 | if [event_id] == {{event['event_code']}} 60 | mutate { 61 | rename => { 62 | {% for field in event['event_fields'] -%} 63 | "{{field['name']}}" => "{{field['standard_name']}}" 64 | {% endfor -%} 65 | } 66 | } 67 | } 68 | {% endfor %}date { 69 | timezone => "UTC" 70 | match => [ "CreationUtcTime", "YYYY-MM-dd HH:mm:ss.SSS" ] 71 | target => "file_creation_time" 72 | tag_on_failure => [ "_parsefailure", "parsefailure-date-file_creation_time", "parsefailure-date-sysmon-CreationUtcTime" ] 73 | } 74 | date { 75 | timezone => "UTC" 76 | match => [ "PreviousCreationUtcTime", "YYYY-MM-dd HH:mm:ss.SSS" ] 77 | target => "file_previous_creation_time" 78 | tag_on_failure => [ "_parsefailure", "parsefailure-date-file_previous_creation_time", "parsefailure-date-sysmon-PreviousCreationUtcTime" ] 79 | } 80 | mutate { 81 | remove_field => [ 82 | "Hashes", 83 | "ConfigurationFileHash", 84 | "UtcTime", 85 | "CreationUtcTime", 86 | "PreviousCreationUtcTime" 87 | ] 88 | } 89 | } 90 | } -------------------------------------------------------------------------------- /resources/scripts/templates/ossem_relationships_to_events.md: -------------------------------------------------------------------------------- 1 | # Relationships to Events 2 | 3 | |Source|Relationship|Target|EventID|Event Name|Event Platform|Log Provider|Log Channel|Audit Category|Audit Sub-Category|Enable Commands| GPO Audit Policy| 4 | | :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| :---| 5 | {% for dr in ds_event_mappings|sort(attribute='name') %}{% for se in dr['security_events'] %}|{{dr['behavior'].source}}|{{dr['behavior'].relationship}}|{{dr['behavior'].target}}|{{se['event_id']}}|{{se['name']}}|{{se['platform']}}|{{se['log_provider']}}|{{se['log_channel']|default('None')}}|{{se['audit_category']|default('None')}}|{{se['audit_sub_category']|default('None')}}|{% if se['log_channel'] == "Security" %} `auditpol /set /subcategory:"{{se['audit_sub_category']}}" /success:enable /failure:enable` {% elif se['log_channel'] == "Microsoft-Windows-Sysmon/Operational" %} `<{{se['audit_category']}} onmatch="exclude" />` {% else %}None{% endif %}|{% if se['log_channel'] == "Security" %} Computer Configuration -> Windows Settings -> Security Settings -> Advanced Audit Policy Configuration -> System Audit Policies -> {{se['audit_category']}} -> Audit {{se['audit_sub_category']}} {% else %}None{% endif %}| 6 | {% endfor %}{% endfor %} -------------------------------------------------------------------------------- /resources/scripts/templates/readme_template.md: -------------------------------------------------------------------------------- 1 | # {{entry['title']}} 2 | 3 | ## Description 4 | {{entry['description']}} 5 | 6 | {%- if entry['images'] %} 7 | {% for image in entry['images'] %} 8 | ## {{image['title']}} 9 | ![{{image['title']}}]({{image['source']}}) 10 | {%- endfor %} 11 | {%- endif %} 12 | 13 | ## Sub Data Sets 14 | {%- if entry['data_set_type'] == 'Data Set' %} 15 | |{{entry['data_set_type']}}|Description| 16 | |---|---| 17 | {%- else %} 18 | |{{entry['data_set_type']}}|Version|Description|Tags| 19 | |---|---|---|---| 20 | {%- endif %} 21 | {%- if entry['sub_data_sets'] %} 22 | {%- for row in entry['sub_data_sets'] %} 23 | {%- if entry['data_set_type'] == 'Data Set' %} 24 | |[{{row['title']}}]({{row['link']}})|{{row['description']}}| 25 | {%- else %} 26 | |[{{row['title']}}]({{row['link']}})|{{row['version']}}|{{row['description']}}|{{row['tags']|join(', ') if row['tags']}}| 27 | {%- endif %} 28 | {%- endfor %} 29 | {%- endif %} 30 | {% if entry['references'] %} 31 | ## References 32 | {%- for reference in entry['references'] %} 33 | * [{{reference['text']}}]({{reference['link']}}) 34 | {%- endfor %} 35 | {%- endif %} -------------------------------------------------------------------------------- /resources/scripts/templates/table.md: -------------------------------------------------------------------------------- 1 | # {{table_metadata['name']}} 2 | 3 | {{table_metadata['description']}} 4 | 5 | ## Attributes 6 | 7 | | Entity | Name | Type | Description | Sample Value | 8 | |:---|:---|:---|:---|:---| 9 | {% for a in table_metadata['attributes'] | sort(attribute='name') %} | {{a['entity']}} | {{a['name']}} | {{a['type']}} | {{a['description']}} | {{a['sample_value']}} | 10 | {% endfor %} -------------------------------------------------------------------------------- /resources/scripts/templates/toc_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "format" : "jb-book", 3 | "root" : "intro", 4 | "parts" : [ 5 | { 6 | "caption" : "Data Dictionaries", 7 | "chapters": [ 8 | { 9 | "file": "dd/intro" 10 | }, 11 | { 12 | "file": "dd/guidelines/intro", 13 | "sections": [ 14 | { 15 | "file": "dd/guidelines/contributing_data_dictionaries" 16 | }, 17 | { 18 | "file": "dd/guidelines/authoring_data_dictionaries" 19 | } 20 | ] 21 | }, 22 | { 23 | "file": "dd/notebooks/intro", 24 | "sections": [ 25 | { 26 | "file": "dd/notebooks/security_events_correlation" 27 | } 28 | ] 29 | } 30 | ] 31 | }, 32 | { 33 | "caption": "Common Data Model", 34 | "chapters": [ 35 | { 36 | "file": "cdm/intro" 37 | }, 38 | { 39 | "file": "cdm/guidelines/intro", 40 | "sections": [ 41 | { 42 | "file": "cdm/guidelines/entity_structure" 43 | }, 44 | { 45 | "file": "cdm/guidelines/table_structure" 46 | }, 47 | { 48 | "file": "cdm/guidelines/data_types" 49 | }, 50 | { 51 | "file": "cdm/guidelines/domain_or_hostname_or_fqdn" 52 | } 53 | ] 54 | }, 55 | { 56 | "file": "cdm/entities/intro", 57 | "sections": [] 58 | }, 59 | { 60 | "file": "cdm/tables/intro", 61 | "sections": [] 62 | } 63 | ] 64 | }, 65 | { 66 | "caption": "Detection Model", 67 | "chapters": [ 68 | { 69 | "file": "dm/intro" 70 | }, 71 | { 72 | "file": "dm/ossem_relationships_to_events" 73 | }, 74 | { 75 | "file": "dm/mitre_attack/intro", 76 | "sections": [ 77 | { 78 | "file": "dm/mitre_attack/attack_techniques_to_events" 79 | }, 80 | { 81 | "file": "dm/mitre_attack/attack_ds_events_mappings" 82 | } 83 | ] 84 | } 85 | ] 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /resources/scripts/templates/xlsx_to_yaml_template.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OTRF/OSSEM/4a2b70aa5db88db970a254b055932c8915d76cec/resources/scripts/templates/xlsx_to_yaml_template.xlsx -------------------------------------------------------------------------------- /resources/scripts/xlsx_to_yaml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Project: OSSEM Data Dictionaries 4 | # Author: Jose Rodriguez (@Cyb3rPandaH) 5 | # License: GPLv3 6 | 7 | # Importing libraries 8 | import yaml 9 | yaml.Dumper.ignore_aliases = lambda *args : True 10 | 11 | import glob 12 | import os 13 | from os import path 14 | 15 | import openpyxl 16 | 17 | # Creating a list with ms excel files' names in your current directory 18 | excel_files = glob.glob(path.join(path.dirname(__file__),"*.xlsx")) 19 | 20 | # Parsing every ms excel file in your current directory 21 | for excel_file_path in excel_files: 22 | # Getting name of file (includes extensions such as .xlsx) 23 | file_name = os.path.basename(excel_file_path) 24 | # Getting name of file (without extensions such as .xlsx) 25 | file_name = file_name[:file_name.find('.')] 26 | # Getting content of ms excel file 27 | wb = openpyxl.load_workbook(excel_file_path) 28 | sheetnames = wb.sheetnames 29 | # Parsing every sheet within the file 30 | for sheet in sheetnames: 31 | # Defining sheet to parse 32 | sheet_to_parse = wb[sheet] 33 | # Defining a control variable to parse sections of the ms excel file 34 | control = '' 35 | # Defining references 36 | event_fields_yaml = [] 37 | references_yaml = [] 38 | tags_yaml = [] 39 | for line in sheet_to_parse.iter_rows(values_only=True): 40 | # Updating control variable to identify sections of the ms excel file 41 | if line[0] == 'standard_name': 42 | control = 'data dictionary' 43 | continue 44 | if line[0] == 'references': 45 | control = 'references' 46 | continue 47 | if line[0] == 'tags': 48 | control = 'tags' 49 | continue 50 | # Getting values to create yaml file 51 | if line[0] == 'title': 52 | title_yaml = line[1].rstrip() 53 | if line[0] == 'description': 54 | description_yaml = line[1].rstrip() 55 | if line[0] == 'platform': 56 | platform_yaml = line[1] 57 | if line[0] == 'log_source': 58 | log_source_yaml = line[1] 59 | if line[0] == 'event_code': 60 | event_code_yaml = line[1] 61 | if line[0] == 'event_version': 62 | event_version_yaml = line[1] 63 | if line[0] == 'attack_data_sources': 64 | attack_data_sources_yaml = line[1].split(',') 65 | if control == 'data dictionary': 66 | if line == (None,None,None,None,None): 67 | continue 68 | dict = {'standard_name' : line[0], 69 | 'name' : line[1], 70 | 'type' : line[2], 71 | 'description' : line[3], 72 | 'sample_value' : line[4]} 73 | event_fields_yaml.append(dict) 74 | if control == 'references': 75 | if line == (None,None,None,None,None): 76 | continue 77 | references_dict_yaml = {'text':line[0],'link':line[1]} 78 | references_yaml.append(references_dict_yaml) 79 | if control == 'tags': 80 | tags_yaml.append(line[0].rstrip()) 81 | # Dictionary of data to create yaml file 82 | data_dict = {'title' : title_yaml, 83 | 'description' : description_yaml, 84 | 'platform' : platform_yaml, 85 | 'log_source' : log_source_yaml, 86 | 'event_code' : event_code_yaml, 87 | 'event_version' : event_version_yaml, 88 | 'attack_data_sources' : attack_data_sources_yaml, 89 | 'event_fields' : event_fields_yaml, 90 | 'references' : references_yaml, 91 | 'tags' : tags_yaml} 92 | # Formatting sheet name 93 | sheet = sheet.replace(' ','_') 94 | # Creating yaml file 95 | with open(sheet + '.yaml', 'w') as file: 96 | yaml.dump(data_dict, file, sort_keys = False, width = float("inf")) --------------------------------------------------------------------------------