├── .github └── workflows │ ├── release.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── poetry.lock ├── print-coverage.py ├── pyproject.toml ├── sigma ├── backends │ ├── kusto │ │ ├── __init__.py │ │ └── kusto.py │ └── microsoft365defender │ │ ├── __init__.py │ │ └── microsoft365defender.py └── pipelines │ ├── azuremonitor │ ├── __init__.py │ ├── azuremonitor.py │ ├── mappings.py │ ├── schema.py │ ├── tables.py │ └── transformations.py │ ├── kusto_common │ ├── __init__.py │ ├── conditions.py │ ├── errors.py │ ├── finalization.py │ ├── mappings.py │ ├── postprocessing.py │ ├── schema.py │ └── transformations.py │ ├── microsoft365defender │ ├── __init__.py │ └── microsoft365defender.py │ ├── microsoftxdr │ ├── __init__.py │ ├── mappings.py │ ├── microsoftxdr.py │ ├── schema.py │ ├── tables.py │ └── transformations.py │ └── sentinelasim │ ├── __init__.py │ ├── mappings.py │ ├── schema.py │ ├── sentinelasim.py │ ├── tables.py │ └── transformations.py ├── tests ├── test_backend_kusto.py ├── test_pipelines_azuremonitor.py ├── test_pipelines_microsoftxdr.py └── test_pipelines_sentinelasim.py └── utils ├── get_azure_monitor_schema_tables.py ├── get_microsoft_xdr_schema_tables.py └── get_sentinel_asim_schema_tables.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release to PyPI 2 | on: 3 | release: 4 | types: [published] 5 | push: 6 | tags: 7 | - v*.*.* 8 | 9 | jobs: 10 | build-and-publish: 11 | runs-on: ubuntu-20.04 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Install Poetry 15 | run: pipx install poetry 16 | - name: Set up Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: 3.8 20 | cache: poetry 21 | - name: Verify versioning 22 | run: | 23 | [ "$(poetry version -s)" == "${GITHUB_REF#refs/tags/v}" ] 24 | - name: Install dependencies 25 | run: poetry install 26 | - name: Run tests 27 | run: poetry run pytest 28 | - name: Build packages 29 | run: poetry build 30 | - name: Configure Poetry 31 | run: | 32 | poetry config repositories.testpypi https://test.pypi.org/legacy/ 33 | poetry config pypi-token.testpypi ${{ secrets.TEST_PYPI_API_TOKEN }} 34 | poetry config pypi-token.pypi "${{ secrets.PYPI_API_TOKEN }}" 35 | #- name: Publish to test PyPI 36 | # if: ${{ github.event_name == 'push' }} 37 | # run: poetry publish -r testpypi 38 | - name: Publish to PyPI 39 | if: ${{ github.event_name == 'release' }} 40 | run: poetry publish 41 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | push: 4 | branches: [ "*" ] 5 | pull_request: 6 | branches: [ "*" ] 7 | workflow_dispatch: 8 | 9 | jobs: 10 | test: 11 | strategy: 12 | matrix: 13 | os: [ 'ubuntu-20.04' ] 14 | python-version: [ '3.8', '3.9', '3.10', '3.11'] 15 | runs-on: ${{ matrix.os }} 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Install Poetry 19 | run: pipx install poetry 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | cache: poetry 25 | - name: Install dependencies 26 | run: poetry install 27 | - name: Run tests 28 | run: poetry run pytest --cov=sigma --cov-report term --cov-report xml:cov.xml -vv 29 | - name: Store coverage for badge 30 | if: ${{ runner.os == 'Linux' }} 31 | run: poetry run python print-coverage.py >> $GITHUB_ENV 32 | - name: Create coverage badge 33 | if: ${{ github.repository == 'AttackIQ/pySigma-backend-microsoft365defender' && github.event_name == 'push' && runner.os == 'Linux' }} 34 | uses: schneegans/dynamic-badges-action@v1.1.0 35 | with: 36 | auth: ${{ secrets.GIST_SECRET }} 37 | gistID: 9c0879725c7f94387801390bbb0ac8d6 38 | filename: slincoln-aiq-pySigma-backend-microsoft365defender.json 39 | label: Coverage 40 | message: ${{ env.COVERAGE }} 41 | color: ${{ env.COVERAGE_COLOR }} 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .coverage* 2 | .vscode/ 3 | **/__pycache__ 4 | .pytest_cache/ 5 | cov.xml 6 | dist/ 7 | docs/_build 8 | .env 9 | .venv/ 10 | .python-version 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | GNU LESSER GENERAL PUBLIC LICENSE 3 | Version 3, 29 June 2007 4 | 5 | Copyright (C) 2007 Free Software Foundation, Inc. 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | 10 | This version of the GNU Lesser General Public License incorporates 11 | the terms and conditions of version 3 of the GNU General Public 12 | License, supplemented by the additional permissions listed below. 13 | 14 | 0. Additional Definitions. 15 | 16 | As used herein, "this License" refers to version 3 of the GNU Lesser 17 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 18 | General Public License. 19 | 20 | "The Library" refers to a covered work governed by this License, 21 | other than an Application or a Combined Work as defined below. 22 | 23 | An "Application" is any work that makes use of an interface provided 24 | by the Library, but which is not otherwise based on the Library. 25 | Defining a subclass of a class defined by the Library is deemed a mode 26 | of using an interface provided by the Library. 27 | 28 | A "Combined Work" is a work produced by combining or linking an 29 | Application with the Library. The particular version of the Library 30 | with which the Combined Work was made is also called the "Linked 31 | Version". 32 | 33 | The "Minimal Corresponding Source" for a Combined Work means the 34 | Corresponding Source for the Combined Work, excluding any source code 35 | for portions of the Combined Work that, considered in isolation, are 36 | based on the Application, and not on the Linked Version. 37 | 38 | The "Corresponding Application Code" for a Combined Work means the 39 | object code and/or source code for the Application, including any data 40 | and utility programs needed for reproducing the Combined Work from the 41 | Application, but excluding the System Libraries of the Combined Work. 42 | 43 | 1. Exception to Section 3 of the GNU GPL. 44 | 45 | You may convey a covered work under sections 3 and 4 of this License 46 | without being bound by section 3 of the GNU GPL. 47 | 48 | 2. Conveying Modified Versions. 49 | 50 | If you modify a copy of the Library, and, in your modifications, a 51 | facility refers to a function or data to be supplied by an Application 52 | that uses the facility (other than as an argument passed when the 53 | facility is invoked), then you may convey a copy of the modified 54 | version: 55 | 56 | a) under this License, provided that you make a good faith effort to 57 | ensure that, in the event an Application does not supply the 58 | function or data, the facility still operates, and performs 59 | whatever part of its purpose remains meaningful, or 60 | 61 | b) under the GNU GPL, with none of the additional permissions of 62 | this License applicable to that copy. 63 | 64 | 3. Object Code Incorporating Material from Library Header Files. 65 | 66 | The object code form of an Application may incorporate material from 67 | a header file that is part of the Library. You may convey such object 68 | code under terms of your choice, provided that, if the incorporated 69 | material is not limited to numerical parameters, data structure 70 | layouts and accessors, or small macros, inline functions and templates 71 | (ten or fewer lines in length), you do both of the following: 72 | 73 | a) Give prominent notice with each copy of the object code that the 74 | Library is used in it and that the Library and its use are 75 | covered by this License. 76 | 77 | b) Accompany the object code with a copy of the GNU GPL and this license 78 | document. 79 | 80 | 4. Combined Works. 81 | 82 | You may convey a Combined Work under terms of your choice that, 83 | taken together, effectively do not restrict modification of the 84 | portions of the Library contained in the Combined Work and reverse 85 | engineering for debugging such modifications, if you also do each of 86 | the following: 87 | 88 | a) Give prominent notice with each copy of the Combined Work that 89 | the Library is used in it and that the Library and its use are 90 | covered by this License. 91 | 92 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 93 | document. 94 | 95 | c) For a Combined Work that displays copyright notices during 96 | execution, include the copyright notice for the Library among 97 | these notices, as well as a reference directing the user to the 98 | copies of the GNU GPL and this license document. 99 | 100 | d) Do one of the following: 101 | 102 | 0) Convey the Minimal Corresponding Source under the terms of this 103 | License, and the Corresponding Application Code in a form 104 | suitable for, and under terms that permit, the user to 105 | recombine or relink the Application with a modified version of 106 | the Linked Version to produce a modified Combined Work, in the 107 | manner specified by section 6 of the GNU GPL for conveying 108 | Corresponding Source. 109 | 110 | 1) Use a suitable shared library mechanism for linking with the 111 | Library. A suitable mechanism is one that (a) uses at run time 112 | a copy of the Library already present on the user's computer 113 | system, and (b) will operate properly with a modified version 114 | of the Library that is interface-compatible with the Linked 115 | Version. 116 | 117 | e) Provide Installation Information, but only if you would otherwise 118 | be required to provide such information under section 6 of the 119 | GNU GPL, and only to the extent that such information is 120 | necessary to install and execute a modified version of the 121 | Combined Work produced by recombining or relinking the 122 | Application with a modified version of the Linked Version. (If 123 | you use option 4d0, the Installation Information must accompany 124 | the Minimal Corresponding Source and Corresponding Application 125 | Code. If you use option 4d1, you must provide the Installation 126 | Information in the manner specified by section 6 of the GNU GPL 127 | for conveying Corresponding Source.) 128 | 129 | 5. Combined Libraries. 130 | 131 | You may place library facilities that are a work based on the 132 | Library side by side in a single library together with other library 133 | facilities that are not Applications and are not covered by this 134 | License, and convey such a combined library under terms of your 135 | choice, if you do both of the following: 136 | 137 | a) Accompany the combined library with a copy of the same work based 138 | on the Library, uncombined with any other library facilities, 139 | conveyed under the terms of this License. 140 | 141 | b) Give prominent notice with the combined library that part of it 142 | is a work based on the Library, and explaining where to find the 143 | accompanying uncombined form of the same work. 144 | 145 | 6. Revised Versions of the GNU Lesser General Public License. 146 | 147 | The Free Software Foundation may publish revised and/or new versions 148 | of the GNU Lesser General Public License from time to time. Such new 149 | versions will be similar in spirit to the present version, but may 150 | differ in detail to address new problems or concerns. 151 | 152 | Each version is given a distinguishing version number. If the 153 | Library as you received it specifies that a certain numbered version 154 | of the GNU Lesser General Public License "or any later version" 155 | applies to it, you have the option of following the terms and 156 | conditions either of that published version or of any later version 157 | published by the Free Software Foundation. If the Library as you 158 | received it does not specify a version number of the GNU Lesser 159 | General Public License, you may choose any version of the GNU Lesser 160 | General Public License ever published by the Free Software Foundation. 161 | 162 | If the Library as you received it specifies that a proxy can decide 163 | whether future versions of the GNU Lesser General Public License shall 164 | apply, that proxy's public statement of acceptance of any version is 165 | permanent authorization for you to choose that version for the 166 | Library. 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pySigma Kusto Query Language (KQL) Backend 2 | 3 | ![Tests](https://github.com/AttackIQ/pySigma-backend-microsoft365defender/actions/workflows/test.yml/badge.svg) 4 | ![Coverage Badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/slincoln-aiq/9c0879725c7f94387801390bbb0ac8d6/raw/slincoln-aiq-pySigma-backend-microsoft365defender.json) 5 | ![Status](https://img.shields.io/badge/Status-pre--release-orange) 6 | ![PyPI version](https://badge.fury.io/py/pysigma-backend-kusto.svg) 7 | ![Python versions](https://img.shields.io/pypi/pyversions/pysigma-backend-kusto.svg) 8 | ![pySigma version](https://img.shields.io/badge/pySigma-%3E%3D0.10.0-blue) 9 | ![License](https://img.shields.io/github/license/AttackIQ/pySigma-backend-microsoft365defender.svg) 10 | 11 | ## Contents 12 | 13 | - [pySigma Kusto Query Language (KQL) Backend](#pysigma-kusto-query-language-kql-backend) 14 | - [📖 Overview](#-overview) 15 | - [🔑 Key Features](#-key-features) 16 | - [🧑‍💻 Maintainer](#-maintainer) 17 | - [🚀 Quick Start](#-quick-start) 18 | - [📘 Usage](#-usage) 19 | - [🖥️ sigma-cli](#️-sigma-cli) 20 | - [🐍 Python Script](#-python-script) 21 | - [🛠️ Advanced Features](#️-advanced-features) 22 | - [🔄 Pipeline \& Backend Args (New in 0.2.0)](#-pipeline--backend-args-new-in-020) 23 | - [🗃️ Custom Table Names (New in 0.3.0) (Beta)](#️-custom-table-names-new-in-030-beta) 24 | - [🔄 Processing Pipelines](#-processing-pipelines) 25 | - [📊 Rule Support](#-rule-support) 26 | - [🖥️ Commonly Supported Categories](#️-commonly-supported-categories) 27 | - [🧪 Custom Transformations](#-custom-transformations) 28 | - [📊 Custom Postprocessing Item](#-custom-postprocessing-item) 29 | - [❓Frequently Asked Questions](#frequently-asked-questions) 30 | - [How do I set the table name for a rule?](#how-do-i-set-the-table-name-for-a-rule) 31 | - [How do I set the table name for a rule in YAML?](#how-do-i-set-the-table-name-for-a-rule-in-yaml) 32 | - [How is the table name determined for a rule?](#how-is-the-table-name-determined-for-a-rule) 33 | - [How are field mappings determined for a rule?](#how-are-field-mappings-determined-for-a-rule) 34 | - [What tables are supported for each pipeline?](#what-tables-are-supported-for-each-pipeline) 35 | - [I am receiving an `Invalid SigmaDetectionItem field name encountered` error. What does this mean?](#i-am-receiving-an-invalid-sigmadetectionitem-field-name-encountered-error-what-does-this-mean) 36 | - [My query\_table or custom field mapping isn't working](#my-query_table-or-custom-field-mapping-isnt-working) 37 | - [🤝 Contributing](#-contributing) 38 | - [📄 License](#-license) 39 | 40 | ## 📖 Overview 41 | 42 | The **pySigma Kusto Backend** transforms Sigma Rules into queries using [Kusto Query Language (KQL)](https://learn.microsoft.com/en-us/kusto/query/?view=microsoft-fabric). This backend supports multiple Microsoft products, including: 43 | 44 | - [Microsoft XDR Advanced Hunting Queries](https://learn.microsoft.com/en-us/defender-xdr/advanced-hunting-overview) (Formally Microsoft 365 Defender Advanced Hunting Queries) 45 | - [Azure Sentinel Advanced Security Information Model (ASIM) Queries](https://learn.microsoft.com/en-us/azure/sentinel/normalization) 46 | - [Azure Monitor Queries](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/get-started-queries) 47 | 48 | > **Note:** This backend was previously named **pySigma Microsoft 365 Defender Backend**. 49 | 50 | ### 🔑 Key Features 51 | 52 | - **Backend**: `sigma.backends.kusto` with `KustoBackend` class 53 | - **Pipelines**: Provides `microsoft_xdr_pipeline`, `sentinelasim_pipeline`, and `azure_monitor_pipeline` for query tables and field renames 54 | - **Output**: Query strings in Kusto Query Language (KQL) 55 | 56 | ### 🧑‍💻 Maintainer 57 | 58 | - [Stephen Lincoln](https://github.com/slincoln-aiq) via [AttackIQ](https://github.com/AttackIQ) 59 | 60 | ## 🚀 Quick Start 61 | 62 | 1. Install the package: 63 | 64 | ```bash 65 | pip install pysigma-backend-kusto 66 | ``` 67 | 68 | > **Note:** This package requires `pySigma` version 0.10.0 or higher. 69 | 70 | 2. Convert a Sigma rule to MIcrosoft XDR KQL query using sigma-cli: 71 | 72 | ```bash 73 | sigma convert -t kusto -p microsoft_xdr path/to/your/rule.yml 74 | ``` 75 | 76 | 3. Or use in a Python script: 77 | 78 | ```python 79 | from sigma.rule import SigmaRule 80 | 81 | from sigma.backends.kusto import KustoBackend 82 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline 83 | 84 | # Load your Sigma rule 85 | rule = SigmaRule.from_yaml( 86 | """ 87 | title: Mimikatz CommandLine 88 | status: test 89 | logsource: 90 | category: process_creation 91 | product: windows 92 | detection: 93 | sel: 94 | CommandLine|contains: mimikatz.exe 95 | condition: sel 96 | """ 97 | ) 98 | 99 | # Convert the rule 100 | xdr_pipeline = microsoft_xdr_pipeline() 101 | backend = KustoBackend(processing_pipeline=xdr_pipeline) 102 | print(backend.convert_rule(rule)[0]) 103 | 104 | ``` 105 | 106 | ## 📘 Usage 107 | 108 | ### 🖥️ sigma-cli 109 | 110 | Use with `sigma-cli` per [typical sigma-cli usage](https://github.com/SigmaHQ/sigma-cli#usage): 111 | 112 | ```bash 113 | sigma convert -t kusto -p microsoft_xdr -f default -s ~/sigma/rules 114 | ``` 115 | 116 | ### 🐍 Python Script 117 | 118 | Use the backend and pipeline in a standalone Python script. Note, the backend automatically applies the pipeline, but 119 | you can manually add it if you would like. 120 | 121 | ```python 122 | from sigma.rule import SigmaRule 123 | from sigma.backends.kusto import KustoBackend 124 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline 125 | 126 | # Define an example rule as a YAML str 127 | sigma_rule = SigmaRule.from_yaml(""" 128 | title: Mimikatz CommandLine 129 | status: test 130 | logsource: 131 | category: process_creation 132 | product: windows 133 | detection: 134 | sel: 135 | CommandLine|contains: mimikatz.exe 136 | condition: sel 137 | """) 138 | # Create backend, which automatically adds the pipeline 139 | kusto_backend = KustoBackend() 140 | 141 | # Or apply the pipeline manually 142 | pipeline = microsoft_xdr_pipeline() 143 | pipeline.apply(sigma_rule) 144 | 145 | # Convert the rule 146 | print(sigma_rule.title + " KQL Query: \n") 147 | print(kusto_backend.convert_rule(sigma_rule)[0]) 148 | ``` 149 | 150 | Output: 151 | 152 | ```text 153 | Mimikatz CommandLine KQL Query: 154 | 155 | DeviceProcessEvents 156 | | where ProcessCommandLine contains "mimikatz.exe" 157 | ``` 158 | 159 | ## 🛠️ Advanced Features 160 | 161 | ### 🔄 Pipeline & Backend Args (New in 0.2.0) 162 | 163 | For the `microsoft_xdr_pipeline`: 164 | 165 | - `transform_parent_image`: Controls ParentImage field mapping behavior 166 | - When set to `True` (default), maps ParentImage to InitiatingProcessParentFileName 167 | - When set to `False`, maps ParentImage to InitiatingProcessFileName 168 | - Useful for adjusting field mappings based on specific rule requirements 169 | - Example usage: 170 | 171 | ```python 172 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline 173 | pipeline = microsoft_xdr_pipeline(transform_parent_image=False) 174 | ``` 175 | 176 | This argument allows fine-tuning of the ParentImage field mapping, which can be crucial for accurate rule conversion in certain scenarios. By default, it follows the behavior of mapping ParentImage to the parent process name, but setting it to `False` allows for mapping to the initiating process name instead. 177 | 178 | ### 🗃️ Custom Table Names (New in 0.3.0) (Beta) 179 | 180 | The `query_table` argument allows users to override table mappings and set custom table names. This is useful for converting Sigma rules where the rule category does not easily map to the default table names. 181 | 182 | #### YAML Pipelines 183 | 184 | To set a custom table name, ensure your pipeline has a priority of 9 or lower, as sigma-cli merges pipelines based on priority (default is 10). Field mappings in `mappings.py` will apply according to your specified table name, along with any custom field mapping transformations. 185 | 186 | ```YAML 187 | # test_table_name_pipeline.yml 188 | name: Custom Query Table Pipeline 189 | priority: 1 190 | transformations: 191 | - id: test_name_name 192 | type: set_state 193 | key: "query_table" 194 | val: ["DeviceProcessEvents"] 195 | ``` 196 | 197 | ```bash 198 | sigma convert -t kusto -p microsoft_xdr -p test_table_name_pipeline.yml test_rule.yml 199 | ``` 200 | 201 | #### Python Pipelines 202 | 203 | You can also set the table name in the pipeline via Python by passing the `query_table` parameter to the pipeline. 204 | 205 | ```python 206 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline 207 | my_pipeline = microsoft_xdr_pipeline(query_table="DeviceProcessEvents") 208 | ``` 209 | 210 | ## 🔄 Processing Pipelines 211 | 212 | This project includes three main processing pipelines, each designed for a specific Microsoft product: 213 | 214 | 1. **Microsoft XDR Pipeline** (formerly Microsoft 365 Defender) 215 | - Status: Production-ready 216 | - Supports a wide range of Sigma rule categories 217 | - All tables supported, but additional field mapping contributions welcome 218 | 219 | 2. **Sentinel ASIM Pipeline** 220 | - Status: Beta 221 | - Transforms rules for Microsoft Sentinel Advanced Security Information Model (ASIM) 222 | - All tables supported, but field mappings are limited 223 | 224 | 3. **Azure Monitor Pipeline** 225 | - Status: Alpha 226 | - Currently supports field mappings for `SecurityEvent` and `SigninLogs` tables only 227 | - All tables supported, but requires custom field mappings for other tables 228 | 229 | Each pipeline includes a `query_table` parameter for setting custom table names. 230 | 231 | ### 📊 Rule Support 232 | 233 | Rules are supported if either: 234 | 235 | - A valid table name is supplied via the `query_table` parameter or YAML pipeline 236 | - The rule's logsource category is supported and mapped in the pipeline's `mappings.py` file 237 | - The rule has an `EventID` or `EventCode` field in the `detection` section, and the eventid is present in the pipeline's `eventid_to_table_mappings` dictionary 238 | 239 | ### 🖥️ Commonly Supported Categories 240 | 241 | - process_creation 242 | - image_load 243 | - network_connection 244 | - file_access, file_change, file_delete, file_event, file_rename 245 | - registry_add, registry_delete, registry_event, registry_set 246 | 247 | Specific pipelines may support additional categories. Check each pipeline's `mappings.py` file for details. 248 | 249 | ## 🧪 Custom Transformations 250 | 251 | This package includes several custom `ProcessingPipeline` `Transformation` classes: 252 | 253 | 1. **DynamicFieldMappingTransformation** 254 | - Determines field mappings based on the `query_table` state parameter 255 | 256 | 2. **GenericFieldMappingTransformation** 257 | - Applies common field mappings across all tables in a pipeline 258 | 259 | 3. **BaseHashesValuesTransformation** 260 | - Transforms the Hashes field, removing hash algorithm prefixes 261 | 262 | 4. **ParentImageValueTransformation** 263 | - Extracts parent process name from Sysmon ParentImage field 264 | 265 | 5. **SplitDomainUserTransformation** 266 | - Splits User field into separate domain and username fields 267 | 268 | 6. **RegistryActionTypeValueTransformation** 269 | - Adjusts registry ActionType values for compatibility 270 | 271 | 7. **InvalidFieldTransformation** 272 | - Identifies unsupported or invalid fields in rules 273 | 274 | 8. **SetQueryTableStateTransformation** 275 | - Manages the `query_table` state based on rule category or custom settings 276 | 277 | ### 📊 Custom Postprocessing Item 278 | 279 | 1. **PrependQueryTablePostprocessingItem** 280 | 281 | - Adds table name as prefix to each query in a SigmaCollection, or single query in a SigmaRule 282 | 283 | ## ❓Frequently Asked Questions 284 | 285 | ### How do I set the table name for a rule? 286 | 287 | You can set the table name for a rule by adding the `query_table` parameter to the pipeline and setting it to the table name you want to use. 288 | 289 | ```python 290 | from sigma.pipelines.microsoftxdr import microsoft_xdr_pipeline 291 | pipeline = microsoft_xdr_pipeline(query_table="DeviceProcessEvents") 292 | ``` 293 | 294 | ### How do I set the table name for a rule in YAML? 295 | 296 | You can set the table name for a rule in YAML by adding the `query_table` parameter to the pipeline and setting it to the table name you want to use. 297 | 298 | ```YAML 299 | # test_table_name_pipeline.yml 300 | name: 301 | priority: 1 302 | transformations: 303 | - id: test_name_name 304 | type: set_state 305 | key: "query_table" 306 | val: ["DeviceProcessEvents"] 307 | ``` 308 | 309 | ```bash 310 | sigma convert -t kusto -p microsoft_xdr -p test_table_name_pipeline.yml test_rule.yml 311 | ``` 312 | 313 | ### How is the table name determined for a rule? 314 | 315 | The table name is set by the `SetQueryTableStateTransformation` transformation, which is the first transformation in each pipeline. The `query_table` is set to the pipeline's `state` parameter with the following priority: 316 | 1. The `query_table` parameter passed to the pipeline, if using a Python script/code. 317 | 2. The `query_table` parameter passed to the pipeline in a custom YAML pipeline, if using sigma-cli. 318 | 3. The `logsource.category` field in the rule, if the category is present in the pipeline's `category_to_table_mappings` dictionary. 319 | 4. The `EventID` or `EventCode` field, if present in the rule's `detection` section, and if the eventid is present in the pipeline's `eventid_to_table_mappings` dictionary. 320 | 5. If none of the above are present, an error is raised. 321 | 322 | ### How are field mappings determined for a rule? 323 | 324 | The field mappings are determined by the `DynamicFieldMappingTransformation` transformation. It will use the table name from the pipeline state's `query_table` key. The field mapping logic is defined in each pipeline's `mappings.py` file for each table. If a field is not found in the table, the `GenericFieldMappingTransformation` will apply generic field mappings. If a field is not found in the generic field mappings, the field will be kept the same. 325 | 326 | ### What tables are supported for each pipeline? 327 | 328 | The tables that are supported for each pipeline are defined in each pipeline's `tables.py` file. This file is automatically generated by the scripts in the `utils` folder. These scripts pull documentation from Microsoft to get all documented tables and their fields and schema. 329 | 330 | ### I am receiving an `Invalid SigmaDetectionItem field name encountered` error. What does this mean? 331 | 332 | This error means that the field name(s) provided in the error are not found in the tables fields defined in `tables.py` for the pipeline you are using. This probably means that a Sigma rule's field was not found in the field mappings for the table. To fix this error, you can supply your own custom field mappings to convert the unsupported field into a supported one. For example, in using YAML: 333 | 334 | ```YAML 335 | # custom_field_mapping_pipeline.yml 336 | name: Custom Field Mapping 337 | priority: 1 338 | transformations: 339 | - id: field_mapping 340 | type: field_name_mapping 341 | mapping: 342 | MyNotSupportedField: a_supported_field 343 | rule_conditions: 344 | - type: logsource 345 | service: sysmon 346 | ``` 347 | 348 | ```bash 349 | sigma convert -t kusto -p custom_field_mapping_pipeline.yml -p microsoft_xdr test_rule.yml 350 | ``` 351 | 352 | If you find the field mapping useful, please consider submitting a PR to add it to the pipeline's field mappings :) 353 | 354 | ### My query_table or custom field mapping isn't working 355 | 356 | Each pipeline in the project has a priority of 10. If you are trying to set the table name or custom field mappings, your pipeline needs to have a priority of 9 or less. You can set the priority in the YAML pipeline like so: 357 | 358 | ```YAML 359 | # test_table_name_pipeline.yml 360 | name: 361 | priority: 9 362 | transformations: 363 | - id: test_name_name 364 | type: set_state 365 | key: "query_table" 366 | val: ["DeviceProcessEvents"] 367 | ``` 368 | 369 | ## 🤝 Contributing 370 | 371 | Contributions are welcome, especially for table and field mappings! Please feel free to submit a Pull Request. 372 | 373 | 1. Fork the repository 374 | 2. Create your feature branch (`git checkout -b feature/AmazingFeature`) 375 | 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) 376 | 4. Push to the branch (`git push origin feature/AmazingFeature`) 377 | 5. Open a Pull Request 378 | 379 | Please make sure to update tests as appropriate. 380 | 381 | ## 📄 License 382 | 383 | This project is licensed under the GNU Lesser General Public License v3.0 - see the [LICENSE](LICENSE) file for details. 384 | -------------------------------------------------------------------------------- /print-coverage.py: -------------------------------------------------------------------------------- 1 | # Prints code testing coverage as percentage for badge generation. 2 | import xml.etree.ElementTree as et 3 | 4 | tree = et.parse("cov.xml") 5 | root = tree.getroot() 6 | coverage = float(root.attrib["line-rate"]) * 100 7 | print(f"COVERAGE={coverage:3.4}%") 8 | if coverage >= 95.0: 9 | print("COVERAGE_COLOR=green") 10 | elif coverage >= 90.0: 11 | print("COVERAGE_COLOR=yellow") 12 | elif coverage >= 85.0: 13 | print("COVERAGE_COLOR=orange") 14 | else: 15 | print("COVERAGE_COLOR=red") 16 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pySigma-backend-kusto" 3 | version = "0.4.3" 4 | description = "pySigma Kusto backend" 5 | authors = ["Stephen Lincoln "] 6 | license = "LGPL-3.0-only" 7 | repository = "https://github.com/AttackIQ/pySigma-backend-kusto" 8 | packages = [ 9 | { include = "sigma" } 10 | ] 11 | 12 | [tool.poetry.dependencies] 13 | python = "^3.8" 14 | pysigma = ">= 0.10.0" 15 | certifi = ">=2023.07.22" 16 | 17 | [tool.poetry.group.dev.dependencies] 18 | pytest = "^7.2.1" 19 | pytest-cov = "^4.0.0" 20 | coverage = "^7.2.1" 21 | requests = "^2.32.3" 22 | python-dotenv = "^1.0.1" 23 | beautifulsoup4 = "^4.12.3" 24 | ruff = "^0.6.9" 25 | black = "^24.8.0" 26 | 27 | [build-system] 28 | requires = ["poetry-core>=1.0.0"] 29 | build-backend = "poetry.core.masonry.api" 30 | 31 | [tool.pytest.ini_options] 32 | addopts = "-v --cov=sigma --cov-report=term-missing" 33 | testpaths = ["tests"] 34 | 35 | [tool.coverage.run] 36 | branch = true 37 | source = ["sigma"] 38 | omit = [ 39 | # omit anything in a tests directory 40 | "tests/*", 41 | ] 42 | 43 | [tool.coverage.report] 44 | exclude_lines = [ 45 | "pragma: no cover", 46 | "def __repr__", 47 | "raise NotImplementedError", 48 | "if __name__ == .__main__.:", 49 | "pass", 50 | "raise ImportError", 51 | ] 52 | show_missing = true 53 | fail_under = 80 54 | 55 | [tool.black] 56 | line-length = 120 57 | target-version = ['py311'] 58 | 59 | [tool.ruff] 60 | line-length = 120 # Same as Black 61 | # Assume Python 3.11 62 | target-version = "py311" 63 | # Exclude a variety of commonly ignored directories. 64 | exclude = [ 65 | ".bzr", 66 | ".direnv", 67 | ".eggs", 68 | ".git", 69 | ".git-rewrite", 70 | ".hg", 71 | ".ipynb_checkpoints", 72 | ".mypy_cache", 73 | ".nox", 74 | ".pants.d", 75 | ".pyenv", 76 | ".pytest_cache", 77 | ".pytype", 78 | ".ruff_cache", 79 | ".svn", 80 | ".tox", 81 | ".venv", 82 | ".vscode", 83 | "__pypackages__", 84 | "_build", 85 | "buck-out", 86 | "build", 87 | "dist", 88 | "node_modules", 89 | "site-packages", 90 | "venv", 91 | ".venv" 92 | ] 93 | 94 | [tool.ruff.lint] 95 | # Enable isort rules 96 | select = ["I"] 97 | 98 | # Sort imports 99 | [tool.ruff.lint.isort] 100 | combine-as-imports = true 101 | case-sensitive = false 102 | -------------------------------------------------------------------------------- /sigma/backends/kusto/__init__.py: -------------------------------------------------------------------------------- 1 | from .kusto import KustoBackend 2 | 3 | # TODO: add all backend classes that should be exposed to the user of your backend in the import statement above. 4 | 5 | backends = { # Mapping between backend identifiers and classes. This is used by the pySigma plugin system to recognize backends and expose them with the identifier. 6 | "kusto": KustoBackend, 7 | } 8 | -------------------------------------------------------------------------------- /sigma/backends/kusto/kusto.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import ClassVar, Dict, Pattern, Tuple, Type, Union 3 | 4 | from sigma.conditions import ( 5 | ConditionAND, 6 | ConditionFieldEqualsValueExpression, 7 | ConditionItem, 8 | ConditionNOT, 9 | ConditionOR, 10 | ) 11 | from sigma.conversion.base import TextQueryBackend 12 | from sigma.conversion.deferred import DeferredQueryExpression 13 | from sigma.conversion.state import ConversionState 14 | from sigma.types import SigmaCompareExpression, SigmaNumber, SigmaString, SpecialChars 15 | 16 | 17 | class KustoBackend(TextQueryBackend): 18 | """Microsoft 365 Defender KQL Backend.""" 19 | 20 | # The backend generates grouping if required 21 | name: ClassVar[str] = "Kusto backend" 22 | identifier: ClassVar[str] = "kusto" 23 | formats: Dict[str, str] = { 24 | "default": "Kusto Query Language search strings", 25 | } 26 | 27 | requires_pipeline: bool = False # m365 pipeline is automatically applied 28 | 29 | # Operator precedence 30 | parenthesize = True 31 | precedence: ClassVar[Tuple[Type[ConditionItem], Type[ConditionItem], Type[ConditionItem]]] = ( 32 | ConditionNOT, 33 | ConditionAND, 34 | ConditionOR, 35 | ) 36 | group_expression: ClassVar[str] = ( 37 | "({expr})" # Expression for precedence override grouping as format string with {expr} placeholder 38 | ) 39 | # Generated query tokens 40 | token_separator: str = " " # separator inserted between all boolean operators 41 | or_token: ClassVar[str] = "or" 42 | and_token: ClassVar[str] = "and" 43 | not_token: ClassVar[str] = "not" 44 | eq_token: ClassVar[str] = " =~ " # Token inserted between field and value (without separator) 45 | 46 | # String output 47 | ## Fields 48 | ### Quoting 49 | field_quote: ClassVar[str] = ( 50 | "'" # Character used to quote field characters if field_quote_pattern matches (or not, depending on field_quote_pattern_negation). No field name quoting is done if not set. 51 | ) 52 | field_quote_pattern: ClassVar[Pattern] = re.compile( 53 | "^\\w+$" 54 | ) # Quote field names if this pattern (doesn't) matches, depending on field_quote_pattern_negation. Field name is always quoted if pattern is not set. 55 | field_quote_pattern_negation: ClassVar[bool] = ( 56 | True # Negate field_quote_pattern result. Field name is quoted if pattern doesn't matches if set to True (default). 57 | ) 58 | 59 | ### Escaping 60 | field_escape: ClassVar[str] = "" # Character to escape particular parts defined in field_escape_pattern. 61 | field_escape_quote: ClassVar[bool] = True # Escape quote string defined in field_quote 62 | field_escape_pattern: ClassVar[Pattern] = re.compile( 63 | "\\s" 64 | ) # All matches of this pattern are prepended with the string contained in field_escape. 65 | 66 | ## Values 67 | str_quote: ClassVar[str] = '"' # string quoting character (added as escaping character) 68 | escape_char: ClassVar[str] = "\\" # Escaping character for special characters inside string 69 | wildcard_multi: ClassVar[str] = "*" # Character used as multi-character wildcard 70 | wildcard_single: ClassVar[str] = "*" # Character used as single-character wildcard 71 | add_escaped: ClassVar[str] = "\\" # Characters quoted in addition to wildcards and string quote 72 | filter_chars: ClassVar[str] = "" # Characters filtered 73 | bool_values: ClassVar[Dict[bool, str]] = { # Values to which boolean values are mapped. 74 | True: "true", 75 | False: "false", 76 | } 77 | 78 | # String matching operators. if none is appropriate eq_token is used. 79 | startswith_expression: ClassVar[str] = "{field} startswith {value}" 80 | endswith_expression: ClassVar[str] = "{field} endswith {value}" 81 | contains_expression: ClassVar[str] = "{field} contains {value}" 82 | wildcard_match_expression: ClassVar[Union[str, None]] = ( 83 | None # Special expression if wildcards can't be matched with the eq_token operator 84 | ) 85 | 86 | # Regular expressions 87 | re_expression: ClassVar[str] = ( 88 | '{field} matches regex "{regex}"' # Regular expression query as format string with placeholders {field} and {regex} 89 | ) 90 | re_escape_char: ClassVar[str] = "\\" # Character used for escaping in regular expressions 91 | re_escape: ClassVar[Tuple[str, ...]] = () # List of strings that are escaped 92 | re_escape_escape_char: bool = True # If True, the escape character is also escaped 93 | 94 | # cidr expressions 95 | cidr_wildcard: ClassVar[str] = "*" # Character used as single wildcard 96 | cidr_expression: ClassVar[str] = ( 97 | 'ipv4_is_in_range({field}, "{value}")' # CIDR expression query as format string with placeholders {field} = {value} 98 | ) 99 | cidr_in_list_expression: ClassVar[str] = ( 100 | 'ipv4_is_in_any_range({field}, "{value}")' # CIDR expression query as format string with placeholders {field} = in({list}) 101 | ) 102 | 103 | # Numeric comparison operators 104 | compare_op_expression: ClassVar[str] = ( 105 | "{field} {operator} {value}" # Compare operation query as format string with placeholders {field}, {operator} and {value} 106 | ) 107 | # Mapping between CompareOperators elements and strings used as replacement for {operator} in compare_op_expression 108 | compare_operators: ClassVar[Dict[SigmaCompareExpression.CompareOperators, str]] = { 109 | SigmaCompareExpression.CompareOperators.LT: "<", 110 | SigmaCompareExpression.CompareOperators.LTE: "<=", 111 | SigmaCompareExpression.CompareOperators.GT: ">", 112 | SigmaCompareExpression.CompareOperators.GTE: ">=", 113 | } 114 | 115 | # Null/None expressions 116 | field_null_expression: ClassVar[str] = ( 117 | "isnull({field})" # Expression for field has null value as format string with {field} placeholder for field name 118 | ) 119 | 120 | # Field value in list, e.g. "field in (value list)" or "field containsall (value list)" 121 | convert_or_as_in: ClassVar[bool] = True # Convert OR as in-expression 122 | convert_and_as_in: ClassVar[bool] = True # Convert AND as in-expression 123 | in_expressions_allow_wildcards: ClassVar[bool] = ( 124 | True # Values in list can contain wildcards. If set to False (default) only plain values are converted into in-expressions. 125 | ) 126 | field_in_list_expression: ClassVar[str] = ( 127 | "{field} {op} ({list})" # Expression for field in list of values as format string with placeholders {field}, {op} and {list} 128 | ) 129 | or_in_operator: ClassVar[str] = ( 130 | "in~" # Operator used to convert OR into in-expressions. Must be set if convert_or_as_in is set 131 | ) 132 | and_in_operator: ClassVar[str] = ( 133 | "has_all" # Operator used to convert AND into in-expressions. Must be set if convert_and_as_in is set 134 | ) 135 | list_separator: ClassVar[str] = ", " # List element separator 136 | 137 | # Value not bound to a field 138 | unbound_value_str_expression: ClassVar[str] = ( 139 | "{value}" # Expression for string value not bound to a field as format string with placeholder {value} 140 | ) 141 | unbound_value_num_expression: ClassVar[str] = ( 142 | "{value}" # Expression for number value not bound to a field as format string with placeholder {value} 143 | ) 144 | unbound_value_re_expression: ClassVar[str] = ( 145 | "_=~{value}" # Expression for regular expression not bound to a field as format string with placeholder {value} 146 | ) 147 | 148 | # Query finalization: appending and concatenating deferred query part 149 | deferred_start: ClassVar[str] = "\n| " # String used as separator between main query and deferred parts 150 | deferred_separator: ClassVar[str] = "\n| " # String used to join multiple deferred query parts 151 | deferred_only_query: ClassVar[str] = "*" # String used as query if final query only contains deferred expression 152 | 153 | # We use =~ for eq_token so everything is case insensitive. But this cannot be used with ints/numbers in queries 154 | # So we can define a new token to use for SigmaNumeric types and override convert_condition_field_eq_val_num 155 | # to use it 156 | num_eq_token: ClassVar[str] = " == " 157 | 158 | # Override methods 159 | 160 | # For numeric values, need == instead of =~ 161 | def convert_condition_field_eq_val_num( 162 | self, cond: ConditionFieldEqualsValueExpression, state: ConversionState 163 | ) -> Union[str, DeferredQueryExpression]: 164 | """Conversion of field = number value expressions""" 165 | try: 166 | return self.escape_and_quote_field(cond.field) + self.num_eq_token + str(cond.value) 167 | except TypeError: # pragma: no cover 168 | raise NotImplementedError("Field equals numeric value expressions are not supported by the backend.") 169 | 170 | def convert_condition_as_in_expression( 171 | self, cond: Union[ConditionOR, ConditionAND], state: ConversionState 172 | ) -> Union[str, DeferredQueryExpression]: 173 | """Overridden method for conversion of field in value list conditions. 174 | KQL doesn't really use wildcards, so if we have an 'as_in' condition where one or more of the values has a wildcard, 175 | we can still use the as_in condition, then append on the wildcard value(s) with a startswith, endswith, or contains 176 | expression 177 | """ 178 | 179 | field = self.escape_and_quote_field(cond.args[0].field) # type: ignore 180 | op1 = self.or_in_operator if isinstance(cond, ConditionOR) else self.and_in_operator 181 | op2 = self.or_token if isinstance(cond, ConditionOR) else self.and_token 182 | list_nonwildcard = self.list_separator.join( 183 | [ 184 | self.convert_value_str(arg.value, state) 185 | for arg in cond.args 186 | if isinstance(arg, ConditionFieldEqualsValueExpression) 187 | and ( 188 | (isinstance(arg.value, SigmaString) and not arg.value.contains_special()) 189 | or (isinstance(arg.value, SigmaNumber)) 190 | ) 191 | ] 192 | ) 193 | list_wildcards = [ 194 | arg.value 195 | for arg in cond.args 196 | if isinstance(arg, ConditionFieldEqualsValueExpression) 197 | and isinstance(arg.value, SigmaString) 198 | and arg.value.contains_special() 199 | ] 200 | as_in_expr = "" 201 | # Convert as_in and wildcard values separately 202 | if list_nonwildcard: 203 | as_in_expr = self.field_in_list_expression.format(field=field, op=op1, list=list_nonwildcard) 204 | wildcard_exprs_list = [] 205 | if list_wildcards: 206 | for arg in list_wildcards: 207 | new_cond = ConditionFieldEqualsValueExpression(field=field, value=arg) 208 | if arg[1:-1].contains_special(): # Wildcard in string, not at start or end. 209 | # We need to get rid of all wildcards, and create a 'and contains' for each element in the list 210 | expr = f"{self.token_separator}{self.and_token}{self.token_separator}".join( 211 | [ 212 | self.contains_expression.format( 213 | field=field, value=self.convert_value_str(SigmaString(str(x)), state) 214 | ) 215 | for x in arg.s 216 | if not isinstance(x, SpecialChars) 217 | ] 218 | ) 219 | expr = self.group_expression.format(expr=expr) 220 | else: 221 | expr = self.convert_condition_field_eq_val_str(new_cond, state) 222 | wildcard_exprs_list.append(expr) 223 | wildcard_exprs = f"{self.token_separator}{op2}{self.token_separator}".join(wildcard_exprs_list) 224 | if as_in_expr and wildcard_exprs: 225 | return as_in_expr + self.token_separator + op2 + self.token_separator + wildcard_exprs 226 | return as_in_expr + wildcard_exprs 227 | 228 | def convert_condition_not(self, cond: ConditionNOT, state: ConversionState) -> Union[str, DeferredQueryExpression]: 229 | """Conversion of NOT conditions. Overridden to surround the group or expr of the 'not' negation with parens, 230 | as expected by KQL. 231 | """ 232 | arg = cond.args[0] 233 | try: 234 | if arg.__class__ in self.precedence: # group if AND or OR condition is negated 235 | return self.not_token + "(" + str(self.convert_condition_group(arg, state)) + ")" # type: ignore 236 | else: 237 | expr = self.convert_condition(arg, state) # type: ignore 238 | if isinstance(expr, DeferredQueryExpression): # negate deferred expression and pass it to parent 239 | return expr.negate() 240 | else: # convert negated expression to string 241 | return self.not_token + "(" + expr + ")" 242 | except TypeError: # pragma: no cover 243 | raise NotImplementedError("Operator 'not' not supported by the backend") 244 | 245 | def convert_value_str(self, s: Union[SigmaString, SigmaNumber], state: ConversionState) -> str: 246 | """Convert a SigmaString into a plain string which can be used in query.""" 247 | if not isinstance(s, SigmaString): 248 | s = SigmaString(str(s)) 249 | converted = super().convert_value_str(s, state) 250 | # If we have a wildcard in a string, we need to un-escape it 251 | # See issue #13 252 | return re.sub(r"\\\*", r"*", converted) 253 | -------------------------------------------------------------------------------- /sigma/backends/microsoft365defender/__init__.py: -------------------------------------------------------------------------------- 1 | from ..kusto.kusto import KustoBackend 2 | 3 | # TODO: add all backend classes that should be exposed to the user of your backend in the import statement above. 4 | 5 | backends = { # Mapping between backend identifiers and classes. This is used by the pySigma plugin system to recognize backends and expose them with the identifier. 6 | "microsoft365defender": KustoBackend, 7 | } 8 | -------------------------------------------------------------------------------- /sigma/backends/microsoft365defender/microsoft365defender.py: -------------------------------------------------------------------------------- 1 | from typing import ClassVar, Dict 2 | 3 | from sigma.backends.kusto.kusto import KustoBackend 4 | 5 | 6 | class Microsoft365DefenderBackend(KustoBackend): 7 | """Microsoft 365 Defender Kusto Backend.""" 8 | 9 | name: ClassVar[str] = "[DEPRECATED] Microsoft 365 Defender Backend" 10 | identifier: ClassVar[str] = "microsoft365defender" 11 | formats: ClassVar[Dict[str, str]] = { 12 | "default": "Microsoft 365 Defender Kusto Query Language search strings", 13 | } 14 | -------------------------------------------------------------------------------- /sigma/pipelines/azuremonitor/__init__.py: -------------------------------------------------------------------------------- 1 | from .azuremonitor import azure_monitor_pipeline 2 | 3 | pipelines = { 4 | "azure_monitor": azure_monitor_pipeline, 5 | } 6 | -------------------------------------------------------------------------------- /sigma/pipelines/azuremonitor/azuremonitor.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sigma.pipelines.kusto_common.postprocessing import ( 4 | PrependQueryTablePostprocessingItem, 5 | ) 6 | from sigma.processing.conditions import ( 7 | ExcludeFieldCondition, 8 | IncludeFieldCondition, 9 | LogsourceCondition, 10 | RuleProcessingItemAppliedCondition, 11 | RuleProcessingStateCondition, 12 | ) 13 | from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline 14 | from sigma.processing.transformations import ( 15 | DropDetectionItemTransformation, 16 | ReplaceStringTransformation, 17 | RuleFailureTransformation, 18 | ) 19 | 20 | from ..kusto_common.errors import InvalidFieldTransformation 21 | from ..kusto_common.schema import create_schema 22 | from ..kusto_common.transformations import ( 23 | DynamicFieldMappingTransformation, 24 | RegistryActionTypeValueTransformation, 25 | SetQueryTableStateTransformation, 26 | ) 27 | from .mappings import ( 28 | AZURE_MONITOR_FIELD_MAPPINGS, 29 | CATEGORY_TO_TABLE_MAPPINGS, 30 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS, 31 | ) 32 | from .schema import AzureMonitorSchema 33 | from .tables import AZURE_MONITOR_TABLES 34 | from .transformations import ( 35 | DefaultHashesValuesTransformation, 36 | SecurityEventHashesValuesTransformation, 37 | ) 38 | 39 | AZURE_MONITOR_SCHEMA = create_schema(AzureMonitorSchema, AZURE_MONITOR_TABLES) 40 | 41 | # Drop ObjectType fields 42 | drop_fields_proc_item = ProcessingItem( 43 | identifier="azure_monitor_drop_fields", 44 | transformation=DropDetectionItemTransformation(), 45 | field_name_conditions=[IncludeFieldCondition(["ObjectType"])], 46 | ) 47 | 48 | ## Fieldmappings 49 | fieldmappings_proc_item = ProcessingItem( 50 | identifier="azure_monitor_table_fieldmappings", 51 | transformation=DynamicFieldMappingTransformation(AZURE_MONITOR_FIELD_MAPPINGS), 52 | ) 53 | 54 | ## Generic Field Mappings, keep this last 55 | ## Exclude any fields already mapped, e.g. if a table mapping has been applied. 56 | # This will fix the case where ProcessId is usually mapped to InitiatingProcessId, EXCEPT for the DeviceProcessEvent table where it stays as ProcessId. 57 | # So we can map ProcessId to ProcessId in the DeviceProcessEvents table mapping, and prevent the generic mapping to InitiatingProcessId from being applied 58 | # by adding a detection item condition that the table field mappings have been applied 59 | 60 | # generic_field_mappings_proc_item = ProcessingItem( 61 | # identifier="azure_monitor_generic_fieldmappings", 62 | # transformation=GenericFieldMappingTransformation(AZURE_MONITOR_FIELD_MAPPINGS), 63 | # detection_item_conditions=[DetectionItemProcessingItemAppliedCondition("azure_monitor_table_fieldmappings")], 64 | # detection_item_condition_linking=any, 65 | # detection_item_condition_negation=True, 66 | # ) 67 | 68 | REGISTRY_FIELDS = [ 69 | "RegistryKey", 70 | "RegistryPreviousKey", 71 | "ObjectName", 72 | ] 73 | 74 | ## Field Value Replacements ProcessingItems 75 | replacement_proc_items = [ 76 | # Sysmon uses abbreviations in RegistryKey values, replace with full key names as the DeviceRegistryEvents schema 77 | # expects them to be 78 | # Note: Ensure this comes AFTER field mapping renames, as we're specifying DeviceRegistryEvent fields 79 | # 80 | # Do this one first, or else the HKLM only one will replace HKLM and mess up the regex 81 | ProcessingItem( 82 | identifier="azure_monitor_registry_key_replace_currentcontrolset", 83 | transformation=ReplaceStringTransformation( 84 | regex=r"(?i)(^HKLM\\SYSTEM\\CurrentControlSet)", 85 | replacement=r"HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet001", 86 | ), 87 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)], 88 | ), 89 | ProcessingItem( 90 | identifier="azure_monitor_registry_key_replace_hklm", 91 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKLM)", replacement=r"HKEY_LOCAL_MACHINE"), 92 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)], 93 | ), 94 | ProcessingItem( 95 | identifier="azure_monitor_registry_key_replace_hku", 96 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKU)", replacement=r"HKEY_USERS"), 97 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)], 98 | ), 99 | ProcessingItem( 100 | identifier="azure_monitor_registry_key_replace_hkcr", 101 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKCR)", replacement=r"HKEY_LOCAL_MACHINE\\CLASSES"), 102 | field_name_conditions=[IncludeFieldCondition(REGISTRY_FIELDS)], 103 | ), 104 | ProcessingItem( 105 | identifier="azure_monitor_registry_actiontype_value", 106 | transformation=RegistryActionTypeValueTransformation(), 107 | field_name_conditions=[IncludeFieldCondition(["EventType"])], 108 | ), 109 | # Processing item to transform the Hashes field in the SecurityEvent table to get rid of the hash algorithm prefix in each value 110 | ProcessingItem( 111 | identifier="azure_monitor_securityevent_hashes_field_values", 112 | transformation=SecurityEventHashesValuesTransformation(), 113 | field_name_conditions=[IncludeFieldCondition(["FileHash"])], 114 | rule_conditions=[RuleProcessingStateCondition("query_table", "SecurityEvent")], 115 | ), 116 | ProcessingItem( 117 | identifier="azure_monitor_hashes_field_values", 118 | transformation=DefaultHashesValuesTransformation(), 119 | field_name_conditions=[IncludeFieldCondition(["Hashes"])], 120 | rule_conditions=[RuleProcessingStateCondition("query_table", "SecurityEvent")], 121 | rule_condition_negation=True, 122 | ), 123 | # Processing item to essentially ignore initiated field 124 | ProcessingItem( 125 | identifier="azure_monitor_network_initiated_field", 126 | transformation=DropDetectionItemTransformation(), 127 | field_name_conditions=[IncludeFieldCondition(["Initiated"])], 128 | rule_conditions=[LogsourceCondition(category="network_connection")], 129 | ), 130 | ] 131 | 132 | # Exceptions/Errors ProcessingItems 133 | # Catch-all for when the query table is not set, meaning the rule could not be mapped to a table or the table name was not set 134 | rule_error_proc_items = [ 135 | # Category Not Supported or Query Table Not Set 136 | ProcessingItem( 137 | identifier="azure_monitor_unsupported_rule_category_or_missing_query_table", 138 | transformation=RuleFailureTransformation( 139 | "Rule category not yet supported by the Azure Monitor pipeline or query_table is not set." 140 | ), 141 | rule_conditions=[ 142 | RuleProcessingItemAppliedCondition("azure_monitor_set_query_table"), # type: ignore 143 | RuleProcessingStateCondition("query_table", None), # type: ignore 144 | ], 145 | rule_condition_linking=all, 146 | ) 147 | ] 148 | 149 | 150 | def get_valid_fields(table_name): 151 | return ( 152 | list(AZURE_MONITOR_SCHEMA.tables[table_name].fields.keys()) 153 | + list(AZURE_MONITOR_FIELD_MAPPINGS.table_mappings.get(table_name, {}).keys()) 154 | + list(AZURE_MONITOR_FIELD_MAPPINGS.generic_mappings.keys()) 155 | + ["Hashes"] 156 | ) 157 | 158 | 159 | field_error_proc_items = [] 160 | 161 | for table_name in AZURE_MONITOR_SCHEMA.tables.keys(): 162 | valid_fields = get_valid_fields(table_name) 163 | 164 | field_error_proc_items.append( 165 | ProcessingItem( 166 | identifier=f"azure_monitor_unsupported_fields_{table_name}", 167 | transformation=InvalidFieldTransformation( 168 | f"Please use valid fields for the {table_name} table, or the following fields that have fieldmappings in this " 169 | f"pipeline:\n{', '.join(sorted(set(valid_fields)))}" 170 | ), 171 | field_name_conditions=[ExcludeFieldCondition(fields=valid_fields)], 172 | rule_conditions=[ 173 | RuleProcessingItemAppliedCondition("azure_monitor_set_query_table"), 174 | RuleProcessingStateCondition("query_table", table_name), 175 | ], 176 | rule_condition_linking=all, 177 | ) 178 | ) 179 | 180 | # Add a catch-all error for custom table names 181 | field_error_proc_items.append( 182 | ProcessingItem( 183 | identifier="azure_monitor_unsupported_fields_custom", 184 | transformation=InvalidFieldTransformation( 185 | "Invalid field name for the custom table. Please ensure you're using valid fields for your custom table." 186 | ), 187 | field_name_conditions=[ 188 | ExcludeFieldCondition(fields=list(AZURE_MONITOR_FIELD_MAPPINGS.generic_mappings.keys()) + ["Hashes"]) 189 | ], 190 | rule_conditions=[ 191 | RuleProcessingItemAppliedCondition("azure_monitor_set_query_table"), # type: ignore 192 | RuleProcessingStateCondition("query_table", None), # type: ignore 193 | ], 194 | rule_condition_linking=all, 195 | ) 196 | ) 197 | 198 | 199 | def azure_monitor_pipeline(query_table: Optional[str] = None) -> ProcessingPipeline: 200 | """Pipeline for transformations for SigmaRules to use in the Kusto Query Language backend. 201 | 202 | :param query_table: If specified, the table name will be used in the finalizer, otherwise the table name will be selected based on the category of the rule. 203 | :type query_table: Optional[str] 204 | 205 | :return: ProcessingPipeline for Microsoft Azure Monitor 206 | :rtype: ProcessingPipeline 207 | """ 208 | 209 | pipeline_items = [ 210 | ProcessingItem( 211 | identifier="azure_monitor_set_query_table", 212 | transformation=SetQueryTableStateTransformation( 213 | query_table, CATEGORY_TO_TABLE_MAPPINGS, EVENTID_CATEGORY_TO_TABLE_MAPPINGS 214 | ), 215 | ), 216 | fieldmappings_proc_item, 217 | drop_fields_proc_item, 218 | # generic_field_mappings_proc_item, 219 | *replacement_proc_items, 220 | *rule_error_proc_items, 221 | *field_error_proc_items, 222 | ] 223 | 224 | return ProcessingPipeline( 225 | name="Generic Log Sources to Azure Monitor tables and fields", 226 | priority=10, 227 | items=pipeline_items, 228 | allowed_backends=frozenset(["kusto"]), 229 | postprocessing_items=[PrependQueryTablePostprocessingItem], # type: ignore 230 | ) 231 | -------------------------------------------------------------------------------- /sigma/pipelines/azuremonitor/mappings.py: -------------------------------------------------------------------------------- 1 | from sigma.pipelines.common import ( 2 | logsource_windows_file_access, 3 | logsource_windows_file_change, 4 | logsource_windows_file_delete, 5 | logsource_windows_file_event, 6 | logsource_windows_file_rename, 7 | logsource_windows_image_load, 8 | logsource_windows_network_connection, 9 | logsource_windows_process_creation, 10 | logsource_windows_registry_add, 11 | logsource_windows_registry_delete, 12 | logsource_windows_registry_event, 13 | logsource_windows_registry_set, 14 | ) 15 | from sigma.pipelines.kusto_common.schema import FieldMappings 16 | 17 | 18 | class AzureMonitorFieldMappings(FieldMappings): 19 | pass 20 | 21 | 22 | # Just map to SecurityEvent for now until we have more mappings for other tables 23 | CATEGORY_TO_TABLE_MAPPINGS = { 24 | "process_creation": "SecurityEvent", 25 | "image_load": "SecurityEvent", 26 | "file_access": "SecurityEvent", 27 | "file_change": "SecurityEvent", 28 | "file_delete": "SecurityEvent", 29 | "file_event": "SecurityEvent", 30 | "file_rename": "SecurityEvent", 31 | "registry_add": "SecurityEvent", 32 | "registry_delete": "SecurityEvent", 33 | "registry_event": "SecurityEvent", 34 | "registry_set": "SecurityEvent", 35 | "network_connection": "SecurityEvent", 36 | } 37 | 38 | 39 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS = { 40 | "process": "SecurityEvent", 41 | "logon": "SecurityEvent", 42 | "registry": "SecurityEvent", 43 | "file": "SecurityEvent", 44 | "network": "SecurityEvent", 45 | "image_load": "SecurityEvent", 46 | "pipe": "SecurityEvent", 47 | "wmi": "SecurityEvent", 48 | } 49 | 50 | ## Rule Categories -> RuleConditions 51 | CATEGORY_TO_CONDITIONS_MAPPINGS = { 52 | "process_creation": logsource_windows_process_creation(), 53 | "image_load": logsource_windows_image_load(), 54 | "file_access": logsource_windows_file_access(), 55 | "file_change": logsource_windows_file_change(), 56 | "file_delete": logsource_windows_file_delete(), 57 | "file_event": logsource_windows_file_event(), 58 | "file_rename": logsource_windows_file_rename(), 59 | "registry_add": logsource_windows_registry_add(), 60 | "registry_delete": logsource_windows_registry_delete(), 61 | "registry_event": logsource_windows_registry_event(), 62 | "registry_set": logsource_windows_registry_set(), 63 | "network_connection": logsource_windows_network_connection(), 64 | } 65 | 66 | 67 | AZURE_MONITOR_FIELD_MAPPINGS = AzureMonitorFieldMappings( 68 | table_mappings={ 69 | "SecurityEvent": { 70 | "CommandLine": "CommandLine", 71 | "Image": "NewProcessName", 72 | "ParentImage": "ParentProcessName", 73 | "User": "SubjectUserName", 74 | "TargetFilename": "ObjectName", 75 | "SourceIp": "IpAddress", 76 | "DestinationIp": "DestinationIp", 77 | "DestinationPort": "DestinationPort", 78 | "SourcePort": "SourcePort", 79 | "SourceHostname": "WorkstationName", 80 | "DestinationHostname": "DestinationHostname", 81 | "EventID": "EventID", 82 | "ProcessId": "NewProcessId", 83 | "ProcessName": "NewProcessName", 84 | "LogonType": "LogonType", 85 | "TargetUserName": "TargetUserName", 86 | "TargetDomainName": "TargetDomainName", 87 | "TargetLogonId": "TargetLogonId", 88 | "Status": "Status", 89 | "SubStatus": "SubStatus", 90 | "ObjectType": "ObjectType", 91 | "ShareName": "ShareName", 92 | "AccessMask": "AccessMask", 93 | "ServiceName": "ServiceName", 94 | "TicketOptions": "TicketOptions", 95 | "TicketEncryptionType": "TicketEncryptionType", 96 | "TransmittedServices": "TransmittedServices", 97 | "WorkstationName": "WorkstationName", 98 | "LogonProcessName": "LogonProcessName", 99 | "LogonGuid": "LogonGuid", 100 | "Category": "EventSourceName", 101 | "Hashes": "FileHash", 102 | "TargetObject": "ObjectName", 103 | }, 104 | "SigninLogs": { 105 | "User": "UserPrincipalName", 106 | "TargetUserName": "UserPrincipalName", 107 | "src_ip": "IPAddress", 108 | "IpAddress": "IPAddress", 109 | "app": "AppDisplayName", 110 | "Application": "AppDisplayName", 111 | "AuthenticationMethod": "AuthenticationMethodsUsed", 112 | "Status": "Status", 113 | "ResultType": "ResultType", 114 | "ResultDescription": "ResultDescription", 115 | "UserAgent": "UserAgent", 116 | "Location": "Location", 117 | "ClientAppUsed": "ClientAppUsed", 118 | "DeviceDetail": "DeviceDetail", 119 | "CorrelationId": "CorrelationId", 120 | "ConditionalAccessStatus": "ConditionalAccessStatus", 121 | "RiskLevelAggregated": "RiskLevelAggregated", 122 | "RiskLevelDuringSignIn": "RiskLevelDuringSignIn", 123 | "RiskDetail": "RiskDetail", 124 | "RiskState": "RiskState", 125 | "MfaDetail": "MfaDetail", 126 | "NetworkLocationDetails": "NetworkLocationDetails", 127 | "AuthenticationProtocol": "AuthenticationProtocol", 128 | "AuthenticationRequirement": "AuthenticationRequirement", 129 | "SignInIdentifier": "SignInIdentifier", 130 | "SignInIdentifierType": "SignInIdentifierType", 131 | "ResourceDisplayName": "ResourceDisplayName", 132 | "ResourceIdentity": "ResourceIdentity", 133 | "AppId": "AppId", 134 | "AuthenticationProcessingDetails": "AuthenticationProcessingDetails", 135 | "IsInteractive": "IsInteractive", 136 | "TokenIssuerName": "TokenIssuerName", 137 | "TokenIssuerType": "TokenIssuerType", 138 | "UserType": "UserType", 139 | "IPAddress": "IPAddress", 140 | "AutonomousSystemNumber": "AutonomousSystemNumber", 141 | }, 142 | }, 143 | generic_mappings={}, 144 | ) 145 | -------------------------------------------------------------------------------- /sigma/pipelines/azuremonitor/schema.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from sigma.pipelines.kusto_common.schema import BaseSchema, FieldMappings 4 | 5 | 6 | @dataclass 7 | class AzureMonitorSchema(BaseSchema): 8 | pass 9 | 10 | 11 | @dataclass 12 | class AzureMonitorFieldMappings(FieldMappings): 13 | pass 14 | -------------------------------------------------------------------------------- /sigma/pipelines/azuremonitor/transformations.py: -------------------------------------------------------------------------------- 1 | from ..kusto_common.transformations import BaseHashesValuesTransformation 2 | 3 | 4 | class SecurityEventHashesValuesTransformation(BaseHashesValuesTransformation): 5 | """ 6 | Transforms the FileHash (originally Hashes) field in SecurityEvent table to get rid of the hash algorithm prefix in each value. 7 | """ 8 | 9 | def __init__(self): 10 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256"], field_prefix="FileHash", drop_algo_prefix=True) 11 | 12 | 13 | class DefaultHashesValuesTransformation(BaseHashesValuesTransformation): 14 | """ 15 | Transforms the Hashes field in XDR Tables to create fields for each hash algorithm. 16 | """ 17 | 18 | def __init__(self): 19 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256"], field_prefix="") 20 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AttackIQ/pySigma-backend-kusto/6f6d378e22db272a5cffd734ae66b773162ef75d/sigma/pipelines/kusto_common/__init__.py -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/conditions.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Union 3 | 4 | from sigma.correlations import SigmaCorrelationRule 5 | from sigma.processing.conditions import RuleProcessingCondition 6 | from sigma.rule import SigmaRule 7 | 8 | 9 | @dataclass 10 | class QueryTableSetCondition(RuleProcessingCondition): 11 | def match( 12 | self, 13 | pipeline: "sigma.processing.pipeline.ProcessingPipeline", # noqa: F821 # type: ignore 14 | rule: Union[SigmaRule, SigmaCorrelationRule], 15 | ) -> bool: 16 | """Match condition on Sigma rule.""" 17 | return pipeline.state.get("query_table", None) is not None 18 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/errors.py: -------------------------------------------------------------------------------- 1 | from sigma.processing.transformations import ( 2 | DetectionItemFailureTransformation, 3 | SigmaTransformationError, 4 | ) 5 | from sigma.rule import SigmaDetectionItem 6 | 7 | 8 | class InvalidFieldTransformation(DetectionItemFailureTransformation): 9 | """ 10 | Overrides the apply_detection_item() method from DetectionItemFailureTransformation to also include the field name 11 | in the error message 12 | """ 13 | 14 | def apply_detection_item(self, detection_item: SigmaDetectionItem) -> None: 15 | field_name = detection_item.field 16 | if field_name: # If no field name is set, don't raise an error because its a keyword 17 | self.message = f"Invalid SigmaDetectionItem field name encountered: {field_name}. " + self.message 18 | raise SigmaTransformationError(self.message) 19 | 20 | 21 | class InvalidHashAlgorithmError(Exception): 22 | pass 23 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/finalization.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | from sigma.processing.finalization import Finalizer 5 | 6 | 7 | @dataclass 8 | class QueryTableFinalizer(Finalizer): 9 | """Finalizer for pipelines using the Kusto Query Language to add in the table name as a prefix to the query. 10 | 11 | The query_table is set by the SetQueryTableStateTransformation transformation that is applied to each rule at the very beginning of the pipeline; 12 | the query table can be supplied as an argument to the pipeline, set in a previous ProcessingPipeline (which is combined into a single pipeline in sigma_cli), or is 13 | set by the rules category or other criteria from other transformations. 14 | 15 | The standard finalizers append all queries together into a single query string. However, this finalizer 16 | will keep individual queries separate and add the table name as a prefix to each query. 17 | 18 | A custom table name can be specified in the finalizer, otherwise the table name will be selected based on the processing pipeline's state 'query_table' key. 19 | """ 20 | 21 | table_names: str = None 22 | 23 | def apply(self, pipeline: "sigma.processing.pipeline.ProcessingPipeline", queries: List[str]) -> List[str]: # type: ignore # noqa: F821 24 | for i, query in enumerate(queries): 25 | if self.table_names: 26 | queries[i] = f"{self.table_names}\n| where {query}" 27 | elif "query_table" in pipeline.state: 28 | queries[i] = f"{pipeline.state['query_table']}\n| where {query}" 29 | else: 30 | queries[i] = f"search {query}" 31 | return queries 32 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/mappings.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | # Event ID Categories based on Windows Security Events 4 | EVENTID_CATEGORIES = { 5 | "process": [1, 5, 10, 25, 4688, 4689, 4696], # Process creation, termination, access, tampering 6 | "logon": [4624, 4625, 4634, 4647, 4648, 4778, 4779, 4800, 4801, 4802, 4803], # Logon/logoff events 7 | "registry": [4656, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 12, 13, 14], # Registry operations 8 | "file": [2, 11, 15, 23, 26, 27, 28, 29, 4656, 4658, 4660, 4663], # File operations 9 | "network": [3, 22, 5140, 5145, 5156, 5157, 5158, 5159], # Network and DNS events 10 | "image_load": [7], # Image loaded 11 | "pipe": [17, 18], # Pipe events 12 | "wmi": [19, 20, 21], # WMI events 13 | "service": [4697, 4698, 4699, 4700, 4701, 4702], # Service and scheduled task operations 14 | "account": [4720, 4722, 4723, 4724, 4725, 4726, 4738, 4740, 4767], # Account management 15 | } 16 | 17 | 18 | def get_category_from_eventid(eventid: int) -> Optional[str]: 19 | """ 20 | Determine the category based on the Event ID 21 | """ 22 | return next((category for category, eventids in EVENTID_CATEGORIES.items() if eventid in eventids), None) 23 | 24 | 25 | def get_table_from_eventid(eventid: int, category_table_mappings: dict) -> str: 26 | """ 27 | Get the appropriate table name for a given EventID and backend type 28 | """ 29 | 30 | category = get_category_from_eventid(eventid) 31 | if category and category in category_table_mappings: 32 | return category_table_mappings[category] 33 | return "" 34 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/postprocessing.py: -------------------------------------------------------------------------------- 1 | from sigma.processing.pipeline import QueryPostprocessingItem 2 | from sigma.processing.postprocessing import QueryPostprocessingTransformation 3 | from sigma.rule import SigmaRule 4 | 5 | from ..kusto_common.conditions import QueryTableSetCondition 6 | 7 | 8 | class PrependQueryTablePostprocessingItem(QueryPostprocessingTransformation): 9 | def apply(self, pipeline: "sigma.processing.pipeline.ProcessingPipeline", rule: SigmaRule, query: str) -> str: # type: ignore # noqa: F821 10 | return f"{pipeline.state['query_table']}\n| where {query}" 11 | 12 | 13 | PrependQueryTablePostprocessingItem = QueryPostprocessingItem( 14 | identifier="kusto_prepend_query_table", 15 | transformation=PrependQueryTablePostprocessingItem(), 16 | rule_conditions=[ 17 | QueryTableSetCondition(), 18 | ], 19 | ) 20 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/schema.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Dict, List, Optional, Union 3 | 4 | 5 | @dataclass 6 | class FieldInfo: 7 | data_type: str 8 | description: str 9 | 10 | 11 | @dataclass 12 | class TableSchema: 13 | fields: Dict[str, FieldInfo] = field(default_factory=dict) 14 | 15 | def get_field_type(self, field_name: str) -> Optional[str]: 16 | field = self.fields.get(field_name) 17 | return field.data_type if field else None 18 | 19 | def get_field_description(self, field_name: str) -> Optional[str]: 20 | field = self.fields.get(field_name) 21 | return field.description if field else None 22 | 23 | def get_valid_fields(self) -> List[str]: 24 | return list(self.fields.keys()) 25 | 26 | 27 | @dataclass 28 | class BaseSchema: 29 | tables: Dict[str, TableSchema] = field(default_factory=dict) 30 | 31 | def get_field_type(self, table_name: str, field_name: str) -> Optional[str]: 32 | table = self.tables.get(table_name) 33 | return table.get_field_type(field_name) if table else None 34 | 35 | def get_field_description(self, table_name: str, field_name: str) -> Optional[str]: 36 | table = self.tables.get(table_name) 37 | return table.get_field_description(field_name) if table else None 38 | 39 | def get_valid_fields(self, table_name: str) -> List[str]: 40 | table = self.tables.get(table_name) 41 | return table.get_valid_fields() if table else [] 42 | 43 | 44 | @dataclass 45 | class FieldMappings: 46 | table_mappings: Dict[str, Dict[str, Union[str, List[str]]]] = field(default_factory=dict) 47 | generic_mappings: Dict[str, str] = field(default_factory=dict) 48 | 49 | def get_field_mapping(self, table_name: str, sigma_field: str) -> str: 50 | table_mapping = self.table_mappings.get(table_name, {}) 51 | mapping = table_mapping.get(sigma_field) 52 | if mapping: 53 | return mapping[0] if isinstance(mapping, list) else mapping 54 | return self.generic_mappings.get(sigma_field, sigma_field) 55 | 56 | 57 | def create_schema(schema_class, tables) -> BaseSchema: 58 | schema = schema_class() 59 | for table_name, fields in tables.items(): 60 | table_schema = TableSchema() 61 | for field_name, field_info in fields.items(): 62 | table_schema.fields[field_name] = FieldInfo( 63 | data_type=field_info["data_type"], description=field_info["description"] 64 | ) 65 | schema.tables[table_name] = table_schema 66 | return schema 67 | -------------------------------------------------------------------------------- /sigma/pipelines/kusto_common/transformations.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | from dataclasses import dataclass, field 4 | from typing import Any, Dict, Iterable, List, Optional, Union 5 | 6 | from sigma.conditions import ConditionOR 7 | from sigma.processing.transformations import ( 8 | DetectionItemTransformation, 9 | FieldMappingTransformation, 10 | Transformation, 11 | ValueTransformation, 12 | ) 13 | from sigma.rule import SigmaDetection, SigmaDetectionItem 14 | from sigma.types import SigmaString, SigmaType 15 | 16 | from ..kusto_common.mappings import get_table_from_eventid 17 | from ..kusto_common.schema import FieldMappings 18 | from .errors import InvalidHashAlgorithmError, SigmaTransformationError 19 | 20 | 21 | class DynamicFieldMappingTransformation(FieldMappingTransformation): 22 | """ 23 | Dynamically sets the mapping dictionary based on the pipeline state or rule's category. 24 | 25 | :param field_mappings: A FieldMappings schema object that contains the table_mappings and generic_mappings. 26 | :type field_mappings: FieldMappings schema object 27 | """ 28 | 29 | def __init__(self, field_mappings: FieldMappings): 30 | super().__init__(field_mappings.generic_mappings) # type: ignore 31 | self.field_mappings = field_mappings 32 | 33 | def set_dynamic_mapping(self, pipeline): 34 | """ 35 | Set the mapping dynamically based on the pipeline state 'query_table' or the rule's logsource category. 36 | """ 37 | 38 | # We should always have a query_table in the pipeline state, will implement mapping based on rule category later if not 39 | if "query_table" in pipeline.state: 40 | query_table = pipeline.state["query_table"] 41 | self.mapping = self.field_mappings.table_mappings.get(query_table, {}) 42 | else: 43 | # TODO: Implement mapping based on rule category 44 | pass 45 | 46 | def apply( 47 | self, 48 | pipeline: "sigma.processing.pipeline.ProcessingPipeline", # noqa: F821 # type: ignore 49 | rule: Union["SigmaRule", "SigmaCorrelationRule"], # noqa: F821 # type: ignore 50 | ) -> None: 51 | """Apply dynamic mapping before the field name transformations.""" 52 | self.set_dynamic_mapping(pipeline) # Dynamically update the mapping 53 | super().apply(pipeline, rule) # Call parent method to continue the transformation process 54 | 55 | 56 | class GenericFieldMappingTransformation(FieldMappingTransformation): 57 | """ 58 | Transformation for applying generic field mappings after table-specific mappings. 59 | """ 60 | 61 | def __init__(self, field_mappings: FieldMappings): 62 | super().__init__(field_mappings.generic_mappings) # type: ignore 63 | 64 | def apply_detection_item( 65 | self, detection_item: SigmaDetectionItem 66 | ) -> Optional[Union[SigmaDetectionItem, SigmaString]]: 67 | if detection_item.field in self.mapping: 68 | detection_item.field = self.mapping[detection_item.field] # type: ignore 69 | return detection_item 70 | 71 | 72 | class BaseHashesValuesTransformation(DetectionItemTransformation): 73 | """ 74 | Base class for transforming the Hashes field to get rid of the hash algorithm prefix in each value and create new detection items for each hash type. 75 | """ 76 | 77 | def __init__(self, valid_hash_algos: List[str], field_prefix: Optional[str] = None, drop_algo_prefix: bool = False): 78 | """ 79 | :param valid_hash_algos: A list of valid hash algorithms that are supported by the table. 80 | :param field_prefix: The prefix to use for the new detection items. 81 | :param drop_algo_prefix: Whether to drop the algorithm prefix in the new field name, e.g. "FileHashSHA256" -> "FileHash". 82 | """ 83 | self.valid_hash_algos = valid_hash_algos 84 | self.field_prefix = field_prefix or "" 85 | self.drop_algo_prefix = drop_algo_prefix 86 | 87 | def apply_detection_item( 88 | self, detection_item: SigmaDetectionItem 89 | ) -> Optional[Union[SigmaDetection, SigmaDetectionItem]]: 90 | to_return = [] 91 | no_valid_hash_algo = True 92 | algo_dict = defaultdict(list) # map to keep track of algos and lists of values 93 | if not isinstance(detection_item.value, list): 94 | detection_item.value = [detection_item.value] 95 | for d in detection_item.value: 96 | hash_value = d.to_plain().split("|") # sometimes if ALGO|VALUE 97 | if len(hash_value) == 1: # and sometimes its ALGO=VALUE 98 | hash_value = hash_value[0].split("=") 99 | if len(hash_value) == 2: 100 | hash_algo = ( 101 | hash_value[0].lstrip("*").upper() 102 | if hash_value[0].lstrip("*").upper() in self.valid_hash_algos 103 | else "" 104 | ) 105 | if hash_algo: 106 | no_valid_hash_algo = False 107 | hash_value = hash_value[1] 108 | else: 109 | hash_value = hash_value[0] 110 | if len(hash_value) == 32: # MD5 111 | hash_algo = "MD5" 112 | no_valid_hash_algo = False 113 | elif len(hash_value) == 40: # SHA1 114 | hash_algo = "SHA1" 115 | no_valid_hash_algo = False 116 | elif len(hash_value) == 64: # SHA256 117 | hash_algo = "SHA256" 118 | no_valid_hash_algo = False 119 | elif len(hash_value) == 128: # SHA512 120 | hash_algo = "SHA512" 121 | no_valid_hash_algo = False 122 | else: # Invalid algo, no fieldname for keyword search 123 | hash_algo = "" 124 | 125 | field_name = self.field_prefix 126 | if not self.drop_algo_prefix: 127 | field_name += hash_algo 128 | algo_dict[field_name].append(hash_value) 129 | if no_valid_hash_algo: 130 | raise InvalidHashAlgorithmError( 131 | "No valid hash algo found in Hashes field. Please use one of the following: " 132 | + ", ".join(self.valid_hash_algos) 133 | ) 134 | for k, v in algo_dict.items(): 135 | if k: # Filter out invalid hash algo types 136 | to_return.append( 137 | SigmaDetectionItem( 138 | field=k if k != "keyword" else None, modifiers=[], value=[SigmaString(x) for x in v] 139 | ) 140 | ) 141 | return SigmaDetection(detection_items=to_return, item_linking=ConditionOR) 142 | 143 | 144 | @dataclass 145 | class SetQueryTableStateTransformation(Transformation): 146 | """Sets rule query table in pipeline state query_table key 147 | 148 | The following priority is used to determine the value to set: 149 | 1) The value provided in the val argument 150 | 2) If the query_table is already set in the pipeline state, use that value (e.g. set in a previous pipeline, like via YAML in sigma-cli for user-defined query tables) 151 | 3) If the rule's logsource category is present in the category_to_table_mappings dictionary, use that value 152 | 4) If the rule has an EventID, use the table name from the eventid_to_table_mappings dictionary 153 | 5) If none of the above are present, raise an error 154 | 155 | :param val: The table name to set in the pipeline state. If not provided, the table name will be determined from the rule's logsource category. 156 | :param category_to_table_mappings: A dictionary mapping logsource categories to table names. If not provided, the default category_to_table_mappings will be used. 157 | 158 | """ 159 | 160 | val: Any = None 161 | category_to_table_mappings: Dict[str, Any] = field(default_factory=dict) 162 | event_id_category_to_table_mappings: Dict[str, Any] = field(default_factory=dict) 163 | 164 | def apply_detection_item(self, detection_item: SigmaDetectionItem) -> Optional[str]: 165 | """ 166 | Apply transformation on detection item. We need to set the query_table pipeline state key, so we return the table name string based on the EventID or EventCode. 167 | """ 168 | if detection_item.field == "EventID" or detection_item.field == "EventCode": 169 | for value in detection_item.value: 170 | if table_name := get_table_from_eventid( 171 | int(value.to_plain()), self.event_id_category_to_table_mappings 172 | ): 173 | return table_name 174 | return None 175 | 176 | def apply_detection(self, detection: SigmaDetection) -> Optional[str]: 177 | """Apply transformation on detection. We need to set the event_type custom attribute on the rule, so we return the event_type string.""" 178 | for i, detection_item in enumerate(detection.detection_items): 179 | if isinstance(detection_item, SigmaDetection): # recurse into nested detection items 180 | self.apply_detection(detection_item) 181 | else: 182 | if ( 183 | self.processing_item is None 184 | or self.processing_item.match_detection_item(self._pipeline, detection_item) 185 | ) and (r := self.apply_detection_item(detection_item)) is not None: 186 | self.processing_item_applied(detection.detection_items[i]) 187 | return r 188 | 189 | def apply(self, pipeline: "ProcessingPipeline", rule: "SigmaRule") -> None: # type: ignore # noqa: F821 190 | super().apply(pipeline, rule) 191 | 192 | # Init table_name to None, will be set in the following if statements 193 | table_name = None 194 | # Set table_name based on the following priority: 195 | # 1) The value provided in the val argument 196 | if self.val: 197 | table_name = self.val 198 | # 2) If the query_table is already set in the pipeline state, use that value (e.g. set in a previous pipeline, like via YAML in sigma-cli for user-defined query tables) 199 | elif pipeline.state.get("query_table"): 200 | table_name = pipeline.state.get("query_table") 201 | # 3) If the rule's logsource category is present in the category_to_table_mappings dictionary, use that value 202 | elif rule.logsource.category: 203 | category = rule.logsource.category 204 | table_name = self.category_to_table_mappings.get(category) 205 | # 4) Check if the rule has an EventID, use the table name from the eventid_to_table_mappings dictionary 206 | else: 207 | for section_title, detection in rule.detection.detections.items(): 208 | # We only want event types from selection sections, not filters 209 | if re.match(r"^sel.*", section_title.lower()): 210 | if (r := self.apply_detection(detection)) is not None: 211 | table_name = r 212 | break 213 | 214 | if table_name: 215 | if isinstance(table_name, list): 216 | table_name = table_name[0] # Use the first table if it's a list 217 | pipeline.state["query_table"] = table_name 218 | else: 219 | raise SigmaTransformationError( 220 | f"Unable to determine table name from rule. The query table is determined in the following order of priority:\n" 221 | f" 1) The value provided to processing pipeline's query_table parameter, if using a Python script.\n" 222 | f" 2) If the query_table is already set in the pipeline state, such as from a custom user-defined pipeline if using sigma-cli.\n" 223 | f" 3) If the rule's logsource category is present in the pipeline's category_to_table_mappings dictionary in mappings.py, use that value.\n" 224 | f" 4) If the rule has an EventID, use the table name from the pipeline's eventid_to_table_mappings dictionary in mappings.py.\n" 225 | f"For more details, see https://github.com/AttackIQ/pySigma-backend-kusto/blob/main/README.md#%EF%B8%8F-custom-table-names-new-in-030-beta." 226 | ) 227 | 228 | 229 | ## Change field value AFTER field transformations from Sysmon values to values expected in the pipelines registry table action field 230 | class RegistryActionTypeValueTransformation(ValueTransformation): 231 | """Custom ValueTransformation transformation. The Microsoft DeviceRegistryEvents table expect the ActionType to 232 | be a slightly different set of values than what Sysmon specified, so this will change them to the correct value.""" 233 | 234 | value_mappings = { # Sysmon EventType -> DeviceRegistryEvents ActionType 235 | "CreateKey": "RegistryKeyCreated", 236 | "DeleteKey": ["RegistryKeyDeleted", "RegistryValueDeleted"], 237 | "SetValue": "RegistryValueSet", 238 | "RenameKey": ["RegistryValueSet", "RegistryKeyCreated"], 239 | } 240 | 241 | def apply_value(self, field: str, val: SigmaType) -> Optional[Union[SigmaType, Iterable[SigmaType]]]: 242 | mapped_vals = self.value_mappings.get(val.to_plain(), val.to_plain()) 243 | if isinstance(mapped_vals, list): 244 | return [SigmaString(v) for v in mapped_vals] 245 | return SigmaString(mapped_vals) 246 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoft365defender/__init__.py: -------------------------------------------------------------------------------- 1 | from .microsoft365defender import microsoft_365_defender_pipeline 2 | 3 | pipelines = { 4 | "microsoft_365_defender_pipeline": microsoft_365_defender_pipeline, # DEPRECATED: Use microsoft_xdr_pipeline instead. 5 | } 6 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoft365defender/microsoft365defender.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sigma.processing.pipeline import ProcessingPipeline 4 | 5 | from ..microsoftxdr import microsoft_xdr_pipeline 6 | 7 | 8 | def microsoft_365_defender_pipeline( 9 | transform_parent_image: Optional[bool] = True, query_table: Optional[str] = None 10 | ) -> ProcessingPipeline: 11 | """DEPRECATED: Use microsoft_xdr_pipeline instead.""" 12 | return microsoft_xdr_pipeline(transform_parent_image, query_table) 13 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoftxdr/__init__.py: -------------------------------------------------------------------------------- 1 | from .microsoftxdr import microsoft_xdr_pipeline 2 | 3 | pipelines = { 4 | "microsoft_xdr_pipeline": microsoft_xdr_pipeline, 5 | } 6 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoftxdr/mappings.py: -------------------------------------------------------------------------------- 1 | from sigma.pipelines.common import ( 2 | logsource_windows_file_access, 3 | logsource_windows_file_change, 4 | logsource_windows_file_delete, 5 | logsource_windows_file_event, 6 | logsource_windows_file_rename, 7 | logsource_windows_image_load, 8 | logsource_windows_network_connection, 9 | logsource_windows_process_creation, 10 | logsource_windows_registry_add, 11 | logsource_windows_registry_delete, 12 | logsource_windows_registry_event, 13 | logsource_windows_registry_set, 14 | ) 15 | from sigma.pipelines.kusto_common.schema import FieldMappings 16 | 17 | ## Rule Categories -> Query Table Names 18 | CATEGORY_TO_TABLE_MAPPINGS = { 19 | "process_creation": "DeviceProcessEvents", 20 | "image_load": "DeviceImageLoadEvents", 21 | "file_access": "DeviceFileEvents", 22 | "file_change": "DeviceFileEvents", 23 | "file_delete": "DeviceFileEvents", 24 | "file_event": "DeviceFileEvents", 25 | "file_rename": "DeviceFileEvents", 26 | "registry_add": "DeviceRegistryEvents", 27 | "registry_delete": "DeviceRegistryEvents", 28 | "registry_event": "DeviceRegistryEvents", 29 | "registry_set": "DeviceRegistryEvents", 30 | "network_connection": "DeviceNetworkEvents", 31 | } 32 | 33 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS = { 34 | "process": "DeviceProcessEvents", 35 | "logon": "DeviceLogonEvents", 36 | "registry": "DeviceRegistryEvents", 37 | "file": "DeviceFileEvents", 38 | "network": "DeviceNetworkEvents", 39 | "image_load": "DeviceImageLoadEvents", 40 | "pipe": "DeviceEvents", 41 | "wmi": "DeviceEvents", 42 | } 43 | 44 | ## Rule Categories -> RuleConditions 45 | CATEGORY_TO_CONDITIONS_MAPPINGS = { 46 | "process_creation": logsource_windows_process_creation(), 47 | "image_load": logsource_windows_image_load(), 48 | "file_access": logsource_windows_file_access(), 49 | "file_change": logsource_windows_file_change(), 50 | "file_delete": logsource_windows_file_delete(), 51 | "file_event": logsource_windows_file_event(), 52 | "file_rename": logsource_windows_file_rename(), 53 | "registry_add": logsource_windows_registry_add(), 54 | "registry_delete": logsource_windows_registry_delete(), 55 | "registry_event": logsource_windows_registry_event(), 56 | "registry_set": logsource_windows_registry_set(), 57 | "network_connection": logsource_windows_network_connection(), 58 | } 59 | 60 | 61 | class MicrosoftXDRFieldMappings(FieldMappings): 62 | pass 63 | 64 | 65 | MICROSOFT_XDR_FIELD_MAPPINGS = MicrosoftXDRFieldMappings( 66 | table_mappings={ 67 | "DeviceProcessEvents": { # process_creation, Sysmon EventID 1 -> DeviceProcessEvents table 68 | # ProcessGuid: ?, 69 | "ProcessId": "ProcessId", 70 | "Image": "FolderPath", 71 | "FileVersion": "ProcessVersionInfoProductVersion", 72 | "Description": "ProcessVersionInfoFileDescription", 73 | "Product": "ProcessVersionInfoProductName", 74 | "Company": "ProcessVersionInfoCompanyName", 75 | "OriginalFileName": "ProcessVersionInfoOriginalFileName", 76 | "CommandLine": "ProcessCommandLine", 77 | # CurrentDirectory: ? 78 | "User": "AccountName", 79 | # LogonGuid: ? 80 | "LogonId": "LogonId", 81 | # TerminalSessionId: ? 82 | "IntegrityLevel": "ProcessIntegrityLevel", 83 | "sha1": "SHA1", 84 | "sha256": "SHA256", 85 | "md5": "MD5", 86 | # 'ParentProcessGuid': ?, 87 | "ParentProcessId": "InitiatingProcessId", 88 | "ParentImage": "InitiatingProcessFolderPath", 89 | "ParentCommandLine": "InitiatingProcessCommandLine", 90 | "ParentUser": "InitiatingProcessAccountName", 91 | "GrandParentImage": "InitiatingProcessParentFileName", 92 | }, 93 | "DeviceImageLoadEvents": { 94 | # 'ProcessGuid': ?, 95 | "ProcessId": "InitiatingProcessId", 96 | "Image": "InitiatingProcessFolderPath", # File path of the process that loaded the image 97 | "ImageLoaded": "FolderPath", 98 | "FileVersion": "InitiatingProcessVersionInfoProductVersion", 99 | "Description": "InitiatingProcessVersionInfoFileDescription", 100 | "Product": "InitiatingProcessVersionInfoProductName", 101 | "Company": "InitiatingProcessVersionInfoCompanyName", 102 | "OriginalFileName": "InitiatingProcessVersionInfoOriginalFileName", 103 | # 'Hashes': ?, 104 | "sha1": "SHA1", 105 | "sha256": "SHA256", 106 | "md5": "MD5", 107 | # 'Signed': ? 108 | # 'Signature': ? 109 | # 'SignatureStatus': ? 110 | "User": "InitiatingProcessAccountName", 111 | }, 112 | "DeviceFileEvents": { # file_*, Sysmon EventID 11 (create), 23 (delete) -> DeviceFileEvents table 113 | # 'ProcessGuid': ?, 114 | "ProcessId": "InitiatingProcessId", 115 | "Image": "InitiatingProcessFolderPath", 116 | "TargetFilename": "FolderPath", 117 | # 'CreationUtcTime': 'Timestamp', 118 | "User": "RequestAccountName", 119 | # 'Hashes': ?, 120 | "sha1": "SHA1", 121 | "sha256": "SHA256", 122 | "md5": "MD5", 123 | "ObjectName": "FolderPath", 124 | }, 125 | "DeviceNetworkEvents": { # network_connection, Sysmon EventID 3 -> DeviceNetworkEvents table 126 | # 'ProcessGuid': ?, 127 | "ProcessId": "InitiatingProcessId", 128 | "Image": "InitiatingProcessFolderPath", 129 | "User": "InitiatingProcessAccountName", 130 | "Protocol": "Protocol", 131 | # 'Initiated': ?, 132 | # 'SourceIsIpv6': ?, 133 | "SourceIp": "LocalIP", 134 | "SourceHostname": "DeviceName", 135 | "SourcePort": "LocalPort", 136 | # 'SourcePortName': ?, 137 | # 'DestinationIsIpv6': ?, 138 | "DestinationIp": "RemoteIP", 139 | "DestinationHostname": "RemoteUrl", 140 | "DestinationPort": "RemotePort", 141 | # 'DestinationPortName': ?, 142 | }, 143 | "DeviceRegistryEvents": { 144 | # registry_*, Sysmon EventID 12 (create/delete), 13 (value set), 14 (key/value rename) -> DeviceRegistryEvents table, 145 | "EventType": "ActionType", 146 | # 'ProcessGuid': ?, 147 | "ProcessId": "InitiatingProcessId", 148 | "Image": "InitiatingProcessFolderPath", 149 | "TargetObject": "RegistryKey", 150 | # 'NewName': ? 151 | "Details": "RegistryValueData", 152 | "User": "InitiatingProcessAccountName", 153 | "ObjectName": "RegistryKey", 154 | }, 155 | }, 156 | generic_mappings={ 157 | "EventType": "ActionType", 158 | "User": "InitiatingProcessAccountName", 159 | "CommandLine": "InitiatingProcessCommandLine", 160 | "Image": "InitiatingProcessFolderPath", 161 | "ProcessName": "InitiatingProcessFolderPath", 162 | "SourceImage": "InitiatingProcessFolderPath", 163 | "ProcessId": "InitiatingProcessId", 164 | "md5": "InitiatingProcessMD5", 165 | "sha1": "InitiatingProcessSHA1", 166 | "sha256": "InitiatingProcessSHA256", 167 | "ParentProcessId": "InitiatingProcessParentId", 168 | "ParentCommandLine": "InitiatingProcessParentCommandLine", 169 | "Company": "InitiatingProcessVersionInfoCompanyName", 170 | "Description": "InitiatingProcessVersionInfoFileDescription", 171 | "OriginalFileName": "InitiatingProcessVersionInfoOriginalFileName", 172 | "Product": "InitiatingProcessVersionInfoProductName", 173 | }, 174 | ) 175 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoftxdr/microsoftxdr.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sigma.processing.conditions import ( 4 | DetectionItemProcessingItemAppliedCondition, 5 | ExcludeFieldCondition, 6 | IncludeFieldCondition, 7 | LogsourceCondition, 8 | RuleProcessingItemAppliedCondition, 9 | RuleProcessingStateCondition, 10 | ) 11 | from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline 12 | from sigma.processing.transformations import ( 13 | DropDetectionItemTransformation, 14 | FieldMappingTransformation, 15 | ReplaceStringTransformation, 16 | RuleFailureTransformation, 17 | ) 18 | 19 | from ..kusto_common.errors import InvalidFieldTransformation 20 | from ..kusto_common.postprocessing import PrependQueryTablePostprocessingItem 21 | from ..kusto_common.schema import create_schema 22 | from ..kusto_common.transformations import ( 23 | DynamicFieldMappingTransformation, 24 | GenericFieldMappingTransformation, 25 | RegistryActionTypeValueTransformation, 26 | SetQueryTableStateTransformation, 27 | ) 28 | from .mappings import ( 29 | CATEGORY_TO_TABLE_MAPPINGS, 30 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS, 31 | MICROSOFT_XDR_FIELD_MAPPINGS, 32 | ) 33 | from .schema import MicrosoftXDRSchema 34 | from .tables import MICROSOFT_XDR_TABLES 35 | from .transformations import ( 36 | ParentImageValueTransformation, 37 | SplitDomainUserTransformation, 38 | XDRHashesValuesTransformation, 39 | ) 40 | 41 | MICROSOFT_XDR_SCHEMA = create_schema(MicrosoftXDRSchema, MICROSOFT_XDR_TABLES) 42 | 43 | # Mapping from ParentImage to InitiatingProcessParentFileName. Must be used alongside of ParentImageValueTransformation 44 | parent_image_field_mapping = {"ParentImage": "InitiatingProcessParentFileName"} 45 | 46 | # Drop EventID field 47 | drop_eventid_proc_item = ProcessingItem( 48 | identifier="microsoft_xdr_drop_eventid", 49 | transformation=DropDetectionItemTransformation(), 50 | field_name_conditions=[IncludeFieldCondition(["EventID", "EventCode", "ObjectType"])], 51 | ) 52 | 53 | 54 | ## Fieldmappings 55 | fieldmappings_proc_item = ProcessingItem( 56 | identifier="microsoft_xdr_table_fieldmappings", 57 | transformation=DynamicFieldMappingTransformation(MICROSOFT_XDR_FIELD_MAPPINGS), 58 | ) 59 | 60 | ## Generic Field Mappings, keep this last 61 | ## Exclude any fields already mapped, e.g. if a table mapping has been applied. 62 | # This will fix the case where ProcessId is usually mapped to InitiatingProcessId, EXCEPT for the DeviceProcessEvent table where it stays as ProcessId. 63 | # So we can map ProcessId to ProcessId in the DeviceProcessEvents table mapping, and prevent the generic mapping to InitiatingProcessId from being applied 64 | # by adding a detection item condition that the table field mappings have been applied 65 | 66 | generic_field_mappings_proc_item = ProcessingItem( 67 | identifier="microsoft_xdr_generic_fieldmappings", 68 | transformation=GenericFieldMappingTransformation(MICROSOFT_XDR_FIELD_MAPPINGS), 69 | detection_item_conditions=[DetectionItemProcessingItemAppliedCondition("microsoft_xdr_table_fieldmappings")], 70 | detection_item_condition_linking=any, 71 | detection_item_condition_negation=True, 72 | ) 73 | 74 | 75 | ## Field Value Replacements ProcessingItems 76 | replacement_proc_items = [ 77 | # Sysmon uses abbreviations in RegistryKey values, replace with full key names as the DeviceRegistryEvents schema 78 | # expects them to be 79 | # Note: Ensure this comes AFTER field mapping renames, as we're specifying DeviceRegistryEvent fields 80 | # 81 | # Do this one first, or else the HKLM only one will replace HKLM and mess up the regex 82 | ProcessingItem( 83 | identifier="microsoft_xdr_registry_key_replace_currentcontrolset", 84 | transformation=ReplaceStringTransformation( 85 | regex=r"(?i)(^HKLM\\SYSTEM\\CurrentControlSet)", 86 | replacement=r"HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet001", 87 | ), 88 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])], 89 | ), 90 | ProcessingItem( 91 | identifier="microsoft_xdr_registry_key_replace_hklm", 92 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKLM)", replacement=r"HKEY_LOCAL_MACHINE"), 93 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])], 94 | ), 95 | ProcessingItem( 96 | identifier="microsoft_xdr_registry_key_replace_hku", 97 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKU)", replacement=r"HKEY_USERS"), 98 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])], 99 | ), 100 | ProcessingItem( 101 | identifier="microsoft_xdr_registry_key_replace_hkcr", 102 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKCR)", replacement=r"HKEY_LOCAL_MACHINE\\CLASSES"), 103 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])], 104 | ), 105 | ProcessingItem( 106 | identifier="microsoft_xdr_registry_actiontype_value", 107 | transformation=RegistryActionTypeValueTransformation(), 108 | field_name_conditions=[IncludeFieldCondition(["ActionType"])], 109 | ), 110 | # Extract Domain from Username fields 111 | ProcessingItem( 112 | identifier="microsoft_xdr_domain_username_extract", 113 | transformation=SplitDomainUserTransformation(), 114 | field_name_conditions=[IncludeFieldCondition(["AccountName", "InitiatingProcessAccountName"])], 115 | ), 116 | ProcessingItem( 117 | identifier="microsoft_xdr_hashes_field_values", 118 | transformation=XDRHashesValuesTransformation(), 119 | field_name_conditions=[IncludeFieldCondition(["Hashes"])], 120 | ), 121 | # Processing item to essentially ignore initiated field 122 | ProcessingItem( 123 | identifier="microsoft_xdr_network_initiated_field", 124 | transformation=DropDetectionItemTransformation(), 125 | field_name_conditions=[IncludeFieldCondition(["Initiated"])], 126 | rule_conditions=[LogsourceCondition(category="network_connection")], 127 | ), 128 | ] 129 | 130 | # ParentImage -> InitiatingProcessParentFileName 131 | parent_image_proc_items = [ 132 | # First apply fieldmapping from ParentImage to InitiatingProcessParentFileName for non process-creation rules 133 | ProcessingItem( 134 | identifier="microsoft_xdr_parent_image_fieldmapping", 135 | transformation=FieldMappingTransformation(parent_image_field_mapping), # type: ignore 136 | rule_conditions=[ 137 | # Exclude process_creation events, there's direct field mapping in this schema table 138 | LogsourceCondition(category="process_creation") 139 | ], 140 | rule_condition_negation=True, 141 | ), 142 | # Second, extract the parent process name from the full path 143 | ProcessingItem( 144 | identifier="microsoft_xdr_parent_image_name_value", 145 | transformation=ParentImageValueTransformation(), 146 | field_name_conditions=[ 147 | IncludeFieldCondition(["InitiatingProcessParentFileName"]), 148 | ], 149 | rule_conditions=[ 150 | # Exclude process_creation events, there's direct field mapping in this schema table 151 | LogsourceCondition(category="process_creation") 152 | ], 153 | rule_condition_negation=True, 154 | ), 155 | ] 156 | 157 | # Exceptions/Errors ProcessingItems 158 | # Catch-all for when the query table is not set, meaning the rule could not be mapped to a table or the table name was not set 159 | rule_error_proc_items = [ 160 | # Category Not Supported or Query Table Not Set 161 | ProcessingItem( 162 | identifier="microsoft_xdr_unsupported_rule_category_or_missing_query_table", 163 | transformation=RuleFailureTransformation( 164 | "Rule category not yet supported by the Microsoft XDR pipeline or query_table is not set." 165 | ), 166 | rule_conditions=[ 167 | RuleProcessingItemAppliedCondition("microsoft_xdr_set_query_table"), # type: ignore 168 | RuleProcessingStateCondition("query_table", None), # type: ignore 169 | ], 170 | rule_condition_linking=all, 171 | ) 172 | ] 173 | 174 | 175 | def get_valid_fields(table_name): 176 | return ( 177 | list(MICROSOFT_XDR_SCHEMA.tables[table_name].fields.keys()) 178 | + list(MICROSOFT_XDR_FIELD_MAPPINGS.table_mappings.get(table_name, {}).keys()) 179 | + list(MICROSOFT_XDR_FIELD_MAPPINGS.generic_mappings.keys()) 180 | + ["Hashes"] 181 | ) 182 | 183 | 184 | field_error_proc_items = [] 185 | 186 | for table_name in MICROSOFT_XDR_SCHEMA.tables.keys(): 187 | valid_fields = get_valid_fields(table_name) 188 | 189 | field_error_proc_items.append( 190 | ProcessingItem( 191 | identifier=f"microsoft_xdr_unsupported_fields_{table_name}", 192 | transformation=InvalidFieldTransformation( 193 | f"Please use valid fields for the {table_name} table, or the following fields that have keymappings in this " 194 | f"pipeline:\n{', '.join(sorted(set(valid_fields)))}" 195 | ), 196 | field_name_conditions=[ExcludeFieldCondition(fields=valid_fields)], 197 | rule_conditions=[ 198 | RuleProcessingItemAppliedCondition("microsoft_xdr_set_query_table"), 199 | RuleProcessingStateCondition("query_table", table_name), 200 | ], 201 | rule_condition_linking=all, 202 | ) 203 | ) 204 | 205 | # Add a catch-all error for custom table names 206 | field_error_proc_items.append( 207 | ProcessingItem( 208 | identifier="microsoft_xdr_unsupported_fields_custom", 209 | transformation=InvalidFieldTransformation( 210 | "Invalid field name for the custom table. Please ensure you're using valid fields for your custom table." 211 | ), 212 | field_name_conditions=[ 213 | ExcludeFieldCondition(fields=list(MICROSOFT_XDR_FIELD_MAPPINGS.generic_mappings.keys()) + ["Hashes"]) 214 | ], 215 | rule_conditions=[ 216 | RuleProcessingItemAppliedCondition("microsoft_xdr_set_query_table"), # type: ignore 217 | RuleProcessingStateCondition("query_table", None), # type: ignore 218 | ], 219 | rule_condition_linking=all, 220 | ) 221 | ) 222 | 223 | 224 | def microsoft_xdr_pipeline( 225 | transform_parent_image: Optional[bool] = True, query_table: Optional[str] = None 226 | ) -> ProcessingPipeline: 227 | """Pipeline for transformations for SigmaRules to use in the Kusto Query Language backend. 228 | Field mappings based on documentation found here: 229 | https://learn.microsoft.com/en-us/microsoft-365/security/defender/advanced-hunting-query-language?view=o365-worldwide 230 | 231 | :param query_table: If specified, the table name will be used in the finalizer, otherwise the table name will be selected based on the category of the rule. 232 | :type query_table: Optional[str] 233 | :param transform_parent_image: If True, the ParentImage field will be mapped to InitiatingProcessParentFileName, and 234 | the parent process name in the ParentImage will be extracted and used. This is because the Microsoft 365 Defender 235 | table schema does not contain a InitiatingProcessParentFolderPath field like it does for InitiatingProcessFolderPath. 236 | i.e. ParentImage: C:\\Windows\\System32\\whoami.exe -> InitiatingProcessParentFileName: whoami.exe. 237 | Defaults to True 238 | :type transform_parent_image: Optional[bool] 239 | 240 | :return: ProcessingPipeline for Microsoft 365 Defender Backend 241 | :rtype: ProcessingPipeline 242 | """ 243 | 244 | pipeline_items = [ 245 | ProcessingItem( 246 | identifier="microsoft_xdr_set_query_table", 247 | transformation=SetQueryTableStateTransformation( 248 | query_table, CATEGORY_TO_TABLE_MAPPINGS, EVENTID_CATEGORY_TO_TABLE_MAPPINGS 249 | ), 250 | ), 251 | drop_eventid_proc_item, 252 | fieldmappings_proc_item, 253 | generic_field_mappings_proc_item, 254 | *replacement_proc_items, 255 | *rule_error_proc_items, 256 | *field_error_proc_items, 257 | ] 258 | 259 | if transform_parent_image: 260 | pipeline_items[4:4] = parent_image_proc_items 261 | 262 | return ProcessingPipeline( 263 | name="Generic Log Sources to Windows XDR tables and fields", 264 | priority=10, 265 | items=pipeline_items, 266 | allowed_backends=frozenset(["kusto"]), 267 | postprocessing_items=[PrependQueryTablePostprocessingItem], # type: ignore 268 | ) 269 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoftxdr/schema.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from sigma.pipelines.kusto_common.schema import BaseSchema, FieldMappings 4 | 5 | 6 | @dataclass 7 | class MicrosoftXDRSchema(BaseSchema): 8 | pass 9 | 10 | 11 | @dataclass 12 | class MicrosoftXDRFieldMappings(FieldMappings): 13 | pass 14 | -------------------------------------------------------------------------------- /sigma/pipelines/microsoftxdr/transformations.py: -------------------------------------------------------------------------------- 1 | from typing import Iterable, Optional, Union 2 | 3 | from sigma.processing.transformations import ( 4 | DetectionItemTransformation, 5 | ValueTransformation, 6 | ) 7 | from sigma.rule import SigmaDetection, SigmaDetectionItem, SigmaString 8 | from sigma.types import SigmaType 9 | 10 | from ..kusto_common.transformations import BaseHashesValuesTransformation 11 | 12 | 13 | ## Custom DetectionItemTransformation to split domain and user, if applicable 14 | class SplitDomainUserTransformation(DetectionItemTransformation): 15 | """Custom DetectionItemTransformation transformation to split a User field into separate domain and user fields, 16 | if applicable. This is to handle the case where the Sysmon `User` field may contain a domain AND username, and 17 | Advanced Hunting queries separate out the domain and username into separate fields. 18 | If a matching field_name_condition field uses the schema DOMAIN\\USER, a new SigmaDetectionItem 19 | will be made for the Domain and put inside a SigmaDetection with the original User SigmaDetectionItem 20 | (minus the domain) for the matching SigmaDetectionItem. 21 | 22 | You should use this with a field_name_condition for `IncludeFieldName(['field', 'names', 'for', 'username']`)""" 23 | 24 | def apply_detection_item( 25 | self, detection_item: SigmaDetectionItem 26 | ) -> Optional[Union[SigmaDetection, SigmaDetectionItem]]: 27 | to_return = [] 28 | if not isinstance(detection_item.value, list): # Ensure its a list, but it most likely will be 29 | detection_item.value = list(detection_item.value) 30 | for d in detection_item.value: 31 | username = d.to_plain().split("\\") 32 | username_field_mappings = { 33 | "AccountName": "AccountDomain", 34 | "RequestAccountName": "RequestAccountDomain", 35 | "InitiatingProcessAccountName": "InitiatingProcessAccountDomain", 36 | } 37 | if len(username) == 2: 38 | domain = username[0] 39 | username = [SigmaString(username[1])] 40 | 41 | domain_field = username_field_mappings.get(detection_item.field, "InitiatingProcessAccountDomain") 42 | domain_value = [SigmaString(domain)] 43 | user_detection_item = SigmaDetectionItem( 44 | field=detection_item.field, 45 | modifiers=[], 46 | value=username, 47 | ) 48 | domain_detection_item = SigmaDetectionItem(field=domain_field, modifiers=[], value=domain_value) 49 | to_return.append(SigmaDetection(detection_items=[user_detection_item, domain_detection_item])) 50 | else: 51 | to_return.append( 52 | SigmaDetection( 53 | [ 54 | SigmaDetectionItem( 55 | field=detection_item.field, modifiers=detection_item.modifiers, value=username 56 | ) 57 | ] 58 | ) 59 | ) 60 | return SigmaDetection(to_return) 61 | 62 | 63 | # Extract parent process name from ParentImage after applying ParentImage field mapping 64 | class ParentImageValueTransformation(ValueTransformation): 65 | """Custom ValueTransformation transformation. Unfortunately, none of the table schemas have 66 | InitiatingProcessParentFolderPath like they do InitiatingProcessFolderPath. Due to this, we cannot directly map the 67 | Sysmon `ParentImage` field to a table field. However, InitiatingProcessParentFileName is an available field in 68 | nearly all tables, so we will extract the process name and use that instead. 69 | 70 | Use this transformation BEFORE mapping ParentImage to InitiatingProcessFileName 71 | """ 72 | 73 | def apply_value(self, field: str, val: SigmaType) -> Optional[Union[SigmaType, Iterable[SigmaType]]]: 74 | parent_process_name = str(val.to_plain().split("\\")[-1].split("/")[-1]) 75 | return SigmaString(parent_process_name) 76 | 77 | 78 | class XDRHashesValuesTransformation(BaseHashesValuesTransformation): 79 | """ 80 | Transforms the Hashes field in XDR Tables to create fields for each hash algorithm. 81 | """ 82 | 83 | def __init__(self): 84 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256"], field_prefix="") 85 | -------------------------------------------------------------------------------- /sigma/pipelines/sentinelasim/__init__.py: -------------------------------------------------------------------------------- 1 | from .sentinelasim import sentinel_asim_pipeline 2 | 3 | pipelines = { 4 | "sentinel_asim": sentinel_asim_pipeline, # TODO: adapt identifier to something approproiate 5 | } 6 | -------------------------------------------------------------------------------- /sigma/pipelines/sentinelasim/mappings.py: -------------------------------------------------------------------------------- 1 | from sigma.pipelines.common import ( 2 | logsource_windows_file_access, 3 | logsource_windows_file_change, 4 | logsource_windows_file_delete, 5 | logsource_windows_file_event, 6 | logsource_windows_file_rename, 7 | logsource_windows_network_connection, 8 | logsource_windows_process_creation, 9 | logsource_windows_registry_add, 10 | logsource_windows_registry_delete, 11 | logsource_windows_registry_event, 12 | logsource_windows_registry_set, 13 | ) 14 | from sigma.pipelines.kusto_common.schema import FieldMappings 15 | 16 | # from .schema import MicrosoftXDRFieldMappings 17 | from .tables import SENTINEL_ASIM_TABLES 18 | 19 | # Get table names from the tables.py file 20 | table_names = list(SENTINEL_ASIM_TABLES.keys()) 21 | 22 | 23 | # Rule Categories -> Query Table Names 24 | # Use the table names from the tables.py file by looking for relevant terms in the table names 25 | CATEGORY_TO_TABLE_MAPPINGS = { 26 | "process_creation": next((table for table in table_names if "process" in table.lower()), "imProcessCreate"), 27 | # "image_load": next((table for table in table_names if 'image' in table.lower()), None), 28 | "file_access": next((table for table in table_names if "file" in table.lower()), "imFileEvent"), 29 | "file_change": next((table for table in table_names if "file" in table.lower()), "imFileEvent"), 30 | "file_delete": next((table for table in table_names if "file" in table.lower()), "imFileEvent"), 31 | "file_event": next((table for table in table_names if "file" in table.lower()), "imFileEvent"), 32 | "file_rename": next((table for table in table_names if "file" in table.lower()), "imFileEvent"), 33 | "registry_add": next((table for table in table_names if "registry" in table.lower()), "imRegistry"), 34 | "registry_delete": next((table for table in table_names if "registry" in table.lower()), "imRegistry"), 35 | "registry_event": next((table for table in table_names if "registry" in table.lower()), "imRegistry"), 36 | "registry_set": next((table for table in table_names if "registry" in table.lower()), "imRegistry"), 37 | "network_connection": next((table for table in table_names if "network" in table.lower()), "imNetworkSession"), 38 | "proxy": next((table for table in table_names if "web" in table.lower()), "imWebSession"), 39 | "webserver": next((table for table in table_names if "web" in table.lower()), "imWebSession"), 40 | } 41 | 42 | 43 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS = { 44 | "process": "imProcessCreate", 45 | "logon": "imAuthentication", 46 | "registry": "imRegistry", 47 | "file": "imFileEvent", 48 | "network": "imNetworkSession", 49 | "image_load": "imProcessCreate", 50 | "pipe": "imFileEvent", 51 | "wmi": "imProcessCreate", 52 | } 53 | 54 | 55 | ## Rule Categories -> RuleConditions 56 | CATEGORY_TO_CONDITIONS_MAPPINGS = { 57 | "process_creation": logsource_windows_process_creation(), 58 | # "image_load": logsource_windows_image_load(), 59 | "file_access": logsource_windows_file_access(), 60 | "file_change": logsource_windows_file_change(), 61 | "file_delete": logsource_windows_file_delete(), 62 | "file_event": logsource_windows_file_event(), 63 | "file_rename": logsource_windows_file_rename(), 64 | "registry_add": logsource_windows_registry_add(), 65 | "registry_delete": logsource_windows_registry_delete(), 66 | "registry_event": logsource_windows_registry_event(), 67 | "registry_set": logsource_windows_registry_set(), 68 | "network_connection": logsource_windows_network_connection(), 69 | } 70 | 71 | 72 | class SentinelASIMFieldMappings(FieldMappings): 73 | pass 74 | 75 | 76 | SENTINEL_ASIM_FIELD_MAPPINGS = SentinelASIMFieldMappings( 77 | table_mappings={ 78 | "imAuditEvent": { 79 | "CommandLine": "Operation", 80 | "User": "ActorUsername", 81 | "TargetFilename": "Object", 82 | "Image": "ActingAppName", 83 | "SourceIP": "SrcIpAddr", 84 | "DestinationIP": "TargetIpAddr", 85 | "DestinationHostname": "TargetHostname", 86 | "EventType": "EventType", 87 | "TargetObject": "Object", 88 | "NewValue": "NewValue", 89 | "OldValue": "OldValue", 90 | "type": "ObjectType", 91 | "SourceHostname": "SrcHostname", 92 | "TargetUsername": "TargetUsername", 93 | "ProcessName": "ActingAppName", 94 | "ProcessId": "ActingAppId", 95 | "LogonId": "ActorSessionId", 96 | "TargetLogonId": "TargetSessionId", 97 | "SubjectUserName": "ActorUsername", 98 | "ObjectName": "Object", 99 | "ObjectType": "ObjectType", 100 | "NewProcessName": "ActingAppName", 101 | "Status": "EventResultDetails", 102 | "IpAddress": ["SrcIpAddr", "TargetIpAddr"], 103 | "SourcePort": "SrcPortNumber", 104 | "DestinationPort": "TargetPortNumber", 105 | "Protocol": "LogonProtocol", 106 | }, 107 | "imAuthentication": { 108 | "User": ["ActorUsername", "TargetUsername"], # Alias field, can map to either 109 | "SourceHostname": "SrcHostname", 110 | "DestinationHostname": "TargetHostname", 111 | "SourceIP": "SrcIpAddr", 112 | "DestinationIP": "TargetIpAddr", 113 | "SourcePort": "SrcPortNumber", 114 | "DestinationPort": "TargetPortNumber", 115 | "Status": "EventResultDetails", 116 | "IpAddress": ["SrcIpAddr", "TargetIpAddr"], # Can map to either source or target IP 117 | "SubjectUserName": "ActorUsername", 118 | "WorkstationName": "SrcHostname", # This is an approximation 119 | "ComputerName": ["SrcHostname", "TargetHostname"], # Can map to either source or target hostname 120 | "AuthenticationPackageName": "LogonProtocol", 121 | "LogonProcessName": "LogonMethod", 122 | "TargetUserSid": "TargetUserId", 123 | "TargetDomainName": "TargetDomain", 124 | "TargetOutboundDomainName": "TargetDomain", 125 | "ElevatedToken": "EventType", # This could map to "Elevate" in EventType 126 | "TargetUserPrincipalName": "TargetUsername", # This is an approximation 127 | "SubjectDomainName": "ActorScope", 128 | "SubjectUserSid": "ActorUserId", 129 | "SubjectLogonId": "ActorSessionId", 130 | "IpPort": ["SrcPortNumber", "TargetPortNumber"], # Can map to either source or target port 131 | "LmPackageName": "LogonProtocol", # This is an approximation 132 | "userAgent": "HttpUserAgent", 133 | # Common fields with specific relevance to this table 134 | "DvcHostname": ["SrcHostname", "TargetHostname"], # Can map to either source or target hostname 135 | "DvcIpAddr": ["SrcIpAddr", "TargetIpAddr"], # Can map to either source or target IP 136 | "DvcDomain": ["SrcDomain", "TargetDomain"], # Can map to either source or target domain 137 | "DvcDomainType": ["SrcDomainType", "TargetDomainType"], # Can map to either source or target domain type 138 | "DvcFQDN": ["SrcFQDN", "TargetFQDN"], # Can map to either source or target FQDN 139 | "DvcId": ["SrcDvcId", "TargetDvcId"], # Can map to either source or target device ID 140 | "DvcIdType": ["SrcDvcIdType", "TargetDvcIdType"], # Can map to either source or target device ID type 141 | "DvcDescription": ["SrcDescription", "TargetDescription"], # Can map to either source or target description 142 | "DvcOs": ["SrcDvcOs", "TargetDvcOs"], # Can map to either source or target OS 143 | }, 144 | "_Im_Dns": { 145 | "SourceIP": "SrcIpAddr", 146 | "DestinationIP": "DstIpAddr", 147 | "SourceHostname": "SrcHostname", 148 | "DestinationHostname": "DstHostname", 149 | "SourcePort": "SrcPortNumber", 150 | "DestinationPort": "DstPortNumber", 151 | "IpAddress": ["SrcIpAddr", "DstIpAddr"], # Can map to either source or target IP 152 | "ProcessName": "SrcProcessName", 153 | "ProcessId": "SrcProcessId", 154 | "User": "SrcUsername", 155 | "ComputerName": ["SrcHostname", "DstHostname"], # Can map to either source or target hostname 156 | "Image": "SrcProcessName", 157 | "QueryName": "DnsQuery", 158 | "QueryStatus": "EventResultDetails", 159 | "QueryResults": "DnsResponseName", 160 | "Protocol": "NetworkProtocol", 161 | "c-useragent": "HttpUserAgent", 162 | "userAgent": "HttpUserAgent", 163 | "Category": "UrlCategory", 164 | "Status": "EventResultDetails", 165 | "Product": "EventProduct", 166 | "Company": "EventVendor", 167 | }, 168 | "imFileEvent": { 169 | "SourceIP": "SrcIpAddr", 170 | "DestinationIP": "DstIpAddr", 171 | "SourceHostname": "SrcHostname", 172 | "DestinationHostname": "DstHostname", 173 | "SourcePort": "SrcPortNumber", 174 | "User": "ActorUsername", 175 | "TargetFilename": "TargetFileName", 176 | "Image": "TargetFilePath", 177 | "ParentImage": "ActingProcessName", 178 | "CommandLine": "ActingProcessCommandLine", 179 | "ParentCommandLine": "ActingProcessCommandLine", 180 | "ProcessName": "ActingProcessName", 181 | "ProcessId": "ActingProcessId", 182 | "ParentProcessName": "ActingProcessName", 183 | "ParentProcessId": "ActingProcessId", 184 | "LogonId": "ActorSessionId", 185 | "TargetObject": "TargetFilePath", 186 | "Details": "TargetFilePath", 187 | "SubjectUserName": "ActorUsername", 188 | "ObjectName": "TargetFilePath", 189 | "OldFilePath": "SrcFilePath", 190 | "NewFilePath": "TargetFilePath", 191 | "OldFileName": "SrcFileName", 192 | "NewFileName": "TargetFileName", 193 | "c-uri": "TargetUrl", 194 | "c-useragent": "HttpUserAgent", 195 | "cs-method": "NetworkApplicationProtocol", 196 | "userAgent": "HttpUserAgent", 197 | "Category": "ThreatCategory", 198 | "OperationName": "EventType", 199 | "ProcessGuid": "ActingProcessGuid", 200 | "CreationUtcTime": "TargetFileCreationTime", 201 | }, 202 | "imNetworkSession": { 203 | "SourceIP": "SrcIpAddr", 204 | "DestinationIP": "DstIpAddr", 205 | "DestinationIp": "DstIpAddr", 206 | "SourceHostname": "SrcHostname", 207 | "DestinationHostname": "DstHostname", 208 | "SourcePort": "SrcPortNumber", 209 | "DestinationPort": "DstPortNumber", 210 | "SourceMAC": "SrcMacAddr", 211 | "DestinationMAC": "DstMacAddr", 212 | "Protocol": "NetworkProtocol", 213 | "NetworkProtocol": "NetworkApplicationProtocol", 214 | "User": ["SrcUsername", "DstUsername"], 215 | "Image": ["SrcProcessName", "DstProcessName"], 216 | "ProcessName": ["SrcProcessName", "DstProcessName"], 217 | "ProcessId": ["SrcProcessId", "DstProcessId"], 218 | "ProcessGuid": ["SrcProcessGuid", "DstProcessGuid"], 219 | "LogonId": ["SrcUserId", "DstUserId"], 220 | "SourceUserName": "SrcUsername", 221 | "DestinationUserName": "DstUsername", 222 | "SourceImage": "SrcProcessName", 223 | "DestinationImage": "DstProcessName", 224 | "SourceProcessGUID": "SrcProcessGuid", 225 | "DestinationProcessGUID": "DstProcessGuid", 226 | "SourceProcessId": "SrcProcessId", 227 | "DestinationProcessId": "DstProcessId", 228 | "SourceThreadId": "SrcProcessId", 229 | "DestinationThreadId": "DstProcessId", 230 | "SourceIsIpv6": "NetworkProtocolVersion", 231 | "DestinationIsIpv6": "NetworkProtocolVersion", 232 | "Initiated": "NetworkDirection", 233 | "SourcePortName": "SrcAppName", 234 | "DestinationPortName": "DstAppName", 235 | "State": "EventSubType", 236 | "IpProtocol": "NetworkProtocol", 237 | "BytesReceived": "DstBytes", 238 | "BytesSent": "SrcBytes", 239 | "PacketsReceived": "DstPackets", 240 | "PacketsSent": "SrcPackets", 241 | "c-uri": "TargetUrl", 242 | "c-useragent": "HttpUserAgent", 243 | "cs-method": "NetworkApplicationProtocol", 244 | "cs-version": "NetworkProtocolVersion", 245 | "cs-Cookie": "HttpUserAgent", 246 | "cs-Referrer": "HttpUserAgent", 247 | "sc-status": "EventResultDetails", 248 | "userAgent": "HttpUserAgent", 249 | "Category": "ThreatCategory", 250 | "OperationName": "EventType", 251 | "Action": "DvcAction", 252 | "RuleName": "NetworkRuleName", 253 | }, 254 | "imProcessCreate": { # process_creation, Sysmon EventID 1 -> imProcessCreate table 255 | "Image": "TargetProcessName", 256 | "ParentImage": ["ParentProcessName", "ActingProcessName"], 257 | "CommandLine": "TargetProcessCommandLine", 258 | "ParentCommandLine": "ActingProcessCommandLine", 259 | "User": "TargetUsername", 260 | "LogonGuid": "TargetUserSessionGuid", 261 | "LogonId": "TargetUserSessionId", 262 | "SourceImage": "ActingProcessName", 263 | "TargetImage": "TargetProcessName", 264 | "SourceUser": "ActorUsername", 265 | "TargetUser": "TargetUsername", 266 | "SourceProcessId": "ActingProcessId", 267 | "TargetProcessId": "TargetProcessId", 268 | "SourceProcessGUID": "ActingProcessGuid", 269 | "TargetProcessGUID": "TargetProcessGuid", 270 | "ProcessId": "TargetProcessId", 271 | "ProcessGuid": "TargetProcessGuid", 272 | "ParentProcessId": ["ParentProcessId", "ActingProcessId"], 273 | "ParentProcessGuid": ["ParentProcessGuid", "ActingProcessGuid"], 274 | "ParentUser": "ActorUsername", 275 | "IntegrityLevel": "TargetProcessIntegrityLevel", 276 | "ParentProcessName": "ParentProcessName", 277 | "CurrentDirectory": "TargetProcessCurrentDirectory", 278 | "OriginalFileName": ["TargetProcessFileOriginalName", "TargetProcessFilename"], 279 | "Description": "TargetProcessFileDescription", 280 | "Product": "TargetProcessFileProduct", 281 | "Company": "TargetProcessFileCompany", 282 | "FileVersion": "TargetProcessFileVersion", 283 | "GrantedAccess": "TargetProcessTokenElevation", 284 | "CallTrace": "TargetProcessInjectedAddress", 285 | "ParentIntegrityLevel": "ParentProcessIntegrityLevel", 286 | "TerminalSessionId": "TargetUserSessionId", 287 | "sha1": "TargetProcessSHA1", 288 | "sha256": "TargetProcessSHA256", 289 | "md5": "TargetProcessMD5", 290 | "ProcessVersionInfoOriginalFileName": "TargetProcessFileVersion", 291 | "ProcessVersionInfoFileDescription": "TargetProcessFileDescription", 292 | "ProcessIntegrityLevel": "TargetProcessIntegrityLevel", 293 | "InitiatingProcessFolderPath": "ActingProcessName", 294 | "InitiatingProcessCommandLine": "ActingProcessCommandLine", 295 | }, 296 | "imRegistry": { 297 | "Image": "ActingProcessName", 298 | "ParentImage": "ParentProcessName", 299 | "User": "ActorUsername", 300 | "TargetObject": "RegistryKey", 301 | "Details": "RegistryValueData", 302 | "EventType": "EventType", 303 | "ProcessId": "ActingProcessId", 304 | "ProcessGuid": "ActingProcessGuid", 305 | "ParentProcessId": "ParentProcessId", 306 | "ParentProcessGuid": "ParentProcessGuid", 307 | "ObjectName": "RegistryKey", 308 | "ObjectValueName": "RegistryValue", 309 | "ObjectType": "RegistryValueType", 310 | "ObjectValue": "RegistryValueData", 311 | "OldName": "RegistryPreviousKey", 312 | "NewName": "RegistryKey", 313 | "OldValueType": "RegistryPreviousValueType", 314 | "NewValueType": "RegistryValueType", 315 | "OldValue": "RegistryPreviousValueData", 316 | "NewValue": "RegistryValueData", 317 | "ProcessName": "ActingProcessName", 318 | "SubjectUserName": "ActorUsername", 319 | "SubjectUserSid": "ActorUserId", 320 | "SubjectDomainName": "ActorScope", 321 | "SubjectLogonId": "ActorSessionId", 322 | }, 323 | "imWebSession": { 324 | "c-uri": "Url", 325 | "c-uri-query": "Url", 326 | "c-useragent": "HttpUserAgent", 327 | "cs-method": "HttpRequestMethod", 328 | "cs-version": "HttpVersion", 329 | "cs-host": "HttpHost", 330 | "cs-Referrer": "HttpReferrer", 331 | "sc-status": "HttpStatusCode", 332 | "cs-User-Agent": "HttpUserAgent", 333 | "r-dns": "HttpHost", 334 | "request": "Url", 335 | "request_body": "Url", 336 | "request_method": "HttpRequestMethod", 337 | "request_url": "Url", 338 | "request_url_query": "Url", 339 | "response_status_code": "HttpStatusCode", 340 | "url_category": "UrlCategory", 341 | "url_original": "UrlOriginal", 342 | "http_request_time": "HttpRequestTime", 343 | "http_response_time": "HttpResponseTime", 344 | "http_content_type": "HttpContentType", 345 | "http_user_agent": "HttpUserAgent", 346 | "http_referrer": "HttpReferrer", 347 | "x_forwarded_for": "HttpRequestXff", 348 | "file_name": "FileName", 349 | "file_hash": ["FileMD5", "FileSHA1", "FileSHA256", "FileSHA512"], 350 | "file_size": "FileSize", 351 | "file_type": "FileContentType", 352 | }, 353 | }, 354 | generic_mappings={ 355 | "EventID": "EventOriginalType", 356 | "EventType": "EventType", 357 | "Product": "EventProduct", 358 | "Vendor": "EventVendor", 359 | "DeviceName": "DvcHostname", 360 | "DeviceHostname": "DvcHostname", 361 | "Computer": "DvcHostname", 362 | "Hostname": "DvcHostname", 363 | "IpAddress": "DvcIpAddr", 364 | "SourceSystem": "EventProduct", 365 | "TimeGenerated": "EventStartTime", 366 | "ProcessName": "ActingProcessName", 367 | }, 368 | ) 369 | -------------------------------------------------------------------------------- /sigma/pipelines/sentinelasim/schema.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from sigma.pipelines.kusto_common.schema import BaseSchema, FieldMappings 4 | 5 | 6 | @dataclass 7 | class SentinelASIMSchema(BaseSchema): 8 | pass 9 | 10 | 11 | @dataclass 12 | class SentinelASIMFieldMappings(FieldMappings): 13 | pass 14 | -------------------------------------------------------------------------------- /sigma/pipelines/sentinelasim/sentinelasim.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from sigma.pipelines.kusto_common.postprocessing import ( 4 | PrependQueryTablePostprocessingItem, 5 | ) 6 | from sigma.processing.conditions import ( 7 | DetectionItemProcessingItemAppliedCondition, 8 | ExcludeFieldCondition, 9 | IncludeFieldCondition, 10 | LogsourceCondition, 11 | RuleProcessingItemAppliedCondition, 12 | RuleProcessingStateCondition, 13 | ) 14 | from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline 15 | from sigma.processing.transformations import ( 16 | DropDetectionItemTransformation, 17 | ReplaceStringTransformation, 18 | RuleFailureTransformation, 19 | ) 20 | 21 | from ..kusto_common.errors import InvalidFieldTransformation 22 | from ..kusto_common.schema import create_schema 23 | from ..kusto_common.transformations import ( 24 | DynamicFieldMappingTransformation, 25 | GenericFieldMappingTransformation, 26 | RegistryActionTypeValueTransformation, 27 | SetQueryTableStateTransformation, 28 | ) 29 | from .mappings import ( 30 | CATEGORY_TO_TABLE_MAPPINGS, 31 | EVENTID_CATEGORY_TO_TABLE_MAPPINGS, 32 | SENTINEL_ASIM_FIELD_MAPPINGS, 33 | ) 34 | from .schema import SentinelASIMSchema 35 | from .tables import SENTINEL_ASIM_TABLES 36 | from .transformations import ( 37 | FileEventHashesValuesTransformation, 38 | ProcessCreateHashesValuesTransformation, 39 | WebSessionHashesValuesTransformation, 40 | ) 41 | 42 | SENTINEL_ASIM_SCHEMA = create_schema(SentinelASIMSchema, SENTINEL_ASIM_TABLES) 43 | 44 | # Drop EventID field 45 | drop_eventid_proc_item = ProcessingItem( 46 | identifier="sentinel_asim_drop_eventid", 47 | transformation=DropDetectionItemTransformation(), 48 | field_name_conditions=[IncludeFieldCondition(["EventID", "EventCode", "ObjectType"])], 49 | ) 50 | 51 | ## Fieldmappings 52 | fieldmappings_proc_item = ProcessingItem( 53 | identifier="sentinel_asim_table_fieldmappings", 54 | transformation=DynamicFieldMappingTransformation(SENTINEL_ASIM_FIELD_MAPPINGS), 55 | ) 56 | 57 | ## Generic Field Mappings, keep this last 58 | ## Exclude any fields already mapped, e.g. if a table mapping has been applied. 59 | # This will fix the case where ProcessId is usually mapped to InitiatingProcessId, EXCEPT for the DeviceProcessEvent table where it stays as ProcessId. 60 | # So we can map ProcessId to ProcessId in the DeviceProcessEvents table mapping, and prevent the generic mapping to InitiatingProcessId from being applied 61 | # by adding a detection item condition that the table field mappings have been applied 62 | 63 | generic_field_mappings_proc_item = ProcessingItem( 64 | identifier="sentinel_asim_generic_fieldmappings", 65 | transformation=GenericFieldMappingTransformation(SENTINEL_ASIM_FIELD_MAPPINGS), 66 | detection_item_conditions=[DetectionItemProcessingItemAppliedCondition("sentinel_asim_table_fieldmappings")], 67 | detection_item_condition_linking=any, 68 | detection_item_condition_negation=True, 69 | ) 70 | 71 | 72 | ## Field Value Replacements ProcessingItems 73 | replacement_proc_items = [ 74 | # Sysmon uses abbreviations in RegistryKey values, replace with full key names as the DeviceRegistryEvents schema 75 | # expects them to be 76 | # Note: Ensure this comes AFTER field mapping renames, as we're specifying DeviceRegistryEvent fields 77 | # 78 | # Do this one first, or else the HKLM only one will replace HKLM and mess up the regex 79 | ProcessingItem( 80 | identifier="sentinel_asim_registry_key_replace_currentcontrolset", 81 | transformation=ReplaceStringTransformation( 82 | regex=r"(?i)(^HKLM\\SYSTEM\\CurrentControlSet)", 83 | replacement=r"HKEY_LOCAL_MACHINE\\SYSTEM\\CurrentControlSet001", 84 | ), 85 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "PreviousRegistryKey"])], 86 | ), 87 | ProcessingItem( 88 | identifier="sentinel_asim_registry_key_replace_hklm", 89 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKLM)", replacement=r"HKEY_LOCAL_MACHINE"), 90 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "RegistryPreviousKey"])], 91 | ), 92 | ProcessingItem( 93 | identifier="sentinel_asim_registry_key_replace_hku", 94 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKU)", replacement=r"HKEY_USERS"), 95 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "RegistryPreviousKey"])], 96 | ), 97 | ProcessingItem( 98 | identifier="sentinel_asim_registry_key_replace_hkcr", 99 | transformation=ReplaceStringTransformation(regex=r"(?i)(^HKCR)", replacement=r"HKEY_LOCAL_MACHINE\\CLASSES"), 100 | field_name_conditions=[IncludeFieldCondition(["RegistryKey", "RegistryPreviousKey"])], 101 | ), 102 | ProcessingItem( 103 | identifier="sentinel_asim_registry_actiontype_value", 104 | transformation=RegistryActionTypeValueTransformation(), 105 | field_name_conditions=[IncludeFieldCondition(["EventType"])], 106 | ), 107 | # Processing item to transform the Hashes field in the SecurityEvent table to get rid of the hash algorithm prefix in each value 108 | ProcessingItem( 109 | identifier="sentinel_asim_processcreate_hashes_field_values", 110 | transformation=ProcessCreateHashesValuesTransformation(), 111 | field_name_conditions=[IncludeFieldCondition(["Hashes"])], 112 | rule_conditions=[RuleProcessingStateCondition("query_table", "imProcessCreate")], 113 | ), 114 | ProcessingItem( 115 | identifier="sentinel_asim_fileevent_hashes_field_values", 116 | transformation=FileEventHashesValuesTransformation(), 117 | field_name_conditions=[IncludeFieldCondition(["Hashes"])], 118 | rule_conditions=[RuleProcessingStateCondition("query_table", "imFileEvent")], 119 | ), 120 | ProcessingItem( 121 | identifier="sentinel_asim_webrequest_hashes_field_values", 122 | transformation=WebSessionHashesValuesTransformation(), 123 | field_name_conditions=[IncludeFieldCondition(["Hashes"])], 124 | rule_conditions=[RuleProcessingStateCondition("query_table", "imWebSession")], 125 | ), 126 | # Processing item to essentially ignore initiated field 127 | ProcessingItem( 128 | identifier="sentinel_asim_network_initiated_field", 129 | transformation=DropDetectionItemTransformation(), 130 | field_name_conditions=[IncludeFieldCondition(["Initiated"])], 131 | rule_conditions=[LogsourceCondition(category="network_connection")], 132 | ), 133 | ] 134 | 135 | # Exceptions/Errors ProcessingItems 136 | # Catch-all for when the query table is not set, meaning the rule could not be mapped to a table or the table name was not set 137 | rule_error_proc_items = [ 138 | # Category Not Supported or Query Table Not Set 139 | ProcessingItem( 140 | identifier="sentinel_asim_unsupported_rule_category_or_missing_query_table", 141 | transformation=RuleFailureTransformation( 142 | "Rule category not yet supported by the Sentinel ASIM pipeline or query_table is not set." 143 | ), 144 | rule_conditions=[ 145 | RuleProcessingItemAppliedCondition("sentinel_asim_set_query_table"), # type: ignore 146 | RuleProcessingStateCondition("query_table", None), # type: ignore 147 | ], 148 | rule_condition_linking=all, 149 | ) 150 | ] 151 | 152 | 153 | def get_valid_fields(table_name): 154 | return ( 155 | list(SENTINEL_ASIM_SCHEMA.tables[table_name].fields.keys()) 156 | + list(SENTINEL_ASIM_FIELD_MAPPINGS.table_mappings.get(table_name, {}).keys()) 157 | + list(SENTINEL_ASIM_FIELD_MAPPINGS.generic_mappings.keys()) 158 | + ["Hashes"] 159 | ) 160 | 161 | 162 | field_error_proc_items = [] 163 | 164 | for table_name in SENTINEL_ASIM_SCHEMA.tables.keys(): 165 | valid_fields = get_valid_fields(table_name) 166 | 167 | field_error_proc_items.append( 168 | ProcessingItem( 169 | identifier=f"sentinel_asim_unsupported_fields_{table_name}", 170 | transformation=InvalidFieldTransformation( 171 | f"Please use valid fields for the {table_name} table, or the following fields that have fieldmappings in this " 172 | f"pipeline:\n{', '.join(sorted(set(valid_fields)))}" 173 | ), 174 | field_name_conditions=[ExcludeFieldCondition(fields=valid_fields)], 175 | rule_conditions=[ 176 | RuleProcessingItemAppliedCondition("sentinel_asim_set_query_table"), # type: ignore 177 | RuleProcessingStateCondition("query_table", table_name), # type: ignore 178 | ], 179 | rule_condition_linking=all, 180 | ) 181 | ) 182 | 183 | # Add a catch-all error for custom table names 184 | field_error_proc_items.append( 185 | ProcessingItem( 186 | identifier="sentinel_asim_unsupported_fields_custom", 187 | transformation=InvalidFieldTransformation( 188 | "Invalid field name for the custom table. Please ensure you're using valid fields for your custom table." 189 | ), 190 | field_name_conditions=[ 191 | ExcludeFieldCondition(fields=list(SENTINEL_ASIM_FIELD_MAPPINGS.generic_mappings.keys()) + ["Hashes"]) 192 | ], 193 | rule_conditions=[ 194 | RuleProcessingItemAppliedCondition("sentinel_asim_set_query_table"), # type: ignore 195 | RuleProcessingStateCondition("query_table", None), # type: ignore 196 | ], 197 | rule_condition_linking=all, 198 | ) 199 | ) 200 | 201 | 202 | def sentinel_asim_pipeline( 203 | transform_parent_image: Optional[bool] = True, query_table: Optional[str] = None 204 | ) -> ProcessingPipeline: 205 | """Pipeline for transformations for SigmaRules to use in the Kusto Query Language backend. 206 | 207 | :param query_table: If specified, the table name will be used in the finalizer, otherwise the table name will be selected based on the category of the rule. 208 | :type query_table: Optional[str] 209 | 210 | :return: ProcessingPipeline for Microsoft Sentinel ASIM 211 | :rtype: ProcessingPipeline 212 | """ 213 | 214 | pipeline_items = [ 215 | ProcessingItem( 216 | identifier="sentinel_asim_set_query_table", 217 | transformation=SetQueryTableStateTransformation( 218 | query_table, CATEGORY_TO_TABLE_MAPPINGS, EVENTID_CATEGORY_TO_TABLE_MAPPINGS 219 | ), 220 | ), 221 | drop_eventid_proc_item, 222 | fieldmappings_proc_item, 223 | generic_field_mappings_proc_item, 224 | *replacement_proc_items, 225 | *rule_error_proc_items, 226 | *field_error_proc_items, 227 | ] 228 | 229 | return ProcessingPipeline( 230 | name="Generic Log Sources to Sentinel ASIM tables and fields", 231 | priority=10, 232 | items=pipeline_items, 233 | allowed_backends=frozenset(["kusto"]), 234 | postprocessing_items=[PrependQueryTablePostprocessingItem], # type: ignore 235 | ) 236 | -------------------------------------------------------------------------------- /sigma/pipelines/sentinelasim/transformations.py: -------------------------------------------------------------------------------- 1 | from ..kusto_common.transformations import BaseHashesValuesTransformation 2 | 3 | 4 | class ProcessCreateHashesValuesTransformation(BaseHashesValuesTransformation): 5 | """ 6 | Transforms the Hashes field in imProcessCreate table to get rid of the hash algorithm prefix in each value. 7 | """ 8 | 9 | def __init__(self): 10 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256", "SHA512", "IMPHASH"], field_prefix="TargetProcess") 11 | 12 | 13 | class FileEventHashesValuesTransformation(BaseHashesValuesTransformation): 14 | """ 15 | Transforms the Hashes field in imFileEvent table to get rid of the hash algorithm prefix in each value. 16 | """ 17 | 18 | def __init__(self): 19 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256", "SHA512"], field_prefix="TargetFile") 20 | 21 | 22 | class WebSessionHashesValuesTransformation(BaseHashesValuesTransformation): 23 | """ 24 | Transforms the Hashes field in imWebSession table to get rid of the hash algorithm prefix in each value. 25 | """ 26 | 27 | def __init__(self): 28 | super().__init__(valid_hash_algos=["MD5", "SHA1", "SHA256", "SHA512"], field_prefix="File") 29 | -------------------------------------------------------------------------------- /tests/test_backend_kusto.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sigma.backends.kusto import KustoBackend 4 | from sigma.collection import SigmaCollection 5 | from sigma.pipelines.microsoft365defender import microsoft_365_defender_pipeline 6 | 7 | 8 | @pytest.fixture 9 | def microsoft365defender_backend(): 10 | return KustoBackend(processing_pipeline=microsoft_365_defender_pipeline()) 11 | 12 | 13 | @pytest.fixture 14 | def kusto_backend_no_pipeline(): 15 | return KustoBackend() 16 | 17 | 18 | def test_kusto_and_expression(microsoft365defender_backend: KustoBackend): 19 | assert ( 20 | microsoft365defender_backend.convert( 21 | SigmaCollection.from_yaml( 22 | """ 23 | title: Test 24 | status: test 25 | logsource: 26 | category: process_creation 27 | product: windows 28 | detection: 29 | sel: 30 | CommandLine: valueA 31 | User: valueB 32 | condition: sel 33 | """ 34 | ) 35 | ) 36 | == ['DeviceProcessEvents\n| where ProcessCommandLine =~ "valueA" and AccountName =~ "valueB"'] 37 | ) 38 | 39 | 40 | def test_kusto_or_expression(microsoft365defender_backend: KustoBackend): 41 | assert ( 42 | microsoft365defender_backend.convert( 43 | SigmaCollection.from_yaml( 44 | """ 45 | title: Test 46 | status: test 47 | logsource: 48 | category: process_creation 49 | product: windows 50 | detection: 51 | sel1: 52 | CommandLine: valueA 53 | sel2: 54 | User: valueB 55 | condition: 1 of sel* 56 | """ 57 | ) 58 | ) 59 | == ['DeviceProcessEvents\n| where ProcessCommandLine =~ "valueA" or AccountName =~ "valueB"'] 60 | ) 61 | 62 | 63 | def test_kusto_and_or_expression(microsoft365defender_backend: KustoBackend): 64 | assert ( 65 | microsoft365defender_backend.convert( 66 | SigmaCollection.from_yaml( 67 | """ 68 | title: Test 69 | status: test 70 | logsource: 71 | category: process_creation 72 | product: windows 73 | detection: 74 | sel: 75 | CommandLine: 76 | - valueA1 77 | - valueA2 78 | ProcessId: 79 | - valueB1 80 | - valueB2 81 | condition: sel 82 | """ 83 | ) 84 | ) 85 | == [ 86 | 'DeviceProcessEvents\n| where (ProcessCommandLine in~ ("valueA1", "valueA2")) and ' 87 | '(ProcessId in~ ("valueB1", "valueB2"))' 88 | ] 89 | ) 90 | 91 | 92 | def test_kusto_or_and_expression(microsoft365defender_backend: KustoBackend): 93 | assert ( 94 | microsoft365defender_backend.convert( 95 | SigmaCollection.from_yaml( 96 | """ 97 | title: Test 98 | status: test 99 | logsource: 100 | category: process_creation 101 | product: windows 102 | detection: 103 | sel1: 104 | CommandLine: valueA1 105 | ProcessId: valueB1 106 | sel2: 107 | CommandLine: valueA2 108 | ProcessId: valueB2 109 | condition: 1 of sel* 110 | """ 111 | ) 112 | ) 113 | == [ 114 | 'DeviceProcessEvents\n| where (ProcessCommandLine =~ "valueA1" and ProcessId =~ "valueB1") or ' 115 | '(ProcessCommandLine =~ "valueA2" and ProcessId =~ "valueB2")' 116 | ] 117 | ) 118 | 119 | 120 | def test_kusto_in_expression(microsoft365defender_backend: KustoBackend): 121 | assert ( 122 | microsoft365defender_backend.convert( 123 | SigmaCollection.from_yaml( 124 | """ 125 | title: Test 126 | status: test 127 | logsource: 128 | category: process_creation 129 | product: windows 130 | detection: 131 | sel: 132 | CommandLine: 133 | - valueA 134 | - valueB 135 | - valueC* 136 | condition: sel 137 | """ 138 | ) 139 | ) 140 | == [ 141 | 'DeviceProcessEvents\n| where ProcessCommandLine in~ ("valueA", "valueB") or ' 142 | 'ProcessCommandLine startswith "valueC"' 143 | ] 144 | ) 145 | 146 | 147 | def test_kusto_regex_query(microsoft365defender_backend: KustoBackend): 148 | assert ( 149 | microsoft365defender_backend.convert( 150 | SigmaCollection.from_yaml( 151 | """ 152 | title: Test 153 | status: test 154 | logsource: 155 | category: process_creation 156 | product: windows 157 | detection: 158 | sel: 159 | CommandLine|re: foo.*bar 160 | ProcessId: foo 161 | condition: sel 162 | """ 163 | ) 164 | ) 165 | == ['DeviceProcessEvents\n| where ProcessCommandLine matches regex "foo.*bar" and ProcessId =~ "foo"'] 166 | ) 167 | 168 | 169 | def test_kusto_cidr_query(microsoft365defender_backend: KustoBackend): 170 | assert ( 171 | microsoft365defender_backend.convert( 172 | SigmaCollection.from_yaml( 173 | """ 174 | title: Test 175 | status: test 176 | logsource: 177 | category: network_connection 178 | product: windows 179 | detection: 180 | sel: 181 | SourceIp|cidr: 192.168.0.0/16 182 | condition: sel 183 | """ 184 | ) 185 | ) 186 | == ['DeviceNetworkEvents\n| where ipv4_is_in_range(LocalIP, "192.168.0.0/16")'] 187 | ) 188 | 189 | 190 | def test_kusto_negation_basic(microsoft365defender_backend: KustoBackend): 191 | assert ( 192 | microsoft365defender_backend.convert( 193 | SigmaCollection.from_yaml( 194 | r""" 195 | title: Test 196 | status: test 197 | logsource: 198 | product: windows 199 | category: process_creation 200 | detection: 201 | selection: 202 | Image: 203 | - '*\process.exe' 204 | CommandLine: 205 | - 'this' 206 | filter: 207 | CommandLine: 208 | - 'notthis' 209 | condition: selection and not filter 210 | """ 211 | ) 212 | ) 213 | == [ 214 | 'DeviceProcessEvents\n| where (FolderPath endswith "\\\\process.exe" and ' 215 | 'ProcessCommandLine =~ "this") and ' 216 | '(not(ProcessCommandLine =~ "notthis"))' 217 | ] 218 | ) 219 | 220 | 221 | def test_kusto_negation_contains(microsoft365defender_backend: KustoBackend): 222 | assert ( 223 | microsoft365defender_backend.convert( 224 | SigmaCollection.from_yaml( 225 | r""" 226 | title: Test 227 | status: test 228 | logsource: 229 | product: windows 230 | category: process_creation 231 | detection: 232 | selection: 233 | Image: 234 | - '*\process.exe' 235 | CommandLine: 236 | - '*this*' 237 | filter: 238 | CommandLine: 239 | - '*notthis*' 240 | condition: selection and not filter 241 | """ 242 | ) 243 | ) 244 | == [ 245 | 'DeviceProcessEvents\n| where (FolderPath endswith "\\\\process.exe" and ' 246 | 'ProcessCommandLine contains "this") and ' 247 | '(not(ProcessCommandLine contains "notthis"))' 248 | ] 249 | ) 250 | 251 | 252 | def test_kusto_grouping(microsoft365defender_backend: KustoBackend): 253 | assert ( 254 | microsoft365defender_backend.convert( 255 | SigmaCollection.from_yaml( 256 | r""" 257 | title: Net connection logic test 258 | status: test 259 | logsource: 260 | category: network_connection 261 | product: windows 262 | detection: 263 | selection: 264 | Image: 265 | - '*\powershell.exe' 266 | - '*\pwsh.exe' 267 | DestinationHostname: 268 | - '*pastebin.com*' 269 | - '*anothersite.com*' 270 | condition: selection 271 | """ 272 | ) 273 | ) 274 | == [ 275 | 'DeviceNetworkEvents\n| where (InitiatingProcessFolderPath endswith "\\\\powershell.exe" or ' 276 | 'InitiatingProcessFolderPath endswith "\\\\pwsh.exe") and (RemoteUrl contains ' 277 | '"pastebin.com" or RemoteUrl contains "anothersite.com")' 278 | ] 279 | ) 280 | 281 | 282 | def test_kusto_escape_cmdline_slash(microsoft365defender_backend: KustoBackend): 283 | assert ( 284 | microsoft365defender_backend.convert( 285 | SigmaCollection.from_yaml( 286 | r""" 287 | title: Delete All Scheduled Tasks 288 | id: 220457c1-1c9f-4c2e-afe6-9598926222c1 289 | status: test 290 | description: Detects the usage of schtasks with the delete flag and the asterisk symbol to delete all tasks from the schedule of the local computer, including tasks scheduled by other users. 291 | references: 292 | - https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/schtasks-delete 293 | author: Nasreddine Bencherchali (Nextron Systems) 294 | date: 2022-09-09 295 | tags: 296 | - attack.impact 297 | - attack.t1489 298 | logsource: 299 | category: process_creation 300 | product: windows 301 | detection: 302 | selection: 303 | Image|endswith: '\schtasks.exe' 304 | CommandLine|contains|all: 305 | - ' /delete ' 306 | - '/tn \*' 307 | - ' /f' 308 | condition: selection 309 | falsepositives: 310 | - Unlikely 311 | level: high 312 | """ 313 | ) 314 | ) 315 | == [ 316 | 'DeviceProcessEvents\n| where FolderPath endswith "\\\\schtasks.exe" and ' 317 | '(ProcessCommandLine contains " /delete " and ' 318 | 'ProcessCommandLine contains "/tn *" and ' 319 | 'ProcessCommandLine contains " /f")' 320 | ] 321 | ) 322 | 323 | 324 | def test_kusto_cmdline_filters(microsoft365defender_backend: KustoBackend): 325 | assert ( 326 | microsoft365defender_backend.convert( 327 | SigmaCollection.from_yaml( 328 | r""" 329 | title: New Firewall Rule Added Via Netsh.EXE 330 | id: cd5cfd80-aa5f-44c0-9c20-108c4ae12e3c 331 | status: test 332 | description: Detects the addition of a new rule to the Windows firewall via netsh 333 | references: 334 | - https://www.operationblockbuster.com/wp-content/uploads/2016/02/Operation-Blockbuster-RAT-and-Staging-Report.pdf 335 | author: Markus Neis, Sander Wiebing 336 | date: 2019-01-29 337 | modified: 2023-02-10 338 | tags: 339 | - attack.defense_evasion 340 | - attack.t1562.004 341 | - attack.s0246 342 | logsource: 343 | category: process_creation 344 | product: windows 345 | detection: 346 | selection_img: 347 | - Image|endswith: '\netsh.exe' 348 | - OriginalFileName: 'netsh.exe' 349 | selection_cli: 350 | CommandLine|contains|all: 351 | - ' firewall ' 352 | - ' add ' 353 | filter_optional_dropbox: 354 | CommandLine|contains: 355 | - 'advfirewall firewall add rule name=Dropbox dir=in action=allow "program=?:\Program Files (x86)\Dropbox\Client\Dropbox.exe" enable=yes profile=Any' 356 | - 'advfirewall firewall add rule name=Dropbox dir=in action=allow "program=?:\Program Files\Dropbox\Client\Dropbox.exe" enable=yes profile=Any' 357 | condition: all of selection_* and not 1 of filter_optional_* 358 | falsepositives: 359 | - Legitimate administration activity 360 | - Software installations 361 | level: medium 362 | """ 363 | ) 364 | ) 365 | == [ 366 | 'DeviceProcessEvents\n| where ((FolderPath endswith "\\\\netsh.exe" or ' 367 | 'ProcessVersionInfoOriginalFileName =~ "netsh.exe") and ' 368 | '(ProcessCommandLine contains " firewall " and ProcessCommandLine contains " add ")) and ' 369 | '(not(((ProcessCommandLine contains "advfirewall firewall add rule name=Dropbox dir=in action=allow ' 370 | '\\"program=" and ProcessCommandLine contains ":\\\\Program Files (x86)\\\\Dropbox\\\\Client\\\\Dropbox.exe\\" ' 371 | 'enable=yes profile=Any") or (ProcessCommandLine contains "advfirewall firewall add rule name=Dropbox dir=in ' 372 | 'action=allow \\"program=" and ProcessCommandLine contains ":\\\\Program Files\\\\Dropbox\\\\Client\\\\Dropbox.exe\\" ' 373 | 'enable=yes profile=Any"))))' 374 | ] 375 | ) 376 | 377 | 378 | def test_kusto_sigmanumber_conversion(kusto_backend_no_pipeline: KustoBackend): 379 | assert ( 380 | kusto_backend_no_pipeline.convert( 381 | SigmaCollection.from_yaml( 382 | """ 383 | title: Test 384 | status: test 385 | logsource: 386 | product: windows 387 | detection: 388 | sel: 389 | EventID: 1 390 | condition: sel 391 | """ 392 | ) 393 | ) 394 | == ["EventID == 1"] 395 | ) 396 | 397 | 398 | def test_kusto_sigmanumber_conversion_mixed_types(kusto_backend_no_pipeline: KustoBackend): 399 | assert ( 400 | kusto_backend_no_pipeline.convert( 401 | SigmaCollection.from_yaml( 402 | r""" 403 | title: ETW Logging Disabled In .NET Processes - Sysmon Registry 404 | id: bf4fc428-dcc3-4bbd-99fe-2422aeee2544 405 | related: 406 | - id: a4c90ea1-2634-4ca0-adbb-35eae169b6fc 407 | type: similar 408 | status: test 409 | description: Potential adversaries stopping ETW providers recording loaded .NET assemblies. 410 | references: 411 | - https://twitter.com/_xpn_/status/1268712093928378368 412 | - https://social.msdn.microsoft.com/Forums/vstudio/en-US/0878832e-39d7-4eaf-8e16-a729c4c40975/what-can-i-use-e13c0d23ccbc4e12931bd9cc2eee27e4-for?forum=clr 413 | - https://github.com/dotnet/runtime/blob/ee2355c801d892f2894b0f7b14a20e6cc50e0e54/docs/design/coreclr/jit/viewing-jit-dumps.md#setting-configuration-variables 414 | - https://github.com/dotnet/runtime/blob/f62e93416a1799aecc6b0947adad55a0d9870732/src/coreclr/src/inc/clrconfigvalues.h#L35-L38 415 | - https://github.com/dotnet/runtime/blob/7abe42dc1123722ed385218268bb9fe04556e3d3/src/coreclr/src/inc/clrconfig.h#L33-L39 416 | - https://github.com/dotnet/runtime/search?p=1&q=COMPlus_&unscoped_q=COMPlus_ 417 | - https://bunnyinside.com/?term=f71e8cb9c76a 418 | - http://managed670.rssing.com/chan-5590147/all_p1.html 419 | - https://github.com/dotnet/runtime/blob/4f9ae42d861fcb4be2fcd5d3d55d5f227d30e723/docs/coding-guidelines/clr-jit-coding-conventions.md#1412-disabling-code 420 | - https://blog.xpnsec.com/hiding-your-dotnet-complus-etwenabled/ 421 | - https://i.blackhat.com/EU-21/Wednesday/EU-21-Teodorescu-Veni-No-Vidi-No-Vici-Attacks-On-ETW-Blind-EDRs.pdf 422 | author: Roberto Rodriguez (Cyb3rWard0g), OTR (Open Threat Research) 423 | date: 2020-06-05 424 | modified: 2023-08-17 425 | tags: 426 | - attack.defense-evasion 427 | - attack.t1112 428 | - attack.t1562 429 | logsource: 430 | product: windows 431 | category: registry_set 432 | detection: 433 | selection_etw_enabled: 434 | TargetObject|endswith: 'SOFTWARE\Microsoft\.NETFramework\ETWEnabled' 435 | Details: 'DWORD (0x00000000)' 436 | selection_complus: 437 | TargetObject|endswith: 438 | - '\COMPlus_ETWEnabled' 439 | - '\COMPlus_ETWFlags' 440 | Details: 441 | - 0 # For REG_SZ type 442 | - 'DWORD (0x00000000)' 443 | condition: 1 of selection_* 444 | falsepositives: 445 | - Unknown 446 | level: high 447 | """ 448 | ) 449 | ) 450 | == [ 451 | '(TargetObject endswith "SOFTWARE\\\\Microsoft\\\\.NETFramework\\\\ETWEnabled" and Details =~ "DWORD (0x00000000)") or ((TargetObject endswith "\\\\COMPlus_ETWEnabled" or ' 452 | 'TargetObject endswith "\\\\COMPlus_ETWFlags") and (Details in~ ("0", "DWORD (0x00000000)")))' 453 | ] 454 | ) 455 | -------------------------------------------------------------------------------- /tests/test_pipelines_azuremonitor.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sigma.backends.kusto import KustoBackend 4 | from sigma.collection import SigmaCollection 5 | from sigma.exceptions import SigmaTransformationError 6 | from sigma.pipelines.azuremonitor import azure_monitor_pipeline 7 | from sigma.rule import SigmaRule 8 | 9 | 10 | @pytest.fixture 11 | def azure_backend(): 12 | return KustoBackend(processing_pipeline=azure_monitor_pipeline()) 13 | 14 | 15 | def test_azure_monitor_process_creation_field_mapping(azure_backend): 16 | yaml_rule = """ 17 | title: Test Process Creation 18 | status: test 19 | logsource: 20 | category: process_creation 21 | product: windows 22 | detection: 23 | sel: 24 | Image: C:\\Windows\\System32\\cmd.exe 25 | CommandLine: whoami 26 | User: SYSTEM 27 | ProcessId: 1234 28 | condition: sel 29 | """ 30 | expected_result = [ 31 | 'SecurityEvent\n| where NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe" and CommandLine =~ "whoami" and SubjectUserName =~ "SYSTEM" and NewProcessId == 1234' 32 | ] 33 | 34 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 35 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 36 | 37 | 38 | def test_azure_monitor_network_connection_field_mapping(azure_backend): 39 | yaml_rule = """ 40 | title: Test Network Connection 41 | status: test 42 | logsource: 43 | category: network_connection 44 | product: windows 45 | detection: 46 | sel: 47 | DestinationIp: 8.8.8.8 48 | DestinationPort: 53 49 | SourcePort: 12345 50 | condition: sel 51 | """ 52 | expected_result = [ 53 | 'SecurityEvent\n| where DestinationIp =~ "8.8.8.8" and DestinationPort == 53 and SourcePort == 12345' 54 | ] 55 | 56 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 57 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 58 | 59 | 60 | def test_azure_monitor_registry_event_field_mapping(azure_backend): 61 | yaml_rule = """ 62 | title: Test Registry Event 63 | status: test 64 | logsource: 65 | category: registry_event 66 | product: windows 67 | detection: 68 | sel: 69 | EventID: 13 70 | TargetObject: HKLM\\Software\\Microsoft\\Windows\\CurrentVersion\\Run 71 | condition: sel 72 | """ 73 | expected_result = [ 74 | 'SecurityEvent\n| where EventID == 13 and ObjectName =~ "HKEY_LOCAL_MACHINE\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run"' 75 | ] 76 | 77 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 78 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 79 | 80 | 81 | def test_azure_monitor_file_event_field_mapping(azure_backend): 82 | yaml_rule = """ 83 | title: Test File Event 84 | status: test 85 | logsource: 86 | category: file_event 87 | product: windows 88 | detection: 89 | sel: 90 | TargetFilename: C:\\suspicious\\file.exe 91 | Image: C:\\Windows\\System32\\cmd.exe 92 | condition: sel 93 | """ 94 | expected_result = [ 95 | 'SecurityEvent\n| where ObjectName =~ "C:\\\\suspicious\\\\file.exe" and NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"' 96 | ] 97 | 98 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 99 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 100 | 101 | 102 | def test_azure_monitor_hashes_transformation(azure_backend): 103 | yaml_rule = """ 104 | title: Test Hashes 105 | status: test 106 | logsource: 107 | category: process_creation 108 | product: windows 109 | detection: 110 | sel: 111 | Hashes: 112 | - md5=1234567890abcdef1234567890abcdef 113 | - sha1=1234567890abcdef1234567890abcdef12345678 114 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef 115 | condition: sel 116 | """ 117 | expected_result = [ 118 | 'SecurityEvent\n| where FileHash in~ ("1234567890abcdef1234567890abcdef", "1234567890abcdef1234567890abcdef12345678", "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef")' 119 | ] 120 | 121 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 122 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 123 | 124 | 125 | def test_azure_monitor_registry_key_replacement(azure_backend): 126 | yaml_rule = """ 127 | title: Test Registry Key Replacement 128 | status: test 129 | logsource: 130 | category: registry_event 131 | product: windows 132 | detection: 133 | sel: 134 | TargetObject: 135 | - HKLM\\Software\\Microsoft\\Windows\\CurrentVersion\\Run 136 | - HKU\\Software\\Microsoft\\Windows\\CurrentVersion\\Run 137 | - HKCR\\Software\\Microsoft\\Windows\\CurrentVersion\\Run 138 | condition: sel 139 | """ 140 | expected_result = [ 141 | 'SecurityEvent\n| where ObjectName in~ ("HKEY_LOCAL_MACHINE\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run", "HKEY_USERS\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run", "HKEY_LOCAL_MACHINE\\\\CLASSES\\\\Software\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run")' 142 | ] 143 | 144 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 145 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 146 | 147 | 148 | def test_azure_monitor_unsupported_category(azure_backend): 149 | yaml_rule = """ 150 | title: Test Unsupported Category 151 | status: test 152 | logsource: 153 | category: unsupported_category 154 | product: windows 155 | detection: 156 | sel: 157 | Field: value 158 | condition: sel 159 | """ 160 | with pytest.raises(SigmaTransformationError, match="Unable to determine table name from rule. "): 161 | azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) 162 | 163 | 164 | def test_azure_monitor_invalid_field(azure_backend): 165 | yaml_rule = """ 166 | title: Test Invalid Field 167 | status: test 168 | logsource: 169 | category: process_creation 170 | product: windows 171 | detection: 172 | sel: 173 | InvalidField: value 174 | condition: sel 175 | """ 176 | with pytest.raises( 177 | SigmaTransformationError, match="Invalid SigmaDetectionItem field name encountered.*SecurityEvent" 178 | ): 179 | azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) 180 | 181 | 182 | def test_azure_monitor_custom_query_table(): 183 | yaml_rule = """ 184 | title: Test Custom Query Table 185 | status: test 186 | logsource: 187 | category: process_creation 188 | product: windows 189 | detection: 190 | sel: 191 | CommandLine: whoami 192 | condition: sel 193 | """ 194 | expected_result = ['CustomTable\n| where CommandLine =~ "whoami"'] 195 | 196 | custom_backend = KustoBackend(processing_pipeline=azure_monitor_pipeline(query_table="CustomTable")) 197 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 198 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 199 | 200 | 201 | def test_azure_monitor_pipeline_custom_table_invalid_category(): 202 | yaml_rule = """ 203 | title: Test 204 | status: test 205 | logsource: 206 | product: windows 207 | category: blah 208 | detection: 209 | sel: 210 | Image: actuallyafileevent.exe 211 | condition: sel 212 | """ 213 | expected_result = ["SecurityEvent\n| " 'where NewProcessName =~ "actuallyafileevent.exe"'] 214 | 215 | custom_backend = KustoBackend(processing_pipeline=azure_monitor_pipeline(query_table="SecurityEvent")) 216 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 217 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 218 | 219 | 220 | def test_azure_monitor_eventid_mapping(azure_backend): 221 | """Test that EventID is used to determine table when category is missing""" 222 | yaml_rule = """ 223 | title: Test EventID Mapping 224 | status: test 225 | logsource: 226 | product: windows 227 | detection: 228 | sel: 229 | EventID: 1 230 | Image: C:\\Windows\\System32\\cmd.exe 231 | condition: sel 232 | """ 233 | # All EventIDs should map to SecurityEvent table 234 | expected_result = [ 235 | 'SecurityEvent\n| where EventID == 1 and NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"' 236 | ] 237 | 238 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 239 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 240 | 241 | 242 | def test_azure_monitor_category_precedence(azure_backend): 243 | """Test that category takes precedence over EventID when both are present""" 244 | yaml_rule = """ 245 | title: Test Category Precedence 246 | status: test 247 | logsource: 248 | category: file_event 249 | product: windows 250 | detection: 251 | sel: 252 | EventID: 1 # Process creation EventID, but should use file_event category 253 | Image: C:\\Windows\\System32\\cmd.exe 254 | condition: sel 255 | """ 256 | # Should use SecurityEvent table based on category mapping 257 | expected_result = [ 258 | 'SecurityEvent\n| where EventID == 1 and NewProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"' 259 | ] 260 | 261 | assert azure_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 262 | assert azure_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 263 | -------------------------------------------------------------------------------- /tests/test_pipelines_sentinelasim.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from sigma.backends.kusto import KustoBackend 4 | from sigma.collection import SigmaCollection 5 | from sigma.exceptions import SigmaTransformationError 6 | from sigma.pipelines.sentinelasim import sentinel_asim_pipeline 7 | from sigma.rule import SigmaRule 8 | 9 | 10 | @pytest.fixture 11 | def asim_backend(): 12 | return KustoBackend(processing_pipeline=sentinel_asim_pipeline()) 13 | 14 | 15 | def test_sentinel_asim_process_creation_field_mapping(asim_backend): 16 | yaml_rule = """ 17 | title: Test Process Creation 18 | status: test 19 | logsource: 20 | category: process_creation 21 | product: windows 22 | detection: 23 | sel: 24 | Image: C:\\Windows\\System32\\cmd.exe 25 | CommandLine: whoami 26 | User: SYSTEM 27 | ProcessId: 1234 28 | condition: sel 29 | """ 30 | expected_result = [ 31 | 'imProcessCreate\n| where TargetProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe" and TargetProcessCommandLine =~ "whoami" and TargetUsername =~ "SYSTEM" and TargetProcessId == 1234' 32 | ] 33 | 34 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 35 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 36 | 37 | 38 | def test_sentinel_asim_network_connection_field_mapping(asim_backend): 39 | yaml_rule = """ 40 | title: Test Network Connection 41 | status: test 42 | logsource: 43 | category: network_connection 44 | product: windows 45 | detection: 46 | sel: 47 | DestinationIp: 8.8.8.8 48 | DestinationPort: 53 49 | Protocol: udp 50 | condition: sel 51 | """ 52 | expected_result = [ 53 | 'imNetworkSession\n| where DstIpAddr =~ "8.8.8.8" and DstPortNumber == 53 and NetworkProtocol =~ "udp"' 54 | ] 55 | 56 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 57 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 58 | 59 | 60 | def test_sentinel_asim_registry_event_field_mapping(asim_backend): 61 | yaml_rule = """ 62 | title: Test Registry Event 63 | status: test 64 | logsource: 65 | category: registry_event 66 | product: windows 67 | detection: 68 | sel: 69 | TargetObject: HKLM\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\Run 70 | EventType: SetValue 71 | condition: sel 72 | """ 73 | expected_result = [ 74 | 'imRegistry\n| where RegistryKey =~ "HKEY_LOCAL_MACHINE\\\\SOFTWARE\\\\Microsoft\\\\Windows\\\\CurrentVersion\\\\Run" and EventType =~ "RegistryValueSet"' 75 | ] 76 | 77 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 78 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 79 | 80 | 81 | def test_sentinel_asim_custom_table(): 82 | yaml_rule = """ 83 | title: Test Custom Table 84 | status: test 85 | logsource: 86 | category: process_creation 87 | product: windows 88 | detection: 89 | sel: 90 | Image: malware.exe 91 | condition: sel 92 | """ 93 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "malware.exe"'] 94 | 95 | custom_backend = KustoBackend(processing_pipeline=sentinel_asim_pipeline(query_table="imFileEvent")) 96 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 97 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 98 | 99 | 100 | def test_sentinel_asim_unsupported_field(asim_backend): 101 | yaml_rule = """ 102 | title: Test Unsupported Field 103 | status: test 104 | logsource: 105 | category: process_creation 106 | product: windows 107 | detection: 108 | sel: 109 | UnsupportedField: value 110 | condition: sel 111 | """ 112 | with pytest.raises( 113 | SigmaTransformationError, match="Invalid SigmaDetectionItem field name encountered: UnsupportedField" 114 | ): 115 | asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) 116 | 117 | 118 | def test_sentinel_asim_file_event(asim_backend): 119 | yaml_rule = """ 120 | title: Test File Event 121 | status: test 122 | logsource: 123 | category: file_event 124 | product: windows 125 | detection: 126 | sel: 127 | Image: C:\\Windows\\explorer.exe 128 | condition: sel 129 | """ 130 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "C:\\\\Windows\\\\explorer.exe"'] 131 | 132 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 133 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 134 | 135 | 136 | def test_sentinel_asim_pipeline_custom_table_invalid_category(): 137 | yaml_rule = """ 138 | title: Test Custom Table 139 | status: test 140 | logsource: 141 | category: blah 142 | product: windows 143 | detection: 144 | sel: 145 | Image: malware.exe 146 | condition: sel 147 | """ 148 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "malware.exe"'] 149 | 150 | custom_backend = KustoBackend(processing_pipeline=sentinel_asim_pipeline(query_table="imFileEvent")) 151 | assert custom_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 152 | assert custom_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 153 | 154 | 155 | def test_sentinel_asim_processcreate_hashes_field_values(asim_backend): 156 | yaml_rule = """ 157 | title: Test ProcessCreate Hashes Field Values 158 | status: test 159 | logsource: 160 | category: process_creation 161 | product: windows 162 | detection: 163 | sel: 164 | Hashes: 165 | - md5=1234567890abcdef1234567890abcdef 166 | - sha1=1234567890abcdef1234567890abcdef12345678 167 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef 168 | - imphash=1234567890abcdef1234567890abcdef 169 | - sha512=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef 170 | condition: sel 171 | """ 172 | expected_result = [ 173 | 'imProcessCreate\n| where TargetProcessMD5 =~ "1234567890abcdef1234567890abcdef" or TargetProcessSHA1 =~ "1234567890abcdef1234567890abcdef12345678" or TargetProcessSHA256 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" or TargetProcessIMPHASH =~ "1234567890abcdef1234567890abcdef" or TargetProcessSHA512 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"' 174 | ] 175 | 176 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 177 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 178 | 179 | 180 | def test_sentinel_asim_fileevent_hashes_field_values(asim_backend): 181 | yaml_rule = """ 182 | title: Test FileEvent Hashes Field Values 183 | status: test 184 | logsource: 185 | category: file_event 186 | product: windows 187 | detection: 188 | sel: 189 | Hashes: 190 | - md5=1234567890abcdef1234567890abcdef 191 | - sha1=1234567890abcdef1234567890abcdef12345678 192 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef 193 | condition: sel 194 | """ 195 | expected_result = [ 196 | 'imFileEvent\n| where TargetFileMD5 =~ "1234567890abcdef1234567890abcdef" or TargetFileSHA1 =~ "1234567890abcdef1234567890abcdef12345678" or TargetFileSHA256 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"' 197 | ] 198 | 199 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 200 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 201 | 202 | 203 | def test_sentinel_asim_webrequest_hashes_field_values(asim_backend): 204 | yaml_rule = """ 205 | title: Test WebRequest Hashes Field Values 206 | status: test 207 | logsource: 208 | category: proxy 209 | product: windows 210 | detection: 211 | sel: 212 | Hashes: 213 | - md5=1234567890abcdef1234567890abcdef 214 | - sha1=1234567890abcdef1234567890abcdef12345678 215 | - sha256=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef 216 | condition: sel 217 | """ 218 | expected_result = [ 219 | 'imWebSession\n| where FileMD5 =~ "1234567890abcdef1234567890abcdef" or FileSHA1 =~ "1234567890abcdef1234567890abcdef12345678" or FileSHA256 =~ "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"' 220 | ] 221 | 222 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 223 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 224 | 225 | 226 | def test_sentinel_asim_pipeline_unsupported_rule_type(asim_backend): 227 | yaml_rule = """ 228 | title: test 229 | status: test 230 | logsource: 231 | category: invalid_category 232 | product: invalid_product 233 | detection: 234 | sel: 235 | field: whatever 236 | condition: sel 237 | """ 238 | with pytest.raises(SigmaTransformationError, match="Unable to determine table name from rule. "): 239 | asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) 240 | 241 | 242 | def test_sentinel_asim_eventid_mapping(asim_backend): 243 | """Test that EventID is used to determine table when category is missing""" 244 | yaml_rule = """ 245 | title: Test EventID Mapping 246 | status: test 247 | logsource: 248 | product: windows 249 | detection: 250 | sel: 251 | EventID: 1 252 | Image: C:\\Windows\\System32\\cmd.exe 253 | condition: sel 254 | """ 255 | # EventID 1 should map to process category -> imProcessCreate table 256 | expected_result = ['imProcessCreate\n| where TargetProcessName =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"'] 257 | 258 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 259 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 260 | 261 | 262 | def test_sentinel_asim_category_precedence(asim_backend): 263 | """Test that category takes precedence over EventID when both are present""" 264 | yaml_rule = """ 265 | title: Test Category Precedence 266 | status: test 267 | logsource: 268 | category: file_event 269 | product: windows 270 | detection: 271 | sel: 272 | EventID: 1 # Process creation EventID, but should use file_event category 273 | Image: C:\\Windows\\System32\\cmd.exe 274 | condition: sel 275 | """ 276 | # Should use imFileEvent table based on category, not imProcessCreate from EventID 277 | expected_result = ['imFileEvent\n| where TargetFilePath =~ "C:\\\\Windows\\\\System32\\\\cmd.exe"'] 278 | 279 | assert asim_backend.convert(SigmaCollection.from_yaml(yaml_rule)) == expected_result 280 | assert asim_backend.convert_rule(SigmaRule.from_yaml(yaml_rule)) == expected_result 281 | -------------------------------------------------------------------------------- /utils/get_azure_monitor_schema_tables.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | import re 4 | from datetime import datetime, timezone 5 | from typing import Dict, List 6 | 7 | import requests 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | # GitHub API configuration 13 | GITHUB_API_KEY = os.getenv("GITHUB_API_KEY") 14 | BASE_URL = "https://api.github.com/repos/MicrosoftDocs/azure-reference-other/contents/azure-monitor-ref/tables" 15 | HEADERS = {"Accept": "application/vnd.github.v3+json"} 16 | if GITHUB_API_KEY: 17 | HEADERS["Authorization"] = f"token {GITHUB_API_KEY}" 18 | 19 | OUTPUT_FILE = "sigma/pipelines/azuremonitor/tables.py" 20 | 21 | 22 | def fetch_content(file_name: str = None) -> str: 23 | """Fetch the file content from GitHub and decode it.""" 24 | url = BASE_URL 25 | if file_name: 26 | url = f"{BASE_URL}/{file_name}" 27 | response = requests.get(url, headers=HEADERS) 28 | if response.ok: 29 | try: 30 | json_content = response.json() 31 | if isinstance(json_content, dict) and "content" in json_content: 32 | return base64.b64decode(json_content["content"]).decode("utf-8") 33 | else: 34 | return response.json() 35 | except ValueError: 36 | return response.text 37 | print(f"Failed to retrieve content for {file_name}: {response.reason}") 38 | return None 39 | 40 | 41 | def extract_table_urls(json_content: dict) -> List[str]: 42 | """Extract table URLs from the json content.""" 43 | return [entry["name"] for entry in json_content] 44 | 45 | 46 | def extract_table_schema(content: str, table_name: str = None) -> dict: 47 | """Extract table schema from markdown content.""" 48 | match = re.search( 49 | r"\|\s*Column\s*\|\s*Type\s*\|\s*Description\s*\|\n\|[-\s|]*\n((?:\|.*\|$\n?)+)", content, re.MULTILINE 50 | ) 51 | if not match: 52 | match = re.search( 53 | r"\|Column\|Type\|Description\|[\r\n]+\|---\|---\|---\|[\n\r]+(.*?)(?=\n##|\Z)", content, re.DOTALL 54 | ) 55 | if not match: 56 | print(f"Field table not found in {table_name}") 57 | return {} 58 | 59 | schema_data = {} 60 | for row in match.group(1).strip().split("\n"): 61 | columns = [col.strip() for col in row.strip().strip("|").split("|")] 62 | if len(columns) >= 2: 63 | schema_data[columns[0]] = {"data_type": columns[1], "description": columns[2] if len(columns) > 2 else ""} 64 | if not schema_data: 65 | print(f"Table schema could not be parsed from {table_name}") 66 | return schema_data 67 | 68 | 69 | def process_table(file_path: str) -> dict: 70 | """Process a table file and extract the schema.""" 71 | print(f"Processing table: {file_path}") 72 | content = fetch_content(file_path) 73 | if not content: 74 | return {} 75 | # Try to get table name from header after --- 76 | table_name = re.search(r"^title:.*-\s*(.+)$", content, re.MULTILINE) 77 | if not table_name: 78 | # Try to get table name from top text between --- 79 | table_name = re.search(r"^ms\.custom\:\s+(.+)", content, re.MULTILINE) 80 | table_name = table_name.group(1).strip() if table_name else None 81 | if not table_name: 82 | print(f"Table name not found in {file_path}") 83 | return {} 84 | return {table_name: extract_table_schema(content, table_name)} 85 | 86 | 87 | def write_schema(output_file: str, schema_tables: Dict[str, dict]): 88 | """Write the schema tables to a Python file.""" 89 | with open(output_file, "w") as f: 90 | f.write("# This file is auto-generated. Do not edit manually.\n") 91 | f.write(f"# Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC\n\n") 92 | f.write("AZURE_MONITOR_TABLES = {\n") 93 | for table, fields in schema_tables.items(): 94 | f.write(f' "{table}": {{\n') 95 | for field, info in fields.items(): 96 | f.write( 97 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}}},\n' 98 | ) 99 | f.write(" },\n") 100 | f.write("}\n") 101 | 102 | 103 | def get_all_includes_tables() -> dict: 104 | tables_list = fetch_content("includes") 105 | if not tables_list: 106 | return {} 107 | table_urls = ["includes/" + url for url in extract_table_urls(tables_list) if url.endswith(".md")] 108 | return {table: schema for url in table_urls for table, schema in process_table(url).items() if schema} 109 | 110 | 111 | def get_all_tables() -> dict: 112 | """Retrieve all tables from the TOC and process them.""" 113 | tables_list = fetch_content() 114 | if not tables_list: 115 | return {} 116 | table_urls = [x for x in extract_table_urls(tables_list) if x.endswith(".md")] 117 | return {table: schema for url in table_urls for table, schema in process_table(url).items() if schema} 118 | 119 | 120 | if __name__ == "__main__": 121 | if not GITHUB_API_KEY: 122 | print("Warning: GITHUB_API_KEY not set. You may encounter rate limiting.") 123 | tables = get_all_tables() 124 | tables_includes = get_all_includes_tables() 125 | tables.update(tables_includes) 126 | write_schema(OUTPUT_FILE, tables) 127 | print(f"Schema written to {OUTPUT_FILE}") 128 | -------------------------------------------------------------------------------- /utils/get_microsoft_xdr_schema_tables.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | import re 4 | from datetime import datetime, timezone 5 | from typing import Dict, List 6 | 7 | import requests 8 | import yaml 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | # GitHub API configuration 14 | GITHUB_API_KEY = os.getenv("GITHUB_API_KEY") 15 | BASE_URL = "https://api.github.com/repos/MicrosoftDocs/defender-docs/contents/defender-xdr" 16 | HEADERS = {"Accept": "application/vnd.github.v3+json"} 17 | if GITHUB_API_KEY: 18 | HEADERS["Authorization"] = f"token {GITHUB_API_KEY}" 19 | 20 | OUTPUT_FILE = "sigma/pipelines/microsoftxdr/tables.py" 21 | 22 | 23 | def fetch_content(file_name: str) -> str: 24 | """Fetch the file content from GitHub and decode it.""" 25 | url = f"{BASE_URL}/{file_name}" 26 | response = requests.get(url, headers=HEADERS) 27 | if response.ok: 28 | return base64.b64decode(response.json()["content"]).decode("utf-8") 29 | print(f"Failed to retrieve content for {file_name}: {response.reason}") 30 | return None 31 | 32 | 33 | def extract_table_urls(toc_content: str) -> List[str]: 34 | """Extract table URLs from the TOC.yml file.""" 35 | toc_data = yaml.safe_load(toc_content) 36 | data_schema_section = toc_data[0]["items"] 37 | for section_name in [ 38 | "Investigate and respond to threats", 39 | "Search for threats with advanced hunting", 40 | "Data schema", 41 | ]: 42 | data_schema_section = next((item for item in data_schema_section if item.get("name") == section_name), None)[ 43 | "items" 44 | ] 45 | return [item["href"] for item in data_schema_section[2:] if "href" in item] 46 | 47 | 48 | def extract_table_schema(content: str) -> dict: 49 | """Extract table schema from markdown content.""" 50 | match = re.search(r"\|\s?Column name\s?\|\s?Data type\s?\|\s?Description\s?\|([\s\S]+?)\n\n", content) 51 | if not match: 52 | return {} 53 | 54 | schema_data = {} 55 | for row in match.group(1).strip().split("\n")[1:]: 56 | columns = [col.strip() for col in row.strip("|").split("|")] 57 | if len(columns) == 3: 58 | schema_data[columns[0]] = {"data_type": columns[1], "description": columns[2]} 59 | return schema_data 60 | 61 | 62 | def process_table(file_path: str) -> dict: 63 | """Process a table file and extract the schema.""" 64 | content = fetch_content(file_path) 65 | if not content: 66 | return {} 67 | 68 | table_name = re.search(r"^# (.+)", content, re.MULTILINE) 69 | table_name = table_name.group(1) if table_name else "Unknown" 70 | return {table_name: extract_table_schema(content)} 71 | 72 | 73 | def write_schema(output_file: str, schema_tables: Dict[str, dict]): 74 | """Write the schema tables to a Python file.""" 75 | with open(output_file, "w") as f: 76 | f.write("# This file is auto-generated. Do not edit manually.\n") 77 | f.write(f"# Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC\n\n") 78 | f.write("MICROSOFT_XDR_TABLES = {\n") 79 | for table, fields in schema_tables.items(): 80 | f.write(f' "{table}": {{\n') 81 | for field, info in fields.items(): 82 | f.write( 83 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}}},\n' 84 | ) 85 | f.write(" },\n") 86 | f.write("}\n") 87 | 88 | 89 | def get_all_tables() -> dict: 90 | """Retrieve all tables from the TOC and process them.""" 91 | toc_content = fetch_content("TOC.yml") 92 | if not toc_content: 93 | return {} 94 | table_urls = extract_table_urls(toc_content) 95 | return {table: schema for url in table_urls for table, schema in process_table(url).items()} 96 | 97 | 98 | if __name__ == "__main__": 99 | if not GITHUB_API_KEY: 100 | print("Warning: GITHUB_API_KEY not set. You may encounter rate limiting.") 101 | tables = get_all_tables() 102 | write_schema(OUTPUT_FILE, tables) 103 | print(f"Schema written to {OUTPUT_FILE}") 104 | -------------------------------------------------------------------------------- /utils/get_sentinel_asim_schema_tables.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import datetime, timezone 3 | from typing import Dict, List, Optional, Tuple 4 | 5 | import requests 6 | from bs4 import BeautifulSoup 7 | 8 | BASE_URL = "https://learn.microsoft.com/en-us/azure/sentinel" 9 | OUTPUT_FILE = "sigma/pipelines/sentinelasim/tables.py" 10 | 11 | # TODO: Fix common field schema not writing to file 12 | 13 | 14 | def get_request(url: str) -> requests.Response: 15 | """ 16 | Sends a GET request to the specified URL and returns the response. 17 | 18 | :param url: The URL to send the GET request to. 19 | :return: The response from the GET request. 20 | """ 21 | response = requests.get(url) 22 | response.raise_for_status() 23 | 24 | return response 25 | 26 | 27 | def extract_asim_schema_hrefs(items: List[dict]) -> List[str]: 28 | """Extracts hrefs for ASIM schemas from the JSON data.""" 29 | for item in items: 30 | if item.get("toc_title") == "Reference": 31 | return extract_asim_schemas(item.get("children", [])) 32 | return [] 33 | 34 | 35 | def extract_asim_schemas(items: List[dict]) -> List[str]: 36 | """Finds the ASIM schemas section and returns the relevant hrefs.""" 37 | for item in items: 38 | if item.get("toc_title").lower() == "advanced security information model (asim)": 39 | return find_schema_hrefs(item.get("children", [])) 40 | return [] 41 | 42 | 43 | def find_schema_hrefs(items: List[dict]) -> List[str]: 44 | """Extracts the schema hrefs, excluding legacy schemas.""" 45 | hrefs = [] 46 | for item in items: 47 | if item.get("toc_title").lower() == "asim schemas": 48 | for schema in item.get("children", []): 49 | if schema.get("toc_title") != "Legacy network normalization schema": 50 | hrefs.append(schema.get("href")) 51 | return hrefs 52 | 53 | 54 | def get_sentinel_asim_schema_tables() -> List[str]: 55 | """Fetches the ASIM schema table hrefs from Azure Sentinel documentation.""" 56 | url = f"{BASE_URL}/toc.json" 57 | response = requests.get(url) 58 | response.raise_for_status() # Ensures proper error handling 59 | data = response.json() 60 | return extract_asim_schema_hrefs(data.get("items", [])) 61 | 62 | 63 | def extract_table_name_and_fields(url: str) -> Dict[str, List[Dict[str, str]]]: 64 | """ 65 | Extracts the table name and field schema from a Sentinel ASIM schema page. 66 | 67 | :param url: Full URL of the schema page. 68 | :return: A dictionary with the table name and a list of field schemas. 69 | """ 70 | response = get_request(url) 71 | soup = BeautifulSoup(response.content, "html.parser") 72 | 73 | table_name = extract_table_name(soup) 74 | if table_name is None: 75 | print(f"No ASIM table found for {url}. Skipping...") 76 | return None 77 | 78 | field_data = extract_field_data(soup) 79 | 80 | return {table_name: field_data} 81 | 82 | 83 | def extract_table_name(soup: BeautifulSoup) -> Optional[str]: 84 | """ 85 | Extracts the table name from the BeautifulSoup object. 86 | 87 | :param soup: BeautifulSoup object of the schema page. 88 | :return: The extracted table name or None if not found. 89 | """ 90 | 91 | def extract_from_code(): 92 | code_element = soup.find("code", class_="lang-kql") 93 | if not code_element: 94 | return None 95 | table_name = code_element.text.strip().split()[0] 96 | return extract_table_name_from_string(table_name) 97 | 98 | def extract_from_text(): 99 | whole_text = soup.get_text() 100 | match = re.search(r"(?i)im(\w+)??", whole_text) 101 | return f"im{match.group(1)}" if match else None 102 | 103 | def extract_table_name_from_string(text): 104 | match = re.search(r"(?i)(im|_im_)(\w+)", text) 105 | return f"{match.group(1)}{match.group(2)}" if match else None 106 | 107 | return extract_from_code() or extract_from_text() 108 | 109 | 110 | def extract_field_data(soup: BeautifulSoup) -> List[Dict[str, str]]: 111 | """ 112 | Extracts field data from a Sentinel ASIM schema page. 113 | 114 | :param soup: BeautifulSoup object of the schema page. 115 | :return: A list of dictionaries with the field name and type. 116 | """ 117 | # schema_details_section = soup.find(id="schema-details") 118 | field_data = {} 119 | 120 | # Loop through all tables in the section and its subsections 121 | tables = soup.find_all("table") 122 | for table in tables: 123 | # Each table has columns: Field, Class, Type, Description 124 | headers = [th.text.strip() for th in table.find_all("th")] 125 | if "Field" in headers and "Class" in headers: 126 | # Parse each row of the table 127 | for row in table.find_all("tr")[1:]: # Skip header row 128 | cols = [td.text.strip() for td in row.find_all("td")] 129 | if len(cols) == 4: # Ensure we have all four columns 130 | field_data[cols[0]] = {"class": cols[1], "data_type": cols[2], "description": cols[3]} 131 | return field_data 132 | 133 | 134 | def get_common_field_data() -> List[Dict[str, str]]: 135 | """ 136 | Extracts common field data from a Sentinel ASIM schema page. 137 | 138 | :return: A list of dictionaries with the field name and type. 139 | """ 140 | full_url = f"{BASE_URL}/normalization-common-fields" 141 | response = get_request(full_url) 142 | soup = BeautifulSoup(response.content, "html.parser") 143 | common_field_info = extract_field_data(soup) 144 | 145 | return common_field_info 146 | 147 | 148 | def write_schema(output_file: str, schema_tables: Dict[str, dict], common_field_data: Dict[str, dict]): 149 | """Write the schema tables to a Python file.""" 150 | with open(output_file, "w") as f: 151 | f.write("# This file is auto-generated. Do not edit manually.\n") 152 | f.write(f"# Last updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC\n\n") 153 | f.write("SENTINEL_ASIM_TABLES = {\n") 154 | for table, fields in schema_tables.items(): 155 | f.write(f' "{table}": {{\n') 156 | for field, info in fields.items(): 157 | f.write( 158 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}, "class": "{info["class"].strip("`")}"}},\n' 159 | ) 160 | f.write(" },\n") 161 | f.write("}\n") 162 | f.write("SENTINEL_ASIM_COMMON_FIELDS = {\n") 163 | f.write(f' "COMMON": {{\n') 164 | for field, info in common_field_data.items(): 165 | f.write( 166 | f' "{field.strip("`")}": {{"data_type": "{info["data_type"].strip("`")}", "description": {repr(info["description"])}, "class": "{info["class"].strip("`")}"}},\n' 167 | ) 168 | f.write(" },\n") 169 | f.write("}\n") 170 | 171 | 172 | def process_asim_schemas() -> Tuple[Dict[str, dict], Dict[str, dict]]: 173 | """Processes all ASIM schemas and extracts table names and field schemas.""" 174 | tables = get_sentinel_asim_schema_tables() 175 | schema_data = {} 176 | common_field_data = get_common_field_data() 177 | 178 | for href in tables: 179 | full_url = f"{BASE_URL}/{href}" 180 | print(f"Processing {full_url}...") 181 | if schema_info := extract_table_name_and_fields(full_url): 182 | schema_data.update(schema_info) 183 | 184 | return schema_data, common_field_data 185 | 186 | 187 | if __name__ == "__main__": 188 | schema_data, common_field_data = process_asim_schemas() 189 | write_schema(OUTPUT_FILE, schema_data, common_field_data) 190 | print(f"Schema written to {OUTPUT_FILE}") 191 | --------------------------------------------------------------------------------